From: Kenichi Handa Date: Fri, 6 Nov 2009 06:31:48 +0000 (+0000) Subject: (unidata-gen-table): Fix for the case that the block data and the X-Git-Tag: emacs-pretest-23.1.90~553 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=149353a4eb5231e4f2c5af61d1bea2e9e909c679;p=emacs.git (unidata-gen-table): Fix for the case that the block data and the following per-char data fall into the same char-table leaf. --- diff --git a/admin/ChangeLog b/admin/ChangeLog index d6e75e04d98..7d56b2aeb5e 100644 --- a/admin/ChangeLog +++ b/admin/ChangeLog @@ -1,3 +1,9 @@ +2009-11-06 Kenichi Handa + + * unidata/unidata-gen.el (unidata-gen-table): Fix for the case + that the block data and the following per-char data fall into the + same char-table leaf. + 2009-10-01 Juanma Barranquero * unidata/UnicodeData.txt: Update to Unicode 5.2.0. diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 9fee8e46c80..2b1f918cc64 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el @@ -93,6 +93,10 @@ (or (file-readable-p unidata-text-file) (error "File not readable: %s" unidata-text-file)) (with-temp-buffer + ;; Insert a file of this format: + ;; (CHAR NAME CATEGORY ...) + ;; where CHAR is a charater code, the following elements are strings + ;; representing character properties. (insert-file-contents unidata-text-file) (goto-char (point-min)) (condition-case nil @@ -103,7 +107,7 @@ ;; Check this kind of block. ;; 4E00;;Lo;0;L;;;;;N;;;;; - ;; 9FA5;;Lo;0;L;;;;;N;;;;; + ;; 9FCB;;Lo;0;L;;;;;N;;;;; (if (and (= (aref name 0) ?<) (string-match ", First>$" name)) (let ((first char) @@ -224,7 +228,7 @@ Property value is a character." ;; a char-table described here to store such values. ;; ;; If succeeding 128 characters has no property, a char-table has the -;; symbol t is for them. Otherwise a char-table has a string of the +;; symbol t for them. Otherwise a char-table has a string of the ;; following format for them. ;; ;; The first character of the string is FIRST-INDEX. @@ -480,7 +484,8 @@ Property value is a character." (prop-idx (unidata-prop-index prop)) (val-list (list t)) (vec (make-vector 128 0)) - tail elt range val val-code idx slot) + tail elt range val val-code idx slot + prev-range-data) (set-char-table-range table (cons 0 (max-char)) default-value) (setq tail unidata-list) (while tail @@ -489,12 +494,34 @@ Property value is a character." val (funcall val-func (nth prop-idx elt))) (setq val-code (if val (unidata-encode-val val-list val))) (if (consp range) - (if val-code - (set-char-table-range table range val)) + (when val-code + (set-char-table-range table range val) + (let ((from (car range)) (to (cdr range))) + ;; If RANGE doesn't end at the char-table boundary (each + ;; 128 characters), we may have to carry over the data + ;; for the last several characters (at most 127 chars) + ;; to the next loop. In that case, set PREV-RANGE-DATA + ;; to ((FROM . TO) . VAL-CODE) where (FROM . TO) + ;; specifies the range of characters handled in the next + ;; loop. + (when (< (logand to #x7F) #x7F) + (if (< from (logand to #x1FFF80)) + (setq from (logand to #x1FFF80))) + (setq prev-range-data (cons (cons from to) val-code))))) (let* ((start (lsh (lsh range -7) 7)) (limit (+ start 127)) str count new-val) (fillarray vec 0) + ;; See the comment above. + (when (and prev-range-data + (>= (cdr (car prev-range-data)) start)) + (let ((from (car (car prev-range-data))) + (to (cdr (car prev-range-data))) + (vcode (cdr prev-range-data))) + (while (<= from to) + (aset vec (- from start) vcode) + (setq from (1+ from))))) + (setq prev-range-data nil) (if val-code (aset vec (- range start) val-code)) (while (and (setq elt (car tail) range (car elt))