+2011-07-06 Kenichi Handa <handa@m17n.org>
+
+ * unidata/unidata-gen.el (unidata-dir): New variable.
+ (unidata-setup-list): Expand unidata-text-file in unidata-dir.
+ (unidata-prop-alist): INDEX element may be a function. New
+ optional element VAL-LIST (for general-category and bidi-class).
+ New entry `mirroring'.
+ (unidata-prop-default, unidata-prop-val-list): New subst.
+ (unidata-get-character, unidata-put-character): Delete them.
+ (unidata-gen-table-character): New arg IGNORE. Adjusted for the
+ above changes.
+ (unidata-get-symbol, unidata-get-integer, unidata-get-numeric)
+ (unidata-put-symbol, unidata-put-integer, unidata-put-numeric):
+ Delete them.
+ (unidata-encode-val): Assume that the first element of VAL-LIST is
+ a cons (nil . 0).
+ (unidata-gen-table): Change argument DEFAULT-VALUE to VAL-LIST.
+ Always store the encoded value.
+ (unidata-gen-table-symbol): New args DEFAULT-VALUE and VAL-LIST.
+ Set the 1st and the 2nd extra slots to index numbers for C
+ functions.
+ (unidata-gen-table-integer): Likewise.
+ (unidata-gen-table-numeric): Likewise.
+ (unidata-gen-table-name): New arg IGNORE.
+ (unidata-gen-table-decomposition): Likewise.
+ (unidata-describe-general-category): Add the case nil to the
+ description alist.
+ (unidata-gen-mirroring-list): New funciton.
+ (unidata-gen-files): New arg DATA-DIR. Adjusted for the change of
+ unidata-prop-alist. Handle the case of storing multiple
+ char-tables in a file.
+
+ * unidata/Makefile.in (${DSTDIR}/charprop.el): New arg to
+ unidata-gen-files.
+
2011-05-21 Glenn Morris <rgm@gnu.org>
* bzrmerge.el (bzrmerge-resolve): Suppress prompts about file-locals.
${DSTDIR}/charprop.el: unidata-gen.elc unidata.txt
ELC=`/bin/pwd`/unidata-gen.elc; \
- DATA=`/bin/pwd`/unidata.txt; \
+ DATADIR=`/bin/pwd`; \
+ DATA=unidata.txt; \
cd ${DSTDIR}; \
- ${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATA}
+ ${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATADIR} $${DATA}
../../src/biditype.h: UnicodeData.txt
gawk -F";" -f biditype.awk $< > $@
;;
;; charprop.el
;; It contains a series of forms of this format:
-;; (char-code-property-register PROP FILE)
+;; (define-char-code-property PROP FILE)
;; where PROP is a symbol representing a character property
-;; (name, generic-category, etc), and FILE is a name of one of
+;; (name, general-category, etc), and FILE is a name of one of
;; the following files.
;;
;; uni-name.el, uni-category.el, uni-combining.el, uni-bidi.el,
;; uni-decomposition.el, uni-decimal.el, uni-digit.el, uni-numeric.el,
;; uni-mirrored.el, uni-old-name.el, uni-comment.el, uni-uppercase.el,
;; uni-lowercase.el, uni-titlecase.el
-;; They each contain a single form of this format:
-;; (char-code-property-register PROP CHAR-TABLE)
+;; They contain one or more forms of this format:
+;; (define-char-code-property PROP CHAR-TABLE)
;; where PROP is the same as above, and CHAR-TABLE is a
;; char-table containing property values in a compressed format.
;;
;; When they are installed in .../lisp/international/, the file
;; "charprop.el" is preloaded in loadup.el. The other files are
-;; automatically loaded when the functions `get-char-code-property'
-;; and `put-char-code-property' are called.
+;; automatically loaded when the Lisp functions
+;; `get-char-code-property' and `put-char-code-property', and C
+;; function uniprop_table are called.
;;
;; FORMAT OF A CHAR TABLE
;;
;; data in a char-table as below.
;;
;; If succeeding 128*N characters have the same property value, we
-;; store that value for them. Otherwise, compress values for
-;; succeeding 128 characters into a single string and store it as a
-;; value for those characters. The way of compression depends on a
-;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE",
-;; and "WORD-LIST TABLE".
-
-;; The char table has four extra slots:
+;; store that value (or the encoded one) for them. Otherwise,
+;; compress values (or the encoded ones) for succeeding 128
+;; characters into a single string and store it for those
+;; characters. The way of compression depends on a property. See
+;; the section "SIMPLE TABLE", "RUN-LENGTH TABLE", and "WORD-LIST
+;; TABLE".
+
+;; The char table has five extra slots:
;; 1st: property symbol
-;; 2nd: function to call to get a property value
-;; 3nd: function to call to put a property value
-;; 4th: function to call to get a description of a property value
+;; 2nd: function to call to get a property value,
+;; or an index number of C function to decode the value,
+;; or nil if the value can be directly got from the table.
+;; 3nd: function to call to put a property value,
+;; or an index number of C function to encode the value,
+;; or nil if the value can be directly stored in the table.
+;; 4th: function to call to get a description of a property value, or nil
;; 5th: data referred by the above functions
;; List of elements of this form:
(defvar unidata-list nil)
+;; Name of the directory containing files of Unicode Character
+;; Database.
+
+(defvar unidata-dir nil)
+
(defun unidata-setup-list (unidata-text-file)
(let* ((table (list nil))
(tail table)
("^<.*Surrogate" . nil)
("^<.*Private Use" . PRIVATE\ USE)))
val char name)
+ (setq unidata-text-file (expand-file-name unidata-text-file unidata-dir))
(or (file-readable-p unidata-text-file)
(error "File not readable: %s" unidata-text-file))
(with-temp-buffer
(setq unidata-list (cdr table))))
;; Alist of this form:
-;; (PROP INDEX GENERATOR FILENAME)
+;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER VAL-LIST)
;; PROP: character property
-;; INDEX: index to each element of unidata-list for PROP
+;; INDEX: index to each element of unidata-list for PROP.
+;; It may be a function that generates an alist of character codes
+;; vs. the corresponding property values.
;; GENERATOR: function to generate a char-table
;; FILENAME: filename to store the char-table
+;; DOCSTRING: docstring for the property
;; DESCRIBER: function to call to get a description string of property value
+;; DEFAULT: the default value of the property
+;; VAL-LIST: list of specially ordered property values
(defconst unidata-prop-alist
'((name
Property value is one of the following symbols:
Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po,
Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn"
- unidata-describe-general-category)
+ unidata-describe-general-category
+ nil
+ ;; The order of elements must be in sync with unicode_category_t
+ ;; in src/character.h.
+ (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po
+ Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co Cn))
(canonical-combining-class
3 unidata-gen-table-integer "uni-combining.el"
"Unicode canonical combining class.
Property value is one of the following symbols:
L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET,
AN, CS, NSM, BN, B, S, WS, ON"
- unidata-describe-bidi-class)
+ unidata-describe-bidi-class
+ L
+ ;; The order of elements must be in sync with bidi_type_t in
+ ;; src/dispextern.h.
+ (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON))
(decomposition
5 unidata-gen-table-decomposition "uni-decomposition.el"
"Unicode decomposition mapping.
(mirrored
9 unidata-gen-table-symbol "uni-mirrored.el"
"Unicode bidi mirrored flag.
-Property value is a symbol `Y' or `N'.")
+Property value is a symbol `Y' or `N'. See also the property `mirroring'.")
(old-name
10 unidata-gen-table-name "uni-old-name.el"
"Unicode old names as published in Unicode 1.0.
14 unidata-gen-table-character "uni-titlecase.el"
"Unicode simple titlecase mapping.
Property value is a character."
- string)))
+ string)
+ (mirroring
+ unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el"
+ "Unicode bidi-mirroring characters.
+Property value is a character that has the corresponding mirroring image,
+or nil for non-mirrored character.")))
;; Functions to access the above data.
(defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist)))
(defsubst unidata-prop-file (prop) (nth 3 (assq prop unidata-prop-alist)))
(defsubst unidata-prop-docstring (prop) (nth 4 (assq prop unidata-prop-alist)))
(defsubst unidata-prop-describer (prop) (nth 5 (assq prop unidata-prop-alist)))
+(defsubst unidata-prop-default (prop) (nth 6 (assq prop unidata-prop-alist)))
+(defsubst unidata-prop-val-list (prop) (nth 7 (assq prop unidata-prop-alist)))
\f
;; SIMPLE TABLE
;; values of succeeding character codes are usually different, we use
;; a char-table described here to store such values.
;;
-;; If succeeding 128 characters has no property, a char-table has the
-;; symbol t for them. Otherwise a char-table has a string of the
-;; following format for them.
+;; A char-table divides character code space (#x0..#x3FFFFF) into
+;; #x8000 blocks (each block contains 128 characters).
+
+;; If all characters of a block have no property, a char-table has the
+;; symbol nil for that block. Otherwise a char-table has a string of
+;; the following format for it.
;;
-;; The first character of the string is FIRST-INDEX.
-;; The Nth (N > 0) character of the string is a property value of the
-;; character (BLOCK-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is
-;; the first of the characters in the block.
+;; The first character of the string is ?\001.
+;; The second character of the string is FIRST-INDEX.
+;; The Nth (N > 1) character of the string is a property value of the
+;; character (BLOCK-HEAD + FIRST-INDEX + N - 2), where BLOCK-HEAD is
+;; the first character of the block.
;;
-;; The 4th extra slot of a char-table is nil.
-
-(defun unidata-get-character (char val table)
- (cond
- ((characterp val)
- val)
+;; This kind of char-table has these extra slots:
+;; 1st: the property symbol
+;; 2nd: nil
+;; 3rd: 0 (corresponding to uniprop_encode_character in chartab.c)
+;; 4th to 5th: nil
- ((stringp val)
- (let* ((len (length val))
- (block-head (lsh (lsh char -7) 7))
- (vec (make-vector 128 nil))
- (first-index (aref val 0)))
- (dotimes (i (1- len))
- (let ((elt (aref val (1+ i))))
- (if (> elt 0)
- (aset vec (+ first-index i) elt))))
- (dotimes (i 128)
- (aset table (+ block-head i) (aref vec i)))
- (aref vec (- char block-head))))))
-
-(defun unidata-put-character (char val table)
- (or (characterp val)
- (not val)
- (error "Not a character nor nil: %S" val))
- (let ((current-val (aref table char)))
- (unless (eq current-val val)
- (if (stringp current-val)
- (funcall (char-table-extra-slot table 1) char current-val table))
- (aset table char val))))
-
-(defun unidata-gen-table-character (prop)
+(defun unidata-gen-table-character (prop &rest ignore)
(let ((table (make-char-table 'char-code-property-table))
(prop-idx (unidata-prop-index prop))
(vec (make-vector 128 0))
(tail unidata-list)
elt range val idx slot)
- (set-char-table-range table (cons 0 (max-char)) t)
+ (if (functionp prop-idx)
+ (setq tail (funcall prop-idx)
+ prop-idx 1))
(while tail
(setq elt (car tail) tail (cdr tail))
(setq range (car elt)
(setq first-index last-index)))
(setq tail (cdr tail)))
(when first-index
- (let ((str (string first-index))
+ (let ((str (string 1 first-index))
c)
(while (<= first-index last-index)
(setq str (format "%s%c" str (or (aref vec first-index) 0))
(set-char-table-range table (cons start limit) str))))))
(set-char-table-extra-slot table 0 prop)
- (byte-compile 'unidata-get-character)
- (byte-compile 'unidata-put-character)
- (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-character))
- (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-character))
-
+ (set-char-table-extra-slot table 2 0)
table))
\f
;; RUN-LENGTH TABLE
;;
-;; If the type of character property value is symbol, integer,
-;; boolean, or character, we use a char-table described here to store
-;; the values.
+;; If many characters of successive character codes have the same
+;; property value, we use a char-table described here to store the
+;; values.
;;
-;; The 4th extra slot is a vector of property values (VAL-TABLE), and
-;; values for succeeding 128 characters are encoded into this
-;; character sequence:
+;; At first, instead of a value itself, we store an index number to
+;; the VAL-TABLE (5th extra slot) in the table. We call that index
+;; number as VAL-CODE here after.
+;;
+;; A char-table divides character code space (#x0..#x3FFFFF) into
+;; #x8000 blocks (each block contains 128 characters).
+;;
+;; If all characters of a block have the same value, a char-table has
+;; VAL-CODE for that block. Otherwise a char-table has a string of
+;; the following format for that block.
+;;
+;; The first character of the string is ?\002.
+;; The following characters has this form:
;; ( VAL-CODE RUN-LENGTH ? ) +
;; where:
-;; VAL-CODE (0..127):
-;; (VAL-CODE - 1) is an index into VAL-TABLE.
-;; The value 0 means no-value.
+;; VAL-CODE (0..127): index into VAL-TABLE.
;; RUN-LENGTH (130..255):
;; (RUN-LENGTH - 128) specifies how many characters have the same
;; value. If omitted, it means 1.
-
-
-;; Return a symbol-type character property value of CHAR. VAL is the
-;; current value of (aref TABLE CHAR).
-
-(defun unidata-get-symbol (char val table)
- (let ((val-table (char-table-extra-slot table 4)))
- (cond ((symbolp val)
- val)
- ((stringp val)
- (let ((first-char (lsh (lsh char -7) 7))
- (str val)
- (len (length val))
- (idx 0)
- this-val count)
- (set-char-table-range table (cons first-char (+ first-char 127))
- nil)
- (while (< idx len)
- (setq val (aref str idx) idx (1+ idx)
- count (if (< idx len) (aref str idx) 1))
- (setq val (and (> val 0) (aref val-table (1- val)))
- count (if (< count 128)
- 1
- (prog1 (- count 128) (setq idx (1+ idx)))))
- (dotimes (i count)
- (if val
- (aset table first-char val))
- (if (= first-char char)
- (setq this-val val))
- (setq first-char (1+ first-char))))
- this-val))
- ((> val 0)
- (aref val-table (1- val))))))
-
-;; Return a integer-type character property value of CHAR. VAL is the
-;; current value of (aref TABLE CHAR).
-
-(defun unidata-get-integer (char val table)
- (let ((val-table (char-table-extra-slot table 4)))
- (cond ((integerp val)
- val)
- ((stringp val)
- (let ((first-char (lsh (lsh char -7) 7))
- (str val)
- (len (length val))
- (idx 0)
- this-val count)
- (while (< idx len)
- (setq val (aref str idx) idx (1+ idx)
- count (if (< idx len) (aref str idx) 1))
- (setq val (and (> val 0) (aref val-table (1- val)))
- count (if (< count 128)
- 1
- (prog1 (- count 128) (setq idx (1+ idx)))))
- (dotimes (i count)
- (aset table first-char val)
- (if (= first-char char)
- (setq this-val val))
- (setq first-char (1+ first-char))))
- this-val)))))
-
-;; Return a numeric-type (integer or float) character property value
-;; of CHAR. VAL is the current value of (aref TABLE CHAR).
-
-(defun unidata-get-numeric (char val table)
- (cond
- ((numberp val)
- val)
- ((stringp val)
- (let ((val-table (char-table-extra-slot table 4))
- (first-char (lsh (lsh char -7) 7))
- (str val)
- (len (length val))
- (idx 0)
- this-val count)
- (while (< idx len)
- (setq val (aref str idx) idx (1+ idx)
- count (if (< idx len) (aref str idx) 1))
- (setq val (and (> val 0) (aref val-table (1- val)))
- count (if (< count 128)
- 1
- (prog1 (- count 128) (setq idx (1+ idx)))))
- (dotimes (i count)
- (aset table first-char val)
- (if (= first-char char)
- (setq this-val val))
- (setq first-char (1+ first-char))))
- this-val))))
-
-;; Store VAL (symbol) as a character property value of CHAR in TABLE.
-
-(defun unidata-put-symbol (char val table)
- (or (symbolp val)
- (error "Not a symbol: %S" val))
- (let ((current-val (aref table char)))
- (unless (eq current-val val)
- (if (stringp current-val)
- (funcall (char-table-extra-slot table 1) char current-val table))
- (aset table char val))))
-
-;; Store VAL (integer) as a character property value of CHAR in TABLE.
-
-(defun unidata-put-integer (char val table)
- (or (integerp val)
- (not val)
- (error "Not an integer nor nil: %S" val))
- (let ((current-val (aref table char)))
- (unless (eq current-val val)
- (if (stringp current-val)
- (funcall (char-table-extra-slot table 1) char current-val table))
- (aset table char val))))
-
-;; Store VAL (integer or float) as a character property value of CHAR
-;; in TABLE.
-
-(defun unidata-put-numeric (char val table)
- (or (numberp val)
- (not val)
- (error "Not a number nor nil: %S" val))
- (let ((current-val (aref table char)))
- (unless (equal current-val val)
- (if (stringp current-val)
- (funcall (char-table-extra-slot table 1) char current-val table))
- (aset table char val))))
+;;
+;; This kind of char-table has these extra slots:
+;; 1st: the property symbol
+;; 2nd: 0 (corresponding to uniprop_decode_value in chartab.c)
+;; 3rd: 1..3 (corresponding to uniprop_encode_xxx in chartab.c)
+;; 4th: function or nil
+;; 5th: VAL-TABLE
;; Encode the character property value VAL into an integer value by
;; VAL-LIST. By side effect, VAL-LIST is modified.
;; VAL-LIST has this form:
-;; (t (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...)
-;; If VAL is one of VALn, just return VAL-CODEn. Otherwise,
-;; VAL-LIST is modified to this:
-;; (t (VAL . (1+ VAL-CODE1)) (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...)
+;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ...)
+;; If VAL is one of VALn, just return n.
+;; Otherwise, VAL-LIST is modified to this:
+;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1))
(defun unidata-encode-val (val-list val)
(let ((slot (assoc val val-list))
val-code)
(if slot
(cdr slot)
- (setq val-code (if (cdr val-list) (1+ (cdr (nth 1 val-list))) 1))
- (setcdr val-list (cons (cons val val-code) (cdr val-list)))
+ (setq val-code (length val-list))
+ (nconc val-list (list (cons val val-code)))
val-code)))
;; Generate a char-table for the character property PROP.
-(defun unidata-gen-table (prop val-func default-value)
+(defun unidata-gen-table (prop val-func default-value val-list)
(let ((table (make-char-table 'char-code-property-table))
(prop-idx (unidata-prop-index prop))
- (val-list (list t))
(vec (make-vector 128 0))
tail elt range val val-code idx slot
prev-range-data)
- (set-char-table-range table (cons 0 (max-char)) default-value)
+ (setq val-list (cons nil (copy-sequence val-list)))
+ (setq tail val-list val-code 0)
+ ;; Convert (nil A B ...) to ((nil . 0) (A . 1) (B . 2) ...)
+ (while tail
+ (setcar tail (cons (car tail) val-code))
+ (setq tail (cdr tail) val-code (1+ val-code)))
+ (setq default-value (unidata-encode-val val-list default-value))
+ (set-char-table-range table t default-value)
+ (set-char-table-range table nil default-value)
(setq tail unidata-list)
(while tail
(setq elt (car tail) tail (cdr tail))
(setq val-code (if val (unidata-encode-val val-list val)))
(if (consp range)
(when val-code
- (set-char-table-range table range val)
+ (set-char-table-range table range val-code)
(let ((from (car range)) (to (cdr range)))
;; If RANGE doesn't end at the char-table boundary (each
;; 128 characters), we may have to carry over the data
(if val-code
(aset vec (- range start) val-code))
(setq tail (cdr tail)))
- (setq str "" val-code -1 count 0)
+ (setq str "\002" val-code -1 count 0)
(mapc #'(lambda (x)
(if (= val-code x)
(setq count (1+ count))
vec)
(if (= count 128)
(if val
- (set-char-table-range table (cons start limit) val))
+ (set-char-table-range table (cons start limit) val-code))
(if (= val-code 0)
(set-char-table-range table (cons start limit) str)
(if (> count 2)
(setq str (concat str (string val-code)))))
(set-char-table-range table (cons start limit) str))))))
- (setq val-list (nreverse (cdr val-list)))
(set-char-table-extra-slot table 0 prop)
(set-char-table-extra-slot table 4 (vconcat (mapcar 'car val-list)))
table))
-(defun unidata-gen-table-symbol (prop)
+(defun unidata-gen-table-symbol (prop default-value val-list)
(let ((table (unidata-gen-table prop
#'(lambda (x) (and (> (length x) 0)
(intern x)))
- 0)))
- (byte-compile 'unidata-get-symbol)
- (byte-compile 'unidata-put-symbol)
- (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-symbol))
- (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-symbol))
+ default-value val-list)))
+ (set-char-table-extra-slot table 1 0)
+ (set-char-table-extra-slot table 2 1)
table))
-(defun unidata-gen-table-integer (prop)
+(defun unidata-gen-table-integer (prop default-value val-list)
(let ((table (unidata-gen-table prop
#'(lambda (x) (and (> (length x) 0)
(string-to-number x)))
- t)))
- (byte-compile 'unidata-get-integer)
- (byte-compile 'unidata-put-integer)
- (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-integer))
- (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-integer))
+ default-value val-list)))
+ (set-char-table-extra-slot table 1 0)
+ (set-char-table-extra-slot table 2 1)
table))
-(defun unidata-gen-table-numeric (prop)
+(defun unidata-gen-table-numeric (prop default-value val-list)
(let ((table (unidata-gen-table prop
#'(lambda (x)
(if (string-match "/" x)
(substring x (match-end 0))))
(if (> (length x) 0)
(string-to-number x))))
- t)))
- (byte-compile 'unidata-get-numeric)
- (byte-compile 'unidata-put-numeric)
- (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-numeric))
- (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-numeric))
+ default-value val-list)))
+ (set-char-table-extra-slot table 1 0)
+ (set-char-table-extra-slot table 2 2)
table))
\f
word-table
block-list block-word-table block-end
tail elt range val idx slot)
- (set-char-table-range table (cons 0 (max-char)) 0)
(setq tail unidata-list)
(setq block-end -1)
(while tail
idx (1+ i)))))
(nreverse (cons (intern (substring str idx)) l))))))
-(defun unidata-gen-table-name (prop)
+(defun unidata-gen-table-name (prop &rest ignore)
(let* ((table (unidata-gen-table-word-list prop 'unidata-split-name))
(word-tables (char-table-extra-slot table 4)))
(byte-compile 'unidata-get-name)
(nreverse l)))))
-(defun unidata-gen-table-decomposition (prop)
+(defun unidata-gen-table-decomposition (prop &rest ignore)
(let* ((table (unidata-gen-table-word-list prop 'unidata-split-decomposition))
(word-tables (char-table-extra-slot table 4)))
(byte-compile 'unidata-get-decomposition)
\f
(defun unidata-describe-general-category (val)
(cdr (assq val
- '((Lu . "Letter, Uppercase")
+ '((nil . "Uknown")
+ (Lu . "Letter, Uppercase")
(Ll . "Letter, Lowercase")
(Lt . "Letter, Titlecase")
(Lm . "Letter, Modifier")
(string ?'))))
val " "))
+(defun unidata-gen-mirroring-list ()
+ (let ((head (list nil))
+ tail)
+ (with-temp-buffer
+ (insert-file-contents (expand-file-name "BidiMirroring.txt" unidata-dir))
+ (goto-char (point-min))
+ (setq tail head)
+ (while (re-search-forward "^\\([0-9A-F]+\\);\\s +\\([0-9A-F]+\\)" nil t)
+ (let ((char (string-to-number (match-string 1) 16))
+ (mirror (match-string 2)))
+ (setq tail (setcdr tail (list (list char mirror)))))))
+ (cdr head)))
+
;; Verify if we can retrieve correct values from the generated
;; char-tables.
;; The entry function. It generates files described in the header
;; comment of this file.
-(defun unidata-gen-files (&optional unidata-text-file)
- (or unidata-text-file
- (setq unidata-text-file (car command-line-args-left)
+(defun unidata-gen-files (&optional data-dir unidata-text-file)
+ (or data-dir
+ (setq data-dir (car command-line-args-left)
+ command-line-args-left (cdr command-line-args-left)
+ unidata-text-file (car command-line-args-left)
command-line-args-left (cdr command-line-args-left)))
- (unidata-setup-list unidata-text-file)
(let ((coding-system-for-write 'utf-8-unix)
- (charprop-file "charprop.el"))
+ (charprop-file "charprop.el")
+ (unidata-dir data-dir))
+ (dolist (elt unidata-prop-alist)
+ (let* ((prop (car elt))
+ (file (unidata-prop-file prop)))
+ (if (file-exists-p file)
+ (delete-file file))))
+ (unidata-setup-list unidata-text-file)
(with-temp-file charprop-file
(insert ";; Automatically generated by unidata-gen.el.\n")
(dolist (elt unidata-prop-alist)
(file (unidata-prop-file prop))
(docstring (unidata-prop-docstring prop))
(describer (unidata-prop-describer prop))
+ (default-value (unidata-prop-default prop))
+ (val-list (unidata-prop-val-list prop))
table)
;; Filename in this comment line is extracted by sed in
;; Makefile.
(insert (format ";; FILE: %s\n" file))
(insert (format "(define-char-code-property '%S %S\n %S)\n"
prop file docstring))
- (with-temp-file file
+ (with-temp-buffer
(message "Generating %s..." file)
- (setq table (funcall generator prop))
+ (when (file-exists-p file)
+ (insert-file-contents file)
+ (goto-char (point-max))
+ (search-backward ";; Local Variables:"))
+ (setq table (funcall generator prop default-value val-list))
(when describer
(unless (subrp (symbol-function describer))
(byte-compile describer)
(setq describer (symbol-function describer)))
(set-char-table-extra-slot table 3 describer))
- (insert ";; Copyright (C) 1991-2009 Unicode, Inc.
-;; This file was generated from the Unicode data file at
-;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt.
-;; See lisp/international/README for the copyright and permission notice.\n"
- (format "(define-char-code-property '%S %S %S)\n"
- prop table docstring)
- ";; Local Variables:\n"
- ";; coding: utf-8\n"
- ";; no-byte-compile: t\n"
- ";; End:\n\n"
- (format ";; %s ends here\n" file)))))
+ (if (bobp)
+ (insert ";; Copyright (C) 1991-2009 Unicode, Inc.
+;; This file was generated from the Unicode data files at
+;; http://www.unicode.org/Public/UNIDATA/.
+;; See lisp/international/README for the copyright and permission notice.\n"))
+ (insert (format "(define-char-code-property '%S %S %S)\n"
+ prop table docstring))
+ (if (eobp)
+ (insert ";; Local Variables:\n"
+ ";; coding: utf-8\n"
+ ";; no-byte-compile: t\n"
+ ";; End:\n\n"
+ (format ";; %s ends here\n" file)))
+ (write-file file)
+ (message "Generating %s...done" file))))
(message "Writing %s..." charprop-file)
(insert ";; Local Variables:\n"
";; coding: utf-8\n"
+2011-07-06 Kenichi Handa <handa@m17n.org>
+
+ * international/characters.el (build-unicode-category-table):
+ Delete it.
+ (unicode-category-table): Set it by
+ unicode-prroperty-table-internal.
+
+ * international/mule-cmds.el (char-code-property-alist): Moved to
+ to src/chartab.c.
+ (get-char-code-property): Call unicode-property-table-internal to
+ load a file. Call get-unicode-property-internal where necessary.
+ (put-char-code-property): Call unicode-property-table-internal to
+ load a file. Call put-unicode-property-internal where necessary.
+ put-unicode-property-internal where necessary.
+ (char-code-property-description): Call
+ unicode-property-table-internal to load a file.
+
+ * international/charprop.el:
+ * international/uni-bidi.el:
+ * international/uni-category.el:
+ * international/uni-combining.el:
+ * international/uni-comment.el:
+ * international/uni-decimal.el:
+ * international/uni-decomposition.el:
+ * international/uni-digit.el:
+ * international/uni-lowercase.el:
+ * international/uni-mirrored.el:
+ * international/uni-name.el:
+ * international/uni-numeric.el:
+ * international/uni-old-name.el:
+ * international/uni-titlecase.el:
+ * international/uni-uppercase.el: Regenerate.
+
+ * loadup.el: Load international/charprop.el before
+ international/characters.
+
2011-06-22 Richard Stallman <rms@gnu.org>
* mail/sendmail.el (mail-bury): If Rmail is in use, return nicely
\f
;;; Setting unicode-category-table.
-;; This macro is to build unicode-category-table at compile time so
-;; that C code can access the table efficiently.
-(defmacro build-unicode-category-table ()
- (let ((table (make-char-table 'unicode-category-table nil)))
- (dotimes (i #x110000)
- (if (or (< i #xD800)
- (and (>= i #xF900) (< i #x30000))
- (and (>= i #xE0000) (< i #xE0200)))
- (aset table i (get-char-code-property i 'general-category))))
- (set-char-table-range table '(#xE000 . #xF8FF) 'Co)
- (set-char-table-range table '(#xF0000 . #xFFFFD) 'Co)
- (set-char-table-range table '(#x100000 . #x10FFFD) 'Co)
- (optimize-char-table table 'eq)
- table))
-
-(setq unicode-category-table (build-unicode-category-table))
+(setq unicode-category-table
+ (unicode-property-table-internal 'general-category))
(map-char-table #'(lambda (key val)
(if (and val
(or (and (/= (aref (symbol-name val) 0) ?M)
-;; Copyright (C) 1991-2010 Unicode, Inc.
-;; This file was generated from the Unicode data file at
-;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt.
-;; See lisp/international/README for the copyright and permission notice.
-
+;; Automatically generated by unidata-gen.el.
;; FILE: uni-name.el
(define-char-code-property 'name "uni-name.el"
"Unicode character name.
;; FILE: uni-mirrored.el
(define-char-code-property 'mirrored "uni-mirrored.el"
"Unicode bidi mirrored flag.
-Property value is a symbol `Y' or `N'.")
+Property value is a symbol `Y' or `N'. See also the property `mirroring'.")
;; FILE: uni-old-name.el
(define-char-code-property 'old-name "uni-old-name.el"
"Unicode old names as published in Unicode 1.0.
(define-char-code-property 'titlecase "uni-titlecase.el"
"Unicode simple titlecase mapping.
Property value is a character.")
+;; FILE: uni-mirrored.el
+(define-char-code-property 'mirroring "uni-mirrored.el"
+ "Unicode bidi-mirroring characters.
+Property value is a character that has the corresponding mirroring image,
+or nil for non-mirrored character.")
;; Local Variables:
;; coding: utf-8
;; no-byte-compile: t
\f
;;; Character property
-;; Each element has the form (PROP . TABLE).
-;; PROP is a symbol representing a character property.
-;; TABLE is a char-table containing the property value for each character.
-;; TABLE may be a name of file to load to build a char-table.
-;; Don't modify this variable directly but use `define-char-code-property'.
-
-(defvar char-code-property-alist nil
- "Alist of character property name vs char-table containing property values.
-Internal use only.")
-
(put 'char-code-property-table 'char-table-extra-slots 5)
(defun define-char-code-property (name table &optional docstring)
(defun get-char-code-property (char propname)
"Return the value of CHAR's PROPNAME property."
- (let ((slot (assq propname char-code-property-alist)))
- (if slot
- (let (table value func)
- (if (stringp (cdr slot))
- (load (cdr slot) nil t))
- (setq table (cdr slot)
- value (aref table char)
- func (char-table-extra-slot table 1))
+ (let ((table (unicode-property-table-internal propname)))
+ (if table
+ (let ((func (char-table-extra-slot table 1)))
(if (functionp func)
- (setq value (funcall func char value table)))
- value)
+ (funcall func char (aref table char) table)
+ (get-unicode-property-internal table char)))
(plist-get (aref char-code-property-table char) propname))))
(defun put-char-code-property (char propname value)
"Store CHAR's PROPNAME property with VALUE.
It can be retrieved with `(get-char-code-property CHAR PROPNAME)'."
- (let ((slot (assq propname char-code-property-alist)))
- (if slot
- (let (table func)
- (if (stringp (cdr slot))
- (load (cdr slot) nil t))
- (setq table (cdr slot)
- func (char-table-extra-slot table 2))
+ (let ((table (unicode-property-table-internal propname)))
+ (if table
+ (let ((func (char-table-extra-slot table 2)))
(if (functionp func)
(funcall func char value table)
- (aset table char value)))
+ (put-unicode-property-internal table char value)))
(let* ((plist (aref char-code-property-table char))
(x (plist-put plist propname value)))
(or (eq x plist)
(defun char-code-property-description (prop value)
"Return a description string of character property PROP's value VALUE.
If there's no description string for VALUE, return nil."
- (let ((slot (assq prop char-code-property-alist)))
- (if slot
- (let (table func)
- (if (stringp (cdr slot))
- (load (cdr slot) nil t))
- (setq table (cdr slot)
- func (char-table-extra-slot table 3))
+ (let ((table (unicode-property-table-internal prop)))
+ (if table
+ (let ((func (char-table-extra-slot table 3)))
(if (functionp func)
(funcall func value))))))
;; multilingual text.
(load "international/mule-cmds")
(load "case-table")
-(load "international/characters")
-(load "composite")
;; This file doesn't exist when building a development version of Emacs
;; from the repository. It is generated just after temacs is built.
(load "international/charprop.el" t)
+(load "international/characters")
+(load "composite")
;; Load language-specific files.
(load "language/chinese")
+2011-07-06 Kenichi Handa <handa@m17n.org>
+
+ * character.h (unicode_category_t): New enum type.
+
+ * chartab.c (uniprop_decoder_t, uniprop_encoder_t): New types.
+ (Qchar_code_property_table): New variable.
+ (UNIPROP_TABLE_P, UNIPROP_GET_DECODER)
+ (UNIPROP_COMPRESSED_FORM_P): New macros.
+ (char_table_ascii): Uncompress the compressed values.
+ (sub_char_table_ref): New arg is_uniprop. Callers changed.
+ Uncompress the compressed values.
+ (sub_char_table_ref_and_range): Likewise.
+ (char_table_ref_and_range): Uncompress the compressed values.
+ (sub_char_table_set): New arg is_uniprop. Callers changed.
+ Uncompress the compressed values.
+ (sub_char_table_set_range): Args changed. Callers changed.
+ (char_table_set_range): Adjuted for the above change.
+ (map_sub_char_table): Delete args default_val and parent. Add arg
+ top. Give decoded values to a Lisp function.
+ (map_char_table): Adjusted for the above change. Give decoded
+ values to a Lisp function. Gcpro more variables.
+ (uniprop_table_uncompress)
+ (uniprop_decode_value_run_length): New functions.
+ (uniprop_decoder, uniprop_decoder_count): New variables.
+ (uniprop_get_decoder, uniprop_encode_value_character)
+ (uniprop_encode_value_run_length, uniprop_encode_value_numeric):
+ New functions.
+ (uniprop_encoder, uniprop_encoder_count): New variables.
+ (uniprop_get_encoder, uniprop_table)
+ (Funicode_property_table_internal, Fget_unicode_property_internal)
+ (Fput_unicode_property_internal): New functions.
+ (syms_of_chartab): DEFSYM Qchar_code_property_table, defsubr
+ Sunicode_property_table_internal, Sget_unicode_property_internal,
+ and Sput_unicode_property_internal. Defvar_lisp
+ char-code-property-alist.
+
+ * composite.c (CHAR_COMPOSABLE_P): Adjusted for the change of
+ Vunicode_category_table.
+
+ * font.c (font_range): Adjusted for the change of
+ Vunicode_category_table.
+
2011-06-22 Paul Eggert <eggert@cs.ucla.edu>
Fixes for GLYPH_DEBUG found by GCC 4.6.0 static checking.
: (c) <= 0xDFFF ? 2 \
: 0)
+/* Data type for Unicode general category.
+
+ The order of members must be in sync with the 8th element of the
+ member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
+ Unicode character property `general-category'. */
+
+typedef enum {
+ UNICODE_CATEGORY_UNKNOWN = 0,
+ UNICODE_CATEGORY_Lu,
+ UNICODE_CATEGORY_Ll,
+ UNICODE_CATEGORY_Lt,
+ UNICODE_CATEGORY_Lm,
+ UNICODE_CATEGORY_Lo,
+ UNICODE_CATEGORY_Mn,
+ UNICODE_CATEGORY_Mc,
+ UNICODE_CATEGORY_Me,
+ UNICODE_CATEGORY_Nd,
+ UNICODE_CATEGORY_Nl,
+ UNICODE_CATEGORY_No,
+ UNICODE_CATEGORY_Pc,
+ UNICODE_CATEGORY_Pd,
+ UNICODE_CATEGORY_Ps,
+ UNICODE_CATEGORY_Pe,
+ UNICODE_CATEGORY_Pi,
+ UNICODE_CATEGORY_Pf,
+ UNICODE_CATEGORY_Po,
+ UNICODE_CATEGORY_Sm,
+ UNICODE_CATEGORY_Sc,
+ UNICODE_CATEGORY_Sk,
+ UNICODE_CATEGORY_So,
+ UNICODE_CATEGORY_Zs,
+ UNICODE_CATEGORY_Zl,
+ UNICODE_CATEGORY_Zp,
+ UNICODE_CATEGORY_Cc,
+ UNICODE_CATEGORY_Cf,
+ UNICODE_CATEGORY_Cs,
+ UNICODE_CATEGORY_Co,
+ UNICODE_CATEGORY_Cn
+} unicode_category_t;
extern int char_resolve_modifier_mask (int);
extern int char_string (unsigned, unsigned char *);
#define CHARTAB_IDX(c, depth, min_char) \
(((c) - (min_char)) >> chartab_bits[(depth)])
+\f
+/* Preamble for uniprop (Unicode character property) tables. See the
+ comment of "Unicode character property tables". */
+
+/* Purpose of uniprop tables. */
+static Lisp_Object Qchar_code_property_table;
+
+/* Types of decoder and encoder functions for uniprop values. */
+typedef Lisp_Object (*uniprop_decoder_t) (Lisp_Object, Lisp_Object);
+typedef Lisp_Object (*uniprop_encoder_t) (Lisp_Object, Lisp_Object);
+
+static Lisp_Object uniprop_table_uncompress (Lisp_Object, int);
+static uniprop_decoder_t uniprop_get_decoder (Lisp_Object);
+
+/* 1 iff TABLE is a uniprop table. */
+#define UNIPROP_TABLE_P(TABLE) \
+ (EQ (XCHAR_TABLE (TABLE)->purpose, Qchar_code_property_table) \
+ && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (TABLE)) == 5)
+
+/* Return a decoder for values in the uniprop table TABLE. */
+#define UNIPROP_GET_DECODER(TABLE) \
+ (UNIPROP_TABLE_P (TABLE) ? uniprop_get_decoder (TABLE) : NULL)
+/* Nonzero iff OBJ is a string representing uniprop values of 128
+ succeeding characters (the bottom level of a char-table) by a
+ compressed format. We are sure that no property value has a string
+ starting with '\001' nor '\002'. */
+#define UNIPROP_COMPRESSED_FORM_P(OBJ) \
+ (STRINGP (OBJ) && SCHARS (OBJ) > 0 \
+ && ((SREF (OBJ, 0) == 1 || (SREF (OBJ, 0) == 2))))
+
+\f
DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0,
doc: /* Return a newly created char-table, with purpose PURPOSE.
Each element is initialized to INIT, which defaults to nil.
static Lisp_Object
char_table_ascii (Lisp_Object table)
{
- Lisp_Object sub;
+ Lisp_Object sub, val;
sub = XCHAR_TABLE (table)->contents[0];
if (! SUB_CHAR_TABLE_P (sub))
sub = XSUB_CHAR_TABLE (sub)->contents[0];
if (! SUB_CHAR_TABLE_P (sub))
return sub;
- return XSUB_CHAR_TABLE (sub)->contents[0];
+ val = XSUB_CHAR_TABLE (sub)->contents[0];
+ if (UNIPROP_TABLE_P (table) && UNIPROP_COMPRESSED_FORM_P (val))
+ val = uniprop_table_uncompress (sub, 0);
+ return val;
}
static Lisp_Object
}
static Lisp_Object
-sub_char_table_ref (Lisp_Object table, int c)
+sub_char_table_ref (Lisp_Object table, int c, int is_uniprop)
{
struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
int depth = XINT (tbl->depth);
int min_char = XINT (tbl->min_char);
Lisp_Object val;
+ int idx = CHARTAB_IDX (c, depth, min_char);
- val = tbl->contents[CHARTAB_IDX (c, depth, min_char)];
+ val = tbl->contents[idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
+ val = uniprop_table_uncompress (table, idx);
if (SUB_CHAR_TABLE_P (val))
- val = sub_char_table_ref (val, c);
+ val = sub_char_table_ref (val, c, is_uniprop);
return val;
}
{
val = tbl->contents[CHARTAB_IDX (c, 0, 0)];
if (SUB_CHAR_TABLE_P (val))
- val = sub_char_table_ref (val, c);
+ val = sub_char_table_ref (val, c, UNIPROP_TABLE_P (table));
}
if (NILP (val))
{
}
static Lisp_Object
-sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp_Object defalt)
+sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to,
+ Lisp_Object defalt, int is_uniprop)
{
struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
int depth = XINT (tbl->depth);
Lisp_Object val;
val = tbl->contents[chartab_idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
+ val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (val))
- val = sub_char_table_ref_and_range (val, c, from, to, defalt);
+ val = sub_char_table_ref_and_range (val, c, from, to, defalt, is_uniprop);
else if (NILP (val))
val = defalt;
c = min_char + idx * chartab_chars[depth] - 1;
idx--;
this_val = tbl->contents[idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
+ this_val = uniprop_table_uncompress (table, idx);
if (SUB_CHAR_TABLE_P (this_val))
- this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt);
+ this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
+ is_uniprop);
else if (NILP (this_val))
this_val = defalt;
chartab_idx++;
this_val = tbl->contents[chartab_idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
+ this_val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (this_val))
- this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt);
+ this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt,
+ is_uniprop);
else if (NILP (this_val))
this_val = defalt;
if (! EQ (this_val, val))
struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
int chartab_idx = CHARTAB_IDX (c, 0, 0), idx;
Lisp_Object val;
+ int is_uniprop = UNIPROP_TABLE_P (table);
val = tbl->contents[chartab_idx];
if (*from < 0)
*from = 0;
if (*to < 0)
*to = MAX_CHAR;
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val))
+ val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (val))
- val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt);
+ val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt,
+ is_uniprop);
else if (NILP (val))
val = tbl->defalt;
-
idx = chartab_idx;
while (*from < idx * chartab_chars[0])
{
c = idx * chartab_chars[0] - 1;
idx--;
this_val = tbl->contents[idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
+ this_val = uniprop_table_uncompress (table, idx);
if (SUB_CHAR_TABLE_P (this_val))
this_val = sub_char_table_ref_and_range (this_val, c, from, to,
- tbl->defalt);
+ tbl->defalt, is_uniprop);
else if (NILP (this_val))
this_val = tbl->defalt;
chartab_idx++;
c = chartab_idx * chartab_chars[0];
this_val = tbl->contents[chartab_idx];
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val))
+ this_val = uniprop_table_uncompress (table, chartab_idx);
if (SUB_CHAR_TABLE_P (this_val))
this_val = sub_char_table_ref_and_range (this_val, c, from, to,
- tbl->defalt);
+ tbl->defalt, is_uniprop);
else if (NILP (this_val))
this_val = tbl->defalt;
if (! EQ (this_val, val))
static void
-sub_char_table_set (Lisp_Object table, int c, Lisp_Object val)
+sub_char_table_set (Lisp_Object table, int c, Lisp_Object val, int is_uniprop)
{
struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
int depth = XINT ((tbl)->depth);
sub = tbl->contents[i];
if (! SUB_CHAR_TABLE_P (sub))
{
- sub = make_sub_char_table (depth + 1,
- min_char + i * chartab_chars[depth], sub);
- tbl->contents[i] = sub;
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
+ sub = uniprop_table_uncompress (table, i);
+ else
+ {
+ sub = make_sub_char_table (depth + 1,
+ min_char + i * chartab_chars[depth],
+ sub);
+ tbl->contents[i] = sub;
+ }
}
- sub_char_table_set (sub, c, val);
+ sub_char_table_set (sub, c, val, is_uniprop);
}
}
sub = make_sub_char_table (1, i * chartab_chars[0], sub);
tbl->contents[i] = sub;
}
- sub_char_table_set (sub, c, val);
+ sub_char_table_set (sub, c, val, UNIPROP_TABLE_P (table));
if (ASCII_CHAR_P (c))
tbl->ascii = char_table_ascii (table);
}
}
static void
-sub_char_table_set_range (Lisp_Object *table, int depth, int min_char, int from, int to, Lisp_Object val)
+sub_char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val,
+ int is_uniprop)
{
- int max_char = min_char + chartab_chars[depth] - 1;
-
- if (depth == 3 || (from <= min_char && to >= max_char))
- *table = val;
- else
+ struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table);
+ int depth = XINT ((tbl)->depth);
+ int min_char = XINT ((tbl)->min_char);
+ int chars_in_block = chartab_chars[depth];
+ int i, c, lim = chartab_size[depth];
+
+ if (from < min_char)
+ from = min_char;
+ i = CHARTAB_IDX (from, depth, min_char);
+ c = min_char + chars_in_block * i;
+ for (; i <= lim; i++, c += chars_in_block)
{
- int i;
- unsigned j;
-
- depth++;
- if (! SUB_CHAR_TABLE_P (*table))
- *table = make_sub_char_table (depth, min_char, *table);
- if (from < min_char)
- from = min_char;
- if (to > max_char)
- to = max_char;
- i = CHARTAB_IDX (from, depth, min_char);
- j = CHARTAB_IDX (to, depth, min_char);
- min_char += chartab_chars[depth] * i;
- for (j++; i < j; i++, min_char += chartab_chars[depth])
- sub_char_table_set_range (XSUB_CHAR_TABLE (*table)->contents + i,
- depth, min_char, from, to, val);
+ if (c > to)
+ break;
+ if (from <= c && c + chars_in_block - 1 <= to)
+ tbl->contents[i] = val;
+ else
+ {
+ Lisp_Object sub = tbl->contents[i];
+ if (! SUB_CHAR_TABLE_P (sub))
+ {
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub))
+ sub = uniprop_table_uncompress (table, i);
+ else
+ {
+ sub = make_sub_char_table (depth + 1, c, sub);
+ tbl->contents[i] = sub;
+ }
+ }
+ sub_char_table_set_range (sub, from, to, val, is_uniprop);
+ }
}
}
{
struct Lisp_Char_Table *tbl = XCHAR_TABLE (table);
Lisp_Object *contents = tbl->contents;
- int i;
if (from == to)
char_table_set (table, from, val);
else
{
- unsigned lim = to / chartab_chars[0] + 1;
- for (i = CHARTAB_IDX (from, 0, 0); i < lim; i++)
- sub_char_table_set_range (contents + i, 0, i * chartab_chars[0],
- from, to, val);
+ int is_uniprop = UNIPROP_TABLE_P (table);
+ int lim = CHARTAB_IDX (to, 0, 0);
+ int i, c;
+
+ for (i = CHARTAB_IDX (from, 0, 0), c = 0; i <= lim;
+ i++, c += chartab_chars[0])
+ {
+ if (c > to)
+ break;
+ if (from <= c && c + chartab_chars[0] - 1 <= to)
+ tbl->contents[i] = val;
+ else
+ {
+ Lisp_Object sub = tbl->contents[i];
+ if (! SUB_CHAR_TABLE_P (sub))
+ {
+ sub = make_sub_char_table (1, i * chartab_chars[0], sub);
+ tbl->contents[i] = sub;
+ }
+ sub_char_table_set_range (sub, from, to, val, is_uniprop);
+ }
+ }
if (ASCII_CHAR_P (from))
tbl->ascii = char_table_ascii (table);
}
(Lisp_Object char_table, Lisp_Object n, Lisp_Object value)
{
CHECK_CHAR_TABLE (char_table);
+ if (EQ (XCHAR_TABLE (char_table)->purpose, Qchar_code_property_table))
+ error ("Can't change extra-slot of char-code-property-table");
CHECK_NUMBER (n);
if (XINT (n) < 0
|| XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table)))
CHECK_CHARACTER_CAR (range);
CHECK_CHARACTER_CDR (range);
- val = char_table_ref_and_range (char_table, XFASTINT (XCAR (range)),
- &from, &to);
+ from = XFASTINT (XCAR (range));
+ to = XFASTINT (XCDR (range));
+ val = char_table_ref_and_range (char_table, from, &from, &to);
/* Not yet implemented. */
}
else
/* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table),
calling it for each character or group of characters that share a
value. RANGE is a cons (FROM . TO) specifying the range of target
- characters, VAL is a value of FROM in TABLE, DEFAULT_VAL is the
- default value of the char-table, PARENT is the parent of the
+ characters, VAL is a value of FROM in TABLE, TOP is the top
char-table.
ARG is passed to C_FUNCTION when that is called.
static Lisp_Object
map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val,
- Lisp_Object range, Lisp_Object default_val, Lisp_Object parent)
+ Lisp_Object range, Lisp_Object top)
{
/* Pointer to the elements of TABLE. */
Lisp_Object *contents;
int chars_in_block;
int from = XINT (XCAR (range)), to = XINT (XCDR (range));
int i, c;
+ int is_uniprop = UNIPROP_TABLE_P (top);
+ uniprop_decoder_t decoder = UNIPROP_GET_DECODER (top);
if (SUB_CHAR_TABLE_P (table))
{
for (c = min_char + chars_in_block * i; c <= max_char;
i++, c += chars_in_block)
{
- Lisp_Object this = contents[i];
+ Lisp_Object this = (SUB_CHAR_TABLE_P (table)
+ ? XSUB_CHAR_TABLE (table)->contents[i]
+ : XCHAR_TABLE (table)->contents[i]);
int nextc = c + chars_in_block;
+ if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this))
+ this = uniprop_table_uncompress (table, i);
if (SUB_CHAR_TABLE_P (this))
{
if (to >= nextc)
XSETCDR (range, make_number (nextc - 1));
val = map_sub_char_table (c_function, function, this, arg,
- val, range, default_val, parent);
+ val, range, top);
}
else
{
if (NILP (this))
- this = default_val;
+ this = XCHAR_TABLE (top)->defalt;
if (!EQ (val, this))
{
int different_value = 1;
if (NILP (val))
{
- if (! NILP (parent))
+ if (! NILP (XCHAR_TABLE (top)->parent))
{
+ Lisp_Object parent = XCHAR_TABLE (top)->parent;
Lisp_Object temp = XCHAR_TABLE (parent)->parent;
/* This is to get a value of FROM in PARENT
XSETCDR (range, make_number (c - 1));
val = map_sub_char_table (c_function, function,
parent, arg, val, range,
- XCHAR_TABLE (parent)->defalt,
- XCHAR_TABLE (parent)->parent);
+ parent);
if (EQ (val, this))
different_value = 0;
}
if (c_function)
(*c_function) (arg, XCAR (range), val);
else
- call2 (function, XCAR (range), val);
+ {
+ if (decoder)
+ val = decoder (top, val);
+ call2 (function, XCAR (range), val);
+ }
}
else
{
if (c_function)
(*c_function) (arg, range, val);
else
- call2 (function, range, val);
+ {
+ if (decoder)
+ val = decoder (top, val);
+ call2 (function, range, val);
+ }
}
}
val = this;
ARG is passed to C_FUNCTION when that is called. */
void
-map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object table, Lisp_Object arg)
+map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object),
+ Lisp_Object function, Lisp_Object table, Lisp_Object arg)
{
- Lisp_Object range, val;
- struct gcpro gcpro1, gcpro2, gcpro3;
+ Lisp_Object range, val, parent;
+ struct gcpro gcpro1, gcpro2, gcpro3, gcpro4;
+ uniprop_decoder_t decoder = UNIPROP_GET_DECODER (table);
range = Fcons (make_number (0), make_number (MAX_CHAR));
- GCPRO3 (table, arg, range);
+ parent = XCHAR_TABLE (table)->parent;
+
+ GCPRO4 (table, arg, range, parent);
val = XCHAR_TABLE (table)->ascii;
if (SUB_CHAR_TABLE_P (val))
val = XSUB_CHAR_TABLE (val)->contents[0];
val = map_sub_char_table (c_function, function, table, arg, val, range,
- XCHAR_TABLE (table)->defalt,
- XCHAR_TABLE (table)->parent);
+ table);
+
/* If VAL is nil and TABLE has a parent, we must consult the parent
recursively. */
while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent))
{
- Lisp_Object parent = XCHAR_TABLE (table)->parent;
- Lisp_Object temp = XCHAR_TABLE (parent)->parent;
+ Lisp_Object temp;
int from = XINT (XCAR (range));
+ parent = XCHAR_TABLE (table)->parent;
+ temp = XCHAR_TABLE (parent)->parent;
/* This is to get a value of FROM in PARENT without checking the
parent of PARENT. */
XCHAR_TABLE (parent)->parent = Qnil;
val = CHAR_TABLE_REF (parent, from);
XCHAR_TABLE (parent)->parent = temp;
val = map_sub_char_table (c_function, function, parent, arg, val, range,
- XCHAR_TABLE (parent)->defalt,
- XCHAR_TABLE (parent)->parent);
+ parent);
table = parent;
}
if (c_function)
(*c_function) (arg, XCAR (range), val);
else
- call2 (function, XCAR (range), val);
+ {
+ if (decoder)
+ val = decoder (table, val);
+ call2 (function, XCAR (range), val);
+ }
}
else
{
if (c_function)
(*c_function) (arg, range, val);
else
- call2 (function, range, val);
+ {
+ if (decoder)
+ val = decoder (table, val);
+ call2 (function, range, val);
+ }
}
}
UNGCPRO;
}
+\f
+/* Unicode character property tables.
+
+ This section provides a convenient and efficient way to get a
+ Unicode character property from C code (from Lisp, you must use
+ get-char-code-property).
+
+ The typical usage is to get a char-table for a specific property at
+ a proper initialization time as this:
+
+ Lisp_Object bidi_class_table = uniprop_table (intern ("bidi-class"));
+
+ and get a property value for character CH as this:
+
+ Lisp_Object bidi_class = CHAR_TABLE_REF (CH, bidi_class_table);
+
+ In this case, what you actually get is an index number to the
+ vector of property values (symbols nil, L, R, etc).
+
+ A table for Unicode character property has these characteristics:
+
+ o The purpose is `char-code-property-table', which implies that the
+ table has 5 extra slots.
+
+ o The second extra slot is a Lisp function, an index (integer) to
+ the array uniprop_decoder[], or nil. If it is a Lisp function, we
+ can't use such a table from C (at the moment). If it is nil, it
+ means that we don't have to decode values.
+
+ o The third extra slot is a Lisp function, an index (integer) to
+ the array uniprop_enncoder[], or nil. If it is a Lisp function, we
+ can't use such a table from C (at the moment). If it is nil, it
+ means that we don't have to encode values. */
+
+
+/* Uncompress the IDXth element of sub-char-table TABLE. */
+
+static Lisp_Object
+uniprop_table_uncompress (Lisp_Object table, int idx)
+{
+ Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[idx];
+ int min_char = (XINT (XSUB_CHAR_TABLE (table)->min_char)
+ + chartab_chars[2] * idx);
+ Lisp_Object sub = make_sub_char_table (3, min_char, Qnil);
+ struct Lisp_Sub_Char_Table *subtbl = XSUB_CHAR_TABLE (sub);
+ const unsigned char *p, *pend;
+ int i;
+
+ XSUB_CHAR_TABLE (table)->contents[idx] = sub;
+ p = SDATA (val), pend = p + SBYTES (val);
+ if (*p == 1)
+ {
+ /* SIMPLE TABLE */
+ p++;
+ idx = STRING_CHAR_ADVANCE (p);
+ while (p < pend && idx < chartab_chars[2])
+ {
+ int v = STRING_CHAR_ADVANCE (p);
+ subtbl->contents[idx++] = v > 0 ? make_number (v) : Qnil;
+ }
+ }
+ else if (*p == 2)
+ {
+ /* RUN-LENGTH TABLE */
+ p++;
+ for (idx = 0; p < pend; )
+ {
+ int v = STRING_CHAR_ADVANCE (p);
+ int count = 1;
+ int len;
+
+ if (p < pend)
+ {
+ count = STRING_CHAR_AND_LENGTH (p, len);
+ if (count < 128)
+ count = 1;
+ else
+ {
+ count -= 128;
+ p += len;
+ }
+ }
+ while (count-- > 0)
+ subtbl->contents[idx++] = make_number (v);
+ }
+ }
+/* It seems that we don't need this function because C code won't need
+ to get a property that is compressed in this form. */
+#if 0
+ else if (*p == 0)
+ {
+ /* WORD-LIST TABLE */
+ }
+#endif
+ return sub;
+}
+
+
+/* Decode VALUE as an elemnet of char-table TABLE. */
+
+static Lisp_Object
+uniprop_decode_value_run_length (Lisp_Object table, Lisp_Object value)
+{
+ if (VECTORP (XCHAR_TABLE (table)->extras[4]))
+ {
+ Lisp_Object valvec = XCHAR_TABLE (table)->extras[4];
+
+ if (XINT (value) >= 0 && XINT (value) < ASIZE (valvec))
+ value = AREF (valvec, XINT (value));
+ }
+ return value;
+}
+
+static uniprop_decoder_t uniprop_decoder [] =
+ { uniprop_decode_value_run_length };
+
+static int uniprop_decoder_count
+ = (sizeof uniprop_decoder) / sizeof (uniprop_decoder[0]);
+
+
+/* Return the decoder of char-table TABLE or nil if none. */
+
+static uniprop_decoder_t
+uniprop_get_decoder (Lisp_Object table)
+{
+ int i;
+
+ if (! INTEGERP (XCHAR_TABLE (table)->extras[1]))
+ return NULL;
+ i = XINT (XCHAR_TABLE (table)->extras[1]);
+ if (i < 0 || i >= uniprop_decoder_count)
+ return NULL;
+ return uniprop_decoder[i];
+}
+
+
+/* Encode VALUE as an element of char-table TABLE which contains
+ characters as elements. */
+
+static Lisp_Object
+uniprop_encode_value_character (Lisp_Object table, Lisp_Object value)
+{
+ if (! NILP (value) && ! CHARACTERP (value))
+ wrong_type_argument (Qintegerp, value);
+ return value;
+}
+
+
+/* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
+ compression. */
+
+static Lisp_Object
+uniprop_encode_value_run_length (Lisp_Object table, Lisp_Object value)
+{
+ Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
+ int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
+
+ for (i = 0; i < size; i++)
+ if (EQ (value, value_table[i]))
+ break;
+ if (i == size)
+ wrong_type_argument (build_string ("Unicode property value"), value);
+ return make_number (i);
+}
+
+
+/* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH
+ compression and contains numbers as elements . */
+
+static Lisp_Object
+uniprop_encode_value_numeric (Lisp_Object table, Lisp_Object value)
+{
+ Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents;
+ int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]);
+
+ CHECK_NUMBER (value);
+ for (i = 0; i < size; i++)
+ if (EQ (value, value_table[i]))
+ break;
+ value = make_number (i);
+ if (i == size)
+ {
+ Lisp_Object args[2];
+
+ args[0] = XCHAR_TABLE (table)->extras[4];
+ args[1] = Fmake_vector (make_number (1), value);
+ XCHAR_TABLE (table)->extras[4] = Fvconcat (2, args);
+ }
+ return make_number (i);
+}
+
+static uniprop_encoder_t uniprop_encoder[] =
+ { uniprop_encode_value_character,
+ uniprop_encode_value_run_length,
+ uniprop_encode_value_numeric };
+
+static int uniprop_encoder_count
+ = (sizeof uniprop_encoder) / sizeof (uniprop_encoder[0]);
+
+
+/* Return the encoder of char-table TABLE or nil if none. */
+
+static uniprop_decoder_t
+uniprop_get_encoder (Lisp_Object table)
+{
+ int i;
+
+ if (! INTEGERP (XCHAR_TABLE (table)->extras[2]))
+ return NULL;
+ i = XINT (XCHAR_TABLE (table)->extras[2]);
+ if (i < 0 || i >= uniprop_encoder_count)
+ return NULL;
+ return uniprop_encoder[i];
+}
+
+/* Return a char-table for Unicode character property PROP. This
+ function may load a Lisp file and thus may cause
+ garbage-collection. */
+
+Lisp_Object
+uniprop_table (Lisp_Object prop)
+{
+ Lisp_Object val, table, result;
+
+ val = Fassq (prop, Vchar_code_property_alist);
+ if (! CONSP (val))
+ return Qnil;
+ table = XCDR (val);
+ if (STRINGP (table))
+ {
+ struct gcpro gcpro1;
+ GCPRO1 (val);
+ result = Fload (concat2 (build_string ("international/"), table),
+ Qt, Qt, Qt, Qt);
+ UNGCPRO;
+ if (NILP (result))
+ return Qnil;
+ table = XCDR (val);
+ }
+ if (! CHAR_TABLE_P (table)
+ || ! UNIPROP_TABLE_P (table))
+ return Qnil;
+ val = XCHAR_TABLE (table)->extras[1];
+ if (INTEGERP (val)
+ ? (XINT (val) < 0 || XINT (val) >= uniprop_decoder_count)
+ : ! NILP (val))
+ return Qnil;
+ /* Prepare ASCII values in advance for CHAR_TABLE_REF. */
+ XCHAR_TABLE (table)->ascii = char_table_ascii (table);
+ return table;
+}
+
+DEFUN ("unicode-property-table-internal", Funicode_property_table_internal,
+ Sunicode_property_table_internal, 1, 1, 0,
+ doc: /* Return a char-table for Unicode character property PROP.
+Use `get-unicode-property-internal' and
+`put-unicode-property-internal' instead of `aref' and `aset' to get
+and put an element value. */)
+ (Lisp_Object prop)
+{
+ Lisp_Object table = uniprop_table (prop);
+
+ if (CHAR_TABLE_P (table))
+ return table;
+ return Fcdr (Fassq (prop, Vchar_code_property_alist));
+}
+
+DEFUN ("get-unicode-property-internal", Fget_unicode_property_internal,
+ Sget_unicode_property_internal, 2, 2, 0,
+ doc: /* Return an element of CHAR-TABLE for character CH.
+CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
+ (Lisp_Object char_table, Lisp_Object ch)
+{
+ Lisp_Object val;
+ uniprop_decoder_t decoder;
+
+ CHECK_CHAR_TABLE (char_table);
+ CHECK_CHARACTER (ch);
+ if (! UNIPROP_TABLE_P (char_table))
+ error ("Invalid Unicode property table");
+ val = CHAR_TABLE_REF (char_table, XINT (ch));
+ decoder = uniprop_get_decoder (char_table);
+ return (decoder ? decoder (char_table, val) : val);
+}
+
+DEFUN ("put-unicode-property-internal", Fput_unicode_property_internal,
+ Sput_unicode_property_internal, 3, 3, 0,
+ doc: /* Set an element of CHAR-TABLE for character CH to VALUE.
+CHAR-TABLE must be what returned by `unicode-property-table-internal'. */)
+ (Lisp_Object char_table, Lisp_Object ch, Lisp_Object value)
+{
+ uniprop_encoder_t encoder;
+
+ CHECK_CHAR_TABLE (char_table);
+ CHECK_CHARACTER (ch);
+ if (! UNIPROP_TABLE_P (char_table))
+ error ("Invalid Unicode property table");
+ encoder = uniprop_get_encoder (char_table);
+ if (encoder)
+ value = encoder (char_table, value);
+ CHAR_TABLE_SET (char_table, XINT (ch), value);
+ return Qnil;
+}
+
\f
void
syms_of_chartab (void)
{
+ DEFSYM (Qchar_code_property_table, "char-code-property-table");
+
defsubr (&Smake_char_table);
defsubr (&Schar_table_parent);
defsubr (&Schar_table_subtype);
defsubr (&Sset_char_table_default);
defsubr (&Soptimize_char_table);
defsubr (&Smap_char_table);
+ defsubr (&Sunicode_property_table_internal);
+ defsubr (&Sget_unicode_property_internal);
+ defsubr (&Sput_unicode_property_internal);
+
+ /* Each element has the form (PROP . TABLE).
+ PROP is a symbol representing a character property.
+ TABLE is a char-table containing the property value for each character.
+ TABLE may be a name of file to load to build a char-table.
+ This variable should be modified only through
+ `define-char-code-property'. */
+
+ DEFVAR_LISP ("char-code-property-alist", Vchar_code_property_alist,
+ doc: /* Alist of character property name vs char-table containing property values.
+Internal use only. */);
+ Vchar_code_property_alist = Qnil;
}
((C) > ' ' \
&& ((C) == 0x200C || (C) == 0x200D \
|| (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \
- (SYMBOLP (_work_val) \
- && (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \
- && _work_char != 'Z'))))
+ (INTEGERP (_work_val) \
+ && (XINT (_work_val) <= UNICODE_CATEGORY_So)))))
/* Update cmp_it->stop_pos to the next position after CHARPOS (and
BYTEPOS) where character composition may happen. If BYTEPOS is
/* Data type for describing the bidirectional character types. The
first 7 must be at the beginning, because they are the only values
valid in the `bidi_type' member of `struct glyph'; we only reserve
- 3 bits for it, so we cannot use there values larger than 7. */
+ 3 bits for it, so we cannot use there values larger than 7.
+
+ The order of members must be in sync with the 8th element of the
+ member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for
+ Unicode character property `bidi-class'. */
typedef enum {
UNKNOWN_BT = 0,
STRONG_L, /* strong left-to-right */
else
FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, pos, pos_byte);
category = CHAR_TABLE_REF (Vunicode_category_table, c);
- if (EQ (category, QCf)
- || CHAR_VARIATION_SELECTOR_P (c))
+ if (INTEGERP (category)
+ && (XINT (category) == UNICODE_CATEGORY_Cf
+ || CHAR_VARIATION_SELECTOR_P (c)))
continue;
if (NILP (font_object))
{