From: Kenichi Handa Date: Wed, 6 Jul 2011 22:43:48 +0000 (+0900) Subject: Add C interface for Unicode character property table. X-Git-Tag: emacs-pretest-24.0.90~104^2~462 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=c805dec0b5fa81b5c9f2b724e2ec12a17d723aca;p=emacs.git Add C interface for Unicode character property table. --- diff --git a/admin/ChangeLog b/admin/ChangeLog index 7aaeb1d5ee2..3632a0992a6 100644 --- a/admin/ChangeLog +++ b/admin/ChangeLog @@ -1,3 +1,38 @@ +2011-07-06 Kenichi Handa + + * unidata/unidata-gen.el (unidata-dir): New variable. + (unidata-setup-list): Expand unidata-text-file in unidata-dir. + (unidata-prop-alist): INDEX element may be a function. New + optional element VAL-LIST (for general-category and bidi-class). + New entry `mirroring'. + (unidata-prop-default, unidata-prop-val-list): New subst. + (unidata-get-character, unidata-put-character): Delete them. + (unidata-gen-table-character): New arg IGNORE. Adjusted for the + above changes. + (unidata-get-symbol, unidata-get-integer, unidata-get-numeric) + (unidata-put-symbol, unidata-put-integer, unidata-put-numeric): + Delete them. + (unidata-encode-val): Assume that the first element of VAL-LIST is + a cons (nil . 0). + (unidata-gen-table): Change argument DEFAULT-VALUE to VAL-LIST. + Always store the encoded value. + (unidata-gen-table-symbol): New args DEFAULT-VALUE and VAL-LIST. + Set the 1st and the 2nd extra slots to index numbers for C + functions. + (unidata-gen-table-integer): Likewise. + (unidata-gen-table-numeric): Likewise. + (unidata-gen-table-name): New arg IGNORE. + (unidata-gen-table-decomposition): Likewise. + (unidata-describe-general-category): Add the case nil to the + description alist. + (unidata-gen-mirroring-list): New funciton. + (unidata-gen-files): New arg DATA-DIR. Adjusted for the change of + unidata-prop-alist. Handle the case of storing multiple + char-tables in a file. + + * unidata/Makefile.in (${DSTDIR}/charprop.el): New arg to + unidata-gen-files. + 2011-05-21 Glenn Morris * bzrmerge.el (bzrmerge-resolve): Suppress prompts about file-locals. diff --git a/admin/unidata/Makefile.in b/admin/unidata/Makefile.in index 04f2f1d4380..e1fe247631f 100644 --- a/admin/unidata/Makefile.in +++ b/admin/unidata/Makefile.in @@ -33,9 +33,10 @@ unidata.txt: UnicodeData.txt ${DSTDIR}/charprop.el: unidata-gen.elc unidata.txt ELC=`/bin/pwd`/unidata-gen.elc; \ - DATA=`/bin/pwd`/unidata.txt; \ + DATADIR=`/bin/pwd`; \ + DATA=unidata.txt; \ cd ${DSTDIR}; \ - ${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATA} + ${RUNEMACS} -batch --load $${ELC} -f unidata-gen-files $${DATADIR} $${DATA} ../../src/biditype.h: UnicodeData.txt gawk -F";" -f biditype.awk $< > $@ diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 9f898668526..ab1dcd134ac 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el @@ -33,24 +33,25 @@ ;; ;; charprop.el ;; It contains a series of forms of this format: -;; (char-code-property-register PROP FILE) +;; (define-char-code-property PROP FILE) ;; where PROP is a symbol representing a character property -;; (name, generic-category, etc), and FILE is a name of one of +;; (name, general-category, etc), and FILE is a name of one of ;; the following files. ;; ;; uni-name.el, uni-category.el, uni-combining.el, uni-bidi.el, ;; uni-decomposition.el, uni-decimal.el, uni-digit.el, uni-numeric.el, ;; uni-mirrored.el, uni-old-name.el, uni-comment.el, uni-uppercase.el, ;; uni-lowercase.el, uni-titlecase.el -;; They each contain a single form of this format: -;; (char-code-property-register PROP CHAR-TABLE) +;; They contain one or more forms of this format: +;; (define-char-code-property PROP CHAR-TABLE) ;; where PROP is the same as above, and CHAR-TABLE is a ;; char-table containing property values in a compressed format. ;; ;; When they are installed in .../lisp/international/, the file ;; "charprop.el" is preloaded in loadup.el. The other files are -;; automatically loaded when the functions `get-char-code-property' -;; and `put-char-code-property' are called. +;; automatically loaded when the Lisp functions +;; `get-char-code-property' and `put-char-code-property', and C +;; function uniprop_table are called. ;; ;; FORMAT OF A CHAR TABLE ;; @@ -62,17 +63,22 @@ ;; data in a char-table as below. ;; ;; If succeeding 128*N characters have the same property value, we -;; store that value for them. Otherwise, compress values for -;; succeeding 128 characters into a single string and store it as a -;; value for those characters. The way of compression depends on a -;; property. See the section "SIMPLE TABLE", "RUN-LENGTH TABLE", -;; and "WORD-LIST TABLE". - -;; The char table has four extra slots: +;; store that value (or the encoded one) for them. Otherwise, +;; compress values (or the encoded ones) for succeeding 128 +;; characters into a single string and store it for those +;; characters. The way of compression depends on a property. See +;; the section "SIMPLE TABLE", "RUN-LENGTH TABLE", and "WORD-LIST +;; TABLE". + +;; The char table has five extra slots: ;; 1st: property symbol -;; 2nd: function to call to get a property value -;; 3nd: function to call to put a property value -;; 4th: function to call to get a description of a property value +;; 2nd: function to call to get a property value, +;; or an index number of C function to decode the value, +;; or nil if the value can be directly got from the table. +;; 3nd: function to call to put a property value, +;; or an index number of C function to encode the value, +;; or nil if the value can be directly stored in the table. +;; 4th: function to call to get a description of a property value, or nil ;; 5th: data referred by the above functions ;; List of elements of this form: @@ -82,6 +88,11 @@ (defvar unidata-list nil) +;; Name of the directory containing files of Unicode Character +;; Database. + +(defvar unidata-dir nil) + (defun unidata-setup-list (unidata-text-file) (let* ((table (list nil)) (tail table) @@ -90,6 +101,7 @@ ("^<.*Surrogate" . nil) ("^<.*Private Use" . PRIVATE\ USE))) val char name) + (setq unidata-text-file (expand-file-name unidata-text-file unidata-dir)) (or (file-readable-p unidata-text-file) (error "File not readable: %s" unidata-text-file)) (with-temp-buffer @@ -134,12 +146,17 @@ (setq unidata-list (cdr table)))) ;; Alist of this form: -;; (PROP INDEX GENERATOR FILENAME) +;; (PROP INDEX GENERATOR FILENAME DOCSTRING DESCRIBER VAL-LIST) ;; PROP: character property -;; INDEX: index to each element of unidata-list for PROP +;; INDEX: index to each element of unidata-list for PROP. +;; It may be a function that generates an alist of character codes +;; vs. the corresponding property values. ;; GENERATOR: function to generate a char-table ;; FILENAME: filename to store the char-table +;; DOCSTRING: docstring for the property ;; DESCRIBER: function to call to get a description string of property value +;; DEFAULT: the default value of the property +;; VAL-LIST: list of specially ordered property values (defconst unidata-prop-alist '((name @@ -152,7 +169,12 @@ Property value is a string.") Property value is one of the following symbols: Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn" - unidata-describe-general-category) + unidata-describe-general-category + nil + ;; The order of elements must be in sync with unicode_category_t + ;; in src/character.h. + (Lu Ll Lt Lm Lo Mn Mc Me Nd Nl No Pc Pd Ps Pe Pi Pf Po + Sm Sc Sk So Zs Zl Zp Cc Cf Cs Co Cn)) (canonical-combining-class 3 unidata-gen-table-integer "uni-combining.el" "Unicode canonical combining class. @@ -164,7 +186,11 @@ Property value is an integer." Property value is one of the following symbols: L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON" - unidata-describe-bidi-class) + unidata-describe-bidi-class + L + ;; The order of elements must be in sync with bidi_type_t in + ;; src/dispextern.h. + (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) (decomposition 5 unidata-gen-table-decomposition "uni-decomposition.el" "Unicode decomposition mapping. @@ -188,7 +214,7 @@ Property value is an integer or a floating point.") (mirrored 9 unidata-gen-table-symbol "uni-mirrored.el" "Unicode bidi mirrored flag. -Property value is a symbol `Y' or `N'.") +Property value is a symbol `Y' or `N'. See also the property `mirroring'.") (old-name 10 unidata-gen-table-name "uni-old-name.el" "Unicode old names as published in Unicode 1.0. @@ -211,7 +237,12 @@ Property value is a character." 14 unidata-gen-table-character "uni-titlecase.el" "Unicode simple titlecase mapping. Property value is a character." - string))) + string) + (mirroring + unidata-gen-mirroring-list unidata-gen-table-character "uni-mirrored.el" + "Unicode bidi-mirroring characters. +Property value is a character that has the corresponding mirroring image, +or nil for non-mirrored character."))) ;; Functions to access the above data. (defsubst unidata-prop-index (prop) (nth 1 (assq prop unidata-prop-alist))) @@ -219,6 +250,8 @@ Property value is a character." (defsubst unidata-prop-file (prop) (nth 3 (assq prop unidata-prop-alist))) (defsubst unidata-prop-docstring (prop) (nth 4 (assq prop unidata-prop-alist))) (defsubst unidata-prop-describer (prop) (nth 5 (assq prop unidata-prop-alist))) +(defsubst unidata-prop-default (prop) (nth 6 (assq prop unidata-prop-alist))) +(defsubst unidata-prop-val-list (prop) (nth 7 (assq prop unidata-prop-alist))) ;; SIMPLE TABLE @@ -227,52 +260,34 @@ Property value is a character." ;; values of succeeding character codes are usually different, we use ;; a char-table described here to store such values. ;; -;; If succeeding 128 characters has no property, a char-table has the -;; symbol t for them. Otherwise a char-table has a string of the -;; following format for them. +;; A char-table divides character code space (#x0..#x3FFFFF) into +;; #x8000 blocks (each block contains 128 characters). + +;; If all characters of a block have no property, a char-table has the +;; symbol nil for that block. Otherwise a char-table has a string of +;; the following format for it. ;; -;; The first character of the string is FIRST-INDEX. -;; The Nth (N > 0) character of the string is a property value of the -;; character (BLOCK-HEAD + FIRST-INDEX + N - 1), where BLOCK-HEAD is -;; the first of the characters in the block. +;; The first character of the string is ?\001. +;; The second character of the string is FIRST-INDEX. +;; The Nth (N > 1) character of the string is a property value of the +;; character (BLOCK-HEAD + FIRST-INDEX + N - 2), where BLOCK-HEAD is +;; the first character of the block. ;; -;; The 4th extra slot of a char-table is nil. - -(defun unidata-get-character (char val table) - (cond - ((characterp val) - val) +;; This kind of char-table has these extra slots: +;; 1st: the property symbol +;; 2nd: nil +;; 3rd: 0 (corresponding to uniprop_encode_character in chartab.c) +;; 4th to 5th: nil - ((stringp val) - (let* ((len (length val)) - (block-head (lsh (lsh char -7) 7)) - (vec (make-vector 128 nil)) - (first-index (aref val 0))) - (dotimes (i (1- len)) - (let ((elt (aref val (1+ i)))) - (if (> elt 0) - (aset vec (+ first-index i) elt)))) - (dotimes (i 128) - (aset table (+ block-head i) (aref vec i))) - (aref vec (- char block-head)))))) - -(defun unidata-put-character (char val table) - (or (characterp val) - (not val) - (error "Not a character nor nil: %S" val)) - (let ((current-val (aref table char))) - (unless (eq current-val val) - (if (stringp current-val) - (funcall (char-table-extra-slot table 1) char current-val table)) - (aset table char val)))) - -(defun unidata-gen-table-character (prop) +(defun unidata-gen-table-character (prop &rest ignore) (let ((table (make-char-table 'char-code-property-table)) (prop-idx (unidata-prop-index prop)) (vec (make-vector 128 0)) (tail unidata-list) elt range val idx slot) - (set-char-table-range table (cons 0 (max-char)) t) + (if (functionp prop-idx) + (setq tail (funcall prop-idx) + prop-idx 1)) (while tail (setq elt (car tail) tail (cdr tail)) (setq range (car elt) @@ -301,7 +316,7 @@ Property value is a character." (setq first-index last-index))) (setq tail (cdr tail))) (when first-index - (let ((str (string first-index)) + (let ((str (string 1 first-index)) c) (while (<= first-index last-index) (setq str (format "%s%c" str (or (aref vec first-index) 0)) @@ -309,184 +324,78 @@ Property value is a character." (set-char-table-range table (cons start limit) str)))))) (set-char-table-extra-slot table 0 prop) - (byte-compile 'unidata-get-character) - (byte-compile 'unidata-put-character) - (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-character)) - (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-character)) - + (set-char-table-extra-slot table 2 0) table)) ;; RUN-LENGTH TABLE ;; -;; If the type of character property value is symbol, integer, -;; boolean, or character, we use a char-table described here to store -;; the values. +;; If many characters of successive character codes have the same +;; property value, we use a char-table described here to store the +;; values. ;; -;; The 4th extra slot is a vector of property values (VAL-TABLE), and -;; values for succeeding 128 characters are encoded into this -;; character sequence: +;; At first, instead of a value itself, we store an index number to +;; the VAL-TABLE (5th extra slot) in the table. We call that index +;; number as VAL-CODE here after. +;; +;; A char-table divides character code space (#x0..#x3FFFFF) into +;; #x8000 blocks (each block contains 128 characters). +;; +;; If all characters of a block have the same value, a char-table has +;; VAL-CODE for that block. Otherwise a char-table has a string of +;; the following format for that block. +;; +;; The first character of the string is ?\002. +;; The following characters has this form: ;; ( VAL-CODE RUN-LENGTH ? ) + ;; where: -;; VAL-CODE (0..127): -;; (VAL-CODE - 1) is an index into VAL-TABLE. -;; The value 0 means no-value. +;; VAL-CODE (0..127): index into VAL-TABLE. ;; RUN-LENGTH (130..255): ;; (RUN-LENGTH - 128) specifies how many characters have the same ;; value. If omitted, it means 1. - - -;; Return a symbol-type character property value of CHAR. VAL is the -;; current value of (aref TABLE CHAR). - -(defun unidata-get-symbol (char val table) - (let ((val-table (char-table-extra-slot table 4))) - (cond ((symbolp val) - val) - ((stringp val) - (let ((first-char (lsh (lsh char -7) 7)) - (str val) - (len (length val)) - (idx 0) - this-val count) - (set-char-table-range table (cons first-char (+ first-char 127)) - nil) - (while (< idx len) - (setq val (aref str idx) idx (1+ idx) - count (if (< idx len) (aref str idx) 1)) - (setq val (and (> val 0) (aref val-table (1- val))) - count (if (< count 128) - 1 - (prog1 (- count 128) (setq idx (1+ idx))))) - (dotimes (i count) - (if val - (aset table first-char val)) - (if (= first-char char) - (setq this-val val)) - (setq first-char (1+ first-char)))) - this-val)) - ((> val 0) - (aref val-table (1- val)))))) - -;; Return a integer-type character property value of CHAR. VAL is the -;; current value of (aref TABLE CHAR). - -(defun unidata-get-integer (char val table) - (let ((val-table (char-table-extra-slot table 4))) - (cond ((integerp val) - val) - ((stringp val) - (let ((first-char (lsh (lsh char -7) 7)) - (str val) - (len (length val)) - (idx 0) - this-val count) - (while (< idx len) - (setq val (aref str idx) idx (1+ idx) - count (if (< idx len) (aref str idx) 1)) - (setq val (and (> val 0) (aref val-table (1- val))) - count (if (< count 128) - 1 - (prog1 (- count 128) (setq idx (1+ idx))))) - (dotimes (i count) - (aset table first-char val) - (if (= first-char char) - (setq this-val val)) - (setq first-char (1+ first-char)))) - this-val))))) - -;; Return a numeric-type (integer or float) character property value -;; of CHAR. VAL is the current value of (aref TABLE CHAR). - -(defun unidata-get-numeric (char val table) - (cond - ((numberp val) - val) - ((stringp val) - (let ((val-table (char-table-extra-slot table 4)) - (first-char (lsh (lsh char -7) 7)) - (str val) - (len (length val)) - (idx 0) - this-val count) - (while (< idx len) - (setq val (aref str idx) idx (1+ idx) - count (if (< idx len) (aref str idx) 1)) - (setq val (and (> val 0) (aref val-table (1- val))) - count (if (< count 128) - 1 - (prog1 (- count 128) (setq idx (1+ idx))))) - (dotimes (i count) - (aset table first-char val) - (if (= first-char char) - (setq this-val val)) - (setq first-char (1+ first-char)))) - this-val)))) - -;; Store VAL (symbol) as a character property value of CHAR in TABLE. - -(defun unidata-put-symbol (char val table) - (or (symbolp val) - (error "Not a symbol: %S" val)) - (let ((current-val (aref table char))) - (unless (eq current-val val) - (if (stringp current-val) - (funcall (char-table-extra-slot table 1) char current-val table)) - (aset table char val)))) - -;; Store VAL (integer) as a character property value of CHAR in TABLE. - -(defun unidata-put-integer (char val table) - (or (integerp val) - (not val) - (error "Not an integer nor nil: %S" val)) - (let ((current-val (aref table char))) - (unless (eq current-val val) - (if (stringp current-val) - (funcall (char-table-extra-slot table 1) char current-val table)) - (aset table char val)))) - -;; Store VAL (integer or float) as a character property value of CHAR -;; in TABLE. - -(defun unidata-put-numeric (char val table) - (or (numberp val) - (not val) - (error "Not a number nor nil: %S" val)) - (let ((current-val (aref table char))) - (unless (equal current-val val) - (if (stringp current-val) - (funcall (char-table-extra-slot table 1) char current-val table)) - (aset table char val)))) +;; +;; This kind of char-table has these extra slots: +;; 1st: the property symbol +;; 2nd: 0 (corresponding to uniprop_decode_value in chartab.c) +;; 3rd: 1..3 (corresponding to uniprop_encode_xxx in chartab.c) +;; 4th: function or nil +;; 5th: VAL-TABLE ;; Encode the character property value VAL into an integer value by ;; VAL-LIST. By side effect, VAL-LIST is modified. ;; VAL-LIST has this form: -;; (t (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...) -;; If VAL is one of VALn, just return VAL-CODEn. Otherwise, -;; VAL-LIST is modified to this: -;; (t (VAL . (1+ VAL-CODE1)) (VAL1 . VAL-CODE1) (VAL2 . VAL-CODE2) ...) +;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ...) +;; If VAL is one of VALn, just return n. +;; Otherwise, VAL-LIST is modified to this: +;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1)) (defun unidata-encode-val (val-list val) (let ((slot (assoc val val-list)) val-code) (if slot (cdr slot) - (setq val-code (if (cdr val-list) (1+ (cdr (nth 1 val-list))) 1)) - (setcdr val-list (cons (cons val val-code) (cdr val-list))) + (setq val-code (length val-list)) + (nconc val-list (list (cons val val-code))) val-code))) ;; Generate a char-table for the character property PROP. -(defun unidata-gen-table (prop val-func default-value) +(defun unidata-gen-table (prop val-func default-value val-list) (let ((table (make-char-table 'char-code-property-table)) (prop-idx (unidata-prop-index prop)) - (val-list (list t)) (vec (make-vector 128 0)) tail elt range val val-code idx slot prev-range-data) - (set-char-table-range table (cons 0 (max-char)) default-value) + (setq val-list (cons nil (copy-sequence val-list))) + (setq tail val-list val-code 0) + ;; Convert (nil A B ...) to ((nil . 0) (A . 1) (B . 2) ...) + (while tail + (setcar tail (cons (car tail) val-code)) + (setq tail (cdr tail) val-code (1+ val-code))) + (setq default-value (unidata-encode-val val-list default-value)) + (set-char-table-range table t default-value) + (set-char-table-range table nil default-value) (setq tail unidata-list) (while tail (setq elt (car tail) tail (cdr tail)) @@ -495,7 +404,7 @@ Property value is a character." (setq val-code (if val (unidata-encode-val val-list val))) (if (consp range) (when val-code - (set-char-table-range table range val) + (set-char-table-range table range val-code) (let ((from (car range)) (to (cdr range))) ;; If RANGE doesn't end at the char-table boundary (each ;; 128 characters), we may have to carry over the data @@ -534,7 +443,7 @@ Property value is a character." (if val-code (aset vec (- range start) val-code)) (setq tail (cdr tail))) - (setq str "" val-code -1 count 0) + (setq str "\002" val-code -1 count 0) (mapc #'(lambda (x) (if (= val-code x) (setq count (1+ count)) @@ -549,7 +458,7 @@ Property value is a character." vec) (if (= count 128) (if val - (set-char-table-range table (cons start limit) val)) + (set-char-table-range table (cons start limit) val-code)) (if (= val-code 0) (set-char-table-range table (cons start limit) str) (if (> count 2) @@ -559,34 +468,29 @@ Property value is a character." (setq str (concat str (string val-code))))) (set-char-table-range table (cons start limit) str)))))) - (setq val-list (nreverse (cdr val-list))) (set-char-table-extra-slot table 0 prop) (set-char-table-extra-slot table 4 (vconcat (mapcar 'car val-list))) table)) -(defun unidata-gen-table-symbol (prop) +(defun unidata-gen-table-symbol (prop default-value val-list) (let ((table (unidata-gen-table prop #'(lambda (x) (and (> (length x) 0) (intern x))) - 0))) - (byte-compile 'unidata-get-symbol) - (byte-compile 'unidata-put-symbol) - (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-symbol)) - (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-symbol)) + default-value val-list))) + (set-char-table-extra-slot table 1 0) + (set-char-table-extra-slot table 2 1) table)) -(defun unidata-gen-table-integer (prop) +(defun unidata-gen-table-integer (prop default-value val-list) (let ((table (unidata-gen-table prop #'(lambda (x) (and (> (length x) 0) (string-to-number x))) - t))) - (byte-compile 'unidata-get-integer) - (byte-compile 'unidata-put-integer) - (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-integer)) - (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-integer)) + default-value val-list))) + (set-char-table-extra-slot table 1 0) + (set-char-table-extra-slot table 2 1) table)) -(defun unidata-gen-table-numeric (prop) +(defun unidata-gen-table-numeric (prop default-value val-list) (let ((table (unidata-gen-table prop #'(lambda (x) (if (string-match "/" x) @@ -595,11 +499,9 @@ Property value is a character." (substring x (match-end 0)))) (if (> (length x) 0) (string-to-number x)))) - t))) - (byte-compile 'unidata-get-numeric) - (byte-compile 'unidata-put-numeric) - (set-char-table-extra-slot table 1 (symbol-function 'unidata-get-numeric)) - (set-char-table-extra-slot table 2 (symbol-function 'unidata-put-numeric)) + default-value val-list))) + (set-char-table-extra-slot table 1 0) + (set-char-table-extra-slot table 2 2) table)) @@ -892,7 +794,6 @@ Property value is a character." word-table block-list block-word-table block-end tail elt range val idx slot) - (set-char-table-range table (cons 0 (max-char)) 0) (setq tail unidata-list) (setq block-end -1) (while tail @@ -1025,7 +926,7 @@ Property value is a character." idx (1+ i))))) (nreverse (cons (intern (substring str idx)) l)))))) -(defun unidata-gen-table-name (prop) +(defun unidata-gen-table-name (prop &rest ignore) (let* ((table (unidata-gen-table-word-list prop 'unidata-split-name)) (word-tables (char-table-extra-slot table 4))) (byte-compile 'unidata-get-name) @@ -1064,7 +965,7 @@ Property value is a character." (nreverse l))))) -(defun unidata-gen-table-decomposition (prop) +(defun unidata-gen-table-decomposition (prop &rest ignore) (let* ((table (unidata-gen-table-word-list prop 'unidata-split-decomposition)) (word-tables (char-table-extra-slot table 4))) (byte-compile 'unidata-get-decomposition) @@ -1080,7 +981,8 @@ Property value is a character." (defun unidata-describe-general-category (val) (cdr (assq val - '((Lu . "Letter, Uppercase") + '((nil . "Uknown") + (Lu . "Letter, Uppercase") (Ll . "Letter, Lowercase") (Lt . "Letter, Titlecase") (Lm . "Letter, Modifier") @@ -1171,6 +1073,19 @@ Property value is a character." (string ?')))) val " ")) +(defun unidata-gen-mirroring-list () + (let ((head (list nil)) + tail) + (with-temp-buffer + (insert-file-contents (expand-file-name "BidiMirroring.txt" unidata-dir)) + (goto-char (point-min)) + (setq tail head) + (while (re-search-forward "^\\([0-9A-F]+\\);\\s +\\([0-9A-F]+\\)" nil t) + (let ((char (string-to-number (match-string 1) 16)) + (mirror (match-string 2))) + (setq tail (setcdr tail (list (list char mirror))))))) + (cdr head))) + ;; Verify if we can retrieve correct values from the generated ;; char-tables. @@ -1212,13 +1127,21 @@ Property value is a character." ;; The entry function. It generates files described in the header ;; comment of this file. -(defun unidata-gen-files (&optional unidata-text-file) - (or unidata-text-file - (setq unidata-text-file (car command-line-args-left) +(defun unidata-gen-files (&optional data-dir unidata-text-file) + (or data-dir + (setq data-dir (car command-line-args-left) + command-line-args-left (cdr command-line-args-left) + unidata-text-file (car command-line-args-left) command-line-args-left (cdr command-line-args-left))) - (unidata-setup-list unidata-text-file) (let ((coding-system-for-write 'utf-8-unix) - (charprop-file "charprop.el")) + (charprop-file "charprop.el") + (unidata-dir data-dir)) + (dolist (elt unidata-prop-alist) + (let* ((prop (car elt)) + (file (unidata-prop-file prop))) + (if (file-exists-p file) + (delete-file file)))) + (unidata-setup-list unidata-text-file) (with-temp-file charprop-file (insert ";; Automatically generated by unidata-gen.el.\n") (dolist (elt unidata-prop-alist) @@ -1227,31 +1150,41 @@ Property value is a character." (file (unidata-prop-file prop)) (docstring (unidata-prop-docstring prop)) (describer (unidata-prop-describer prop)) + (default-value (unidata-prop-default prop)) + (val-list (unidata-prop-val-list prop)) table) ;; Filename in this comment line is extracted by sed in ;; Makefile. (insert (format ";; FILE: %s\n" file)) (insert (format "(define-char-code-property '%S %S\n %S)\n" prop file docstring)) - (with-temp-file file + (with-temp-buffer (message "Generating %s..." file) - (setq table (funcall generator prop)) + (when (file-exists-p file) + (insert-file-contents file) + (goto-char (point-max)) + (search-backward ";; Local Variables:")) + (setq table (funcall generator prop default-value val-list)) (when describer (unless (subrp (symbol-function describer)) (byte-compile describer) (setq describer (symbol-function describer))) (set-char-table-extra-slot table 3 describer)) - (insert ";; Copyright (C) 1991-2009 Unicode, Inc. -;; This file was generated from the Unicode data file at -;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt. -;; See lisp/international/README for the copyright and permission notice.\n" - (format "(define-char-code-property '%S %S %S)\n" - prop table docstring) - ";; Local Variables:\n" - ";; coding: utf-8\n" - ";; no-byte-compile: t\n" - ";; End:\n\n" - (format ";; %s ends here\n" file))))) + (if (bobp) + (insert ";; Copyright (C) 1991-2009 Unicode, Inc. +;; This file was generated from the Unicode data files at +;; http://www.unicode.org/Public/UNIDATA/. +;; See lisp/international/README for the copyright and permission notice.\n")) + (insert (format "(define-char-code-property '%S %S %S)\n" + prop table docstring)) + (if (eobp) + (insert ";; Local Variables:\n" + ";; coding: utf-8\n" + ";; no-byte-compile: t\n" + ";; End:\n\n" + (format ";; %s ends here\n" file))) + (write-file file) + (message "Generating %s...done" file)))) (message "Writing %s..." charprop-file) (insert ";; Local Variables:\n" ";; coding: utf-8\n" diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 89b33dc7a62..b85a1680286 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,39 @@ +2011-07-06 Kenichi Handa + + * international/characters.el (build-unicode-category-table): + Delete it. + (unicode-category-table): Set it by + unicode-prroperty-table-internal. + + * international/mule-cmds.el (char-code-property-alist): Moved to + to src/chartab.c. + (get-char-code-property): Call unicode-property-table-internal to + load a file. Call get-unicode-property-internal where necessary. + (put-char-code-property): Call unicode-property-table-internal to + load a file. Call put-unicode-property-internal where necessary. + put-unicode-property-internal where necessary. + (char-code-property-description): Call + unicode-property-table-internal to load a file. + + * international/charprop.el: + * international/uni-bidi.el: + * international/uni-category.el: + * international/uni-combining.el: + * international/uni-comment.el: + * international/uni-decimal.el: + * international/uni-decomposition.el: + * international/uni-digit.el: + * international/uni-lowercase.el: + * international/uni-mirrored.el: + * international/uni-name.el: + * international/uni-numeric.el: + * international/uni-old-name.el: + * international/uni-titlecase.el: + * international/uni-uppercase.el: Regenerate. + + * loadup.el: Load international/charprop.el before + international/characters. + 2011-06-22 Richard Stallman * mail/sendmail.el (mail-bury): If Rmail is in use, return nicely diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 455cbe697d6..a9657c17b9f 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -1206,22 +1206,8 @@ Setup char-width-table appropriate for non-CJK language environment." ;;; Setting unicode-category-table. -;; This macro is to build unicode-category-table at compile time so -;; that C code can access the table efficiently. -(defmacro build-unicode-category-table () - (let ((table (make-char-table 'unicode-category-table nil))) - (dotimes (i #x110000) - (if (or (< i #xD800) - (and (>= i #xF900) (< i #x30000)) - (and (>= i #xE0000) (< i #xE0200))) - (aset table i (get-char-code-property i 'general-category)))) - (set-char-table-range table '(#xE000 . #xF8FF) 'Co) - (set-char-table-range table '(#xF0000 . #xFFFFD) 'Co) - (set-char-table-range table '(#x100000 . #x10FFFD) 'Co) - (optimize-char-table table 'eq) - table)) - -(setq unicode-category-table (build-unicode-category-table)) +(setq unicode-category-table + (unicode-property-table-internal 'general-category)) (map-char-table #'(lambda (key val) (if (and val (or (and (/= (aref (symbol-name val) 0) ?M) diff --git a/lisp/international/charprop.el b/lisp/international/charprop.el index 5c3efcc9d07..919666010b1 100644 --- a/lisp/international/charprop.el +++ b/lisp/international/charprop.el @@ -1,8 +1,4 @@ -;; Copyright (C) 1991-2010 Unicode, Inc. -;; This file was generated from the Unicode data file at -;; http://www.unicode.org/Public/UNIDATA/UnicodeData.txt. -;; See lisp/international/README for the copyright and permission notice. - +;; Automatically generated by unidata-gen.el. ;; FILE: uni-name.el (define-char-code-property 'name "uni-name.el" "Unicode character name. @@ -45,7 +41,7 @@ Property value is an integer or a floating point.") ;; FILE: uni-mirrored.el (define-char-code-property 'mirrored "uni-mirrored.el" "Unicode bidi mirrored flag. -Property value is a symbol `Y' or `N'.") +Property value is a symbol `Y' or `N'. See also the property `mirroring'.") ;; FILE: uni-old-name.el (define-char-code-property 'old-name "uni-old-name.el" "Unicode old names as published in Unicode 1.0. @@ -66,6 +62,11 @@ Property value is a character.") (define-char-code-property 'titlecase "uni-titlecase.el" "Unicode simple titlecase mapping. Property value is a character.") +;; FILE: uni-mirrored.el +(define-char-code-property 'mirroring "uni-mirrored.el" + "Unicode bidi-mirroring characters. +Property value is a character that has the corresponding mirroring image, +or nil for non-mirrored character.") ;; Local Variables: ;; coding: utf-8 ;; no-byte-compile: t diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index b3f17bb3fcf..e75a22d6415 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el @@ -2709,16 +2709,6 @@ See also `locale-charset-language-names', `locale-language-names', ;;; Character property -;; Each element has the form (PROP . TABLE). -;; PROP is a symbol representing a character property. -;; TABLE is a char-table containing the property value for each character. -;; TABLE may be a name of file to load to build a char-table. -;; Don't modify this variable directly but use `define-char-code-property'. - -(defvar char-code-property-alist nil - "Alist of character property name vs char-table containing property values. -Internal use only.") - (put 'char-code-property-table 'char-table-extra-slots 5) (defun define-char-code-property (name table &optional docstring) @@ -2770,32 +2760,23 @@ See also the documentation of `get-char-code-property' and (defun get-char-code-property (char propname) "Return the value of CHAR's PROPNAME property." - (let ((slot (assq propname char-code-property-alist))) - (if slot - (let (table value func) - (if (stringp (cdr slot)) - (load (cdr slot) nil t)) - (setq table (cdr slot) - value (aref table char) - func (char-table-extra-slot table 1)) + (let ((table (unicode-property-table-internal propname))) + (if table + (let ((func (char-table-extra-slot table 1))) (if (functionp func) - (setq value (funcall func char value table))) - value) + (funcall func char (aref table char) table) + (get-unicode-property-internal table char))) (plist-get (aref char-code-property-table char) propname)))) (defun put-char-code-property (char propname value) "Store CHAR's PROPNAME property with VALUE. It can be retrieved with `(get-char-code-property CHAR PROPNAME)'." - (let ((slot (assq propname char-code-property-alist))) - (if slot - (let (table func) - (if (stringp (cdr slot)) - (load (cdr slot) nil t)) - (setq table (cdr slot) - func (char-table-extra-slot table 2)) + (let ((table (unicode-property-table-internal propname))) + (if table + (let ((func (char-table-extra-slot table 2))) (if (functionp func) (funcall func char value table) - (aset table char value))) + (put-unicode-property-internal table char value))) (let* ((plist (aref char-code-property-table char)) (x (plist-put plist propname value))) (or (eq x plist) @@ -2805,13 +2786,9 @@ It can be retrieved with `(get-char-code-property CHAR PROPNAME)'." (defun char-code-property-description (prop value) "Return a description string of character property PROP's value VALUE. If there's no description string for VALUE, return nil." - (let ((slot (assq prop char-code-property-alist))) - (if slot - (let (table func) - (if (stringp (cdr slot)) - (load (cdr slot) nil t)) - (setq table (cdr slot) - func (char-table-extra-slot table 3)) + (let ((table (unicode-property-table-internal prop))) + (if table + (let ((func (char-table-extra-slot table 3))) (if (functionp func) (funcall func value)))))) diff --git a/lisp/international/uni-bidi.el b/lisp/international/uni-bidi.el index 9e571ef9d0d..e7682c6d8ff 100644 Binary files a/lisp/international/uni-bidi.el and b/lisp/international/uni-bidi.el differ diff --git a/lisp/international/uni-category.el b/lisp/international/uni-category.el index 80538f7b416..a4455decc52 100644 Binary files a/lisp/international/uni-category.el and b/lisp/international/uni-category.el differ diff --git a/lisp/international/uni-combining.el b/lisp/international/uni-combining.el index 2ee74d8b818..227b9d0af79 100644 Binary files a/lisp/international/uni-combining.el and b/lisp/international/uni-combining.el differ diff --git a/lisp/international/uni-comment.el b/lisp/international/uni-comment.el index dcc717977c7..c9743064bd4 100644 Binary files a/lisp/international/uni-comment.el and b/lisp/international/uni-comment.el differ diff --git a/lisp/international/uni-decimal.el b/lisp/international/uni-decimal.el index 22207a224b0..2c424ffb5de 100644 Binary files a/lisp/international/uni-decimal.el and b/lisp/international/uni-decimal.el differ diff --git a/lisp/international/uni-decomposition.el b/lisp/international/uni-decomposition.el index f35bcebfed8..b0bf07bbe85 100644 Binary files a/lisp/international/uni-decomposition.el and b/lisp/international/uni-decomposition.el differ diff --git a/lisp/international/uni-digit.el b/lisp/international/uni-digit.el index 692dea1edc8..fc52fd8c28c 100644 Binary files a/lisp/international/uni-digit.el and b/lisp/international/uni-digit.el differ diff --git a/lisp/international/uni-lowercase.el b/lisp/international/uni-lowercase.el index 7cc601159f0..41890018204 100644 Binary files a/lisp/international/uni-lowercase.el and b/lisp/international/uni-lowercase.el differ diff --git a/lisp/international/uni-mirrored.el b/lisp/international/uni-mirrored.el index 5129a93396d..006cf575591 100644 Binary files a/lisp/international/uni-mirrored.el and b/lisp/international/uni-mirrored.el differ diff --git a/lisp/international/uni-name.el b/lisp/international/uni-name.el index 5b9e8323d21..7fac18b278d 100644 Binary files a/lisp/international/uni-name.el and b/lisp/international/uni-name.el differ diff --git a/lisp/international/uni-numeric.el b/lisp/international/uni-numeric.el index 278ad683fe4..d16e8c00870 100644 Binary files a/lisp/international/uni-numeric.el and b/lisp/international/uni-numeric.el differ diff --git a/lisp/international/uni-old-name.el b/lisp/international/uni-old-name.el index 2e283492408..4e704e5cdd0 100644 Binary files a/lisp/international/uni-old-name.el and b/lisp/international/uni-old-name.el differ diff --git a/lisp/international/uni-titlecase.el b/lisp/international/uni-titlecase.el index 729a469d103..b8098c81876 100644 Binary files a/lisp/international/uni-titlecase.el and b/lisp/international/uni-titlecase.el differ diff --git a/lisp/international/uni-uppercase.el b/lisp/international/uni-uppercase.el index 0714b14794f..899276eb725 100644 Binary files a/lisp/international/uni-uppercase.el and b/lisp/international/uni-uppercase.el differ diff --git a/lisp/loadup.el b/lisp/loadup.el index 4c677523689..792827dd913 100644 --- a/lisp/loadup.el +++ b/lisp/loadup.el @@ -123,11 +123,11 @@ ;; multilingual text. (load "international/mule-cmds") (load "case-table") -(load "international/characters") -(load "composite") ;; This file doesn't exist when building a development version of Emacs ;; from the repository. It is generated just after temacs is built. (load "international/charprop.el" t) +(load "international/characters") +(load "composite") ;; Load language-specific files. (load "language/chinese") diff --git a/src/ChangeLog b/src/ChangeLog index 78fca60ca28..1a56298ee20 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,45 @@ +2011-07-06 Kenichi Handa + + * character.h (unicode_category_t): New enum type. + + * chartab.c (uniprop_decoder_t, uniprop_encoder_t): New types. + (Qchar_code_property_table): New variable. + (UNIPROP_TABLE_P, UNIPROP_GET_DECODER) + (UNIPROP_COMPRESSED_FORM_P): New macros. + (char_table_ascii): Uncompress the compressed values. + (sub_char_table_ref): New arg is_uniprop. Callers changed. + Uncompress the compressed values. + (sub_char_table_ref_and_range): Likewise. + (char_table_ref_and_range): Uncompress the compressed values. + (sub_char_table_set): New arg is_uniprop. Callers changed. + Uncompress the compressed values. + (sub_char_table_set_range): Args changed. Callers changed. + (char_table_set_range): Adjuted for the above change. + (map_sub_char_table): Delete args default_val and parent. Add arg + top. Give decoded values to a Lisp function. + (map_char_table): Adjusted for the above change. Give decoded + values to a Lisp function. Gcpro more variables. + (uniprop_table_uncompress) + (uniprop_decode_value_run_length): New functions. + (uniprop_decoder, uniprop_decoder_count): New variables. + (uniprop_get_decoder, uniprop_encode_value_character) + (uniprop_encode_value_run_length, uniprop_encode_value_numeric): + New functions. + (uniprop_encoder, uniprop_encoder_count): New variables. + (uniprop_get_encoder, uniprop_table) + (Funicode_property_table_internal, Fget_unicode_property_internal) + (Fput_unicode_property_internal): New functions. + (syms_of_chartab): DEFSYM Qchar_code_property_table, defsubr + Sunicode_property_table_internal, Sget_unicode_property_internal, + and Sput_unicode_property_internal. Defvar_lisp + char-code-property-alist. + + * composite.c (CHAR_COMPOSABLE_P): Adjusted for the change of + Vunicode_category_table. + + * font.c (font_range): Adjusted for the change of + Vunicode_category_table. + 2011-06-22 Paul Eggert Fixes for GLYPH_DEBUG found by GCC 4.6.0 static checking. diff --git a/src/character.h b/src/character.h index 9a45e7f0033..d8e77c50953 100644 --- a/src/character.h +++ b/src/character.h @@ -597,6 +597,45 @@ along with GNU Emacs. If not, see . */ : (c) <= 0xDFFF ? 2 \ : 0) +/* Data type for Unicode general category. + + The order of members must be in sync with the 8th element of the + member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for + Unicode character property `general-category'. */ + +typedef enum { + UNICODE_CATEGORY_UNKNOWN = 0, + UNICODE_CATEGORY_Lu, + UNICODE_CATEGORY_Ll, + UNICODE_CATEGORY_Lt, + UNICODE_CATEGORY_Lm, + UNICODE_CATEGORY_Lo, + UNICODE_CATEGORY_Mn, + UNICODE_CATEGORY_Mc, + UNICODE_CATEGORY_Me, + UNICODE_CATEGORY_Nd, + UNICODE_CATEGORY_Nl, + UNICODE_CATEGORY_No, + UNICODE_CATEGORY_Pc, + UNICODE_CATEGORY_Pd, + UNICODE_CATEGORY_Ps, + UNICODE_CATEGORY_Pe, + UNICODE_CATEGORY_Pi, + UNICODE_CATEGORY_Pf, + UNICODE_CATEGORY_Po, + UNICODE_CATEGORY_Sm, + UNICODE_CATEGORY_Sc, + UNICODE_CATEGORY_Sk, + UNICODE_CATEGORY_So, + UNICODE_CATEGORY_Zs, + UNICODE_CATEGORY_Zl, + UNICODE_CATEGORY_Zp, + UNICODE_CATEGORY_Cc, + UNICODE_CATEGORY_Cf, + UNICODE_CATEGORY_Cs, + UNICODE_CATEGORY_Co, + UNICODE_CATEGORY_Cn +} unicode_category_t; extern int char_resolve_modifier_mask (int); extern int char_string (unsigned, unsigned char *); diff --git a/src/chartab.c b/src/chartab.c index ed5b238646e..4a9a76bdd60 100644 --- a/src/chartab.c +++ b/src/chartab.c @@ -53,7 +53,38 @@ static const int chartab_bits[4] = #define CHARTAB_IDX(c, depth, min_char) \ (((c) - (min_char)) >> chartab_bits[(depth)]) + +/* Preamble for uniprop (Unicode character property) tables. See the + comment of "Unicode character property tables". */ + +/* Purpose of uniprop tables. */ +static Lisp_Object Qchar_code_property_table; + +/* Types of decoder and encoder functions for uniprop values. */ +typedef Lisp_Object (*uniprop_decoder_t) (Lisp_Object, Lisp_Object); +typedef Lisp_Object (*uniprop_encoder_t) (Lisp_Object, Lisp_Object); + +static Lisp_Object uniprop_table_uncompress (Lisp_Object, int); +static uniprop_decoder_t uniprop_get_decoder (Lisp_Object); + +/* 1 iff TABLE is a uniprop table. */ +#define UNIPROP_TABLE_P(TABLE) \ + (EQ (XCHAR_TABLE (TABLE)->purpose, Qchar_code_property_table) \ + && CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (TABLE)) == 5) + +/* Return a decoder for values in the uniprop table TABLE. */ +#define UNIPROP_GET_DECODER(TABLE) \ + (UNIPROP_TABLE_P (TABLE) ? uniprop_get_decoder (TABLE) : NULL) +/* Nonzero iff OBJ is a string representing uniprop values of 128 + succeeding characters (the bottom level of a char-table) by a + compressed format. We are sure that no property value has a string + starting with '\001' nor '\002'. */ +#define UNIPROP_COMPRESSED_FORM_P(OBJ) \ + (STRINGP (OBJ) && SCHARS (OBJ) > 0 \ + && ((SREF (OBJ, 0) == 1 || (SREF (OBJ, 0) == 2)))) + + DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0, doc: /* Return a newly created char-table, with purpose PURPOSE. Each element is initialized to INIT, which defaults to nil. @@ -107,7 +138,7 @@ make_sub_char_table (int depth, int min_char, Lisp_Object defalt) static Lisp_Object char_table_ascii (Lisp_Object table) { - Lisp_Object sub; + Lisp_Object sub, val; sub = XCHAR_TABLE (table)->contents[0]; if (! SUB_CHAR_TABLE_P (sub)) @@ -115,7 +146,10 @@ char_table_ascii (Lisp_Object table) sub = XSUB_CHAR_TABLE (sub)->contents[0]; if (! SUB_CHAR_TABLE_P (sub)) return sub; - return XSUB_CHAR_TABLE (sub)->contents[0]; + val = XSUB_CHAR_TABLE (sub)->contents[0]; + if (UNIPROP_TABLE_P (table) && UNIPROP_COMPRESSED_FORM_P (val)) + val = uniprop_table_uncompress (sub, 0); + return val; } static Lisp_Object @@ -169,16 +203,19 @@ copy_char_table (Lisp_Object table) } static Lisp_Object -sub_char_table_ref (Lisp_Object table, int c) +sub_char_table_ref (Lisp_Object table, int c, int is_uniprop) { struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); int depth = XINT (tbl->depth); int min_char = XINT (tbl->min_char); Lisp_Object val; + int idx = CHARTAB_IDX (c, depth, min_char); - val = tbl->contents[CHARTAB_IDX (c, depth, min_char)]; + val = tbl->contents[idx]; + if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val)) + val = uniprop_table_uncompress (table, idx); if (SUB_CHAR_TABLE_P (val)) - val = sub_char_table_ref (val, c); + val = sub_char_table_ref (val, c, is_uniprop); return val; } @@ -198,7 +235,7 @@ char_table_ref (Lisp_Object table, int c) { val = tbl->contents[CHARTAB_IDX (c, 0, 0)]; if (SUB_CHAR_TABLE_P (val)) - val = sub_char_table_ref (val, c); + val = sub_char_table_ref (val, c, UNIPROP_TABLE_P (table)); } if (NILP (val)) { @@ -210,7 +247,8 @@ char_table_ref (Lisp_Object table, int c) } static Lisp_Object -sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp_Object defalt) +sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, + Lisp_Object defalt, int is_uniprop) { struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); int depth = XINT (tbl->depth); @@ -219,8 +257,10 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp Lisp_Object val; val = tbl->contents[chartab_idx]; + if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val)) + val = uniprop_table_uncompress (table, chartab_idx); if (SUB_CHAR_TABLE_P (val)) - val = sub_char_table_ref_and_range (val, c, from, to, defalt); + val = sub_char_table_ref_and_range (val, c, from, to, defalt, is_uniprop); else if (NILP (val)) val = defalt; @@ -232,8 +272,11 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp c = min_char + idx * chartab_chars[depth] - 1; idx--; this_val = tbl->contents[idx]; + if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val)) + this_val = uniprop_table_uncompress (table, idx); if (SUB_CHAR_TABLE_P (this_val)) - this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt); + this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt, + is_uniprop); else if (NILP (this_val)) this_val = defalt; @@ -251,8 +294,11 @@ sub_char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to, Lisp chartab_idx++; this_val = tbl->contents[chartab_idx]; + if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val)) + this_val = uniprop_table_uncompress (table, chartab_idx); if (SUB_CHAR_TABLE_P (this_val)) - this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt); + this_val = sub_char_table_ref_and_range (this_val, c, from, to, defalt, + is_uniprop); else if (NILP (this_val)) this_val = defalt; if (! EQ (this_val, val)) @@ -277,17 +323,20 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to) struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); int chartab_idx = CHARTAB_IDX (c, 0, 0), idx; Lisp_Object val; + int is_uniprop = UNIPROP_TABLE_P (table); val = tbl->contents[chartab_idx]; if (*from < 0) *from = 0; if (*to < 0) *to = MAX_CHAR; + if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (val)) + val = uniprop_table_uncompress (table, chartab_idx); if (SUB_CHAR_TABLE_P (val)) - val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt); + val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt, + is_uniprop); else if (NILP (val)) val = tbl->defalt; - idx = chartab_idx; while (*from < idx * chartab_chars[0]) { @@ -296,9 +345,11 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to) c = idx * chartab_chars[0] - 1; idx--; this_val = tbl->contents[idx]; + if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val)) + this_val = uniprop_table_uncompress (table, idx); if (SUB_CHAR_TABLE_P (this_val)) this_val = sub_char_table_ref_and_range (this_val, c, from, to, - tbl->defalt); + tbl->defalt, is_uniprop); else if (NILP (this_val)) this_val = tbl->defalt; @@ -315,9 +366,11 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to) chartab_idx++; c = chartab_idx * chartab_chars[0]; this_val = tbl->contents[chartab_idx]; + if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this_val)) + this_val = uniprop_table_uncompress (table, chartab_idx); if (SUB_CHAR_TABLE_P (this_val)) this_val = sub_char_table_ref_and_range (this_val, c, from, to, - tbl->defalt); + tbl->defalt, is_uniprop); else if (NILP (this_val)) this_val = tbl->defalt; if (! EQ (this_val, val)) @@ -332,7 +385,7 @@ char_table_ref_and_range (Lisp_Object table, int c, int *from, int *to) static void -sub_char_table_set (Lisp_Object table, int c, Lisp_Object val) +sub_char_table_set (Lisp_Object table, int c, Lisp_Object val, int is_uniprop) { struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); int depth = XINT ((tbl)->depth); @@ -347,11 +400,17 @@ sub_char_table_set (Lisp_Object table, int c, Lisp_Object val) sub = tbl->contents[i]; if (! SUB_CHAR_TABLE_P (sub)) { - sub = make_sub_char_table (depth + 1, - min_char + i * chartab_chars[depth], sub); - tbl->contents[i] = sub; + if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub)) + sub = uniprop_table_uncompress (table, i); + else + { + sub = make_sub_char_table (depth + 1, + min_char + i * chartab_chars[depth], + sub); + tbl->contents[i] = sub; + } } - sub_char_table_set (sub, c, val); + sub_char_table_set (sub, c, val, is_uniprop); } } @@ -376,7 +435,7 @@ char_table_set (Lisp_Object table, int c, Lisp_Object val) sub = make_sub_char_table (1, i * chartab_chars[0], sub); tbl->contents[i] = sub; } - sub_char_table_set (sub, c, val); + sub_char_table_set (sub, c, val, UNIPROP_TABLE_P (table)); if (ASCII_CHAR_P (c)) tbl->ascii = char_table_ascii (table); } @@ -384,30 +443,40 @@ char_table_set (Lisp_Object table, int c, Lisp_Object val) } static void -sub_char_table_set_range (Lisp_Object *table, int depth, int min_char, int from, int to, Lisp_Object val) +sub_char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val, + int is_uniprop) { - int max_char = min_char + chartab_chars[depth] - 1; - - if (depth == 3 || (from <= min_char && to >= max_char)) - *table = val; - else + struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); + int depth = XINT ((tbl)->depth); + int min_char = XINT ((tbl)->min_char); + int chars_in_block = chartab_chars[depth]; + int i, c, lim = chartab_size[depth]; + + if (from < min_char) + from = min_char; + i = CHARTAB_IDX (from, depth, min_char); + c = min_char + chars_in_block * i; + for (; i <= lim; i++, c += chars_in_block) { - int i; - unsigned j; - - depth++; - if (! SUB_CHAR_TABLE_P (*table)) - *table = make_sub_char_table (depth, min_char, *table); - if (from < min_char) - from = min_char; - if (to > max_char) - to = max_char; - i = CHARTAB_IDX (from, depth, min_char); - j = CHARTAB_IDX (to, depth, min_char); - min_char += chartab_chars[depth] * i; - for (j++; i < j; i++, min_char += chartab_chars[depth]) - sub_char_table_set_range (XSUB_CHAR_TABLE (*table)->contents + i, - depth, min_char, from, to, val); + if (c > to) + break; + if (from <= c && c + chars_in_block - 1 <= to) + tbl->contents[i] = val; + else + { + Lisp_Object sub = tbl->contents[i]; + if (! SUB_CHAR_TABLE_P (sub)) + { + if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (sub)) + sub = uniprop_table_uncompress (table, i); + else + { + sub = make_sub_char_table (depth + 1, c, sub); + tbl->contents[i] = sub; + } + } + sub_char_table_set_range (sub, from, to, val, is_uniprop); + } } } @@ -417,16 +486,33 @@ char_table_set_range (Lisp_Object table, int from, int to, Lisp_Object val) { struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); Lisp_Object *contents = tbl->contents; - int i; if (from == to) char_table_set (table, from, val); else { - unsigned lim = to / chartab_chars[0] + 1; - for (i = CHARTAB_IDX (from, 0, 0); i < lim; i++) - sub_char_table_set_range (contents + i, 0, i * chartab_chars[0], - from, to, val); + int is_uniprop = UNIPROP_TABLE_P (table); + int lim = CHARTAB_IDX (to, 0, 0); + int i, c; + + for (i = CHARTAB_IDX (from, 0, 0), c = 0; i <= lim; + i++, c += chartab_chars[0]) + { + if (c > to) + break; + if (from <= c && c + chartab_chars[0] - 1 <= to) + tbl->contents[i] = val; + else + { + Lisp_Object sub = tbl->contents[i]; + if (! SUB_CHAR_TABLE_P (sub)) + { + sub = make_sub_char_table (1, i * chartab_chars[0], sub); + tbl->contents[i] = sub; + } + sub_char_table_set_range (sub, from, to, val, is_uniprop); + } + } if (ASCII_CHAR_P (from)) tbl->ascii = char_table_ascii (table); } @@ -504,6 +590,8 @@ DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot, (Lisp_Object char_table, Lisp_Object n, Lisp_Object value) { CHECK_CHAR_TABLE (char_table); + if (EQ (XCHAR_TABLE (char_table)->purpose, Qchar_code_property_table)) + error ("Can't change extra-slot of char-code-property-table"); CHECK_NUMBER (n); if (XINT (n) < 0 || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table))) @@ -532,8 +620,9 @@ a cons of character codes (for characters in the range), or a character code. * CHECK_CHARACTER_CAR (range); CHECK_CHARACTER_CDR (range); - val = char_table_ref_and_range (char_table, XFASTINT (XCAR (range)), - &from, &to); + from = XFASTINT (XCAR (range)); + to = XFASTINT (XCDR (range)); + val = char_table_ref_and_range (char_table, from, &from, &to); /* Not yet implemented. */ } else @@ -655,8 +744,7 @@ equivalent and can be merged. It defaults to `equal'. */) /* Map C_FUNCTION or FUNCTION over TABLE (top or sub char-table), calling it for each character or group of characters that share a value. RANGE is a cons (FROM . TO) specifying the range of target - characters, VAL is a value of FROM in TABLE, DEFAULT_VAL is the - default value of the char-table, PARENT is the parent of the + characters, VAL is a value of FROM in TABLE, TOP is the top char-table. ARG is passed to C_FUNCTION when that is called. @@ -669,7 +757,7 @@ equivalent and can be merged. It defaults to `equal'. */) static Lisp_Object map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object table, Lisp_Object arg, Lisp_Object val, - Lisp_Object range, Lisp_Object default_val, Lisp_Object parent) + Lisp_Object range, Lisp_Object top) { /* Pointer to the elements of TABLE. */ Lisp_Object *contents; @@ -681,6 +769,8 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), int chars_in_block; int from = XINT (XCAR (range)), to = XINT (XCDR (range)); int i, c; + int is_uniprop = UNIPROP_TABLE_P (top); + uniprop_decoder_t decoder = UNIPROP_GET_DECODER (top); if (SUB_CHAR_TABLE_P (table)) { @@ -710,28 +800,33 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), for (c = min_char + chars_in_block * i; c <= max_char; i++, c += chars_in_block) { - Lisp_Object this = contents[i]; + Lisp_Object this = (SUB_CHAR_TABLE_P (table) + ? XSUB_CHAR_TABLE (table)->contents[i] + : XCHAR_TABLE (table)->contents[i]); int nextc = c + chars_in_block; + if (is_uniprop && UNIPROP_COMPRESSED_FORM_P (this)) + this = uniprop_table_uncompress (table, i); if (SUB_CHAR_TABLE_P (this)) { if (to >= nextc) XSETCDR (range, make_number (nextc - 1)); val = map_sub_char_table (c_function, function, this, arg, - val, range, default_val, parent); + val, range, top); } else { if (NILP (this)) - this = default_val; + this = XCHAR_TABLE (top)->defalt; if (!EQ (val, this)) { int different_value = 1; if (NILP (val)) { - if (! NILP (parent)) + if (! NILP (XCHAR_TABLE (top)->parent)) { + Lisp_Object parent = XCHAR_TABLE (top)->parent; Lisp_Object temp = XCHAR_TABLE (parent)->parent; /* This is to get a value of FROM in PARENT @@ -742,8 +837,7 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), XSETCDR (range, make_number (c - 1)); val = map_sub_char_table (c_function, function, parent, arg, val, range, - XCHAR_TABLE (parent)->defalt, - XCHAR_TABLE (parent)->parent); + parent); if (EQ (val, this)) different_value = 0; } @@ -756,14 +850,22 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), if (c_function) (*c_function) (arg, XCAR (range), val); else - call2 (function, XCAR (range), val); + { + if (decoder) + val = decoder (top, val); + call2 (function, XCAR (range), val); + } } else { if (c_function) (*c_function) (arg, range, val); else - call2 (function, range, val); + { + if (decoder) + val = decoder (top, val); + call2 (function, range, val); + } } } val = this; @@ -783,35 +885,39 @@ map_sub_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), ARG is passed to C_FUNCTION when that is called. */ void -map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object table, Lisp_Object arg) +map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), + Lisp_Object function, Lisp_Object table, Lisp_Object arg) { - Lisp_Object range, val; - struct gcpro gcpro1, gcpro2, gcpro3; + Lisp_Object range, val, parent; + struct gcpro gcpro1, gcpro2, gcpro3, gcpro4; + uniprop_decoder_t decoder = UNIPROP_GET_DECODER (table); range = Fcons (make_number (0), make_number (MAX_CHAR)); - GCPRO3 (table, arg, range); + parent = XCHAR_TABLE (table)->parent; + + GCPRO4 (table, arg, range, parent); val = XCHAR_TABLE (table)->ascii; if (SUB_CHAR_TABLE_P (val)) val = XSUB_CHAR_TABLE (val)->contents[0]; val = map_sub_char_table (c_function, function, table, arg, val, range, - XCHAR_TABLE (table)->defalt, - XCHAR_TABLE (table)->parent); + table); + /* If VAL is nil and TABLE has a parent, we must consult the parent recursively. */ while (NILP (val) && ! NILP (XCHAR_TABLE (table)->parent)) { - Lisp_Object parent = XCHAR_TABLE (table)->parent; - Lisp_Object temp = XCHAR_TABLE (parent)->parent; + Lisp_Object temp; int from = XINT (XCAR (range)); + parent = XCHAR_TABLE (table)->parent; + temp = XCHAR_TABLE (parent)->parent; /* This is to get a value of FROM in PARENT without checking the parent of PARENT. */ XCHAR_TABLE (parent)->parent = Qnil; val = CHAR_TABLE_REF (parent, from); XCHAR_TABLE (parent)->parent = temp; val = map_sub_char_table (c_function, function, parent, arg, val, range, - XCHAR_TABLE (parent)->defalt, - XCHAR_TABLE (parent)->parent); + parent); table = parent; } @@ -822,14 +928,22 @@ map_char_table (void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object), Lisp if (c_function) (*c_function) (arg, XCAR (range), val); else - call2 (function, XCAR (range), val); + { + if (decoder) + val = decoder (table, val); + call2 (function, XCAR (range), val); + } } else { if (c_function) (*c_function) (arg, range, val); else - call2 (function, range, val); + { + if (decoder) + val = decoder (table, val); + call2 (function, range, val); + } } } @@ -983,10 +1097,316 @@ map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object), UNGCPRO; } + +/* Unicode character property tables. + + This section provides a convenient and efficient way to get a + Unicode character property from C code (from Lisp, you must use + get-char-code-property). + + The typical usage is to get a char-table for a specific property at + a proper initialization time as this: + + Lisp_Object bidi_class_table = uniprop_table (intern ("bidi-class")); + + and get a property value for character CH as this: + + Lisp_Object bidi_class = CHAR_TABLE_REF (CH, bidi_class_table); + + In this case, what you actually get is an index number to the + vector of property values (symbols nil, L, R, etc). + + A table for Unicode character property has these characteristics: + + o The purpose is `char-code-property-table', which implies that the + table has 5 extra slots. + + o The second extra slot is a Lisp function, an index (integer) to + the array uniprop_decoder[], or nil. If it is a Lisp function, we + can't use such a table from C (at the moment). If it is nil, it + means that we don't have to decode values. + + o The third extra slot is a Lisp function, an index (integer) to + the array uniprop_enncoder[], or nil. If it is a Lisp function, we + can't use such a table from C (at the moment). If it is nil, it + means that we don't have to encode values. */ + + +/* Uncompress the IDXth element of sub-char-table TABLE. */ + +static Lisp_Object +uniprop_table_uncompress (Lisp_Object table, int idx) +{ + Lisp_Object val = XSUB_CHAR_TABLE (table)->contents[idx]; + int min_char = (XINT (XSUB_CHAR_TABLE (table)->min_char) + + chartab_chars[2] * idx); + Lisp_Object sub = make_sub_char_table (3, min_char, Qnil); + struct Lisp_Sub_Char_Table *subtbl = XSUB_CHAR_TABLE (sub); + const unsigned char *p, *pend; + int i; + + XSUB_CHAR_TABLE (table)->contents[idx] = sub; + p = SDATA (val), pend = p + SBYTES (val); + if (*p == 1) + { + /* SIMPLE TABLE */ + p++; + idx = STRING_CHAR_ADVANCE (p); + while (p < pend && idx < chartab_chars[2]) + { + int v = STRING_CHAR_ADVANCE (p); + subtbl->contents[idx++] = v > 0 ? make_number (v) : Qnil; + } + } + else if (*p == 2) + { + /* RUN-LENGTH TABLE */ + p++; + for (idx = 0; p < pend; ) + { + int v = STRING_CHAR_ADVANCE (p); + int count = 1; + int len; + + if (p < pend) + { + count = STRING_CHAR_AND_LENGTH (p, len); + if (count < 128) + count = 1; + else + { + count -= 128; + p += len; + } + } + while (count-- > 0) + subtbl->contents[idx++] = make_number (v); + } + } +/* It seems that we don't need this function because C code won't need + to get a property that is compressed in this form. */ +#if 0 + else if (*p == 0) + { + /* WORD-LIST TABLE */ + } +#endif + return sub; +} + + +/* Decode VALUE as an elemnet of char-table TABLE. */ + +static Lisp_Object +uniprop_decode_value_run_length (Lisp_Object table, Lisp_Object value) +{ + if (VECTORP (XCHAR_TABLE (table)->extras[4])) + { + Lisp_Object valvec = XCHAR_TABLE (table)->extras[4]; + + if (XINT (value) >= 0 && XINT (value) < ASIZE (valvec)) + value = AREF (valvec, XINT (value)); + } + return value; +} + +static uniprop_decoder_t uniprop_decoder [] = + { uniprop_decode_value_run_length }; + +static int uniprop_decoder_count + = (sizeof uniprop_decoder) / sizeof (uniprop_decoder[0]); + + +/* Return the decoder of char-table TABLE or nil if none. */ + +static uniprop_decoder_t +uniprop_get_decoder (Lisp_Object table) +{ + int i; + + if (! INTEGERP (XCHAR_TABLE (table)->extras[1])) + return NULL; + i = XINT (XCHAR_TABLE (table)->extras[1]); + if (i < 0 || i >= uniprop_decoder_count) + return NULL; + return uniprop_decoder[i]; +} + + +/* Encode VALUE as an element of char-table TABLE which contains + characters as elements. */ + +static Lisp_Object +uniprop_encode_value_character (Lisp_Object table, Lisp_Object value) +{ + if (! NILP (value) && ! CHARACTERP (value)) + wrong_type_argument (Qintegerp, value); + return value; +} + + +/* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH + compression. */ + +static Lisp_Object +uniprop_encode_value_run_length (Lisp_Object table, Lisp_Object value) +{ + Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents; + int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]); + + for (i = 0; i < size; i++) + if (EQ (value, value_table[i])) + break; + if (i == size) + wrong_type_argument (build_string ("Unicode property value"), value); + return make_number (i); +} + + +/* Encode VALUE as an element of char-table TABLE which adopts RUN-LENGTH + compression and contains numbers as elements . */ + +static Lisp_Object +uniprop_encode_value_numeric (Lisp_Object table, Lisp_Object value) +{ + Lisp_Object *value_table = XVECTOR (XCHAR_TABLE (table)->extras[4])->contents; + int i, size = ASIZE (XCHAR_TABLE (table)->extras[4]); + + CHECK_NUMBER (value); + for (i = 0; i < size; i++) + if (EQ (value, value_table[i])) + break; + value = make_number (i); + if (i == size) + { + Lisp_Object args[2]; + + args[0] = XCHAR_TABLE (table)->extras[4]; + args[1] = Fmake_vector (make_number (1), value); + XCHAR_TABLE (table)->extras[4] = Fvconcat (2, args); + } + return make_number (i); +} + +static uniprop_encoder_t uniprop_encoder[] = + { uniprop_encode_value_character, + uniprop_encode_value_run_length, + uniprop_encode_value_numeric }; + +static int uniprop_encoder_count + = (sizeof uniprop_encoder) / sizeof (uniprop_encoder[0]); + + +/* Return the encoder of char-table TABLE or nil if none. */ + +static uniprop_decoder_t +uniprop_get_encoder (Lisp_Object table) +{ + int i; + + if (! INTEGERP (XCHAR_TABLE (table)->extras[2])) + return NULL; + i = XINT (XCHAR_TABLE (table)->extras[2]); + if (i < 0 || i >= uniprop_encoder_count) + return NULL; + return uniprop_encoder[i]; +} + +/* Return a char-table for Unicode character property PROP. This + function may load a Lisp file and thus may cause + garbage-collection. */ + +Lisp_Object +uniprop_table (Lisp_Object prop) +{ + Lisp_Object val, table, result; + + val = Fassq (prop, Vchar_code_property_alist); + if (! CONSP (val)) + return Qnil; + table = XCDR (val); + if (STRINGP (table)) + { + struct gcpro gcpro1; + GCPRO1 (val); + result = Fload (concat2 (build_string ("international/"), table), + Qt, Qt, Qt, Qt); + UNGCPRO; + if (NILP (result)) + return Qnil; + table = XCDR (val); + } + if (! CHAR_TABLE_P (table) + || ! UNIPROP_TABLE_P (table)) + return Qnil; + val = XCHAR_TABLE (table)->extras[1]; + if (INTEGERP (val) + ? (XINT (val) < 0 || XINT (val) >= uniprop_decoder_count) + : ! NILP (val)) + return Qnil; + /* Prepare ASCII values in advance for CHAR_TABLE_REF. */ + XCHAR_TABLE (table)->ascii = char_table_ascii (table); + return table; +} + +DEFUN ("unicode-property-table-internal", Funicode_property_table_internal, + Sunicode_property_table_internal, 1, 1, 0, + doc: /* Return a char-table for Unicode character property PROP. +Use `get-unicode-property-internal' and +`put-unicode-property-internal' instead of `aref' and `aset' to get +and put an element value. */) + (Lisp_Object prop) +{ + Lisp_Object table = uniprop_table (prop); + + if (CHAR_TABLE_P (table)) + return table; + return Fcdr (Fassq (prop, Vchar_code_property_alist)); +} + +DEFUN ("get-unicode-property-internal", Fget_unicode_property_internal, + Sget_unicode_property_internal, 2, 2, 0, + doc: /* Return an element of CHAR-TABLE for character CH. +CHAR-TABLE must be what returned by `unicode-property-table-internal'. */) + (Lisp_Object char_table, Lisp_Object ch) +{ + Lisp_Object val; + uniprop_decoder_t decoder; + + CHECK_CHAR_TABLE (char_table); + CHECK_CHARACTER (ch); + if (! UNIPROP_TABLE_P (char_table)) + error ("Invalid Unicode property table"); + val = CHAR_TABLE_REF (char_table, XINT (ch)); + decoder = uniprop_get_decoder (char_table); + return (decoder ? decoder (char_table, val) : val); +} + +DEFUN ("put-unicode-property-internal", Fput_unicode_property_internal, + Sput_unicode_property_internal, 3, 3, 0, + doc: /* Set an element of CHAR-TABLE for character CH to VALUE. +CHAR-TABLE must be what returned by `unicode-property-table-internal'. */) + (Lisp_Object char_table, Lisp_Object ch, Lisp_Object value) +{ + uniprop_encoder_t encoder; + + CHECK_CHAR_TABLE (char_table); + CHECK_CHARACTER (ch); + if (! UNIPROP_TABLE_P (char_table)) + error ("Invalid Unicode property table"); + encoder = uniprop_get_encoder (char_table); + if (encoder) + value = encoder (char_table, value); + CHAR_TABLE_SET (char_table, XINT (ch), value); + return Qnil; +} + void syms_of_chartab (void) { + DEFSYM (Qchar_code_property_table, "char-code-property-table"); + defsubr (&Smake_char_table); defsubr (&Schar_table_parent); defsubr (&Schar_table_subtype); @@ -998,4 +1418,19 @@ syms_of_chartab (void) defsubr (&Sset_char_table_default); defsubr (&Soptimize_char_table); defsubr (&Smap_char_table); + defsubr (&Sunicode_property_table_internal); + defsubr (&Sget_unicode_property_internal); + defsubr (&Sput_unicode_property_internal); + + /* Each element has the form (PROP . TABLE). + PROP is a symbol representing a character property. + TABLE is a char-table containing the property value for each character. + TABLE may be a name of file to load to build a char-table. + This variable should be modified only through + `define-char-code-property'. */ + + DEFVAR_LISP ("char-code-property-alist", Vchar_code_property_alist, + doc: /* Alist of character property name vs char-table containing property values. +Internal use only. */); + Vchar_code_property_alist = Qnil; } diff --git a/src/composite.c b/src/composite.c index 796c5a58de6..7123b505e68 100644 --- a/src/composite.c +++ b/src/composite.c @@ -976,9 +976,8 @@ static int _work_char; ((C) > ' ' \ && ((C) == 0x200C || (C) == 0x200D \ || (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \ - (SYMBOLP (_work_val) \ - && (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \ - && _work_char != 'Z')))) + (INTEGERP (_work_val) \ + && (XINT (_work_val) <= UNICODE_CATEGORY_So))))) /* Update cmp_it->stop_pos to the next position after CHARPOS (and BYTEPOS) where character composition may happen. If BYTEPOS is diff --git a/src/dispextern.h b/src/dispextern.h index 57fa09d3bfc..c0a67690a5c 100644 --- a/src/dispextern.h +++ b/src/dispextern.h @@ -1773,7 +1773,11 @@ extern int face_change_count; /* Data type for describing the bidirectional character types. The first 7 must be at the beginning, because they are the only values valid in the `bidi_type' member of `struct glyph'; we only reserve - 3 bits for it, so we cannot use there values larger than 7. */ + 3 bits for it, so we cannot use there values larger than 7. + + The order of members must be in sync with the 8th element of the + member of unidata-prop-alist (in admin/unidata/unidata-getn.el) for + Unicode character property `bidi-class'. */ typedef enum { UNKNOWN_BT = 0, STRONG_L, /* strong left-to-right */ diff --git a/src/font.c b/src/font.c index 14390335f3c..5aff20b1346 100644 --- a/src/font.c +++ b/src/font.c @@ -3739,8 +3739,9 @@ font_range (EMACS_INT pos, EMACS_INT *limit, struct window *w, struct face *face else FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, pos, pos_byte); category = CHAR_TABLE_REF (Vunicode_category_table, c); - if (EQ (category, QCf) - || CHAR_VARIATION_SELECTOR_P (c)) + if (INTEGERP (category) + && (XINT (category) == UNICODE_CATEGORY_Cf + || CHAR_VARIATION_SELECTOR_P (c))) continue; if (NILP (font_object)) {