From ad82612405604b7d20d86fe6b3283f91bee5e60a Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Wed, 4 Dec 2013 18:58:05 +0200 Subject: [PATCH] Fix bug #16043 with crashes when displaying new bidi control characters. src/bidi.c (bidi_get_type, bidi_get_category): Handle the isolate directional control characters. Update type and category determination according to the UBA from Unicode v6.3. (bidi_category_t): New category EXPLICIT_FORMATTING. src/dispextern.h (bidi_type_t): Update to include new bidirectional properties introduced with Unicode v6.3. admin/unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class to include the new isolate-related classes introduced with Unicode v6.3. (unidata-encode-val): Accept an additional optional argument, a warning message to emit when UnicodeData.txt defines bidi-class values that are not in unidata-prop-alist. Add a comment explaining what should maintainers do if/when such a warning ever appears. (unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil when generating uni-bidi.el. --- admin/ChangeLog | 13 ++++++++ admin/unidata/unidata-gen.el | 32 ++++++++++++++++---- src/ChangeLog | 10 +++++++ src/bidi.c | 58 +++++++++++++++++++++--------------- src/dispextern.h | 4 +++ 5 files changed, 87 insertions(+), 30 deletions(-) diff --git a/admin/ChangeLog b/admin/ChangeLog index 7d23542a84e..730253e616f 100644 --- a/admin/ChangeLog +++ b/admin/ChangeLog @@ -1,3 +1,16 @@ +2013-12-04 Eli Zaretskii + + * unidata/unidata-gen.el (unidata-prop-alist): Update bidi-class + to include the new isolate-related classes introduced with Unicode + v6.3. + (unidata-encode-val): Accept an additional optional argument, a + warning message to emit when UnicodeData.txt defines bidi-class + values that are not in unidata-prop-alist. Add a comment + explaining what should maintainers do if/when such a warning ever + appears. + (unidata-gen-table): Call unidata-encode-val with 3rd arg non-nil + when generating uni-bidi.el. + 2013-12-01 Glenn Morris * unidata/Makefile.in (${DSTDIR}/charprop.el): diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index 42e1cc0bc3c..fa8f81636e3 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el @@ -194,8 +194,8 @@ Property value is an integer." 4 unidata-gen-table-symbol "uni-bidi.el" "Unicode bidi class. Property value is one of the following symbols: - L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET, - AN, CS, NSM, BN, B, S, WS, ON" + L, LRE, LRO, LRI, R, AL, RLE, RLO, RLI, FSI, PDF, PDI, + EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON" unidata-describe-bidi-class ;; The assignment of default values to blocks of code points ;; follows the file DerivedBidiClass.txt from the Unicode @@ -205,7 +205,8 @@ Property value is one of the following symbols: (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R)) ;; The order of elements must be in sync with bidi_type_t in ;; src/dispextern.h. - (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON)) + (L R EN AN BN B AL LRE LRO RLE RLO PDF LRI RLI FSI PDI + ES ET CS NSM S WS ON)) (decomposition 5 unidata-gen-table-decomposition "uni-decomposition.el" "Unicode decomposition mapping. @@ -397,12 +398,17 @@ is the character itself."))) ;; If VAL is one of VALn, just return n. ;; Otherwise, VAL-LIST is modified to this: ;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1)) +;; +;; WARN is an optional warning to display when the value list is +;; extended, for property values that need to be in sync with other +;; parts of Emacs; currently only used for bidi-class. -(defun unidata-encode-val (val-list val) +(defun unidata-encode-val (val-list val &optional warn) (let ((slot (assoc val val-list)) val-code) (if slot (cdr slot) + (if warn (message warn val)) (setq val-code (length val-list)) (nconc val-list (list (cons val val-code))) val-code))) @@ -413,6 +419,16 @@ is the character itself."))) (let ((table (make-char-table 'char-code-property-table)) (prop-idx (unidata-prop-index prop)) (vec (make-vector 128 0)) + ;; When this warning is printed, there's a need to make the + ;; following changes: + ;; (1) update unidata-prop-alist with the new bidi-class values; + ;; (2) extend bidi_type_t enumeration on src/dispextern.h to + ;; include the new classes; + ;; (3) possibly update the assertion in bidi.c:bidi_check_type; and + ;; (4) possibly update the switch cases in + ;; bidi.c:bidi_get_type and bidi.c:bidi_get_category. + (bidi-warning "\ +** Found new bidi-class '%s', please update bidi.c and dispextern.h") tail elt range val val-code idx slot prev-range-data) (setq val-list (cons nil (copy-sequence val-list))) @@ -438,7 +454,9 @@ is the character itself."))) (setq elt (car tail) tail (cdr tail)) (setq range (car elt) val (funcall val-func (nth prop-idx elt))) - (setq val-code (if val (unidata-encode-val val-list val))) + (setq val-code (if val (unidata-encode-val val-list val + (and (eq prop 'bidi-class) + bidi-warning)))) (if (consp range) (when val-code (set-char-table-range table range val-code) @@ -486,7 +504,9 @@ is the character itself."))) (setq new-val (funcall val-func (nth prop-idx elt))) (if (not (eq val new-val)) (setq val new-val - val-code (if val (unidata-encode-val val-list val)))) + val-code (if val (unidata-encode-val + val-list val (and (eq prop 'bidi-class) + bidi-warning))))) (if val-code (aset vec (- range start) val-code)) (setq tail (cdr tail))) diff --git a/src/ChangeLog b/src/ChangeLog index a877bc885e7..4c7b3015877 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,13 @@ +2013-12-04 Eli Zaretskii + + * bidi.c (bidi_get_type, bidi_get_category): Handle the isolate + directional control characters. Update type and category + determination according to the UBA from Unicode v6.3. + (bidi_category_t): New category EXPLICIT_FORMATTING. + + * dispextern.h (bidi_type_t): Update to include new bidirectional + properties introduced with Unicode v6.3. (Bug#16043) + 2013-12-04 Martin Rudalics * xterm.c (XTflash): Fix coordinate of bottom area to flash diff --git a/src/bidi.c b/src/bidi.c index 98c3c17b78f..d3a617651f6 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -76,7 +76,8 @@ typedef enum { UNKNOWN_BC, NEUTRAL, WEAK, - STRONG + STRONG, + EXPLICIT_FORMATTING } bidi_category_t; /* UAX#9 says to search only for L, AL, or R types of characters, and @@ -115,13 +116,9 @@ bidi_get_type (int ch, bidi_dir_t override) if (default_type == UNKNOWN_BT) emacs_abort (); - if (override == NEUTRAL_DIR) - return default_type; - switch (default_type) { - /* Although UAX#9 does not tell, it doesn't make sense to - override NEUTRAL_B and LRM/RLM characters. */ + case WEAK_BN: case NEUTRAL_B: case LRE: case LRO: @@ -129,20 +126,20 @@ bidi_get_type (int ch, bidi_dir_t override) case RLO: case PDF: return default_type; + /* FIXME: The isolate controls are treated as BN until we add + support for UBA v6.3. */ + case LRI: + case RLI: + case FSI: + case PDI: + return WEAK_BN; default: - switch (ch) - { - case LRM_CHAR: - case RLM_CHAR: - return default_type; - default: - if (override == L2R) /* X6 */ - return STRONG_L; - else if (override == R2L) - return STRONG_R; - else - emacs_abort (); /* can't happen: handled above */ - } + if (override == L2R) + return STRONG_L; + else if (override == R2L) + return STRONG_R; + else + return default_type; } } @@ -163,12 +160,7 @@ bidi_get_category (bidi_type_t type) case STRONG_L: case STRONG_R: case STRONG_AL: - case LRE: - case LRO: - case RLE: - case RLO: return STRONG; - case PDF: /* ??? really?? */ case WEAK_EN: case WEAK_ES: case WEAK_ET: @@ -176,12 +168,30 @@ bidi_get_category (bidi_type_t type) case WEAK_CS: case WEAK_NSM: case WEAK_BN: + /* FIXME */ + case LRI: + case RLI: + case FSI: + case PDI: return WEAK; case NEUTRAL_B: case NEUTRAL_S: case NEUTRAL_WS: case NEUTRAL_ON: return NEUTRAL; + case LRE: + case LRO: + case RLE: + case RLO: + case PDF: +#if 0 + /* FIXME: This awaits implementation of isolate support. */ + case LRI: + case RLI: + case FSI: + case PDI: +#endif + return EXPLICIT_FORMATTING; default: emacs_abort (); } diff --git a/src/dispextern.h b/src/dispextern.h index 2ce0a8f4c99..7de4edf2196 100644 --- a/src/dispextern.h +++ b/src/dispextern.h @@ -1895,6 +1895,10 @@ typedef enum { RLE, /* right-to-left embedding */ RLO, /* right-to-left override */ PDF, /* pop directional format */ + LRI, /* left-to-right isolate */ + RLI, /* right-to-left isolate */ + FSI, /* first strong isolate */ + PDI, /* pop directional isolate */ WEAK_ES, /* european number separator */ WEAK_ET, /* european number terminator */ WEAK_CS, /* common separator */ -- 2.39.2