4 unidata-gen-table-symbol "uni-bidi.el"
"Unicode bidi class.
Property value is one of the following symbols:
- L, LRE, LRO, R, AL, RLE, RLO, PDF, EN, ES, ET,
- AN, CS, NSM, BN, B, S, WS, ON"
+ L, LRE, LRO, LRI, R, AL, RLE, RLO, RLI, FSI, PDF, PDI,
+ EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON"
unidata-describe-bidi-class
;; The assignment of default values to blocks of code points
;; follows the file DerivedBidiClass.txt from the Unicode
(#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R))
;; The order of elements must be in sync with bidi_type_t in
;; src/dispextern.h.
- (L R EN AN BN B AL LRE LRO RLE RLO PDF ES ET CS NSM S WS ON))
+ (L R EN AN BN B AL LRE LRO RLE RLO PDF LRI RLI FSI PDI
+ ES ET CS NSM S WS ON))
(decomposition
5 unidata-gen-table-decomposition "uni-decomposition.el"
"Unicode decomposition mapping.
;; If VAL is one of VALn, just return n.
;; Otherwise, VAL-LIST is modified to this:
;; ((nil . 0) (VAL1 . 1) (VAL2 . 2) ... (VAL . n+1))
+;;
+;; WARN is an optional warning to display when the value list is
+;; extended, for property values that need to be in sync with other
+;; parts of Emacs; currently only used for bidi-class.
-(defun unidata-encode-val (val-list val)
+(defun unidata-encode-val (val-list val &optional warn)
(let ((slot (assoc val val-list))
val-code)
(if slot
(cdr slot)
+ (if warn (message warn val))
(setq val-code (length val-list))
(nconc val-list (list (cons val val-code)))
val-code)))
(let ((table (make-char-table 'char-code-property-table))
(prop-idx (unidata-prop-index prop))
(vec (make-vector 128 0))
+ ;; When this warning is printed, there's a need to make the
+ ;; following changes:
+ ;; (1) update unidata-prop-alist with the new bidi-class values;
+ ;; (2) extend bidi_type_t enumeration on src/dispextern.h to
+ ;; include the new classes;
+ ;; (3) possibly update the assertion in bidi.c:bidi_check_type; and
+ ;; (4) possibly update the switch cases in
+ ;; bidi.c:bidi_get_type and bidi.c:bidi_get_category.
+ (bidi-warning "\
+** Found new bidi-class '%s', please update bidi.c and dispextern.h")
tail elt range val val-code idx slot
prev-range-data)
(setq val-list (cons nil (copy-sequence val-list)))
(setq elt (car tail) tail (cdr tail))
(setq range (car elt)
val (funcall val-func (nth prop-idx elt)))
- (setq val-code (if val (unidata-encode-val val-list val)))
+ (setq val-code (if val (unidata-encode-val val-list val
+ (and (eq prop 'bidi-class)
+ bidi-warning))))
(if (consp range)
(when val-code
(set-char-table-range table range val-code)
(setq new-val (funcall val-func (nth prop-idx elt)))
(if (not (eq val new-val))
(setq val new-val
- val-code (if val (unidata-encode-val val-list val))))
+ val-code (if val (unidata-encode-val
+ val-list val (and (eq prop 'bidi-class)
+ bidi-warning)))))
(if val-code
(aset vec (- range start) val-code))
(setq tail (cdr tail)))
UNKNOWN_BC,
NEUTRAL,
WEAK,
- STRONG
+ STRONG,
+ EXPLICIT_FORMATTING
} bidi_category_t;
/* UAX#9 says to search only for L, AL, or R types of characters, and
if (default_type == UNKNOWN_BT)
emacs_abort ();
- if (override == NEUTRAL_DIR)
- return default_type;
-
switch (default_type)
{
- /* Although UAX#9 does not tell, it doesn't make sense to
- override NEUTRAL_B and LRM/RLM characters. */
+ case WEAK_BN:
case NEUTRAL_B:
case LRE:
case LRO:
case RLO:
case PDF:
return default_type;
+ /* FIXME: The isolate controls are treated as BN until we add
+ support for UBA v6.3. */
+ case LRI:
+ case RLI:
+ case FSI:
+ case PDI:
+ return WEAK_BN;
default:
- switch (ch)
- {
- case LRM_CHAR:
- case RLM_CHAR:
- return default_type;
- default:
- if (override == L2R) /* X6 */
- return STRONG_L;
- else if (override == R2L)
- return STRONG_R;
- else
- emacs_abort (); /* can't happen: handled above */
- }
+ if (override == L2R)
+ return STRONG_L;
+ else if (override == R2L)
+ return STRONG_R;
+ else
+ return default_type;
}
}
case STRONG_L:
case STRONG_R:
case STRONG_AL:
- case LRE:
- case LRO:
- case RLE:
- case RLO:
return STRONG;
- case PDF: /* ??? really?? */
case WEAK_EN:
case WEAK_ES:
case WEAK_ET:
case WEAK_CS:
case WEAK_NSM:
case WEAK_BN:
+ /* FIXME */
+ case LRI:
+ case RLI:
+ case FSI:
+ case PDI:
return WEAK;
case NEUTRAL_B:
case NEUTRAL_S:
case NEUTRAL_WS:
case NEUTRAL_ON:
return NEUTRAL;
+ case LRE:
+ case LRO:
+ case RLE:
+ case RLO:
+ case PDF:
+#if 0
+ /* FIXME: This awaits implementation of isolate support. */
+ case LRI:
+ case RLI:
+ case FSI:
+ case PDI:
+#endif
+ return EXPLICIT_FORMATTING;
default:
emacs_abort ();
}
RLE, /* right-to-left embedding */
RLO, /* right-to-left override */
PDF, /* pop directional format */
+ LRI, /* left-to-right isolate */
+ RLI, /* right-to-left isolate */
+ FSI, /* first strong isolate */
+ PDI, /* pop directional isolate */
WEAK_ES, /* european number separator */
WEAK_ET, /* european number terminator */
WEAK_CS, /* common separator */