From 8f17e4de2d7d5ab21e752b27a355d28b1e17d971 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii <eliz@gnu.org> Date: Wed, 4 May 2022 11:14:45 +0300 Subject: [PATCH] Fix 'bidi-class' property of unassigned codepoints * admin/unidata/unidata-gen.el (unidata-file-alist): Update the default values of 'bidi-class' according to the latest Unicode Standard. * admin/notes/unicode: Mention possible changes in DerivedBidiClass.txt that need to be reflected in unidata-gen.el. * lisp/international/characters.el (#xfb50, #xfdf0): Fix the Arabic block characters. (Bug#55256) --- admin/notes/unicode | 6 ++++++ admin/unidata/unidata-gen.el | 12 +++++++++--- lisp/international/characters.el | 3 ++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/admin/notes/unicode b/admin/notes/unicode index 41661995000..f699f4fb1c0 100644 --- a/admin/notes/unicode +++ b/admin/notes/unicode @@ -36,6 +36,12 @@ copyright.html in admin/unidata (some of them might need trailing whitespace removed before they can be committed to the Emacs repository). +Next, review the assignment of default values of the Bidi Class +property to blocks in the file extracted/DerivedBidiClass.txt from the +UCD (search for "unassigned" in that file). Any changes should be +reflected in the unidata-gen.el file, where it sets up the default +values around line 210. + Then Emacs should be rebuilt for them to take effect. Rebuilding Emacs updates several derived files elsewhere in the Emacs source tree, mainly in lisp/international/. diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index ad72eed9955..149f7535588 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el @@ -209,9 +209,15 @@ Property value is one of the following symbols: ;; The assignment of default values to blocks of code points ;; follows the file DerivedBidiClass.txt from the Unicode ;; Character Database (UCD). - (L (#x0600 #x06FF AL) (#xFB50 #xFDFF AL) (#xFE70 #xFEFF AL) - (#x0590 #x05FF R) (#x07C0 #x08FF R) - (#xFB1D #xFB4F R) (#x10800 #x10FFF R) (#x1E800 #x1EFFF R)) + (L (#x0600 #x07BF AL) (#x0860 #x08FF AL) (#xFB50 #xFDCF AL) + (#xFDF0 #xFDFF AL) (#xFE70 #xFEFF AL) (#x10D00 #x10D3F AL) + (#x10F30 #x10F6F AL) (#x1EC70 #x1ECBF AL) (#x1ED00 #x1ED4F AL) + (#x1EE00 #x1EEFF AL) + (#x0590 #x05FF R) (#x07C0 #x085F R) (#xFB1D #xFB4F R) + (#x10800 #x10CFF R) (#x10D40 #x10F2F R) (#x10F70 #x10FFF R) + (#x1E800 #x1EC6F R) (#x1ECC0 #x1ECFF R) (#x1ED50 #x1EDFF R) + (#x1EF00 #x1EFFF R) + (#x20A0 #x20CF ET)) ;; The order of elements must be in sync with bidi_type_t in ;; src/dispextern.h. (L R EN AN BN B AL LRE LRO RLE RLO PDF LRI RLI FSI PDI diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 63ac455ea6a..03fb1810866 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -303,7 +303,8 @@ with L, LRE, or LRO Unicode bidi character type.") (setq charsets (cdr charsets)))) (modify-category-entry '(#x600 . #x6ff) ?b) (modify-category-entry '(#x870 . #x8ff) ?b) -(modify-category-entry '(#xfb50 . #xfdff) ?b) +(modify-category-entry '(#xfb50 . #xfdc0) ?b) +(modify-category-entry '(#xfdf0 . #xfdff) ?b) (modify-category-entry '(#xfe70 . #xfefe) ?b) ;; Cyrillic character set (ISO-8859-5) -- 2.39.5