From bec8e1d21a1474a8dbc1a4847f3282d41de7d1ba Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Thu, 12 Sep 2024 21:12:09 +0300 Subject: [PATCH] Update Emacs sources for Unicode 16.0 * lisp/international/ucs-normalize.el (check-range): Update ranges of character codes with decompositions. * lisp/international/mule-cmds.el (ucs-names): Update unused ranges. * lisp/international/fontset.el (script-representative-chars) (otf-script-alist, setup-default-fontset): Add new scripts. * lisp/international/characters.el: * admin/unidata/blocks.awk: * test/manual/BidiCharacterTest.txt: * test/lisp/international/ucs-normalize-tests.el (ucs-normalize-tests--failing-lines-part2): Update for Unicode 16.0. * etc/NEWS: Announce support for Unicode 16.0. (cherry picked from commit 04e8ad6489ebec121ace7ea6d582429a96af8f04) --- admin/unidata/blocks.awk | 1 + etc/NEWS | 3 ++ lisp/international/characters.el | 30 +++++++++++++++---- lisp/international/fontset.el | 20 +++++++++++-- lisp/international/mule-cmds.el | 11 +++---- lisp/international/ucs-normalize.el | 3 +- .../lisp/international/ucs-normalize-tests.el | 27 ++++++++--------- test/manual/BidiCharacterTest.txt | 9 +++--- 8 files changed, 72 insertions(+), 32 deletions(-) diff --git a/admin/unidata/blocks.awk b/admin/unidata/blocks.awk index 40cefe0d563..6393b7bdc63 100755 --- a/admin/unidata/blocks.awk +++ b/admin/unidata/blocks.awk @@ -57,6 +57,7 @@ BEGIN { alias["block elements"] = "symbol" alias["miscellaneous symbols"] = "symbol" alias["symbols for legacy computing"] = "symbol" + alias["symbols for legacy computing supplement"] = "symbol" alias["cjk strokes"] = "cjk-misc" alias["cjk symbols and punctuation"] = "cjk-misc" alias["halfwidth and fullwidth forms"] = "cjk-misc" diff --git a/etc/NEWS b/etc/NEWS index c2779738264..6beddd6cffa 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -92,6 +92,9 @@ It is equivalent to running ‘project-any-command’ with ‘find-file’. ** Internationalization +--- +*** Emacs now supports Unicode version 16.0. + --- *** New language-environment and input method for Tifinagh. The Tifinagh script is used to write the Berber languages. diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 1e5963f89f3..b13d5f9d7a3 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -1181,7 +1181,9 @@ with L, LRE, or LRO Unicode bidi character type.") (#x10A01 . #x10A0F) (#x10A38 . #x10A3F) (#x10AE5 . #x10AE6) + (#x10D69 . #x10D6D) (#x10EAB . #x10EAC) + (#x10EFC . #x10EFF) (#x11001 . #x11001) (#x11038 . #x11046) (#x1107F . #x11081) @@ -1207,6 +1209,11 @@ with L, LRE, or LRO Unicode bidi character type.") (#x11340 . #x11340) (#x11366 . #x1136C) (#x11370 . #x11374) + (#x113BB . #x113C0) + (#x113CE . #x113CE) + (#x113D0 . #x113D0) + (#x113D2 . #x113D2) + (#x113E1 . #x113E2) (#x11438 . #x1143F) (#x11442 . #x11444) (#x11446 . #x11446) @@ -1236,12 +1243,18 @@ with L, LRE, or LRO Unicode bidi character type.") (#x11CAA . #x11CB0) (#x11CB2 . #x11CB3) (#x11CB5 . #x11CB6) + (#x11F5A . #x11F5A) + (#x13430 . #x13440) + (#x13447 . #x13455) + (#x1611E . #x16129) + (#x1612D . #x1612F) (#x16AF0 . #x16AF4) (#x16B30 . #x16B36) (#x16F8F . #x16F92) (#x16FE4 . #x16FE4) (#x1BC9D . #x1BC9E) (#x1BCA0 . #x1BCA3) + (#x1CF00 . #x1CF02) (#x1D167 . #x1D169) (#x1D173 . #x1D182) (#x1D185 . #x1D18B) @@ -1258,6 +1271,7 @@ with L, LRE, or LRO Unicode bidi character type.") (#x1E01B . #x1E021) (#x1E023 . #x1E024) (#x1E026 . #x1E02A) + (#x1E5EE . #x1E5EF) (#x1E8D0 . #x1E8D6) (#x1E944 . #x1E94A) (#xE0001 . #xE01EF)))) @@ -1273,8 +1287,10 @@ with L, LRE, or LRO Unicode bidi character type.") (#x23F3 . #x23F3) (#x25FD . #x25FE) (#x2614 . #x2615) + (#x2630 . #x2637) (#x2648 . #x2653) (#x267F . #x267F) + (#x268A . #x268F) (#x2693 . #x2693) (#x26A1 . #x26A1) (#x26AA . #x26AB) @@ -1308,7 +1324,7 @@ with L, LRE, or LRO Unicode bidi character type.") (#x3041 . #x3096) (#x3099 . #x30FF) (#x3105 . #x312F) - (#x3131 . #x31E3) + (#x3131 . #x31E5) (#x31EF . #x31EF) (#x31F0 . #x3247) (#x3250 . #x4DBF) @@ -1326,6 +1342,7 @@ with L, LRE, or LRO Unicode bidi character type.") (#x17000 . #x187F7) (#x18800 . #x18AFF) (#x18B00 . #x18CD5) + (#x18CFF . #x18CFF) (#x18D00 . #x18D08) (#x1AFF0 . #x1AFF3) (#x1AFF5 . #x1AFFB) @@ -1336,6 +1353,8 @@ with L, LRE, or LRO Unicode bidi character type.") (#x1B155 . #x1B155) (#x1B164 . #x1B167) (#x1B170 . #x1B2FB) + (#x1D300 . #x1D356) + (#x1D360 . #x1D376) (#x1F004 . #x1F004) (#x1F0CF . #x1F0CF) (#x1F18E . #x1F18E) @@ -1383,11 +1402,10 @@ with L, LRE, or LRO Unicode bidi character type.") (#x1FA60 . #x1FA6D) (#x1FA70 . #x1FA74) (#x1FA78 . #x1FA7C) - (#x1FA80 . #x1FA88) - (#x1FA90 . #x1FABD) - (#x1FABF . #x1FAC5) - (#x1FACE . #x1FADB) - (#x1FAE0 . #x1FAE8) + (#x1FA80 . #x1FA89) + (#x1FA8F . #x1FAC6) + (#x1FACE . #x1FADC) + (#x1FADF . #x1FAE9) (#x1FAF0 . #x1FAF8) (#x1FB00 . #x1FB92) (#x20000 . #x2FFFF) diff --git a/lisp/international/fontset.el b/lisp/international/fontset.el index 02d60a88aa3..2d91a5f2329 100644 --- a/lisp/international/fontset.el +++ b/lisp/international/fontset.el @@ -237,6 +237,7 @@ (elbasan #x10500) (caucasian-albanian #x10530) (vithkuqi #x10570) + (todhri #x105C0 #x105ED) (linear-a #x10600) (cypriot-syllabary #x10800) (palmyrene #x10860) @@ -246,6 +247,7 @@ (kharoshthi #x10A00) (manichaean #x10AC0) (hanifi-rohingya #x10D00 #x10D24 #x10D39) + (garay #x10D50 #x10D70 #x10D4A #x10D41) (yezidi #x10E80) (old-sogdian #x10F00) (sogdian #x10F30) @@ -259,6 +261,7 @@ (khojki #x11200) (khudawadi #x112B0) (grantha #x11315 #x1133E #x11374) + (tulu-tigalari #x11380 #x113B8) (newa #x11400) (tirhuta #x11481 #x1148F #x114D0) (siddham #x1158E #x115AF #x115D4) @@ -271,6 +274,7 @@ (zanabazar-square #x11A00) (soyombo #x11A50) (pau-cin-hau #x11AC0) + (sunuwar #x11BC0 #x11BF1) (bhaiksuki #x11C00) (marchen #x11C72) (masaram-gondi #x11D00) @@ -280,10 +284,12 @@ (cuneiform #x12000) (cypro-minoan #x12F90) (egyptian #x13000) + (gurung-khema #x16100 #x1611E #x16131) (mro #x16A40) (tangsa #x16A70 #x16AC0) (bassa-vah #x16AD0) (pahawh-hmong #x16B11) + (kirat-rai #x16D43 #x16D63 #x16D71) (medefaidrin #x16E40) (tangut #x17000) (khitan-small-script #x18B00) @@ -300,6 +306,7 @@ (toto #x1E290 #x1E295 #x1E2AD) (wancho #x1E2C0 #x1E2E8 #x1E2EF) (nag-mundari #x1E4D0 #x1E4EB #x1E4F0) + (ol-onal #x1E5D0 #x1E5F2) (mende-kikakui #x1E810 #x1E8A6) (adlam #x1E900 #x1E943) (indic-siyaq-number #x1EC71 #x1EC9F) @@ -311,7 +318,7 @@ (defvar otf-script-alist) -;; The below was synchronized with the latest Sep 12, 2021 version of +;; The below was synchronized with the latest May 31, 2024 version of ;; https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags (setq otf-script-alist '((adlm . adlam) @@ -356,6 +363,7 @@ (elba . elbasan) (elym . elymaic) (ethi . ethiopic) + (gara . garay) (geor . georgian) (glag . glagolitic) (goth . gothic) @@ -366,6 +374,7 @@ (gong . gunjala-gondi) (guru . gurmukhi) (gur2 . gurmukhi) + (gukh . gurung-khema) (hani . han) (hang . hangul) (jamo . hangul) ; Not recommended; use 'hang' instead. @@ -388,6 +397,7 @@ (khmr . khmer) (khoj . khojki) (sind . khudawadi) + (krai . kirat-rai) (lao\ . lao) (latn . latin) (lepc . lepcha) @@ -428,6 +438,7 @@ (hmnp . nyiakeng-puachue-hmong) (ogam . ogham) (olck . ol-chiki) + (omao . ol-onal) (ital . old-italic) (xpeo . old-persian) (narb . old-north-arabian) @@ -461,6 +472,7 @@ (sora . sora-sompeng) (soyo . soyombo) (sund . sundanese) + (sunu . sunuwar) (sylo . syloti-nagri) (syrc . syriac) (tglg . tagalog) @@ -481,7 +493,9 @@ (tibt . tibetan) (tfng . tifinagh) (tirh . tirhuta) + (todr . todhri) (toto . toto) + (tutg . tulu-tigalari) (ugar . ugaritic) (vith . vithkuqi) (vai\ . vai) @@ -872,7 +886,9 @@ mahjong-tile domino-tile emoji - chess-symbol)) + chess-symbol + garay + sunuwar)) (set-fontset-font "fontset-default" script (font-spec :registry "iso10646-1" :script script) nil 'append)) diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index 5207bf5a3b9..c29fd9d3448 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el @@ -3109,10 +3109,11 @@ on encoding." ;; (#x4E00 . #x9FFF) CJK Unified Ideographs (#xA000 . #xD7FF) ;; (#xD800 . #xF8FF) Surrogate/Private - (#xFB00 . #x134FF) - ;; (#x13500 . #x143FF) unused + (#xFB00 . #x143FA) (#x14400 . #x14646) - ;; (#x14647 . #x167FF) unused + ;; (#x14647 . #x160FF) unused + (#x16100 . #x16139) + ;; (#x1613A . #x167FF) unused (#x16800 . #x16F9F) (#x16FE0 . #x16FF1) ;; (#x17000 . #x187FF) Tangut Ideographs @@ -3128,8 +3129,8 @@ on encoding." (#x1B170 . #x1B2FF) ;; (#x1B300 . #x1BBFF) unused (#x1BC00 . #x1BCAF) - ;; (#x1BCB0 . #x1CEFF) unused - (#x1CF00 . #x1FFFF) + ;; (#x1BCB0 . #x1CBFF) unused + (#x1CC00 . #x1FFFF) ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused (#xE0000 . #xE01FF))) (gc-cons-threshold (max gc-cons-threshold 10000000)) diff --git a/lisp/international/ucs-normalize.el b/lisp/international/ucs-normalize.el index ccb2022375c..94712a92bf7 100644 --- a/lisp/international/ucs-normalize.el +++ b/lisp/international/ucs-normalize.el @@ -142,7 +142,8 @@ (defvar check-range nil) (setq check-range '((#x00A0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x11100) - (#x11100 . #x11A00) (#x1D000 . #x1E100) (#x1EE00 . #x1F300) + (#x11100 . #x11A00) (#x16100 . #x16DFF) (#x1CCD0 . #x1CCFF) + (#x1D000 . #x1E100) (#x1EE00 . #x1F300) (#x1FBF0 . #x1FC00) (#x2F800 . #x2FB00))) ;; Basic normalization functions diff --git a/test/lisp/international/ucs-normalize-tests.el b/test/lisp/international/ucs-normalize-tests.el index 7b17ee56fb9..b7231a85345 100644 --- a/test/lisp/international/ucs-normalize-tests.el +++ b/test/lisp/international/ucs-normalize-tests.el @@ -247,20 +247,19 @@ Must be called with `ucs-normalize-tests--norm-buf' as current buffer." ucs-normalize-tests--failing-lines-part1))) (defconst ucs-normalize-tests--failing-lines-part2 - (list 17789 17790 17801 17802 17807 17808 17811 17812 - 17815 17816 17821 17822 17829 17830 17907 17908 - 18023 18024 18049 18050 18055 18056 18459 18460 - 18605 18606 18617 18618 18621 18622 18625 18626 - 18627 18628 18631 18632 18633 18634 18663 18664 - 18669 18670 18673 18674 18679 18680 18685 18686 - 18691 18692 18695 18697 18699 18701 18703 18704 - 18705 18707 18709 18711 18713 18715 18717 18719 - 18721 18723 18725 18727 18729 18731 18733 18735 - 18737 18739 18740 18741 18742 18743 18889 18891 - 18893 18895 18897 18899 18901 18903 18905 18907 - 18909 18911 18913 18914 18915 18916 18917 18919 - 18921 18923 18925 18927 18929 18931 18933 18935 - 18937 18939 18941 18943 18945 18947 18948)) + (list 17867 17868 17879 17880 17885 17886 17889 17890 + 17893 17894 17899 17900 17907 17908 17985 17986 + 18101 18102 18127 18128 18133 18134 18537 18538 + 18693 18694 18705 18706 18709 18710 18713 18714 + 18715 18716 18719 18720 18721 18722 18757 18758 + 18763 18764 18767 18768 18773 18774 18779 18780 + 18785 18786 18789 18791 18793 18795 18797 18798 + 18799 18801 18803 18805 18807 18835 18836 18837 + 18838 18839 18985 18987 18989 18991 18993 18995 + 18997 18999 19001 19003 19005 19007 19009 19010 + 19011 19012 19013 19015 19017 19019 19021 19023 + 19025 19027 19029 19031 19033 19035 19037 19039 + 19041 19043 19045 19047 19048)) (ert-deftest ucs-normalize-part2 () :tags '(:expensive-test) diff --git a/test/manual/BidiCharacterTest.txt b/test/manual/BidiCharacterTest.txt index 6b3ef016036..304b6ca4670 100644 --- a/test/manual/BidiCharacterTest.txt +++ b/test/manual/BidiCharacterTest.txt @@ -1,7 +1,8 @@ -# BidiCharacterTest-15.1.0.txt -# Date: 2023-01-05 -# © 2023 Unicode®, Inc. -# For terms of use, see https://www.unicode.org/terms_of_use.html +# BidiCharacterTest-16.0.0.txt +# Date: 2024-02-02 +# © 2024 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use and license, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see https://www.unicode.org/reports/tr44/ -- 2.39.5