* lisp/international/ucs-normalize.el (check-range): Update ranges
of character codes with decompositions.
* lisp/international/mule-cmds.el (ucs-names): Update unused
ranges.
* lisp/international/fontset.el (script-representative-chars)
(otf-script-alist, setup-default-fontset): Add new scripts.
* lisp/international/characters.el:
* admin/unidata/blocks.awk:
* test/manual/BidiCharacterTest.txt:
* test/lisp/international/ucs-normalize-tests.el
(ucs-normalize-tests--failing-lines-part2): Update for Unicode
16.0.
* etc/NEWS: Announce support for Unicode 16.0.
(cherry picked from commit
04e8ad6489ebec121ace7ea6d582429a96af8f04)
alias["block elements"] = "symbol"
alias["miscellaneous symbols"] = "symbol"
alias["symbols for legacy computing"] = "symbol"
+ alias["symbols for legacy computing supplement"] = "symbol"
alias["cjk strokes"] = "cjk-misc"
alias["cjk symbols and punctuation"] = "cjk-misc"
alias["halfwidth and fullwidth forms"] = "cjk-misc"
** Internationalization
+---
+*** Emacs now supports Unicode version 16.0.
+
---
*** New language-environment and input method for Tifinagh.
The Tifinagh script is used to write the Berber languages.
(#x10A01 . #x10A0F)
(#x10A38 . #x10A3F)
(#x10AE5 . #x10AE6)
+ (#x10D69 . #x10D6D)
(#x10EAB . #x10EAC)
+ (#x10EFC . #x10EFF)
(#x11001 . #x11001)
(#x11038 . #x11046)
(#x1107F . #x11081)
(#x11340 . #x11340)
(#x11366 . #x1136C)
(#x11370 . #x11374)
+ (#x113BB . #x113C0)
+ (#x113CE . #x113CE)
+ (#x113D0 . #x113D0)
+ (#x113D2 . #x113D2)
+ (#x113E1 . #x113E2)
(#x11438 . #x1143F)
(#x11442 . #x11444)
(#x11446 . #x11446)
(#x11CAA . #x11CB0)
(#x11CB2 . #x11CB3)
(#x11CB5 . #x11CB6)
+ (#x11F5A . #x11F5A)
+ (#x13430 . #x13440)
+ (#x13447 . #x13455)
+ (#x1611E . #x16129)
+ (#x1612D . #x1612F)
(#x16AF0 . #x16AF4)
(#x16B30 . #x16B36)
(#x16F8F . #x16F92)
(#x16FE4 . #x16FE4)
(#x1BC9D . #x1BC9E)
(#x1BCA0 . #x1BCA3)
+ (#x1CF00 . #x1CF02)
(#x1D167 . #x1D169)
(#x1D173 . #x1D182)
(#x1D185 . #x1D18B)
(#x1E01B . #x1E021)
(#x1E023 . #x1E024)
(#x1E026 . #x1E02A)
+ (#x1E5EE . #x1E5EF)
(#x1E8D0 . #x1E8D6)
(#x1E944 . #x1E94A)
(#xE0001 . #xE01EF))))
(#x23F3 . #x23F3)
(#x25FD . #x25FE)
(#x2614 . #x2615)
+ (#x2630 . #x2637)
(#x2648 . #x2653)
(#x267F . #x267F)
+ (#x268A . #x268F)
(#x2693 . #x2693)
(#x26A1 . #x26A1)
(#x26AA . #x26AB)
(#x3041 . #x3096)
(#x3099 . #x30FF)
(#x3105 . #x312F)
- (#x3131 . #x31E3)
+ (#x3131 . #x31E5)
(#x31EF . #x31EF)
(#x31F0 . #x3247)
(#x3250 . #x4DBF)
(#x17000 . #x187F7)
(#x18800 . #x18AFF)
(#x18B00 . #x18CD5)
+ (#x18CFF . #x18CFF)
(#x18D00 . #x18D08)
(#x1AFF0 . #x1AFF3)
(#x1AFF5 . #x1AFFB)
(#x1B155 . #x1B155)
(#x1B164 . #x1B167)
(#x1B170 . #x1B2FB)
+ (#x1D300 . #x1D356)
+ (#x1D360 . #x1D376)
(#x1F004 . #x1F004)
(#x1F0CF . #x1F0CF)
(#x1F18E . #x1F18E)
(#x1FA60 . #x1FA6D)
(#x1FA70 . #x1FA74)
(#x1FA78 . #x1FA7C)
- (#x1FA80 . #x1FA88)
- (#x1FA90 . #x1FABD)
- (#x1FABF . #x1FAC5)
- (#x1FACE . #x1FADB)
- (#x1FAE0 . #x1FAE8)
+ (#x1FA80 . #x1FA89)
+ (#x1FA8F . #x1FAC6)
+ (#x1FACE . #x1FADC)
+ (#x1FADF . #x1FAE9)
(#x1FAF0 . #x1FAF8)
(#x1FB00 . #x1FB92)
(#x20000 . #x2FFFF)
(elbasan #x10500)
(caucasian-albanian #x10530)
(vithkuqi #x10570)
+ (todhri #x105C0 #x105ED)
(linear-a #x10600)
(cypriot-syllabary #x10800)
(palmyrene #x10860)
(kharoshthi #x10A00)
(manichaean #x10AC0)
(hanifi-rohingya #x10D00 #x10D24 #x10D39)
+ (garay #x10D50 #x10D70 #x10D4A #x10D41)
(yezidi #x10E80)
(old-sogdian #x10F00)
(sogdian #x10F30)
(khojki #x11200)
(khudawadi #x112B0)
(grantha #x11315 #x1133E #x11374)
+ (tulu-tigalari #x11380 #x113B8)
(newa #x11400)
(tirhuta #x11481 #x1148F #x114D0)
(siddham #x1158E #x115AF #x115D4)
(zanabazar-square #x11A00)
(soyombo #x11A50)
(pau-cin-hau #x11AC0)
+ (sunuwar #x11BC0 #x11BF1)
(bhaiksuki #x11C00)
(marchen #x11C72)
(masaram-gondi #x11D00)
(cuneiform #x12000)
(cypro-minoan #x12F90)
(egyptian #x13000)
+ (gurung-khema #x16100 #x1611E #x16131)
(mro #x16A40)
(tangsa #x16A70 #x16AC0)
(bassa-vah #x16AD0)
(pahawh-hmong #x16B11)
+ (kirat-rai #x16D43 #x16D63 #x16D71)
(medefaidrin #x16E40)
(tangut #x17000)
(khitan-small-script #x18B00)
(toto #x1E290 #x1E295 #x1E2AD)
(wancho #x1E2C0 #x1E2E8 #x1E2EF)
(nag-mundari #x1E4D0 #x1E4EB #x1E4F0)
+ (ol-onal #x1E5D0 #x1E5F2)
(mende-kikakui #x1E810 #x1E8A6)
(adlam #x1E900 #x1E943)
(indic-siyaq-number #x1EC71 #x1EC9F)
(defvar otf-script-alist)
-;; The below was synchronized with the latest Sep 12, 2021 version of
+;; The below was synchronized with the latest May 31, 2024 version of
;; https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
(setq otf-script-alist
'((adlm . adlam)
(elba . elbasan)
(elym . elymaic)
(ethi . ethiopic)
+ (gara . garay)
(geor . georgian)
(glag . glagolitic)
(goth . gothic)
(gong . gunjala-gondi)
(guru . gurmukhi)
(gur2 . gurmukhi)
+ (gukh . gurung-khema)
(hani . han)
(hang . hangul)
(jamo . hangul) ; Not recommended; use 'hang' instead.
(khmr . khmer)
(khoj . khojki)
(sind . khudawadi)
+ (krai . kirat-rai)
(lao\ . lao)
(latn . latin)
(lepc . lepcha)
(hmnp . nyiakeng-puachue-hmong)
(ogam . ogham)
(olck . ol-chiki)
+ (omao . ol-onal)
(ital . old-italic)
(xpeo . old-persian)
(narb . old-north-arabian)
(sora . sora-sompeng)
(soyo . soyombo)
(sund . sundanese)
+ (sunu . sunuwar)
(sylo . syloti-nagri)
(syrc . syriac)
(tglg . tagalog)
(tibt . tibetan)
(tfng . tifinagh)
(tirh . tirhuta)
+ (todr . todhri)
(toto . toto)
+ (tutg . tulu-tigalari)
(ugar . ugaritic)
(vith . vithkuqi)
(vai\ . vai)
mahjong-tile
domino-tile
emoji
- chess-symbol))
+ chess-symbol
+ garay
+ sunuwar))
(set-fontset-font "fontset-default"
script (font-spec :registry "iso10646-1" :script script)
nil 'append))
;; (#x4E00 . #x9FFF) CJK Unified Ideographs
(#xA000 . #xD7FF)
;; (#xD800 . #xF8FF) Surrogate/Private
- (#xFB00 . #x134FF)
- ;; (#x13500 . #x143FF) unused
+ (#xFB00 . #x143FA)
(#x14400 . #x14646)
- ;; (#x14647 . #x167FF) unused
+ ;; (#x14647 . #x160FF) unused
+ (#x16100 . #x16139)
+ ;; (#x1613A . #x167FF) unused
(#x16800 . #x16F9F)
(#x16FE0 . #x16FF1)
;; (#x17000 . #x187FF) Tangut Ideographs
(#x1B170 . #x1B2FF)
;; (#x1B300 . #x1BBFF) unused
(#x1BC00 . #x1BCAF)
- ;; (#x1BCB0 . #x1CEFF) unused
- (#x1CF00 . #x1FFFF)
+ ;; (#x1BCB0 . #x1CBFF) unused
+ (#x1CC00 . #x1FFFF)
;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused
(#xE0000 . #xE01FF)))
(gc-cons-threshold (max gc-cons-threshold 10000000))
(defvar check-range nil)
(setq check-range
'((#x00A0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x11100)
- (#x11100 . #x11A00) (#x1D000 . #x1E100) (#x1EE00 . #x1F300)
+ (#x11100 . #x11A00) (#x16100 . #x16DFF) (#x1CCD0 . #x1CCFF)
+ (#x1D000 . #x1E100) (#x1EE00 . #x1F300)
(#x1FBF0 . #x1FC00) (#x2F800 . #x2FB00)))
;; Basic normalization functions
ucs-normalize-tests--failing-lines-part1)))
(defconst ucs-normalize-tests--failing-lines-part2
- (list 17789 17790 17801 17802 17807 17808 17811 17812
- 17815 17816 17821 17822 17829 17830 17907 17908
- 18023 18024 18049 18050 18055 18056 18459 18460
- 18605 18606 18617 18618 18621 18622 18625 18626
- 18627 18628 18631 18632 18633 18634 18663 18664
- 18669 18670 18673 18674 18679 18680 18685 18686
- 18691 18692 18695 18697 18699 18701 18703 18704
- 18705 18707 18709 18711 18713 18715 18717 18719
- 18721 18723 18725 18727 18729 18731 18733 18735
- 18737 18739 18740 18741 18742 18743 18889 18891
- 18893 18895 18897 18899 18901 18903 18905 18907
- 18909 18911 18913 18914 18915 18916 18917 18919
- 18921 18923 18925 18927 18929 18931 18933 18935
- 18937 18939 18941 18943 18945 18947 18948))
+ (list 17867 17868 17879 17880 17885 17886 17889 17890
+ 17893 17894 17899 17900 17907 17908 17985 17986
+ 18101 18102 18127 18128 18133 18134 18537 18538
+ 18693 18694 18705 18706 18709 18710 18713 18714
+ 18715 18716 18719 18720 18721 18722 18757 18758
+ 18763 18764 18767 18768 18773 18774 18779 18780
+ 18785 18786 18789 18791 18793 18795 18797 18798
+ 18799 18801 18803 18805 18807 18835 18836 18837
+ 18838 18839 18985 18987 18989 18991 18993 18995
+ 18997 18999 19001 19003 19005 19007 19009 19010
+ 19011 19012 19013 19015 19017 19019 19021 19023
+ 19025 19027 19029 19031 19033 19035 19037 19039
+ 19041 19043 19045 19047 19048))
(ert-deftest ucs-normalize-part2 ()
:tags '(:expensive-test)
-# BidiCharacterTest-15.1.0.txt
-# Date: 2023-01-05
-# © 2023 Unicode®, Inc.
-# For terms of use, see https://www.unicode.org/terms_of_use.html
+# BidiCharacterTest-16.0.0.txt
+# Date: 2024-02-02
+# © 2024 Unicode®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use and license, see https://www.unicode.org/terms_of_use.html
#
# Unicode Character Database
# For documentation, see https://www.unicode.org/reports/tr44/