From bec8e1d21a1474a8dbc1a4847f3282d41de7d1ba Mon Sep 17 00:00:00 2001
From: Eli Zaretskii <eliz@gnu.org>
Date: Thu, 12 Sep 2024 21:12:09 +0300
Subject: [PATCH] Update Emacs sources for Unicode 16.0

* lisp/international/ucs-normalize.el (check-range): Update ranges
of character codes with decompositions.
* lisp/international/mule-cmds.el (ucs-names): Update unused
ranges.
* lisp/international/fontset.el (script-representative-chars)
(otf-script-alist, setup-default-fontset): Add new scripts.
* lisp/international/characters.el:
* admin/unidata/blocks.awk:
* test/manual/BidiCharacterTest.txt:
* test/lisp/international/ucs-normalize-tests.el
(ucs-normalize-tests--failing-lines-part2): Update for Unicode
16.0.

* etc/NEWS: Announce support for Unicode 16.0.

(cherry picked from commit 04e8ad6489ebec121ace7ea6d582429a96af8f04)
---
 admin/unidata/blocks.awk                      |  1 +
 etc/NEWS                                      |  3 ++
 lisp/international/characters.el              | 30 +++++++++++++++----
 lisp/international/fontset.el                 | 20 +++++++++++--
 lisp/international/mule-cmds.el               | 11 +++----
 lisp/international/ucs-normalize.el           |  3 +-
 .../lisp/international/ucs-normalize-tests.el | 27 ++++++++---------
 test/manual/BidiCharacterTest.txt             |  9 +++---
 8 files changed, 72 insertions(+), 32 deletions(-)

diff --git a/admin/unidata/blocks.awk b/admin/unidata/blocks.awk
index 40cefe0d563..6393b7bdc63 100755
--- a/admin/unidata/blocks.awk
+++ b/admin/unidata/blocks.awk
@@ -57,6 +57,7 @@ BEGIN {
     alias["block elements"] = "symbol"
     alias["miscellaneous symbols"] = "symbol"
     alias["symbols for legacy computing"] = "symbol"
+    alias["symbols for legacy computing supplement"] = "symbol"
     alias["cjk strokes"] = "cjk-misc"
     alias["cjk symbols and punctuation"] = "cjk-misc"
     alias["halfwidth and fullwidth forms"] = "cjk-misc"
diff --git a/etc/NEWS b/etc/NEWS
index c2779738264..6beddd6cffa 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -92,6 +92,9 @@ It is equivalent to running âproject-any-commandâ with âfind-fileâ.
 
 ** Internationalization
 
+---
+*** Emacs now supports Unicode version 16.0.
+
 ---
 *** New language-environment and input method for Tifinagh.
 The Tifinagh script is used to write the Berber languages.
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
index 1e5963f89f3..b13d5f9d7a3 100644
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -1181,7 +1181,9 @@ with L, LRE, or LRO Unicode bidi character type.")
 	   (#x10A01 . #x10A0F)
 	   (#x10A38 . #x10A3F)
 	   (#x10AE5 . #x10AE6)
+           (#x10D69 . #x10D6D)
 	   (#x10EAB . #x10EAC)
+           (#x10EFC . #x10EFF)
 	   (#x11001 . #x11001)
 	   (#x11038 . #x11046)
 	   (#x1107F . #x11081)
@@ -1207,6 +1209,11 @@ with L, LRE, or LRO Unicode bidi character type.")
 	   (#x11340 . #x11340)
 	   (#x11366 . #x1136C)
 	   (#x11370 . #x11374)
+           (#x113BB . #x113C0)
+           (#x113CE . #x113CE)
+           (#x113D0 . #x113D0)
+           (#x113D2 . #x113D2)
+           (#x113E1 . #x113E2)
 	   (#x11438 . #x1143F)
 	   (#x11442 . #x11444)
 	   (#x11446 . #x11446)
@@ -1236,12 +1243,18 @@ with L, LRE, or LRO Unicode bidi character type.")
 	   (#x11CAA . #x11CB0)
 	   (#x11CB2 . #x11CB3)
 	   (#x11CB5 . #x11CB6)
+           (#x11F5A . #x11F5A)
+           (#x13430 . #x13440)
+           (#x13447 . #x13455)
+           (#x1611E . #x16129)
+           (#x1612D . #x1612F)
 	   (#x16AF0 . #x16AF4)
 	   (#x16B30 . #x16B36)
 	   (#x16F8F . #x16F92)
 	   (#x16FE4 . #x16FE4)
 	   (#x1BC9D . #x1BC9E)
 	   (#x1BCA0 . #x1BCA3)
+           (#x1CF00 . #x1CF02)
 	   (#x1D167 . #x1D169)
 	   (#x1D173 . #x1D182)
 	   (#x1D185 . #x1D18B)
@@ -1258,6 +1271,7 @@ with L, LRE, or LRO Unicode bidi character type.")
 	   (#x1E01B . #x1E021)
 	   (#x1E023 . #x1E024)
 	   (#x1E026 . #x1E02A)
+           (#x1E5EE . #x1E5EF)
 	   (#x1E8D0 . #x1E8D6)
 	   (#x1E944 . #x1E94A)
 	   (#xE0001 . #xE01EF))))
@@ -1273,8 +1287,10 @@ with L, LRE, or LRO Unicode bidi character type.")
 	   (#x23F3 . #x23F3)
 	   (#x25FD . #x25FE)
 	   (#x2614 . #x2615)
+           (#x2630 . #x2637)
 	   (#x2648 . #x2653)
 	   (#x267F . #x267F)
+           (#x268A . #x268F)
 	   (#x2693 . #x2693)
 	   (#x26A1 . #x26A1)
 	   (#x26AA . #x26AB)
@@ -1308,7 +1324,7 @@ with L, LRE, or LRO Unicode bidi character type.")
 	   (#x3041 . #x3096)
            (#x3099 . #x30FF)
            (#x3105 . #x312F)
-           (#x3131 . #x31E3)
+           (#x3131 . #x31E5)
            (#x31EF . #x31EF)
            (#x31F0 . #x3247)
 	   (#x3250 . #x4DBF)
@@ -1326,6 +1342,7 @@ with L, LRE, or LRO Unicode bidi character type.")
 	   (#x17000 . #x187F7)
 	   (#x18800 . #x18AFF)
 	   (#x18B00 . #x18CD5)
+           (#x18CFF . #x18CFF)
            (#x18D00 . #x18D08)
 	   (#x1AFF0 . #x1AFF3)
            (#x1AFF5 . #x1AFFB)
@@ -1336,6 +1353,8 @@ with L, LRE, or LRO Unicode bidi character type.")
            (#x1B155 . #x1B155)
 	   (#x1B164 . #x1B167)
 	   (#x1B170 . #x1B2FB)
+           (#x1D300 . #x1D356)
+           (#x1D360 . #x1D376)
 	   (#x1F004 . #x1F004)
 	   (#x1F0CF . #x1F0CF)
 	   (#x1F18E . #x1F18E)
@@ -1383,11 +1402,10 @@ with L, LRE, or LRO Unicode bidi character type.")
 	   (#x1FA60 . #x1FA6D)
 	   (#x1FA70 . #x1FA74)
 	   (#x1FA78 . #x1FA7C)
-	   (#x1FA80 . #x1FA88)
-	   (#x1FA90 . #x1FABD)
-	   (#x1FABF . #x1FAC5)
-	   (#x1FACE . #x1FADB)
-	   (#x1FAE0 . #x1FAE8)
+	   (#x1FA80 . #x1FA89)
+	   (#x1FA8F . #x1FAC6)
+	   (#x1FACE . #x1FADC)
+	   (#x1FADF . #x1FAE9)
 	   (#x1FAF0 . #x1FAF8)
 	   (#x1FB00 . #x1FB92)
 	   (#x20000 . #x2FFFF)
diff --git a/lisp/international/fontset.el b/lisp/international/fontset.el
index 02d60a88aa3..2d91a5f2329 100644
--- a/lisp/international/fontset.el
+++ b/lisp/international/fontset.el
@@ -237,6 +237,7 @@
 	(elbasan #x10500)
 	(caucasian-albanian #x10530)
 	(vithkuqi #x10570)
+        (todhri #x105C0 #x105ED)
 	(linear-a #x10600)
 	(cypriot-syllabary #x10800)
 	(palmyrene #x10860)
@@ -246,6 +247,7 @@
 	(kharoshthi #x10A00)
 	(manichaean #x10AC0)
 	(hanifi-rohingya #x10D00 #x10D24 #x10D39)
+        (garay #x10D50 #x10D70 #x10D4A #x10D41)
 	(yezidi #x10E80)
 	(old-sogdian #x10F00)
 	(sogdian #x10F30)
@@ -259,6 +261,7 @@
 	(khojki #x11200)
 	(khudawadi #x112B0)
 	(grantha #x11315 #x1133E #x11374)
+        (tulu-tigalari #x11380 #x113B8)
 	(newa #x11400)
 	(tirhuta #x11481 #x1148F #x114D0)
 	(siddham #x1158E #x115AF #x115D4)
@@ -271,6 +274,7 @@
 	(zanabazar-square #x11A00)
 	(soyombo #x11A50)
 	(pau-cin-hau #x11AC0)
+        (sunuwar #x11BC0 #x11BF1)
 	(bhaiksuki #x11C00)
 	(marchen #x11C72)
 	(masaram-gondi #x11D00)
@@ -280,10 +284,12 @@
 	(cuneiform #x12000)
 	(cypro-minoan #x12F90)
 	(egyptian #x13000)
+        (gurung-khema #x16100 #x1611E #x16131)
 	(mro #x16A40)
 	(tangsa #x16A70 #x16AC0)
 	(bassa-vah #x16AD0)
 	(pahawh-hmong #x16B11)
+        (kirat-rai #x16D43 #x16D63 #x16D71)
 	(medefaidrin #x16E40)
 	(tangut #x17000)
 	(khitan-small-script #x18B00)
@@ -300,6 +306,7 @@
 	(toto #x1E290 #x1E295 #x1E2AD)
 	(wancho #x1E2C0 #x1E2E8 #x1E2EF)
         (nag-mundari #x1E4D0 #x1E4EB #x1E4F0)
+        (ol-onal #x1E5D0 #x1E5F2)
 	(mende-kikakui #x1E810 #x1E8A6)
 	(adlam #x1E900 #x1E943)
 	(indic-siyaq-number #x1EC71 #x1EC9F)
@@ -311,7 +318,7 @@
 
 (defvar otf-script-alist)
 
-;; The below was synchronized with the latest Sep 12, 2021 version of
+;; The below was synchronized with the latest May 31, 2024 version of
 ;; https://docs.microsoft.com/en-us/typography/opentype/spec/scripttags
 (setq otf-script-alist
       '((adlm . adlam)
@@ -356,6 +363,7 @@
 	(elba . elbasan)
 	(elym . elymaic)
 	(ethi . ethiopic)
+        (gara . garay)
 	(geor . georgian)
 	(glag . glagolitic)
 	(goth . gothic)
@@ -366,6 +374,7 @@
 	(gong . gunjala-gondi)
 	(guru . gurmukhi)
 	(gur2 . gurmukhi)
+        (gukh . gurung-khema)
 	(hani . han)
 	(hang . hangul)
 	(jamo . hangul) ; Not recommended; use 'hang' instead.
@@ -388,6 +397,7 @@
 	(khmr . khmer)
 	(khoj . khojki)
 	(sind . khudawadi)
+        (krai . kirat-rai)
 	(lao\  . lao)
 	(latn . latin)
 	(lepc . lepcha)
@@ -428,6 +438,7 @@
 	(hmnp . nyiakeng-puachue-hmong)
 	(ogam . ogham)
 	(olck . ol-chiki)
+        (omao . ol-onal)
 	(ital . old-italic)
 	(xpeo . old-persian)
 	(narb . old-north-arabian)
@@ -461,6 +472,7 @@
 	(sora . sora-sompeng)
 	(soyo . soyombo)
 	(sund . sundanese)
+        (sunu . sunuwar)
 	(sylo . syloti-nagri)
 	(syrc . syriac)
 	(tglg . tagalog)
@@ -481,7 +493,9 @@
 	(tibt . tibetan)
 	(tfng . tifinagh)
 	(tirh . tirhuta)
+        (todr . todhri)
         (toto . toto)
+        (tutg . tulu-tigalari)
 	(ugar . ugaritic)
         (vith . vithkuqi)
 	(vai\  . vai)
@@ -872,7 +886,9 @@
 		    mahjong-tile
 		    domino-tile
                     emoji
-                    chess-symbol))
+                    chess-symbol
+                    garay
+                    sunuwar))
     (set-fontset-font "fontset-default"
 		      script (font-spec :registry "iso10646-1" :script script)
 		      nil 'append))
diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el
index 5207bf5a3b9..c29fd9d3448 100644
--- a/lisp/international/mule-cmds.el
+++ b/lisp/international/mule-cmds.el
@@ -3109,10 +3109,11 @@ on encoding."
 	       ;; (#x4E00 . #x9FFF) CJK Unified Ideographs
 	       (#xA000 . #xD7FF)
 	       ;; (#xD800 . #xF8FF) Surrogate/Private
-	       (#xFB00 . #x134FF)
-	       ;; (#x13500 . #x143FF) unused
+	       (#xFB00 . #x143FA)
                (#x14400 . #x14646)
-	       ;; (#x14647 . #x167FF) unused
+	       ;; (#x14647 . #x160FF) unused
+               (#x16100 . #x16139)
+               ;; (#x1613A . #x167FF) unused
 	       (#x16800 . #x16F9F)
                (#x16FE0 . #x16FF1)
                ;; (#x17000 . #x187FF) Tangut Ideographs
@@ -3128,8 +3129,8 @@ on encoding."
                (#x1B170 . #x1B2FF)
 	       ;; (#x1B300 . #x1BBFF) unused
                (#x1BC00 . #x1BCAF)
-	       ;; (#x1BCB0 . #x1CEFF) unused
-	       (#x1CF00 . #x1FFFF)
+	       ;; (#x1BCB0 . #x1CBFF) unused
+               (#x1CC00 . #x1FFFF)
 	       ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused
 	       (#xE0000 . #xE01FF)))
             (gc-cons-threshold (max gc-cons-threshold 10000000))
diff --git a/lisp/international/ucs-normalize.el b/lisp/international/ucs-normalize.el
index ccb2022375c..94712a92bf7 100644
--- a/lisp/international/ucs-normalize.el
+++ b/lisp/international/ucs-normalize.el
@@ -142,7 +142,8 @@
   (defvar check-range nil)
     (setq check-range
           '((#x00A0 . #x3400) (#xA600 . #xAC00) (#xF900 . #x11100)
-            (#x11100 . #x11A00) (#x1D000 . #x1E100) (#x1EE00 . #x1F300)
+            (#x11100 . #x11A00) (#x16100 . #x16DFF) (#x1CCD0 . #x1CCFF)
+            (#x1D000 . #x1E100) (#x1EE00 . #x1F300)
             (#x1FBF0 . #x1FC00) (#x2F800 . #x2FB00)))
 
   ;; Basic normalization functions
diff --git a/test/lisp/international/ucs-normalize-tests.el b/test/lisp/international/ucs-normalize-tests.el
index 7b17ee56fb9..b7231a85345 100644
--- a/test/lisp/international/ucs-normalize-tests.el
+++ b/test/lisp/international/ucs-normalize-tests.el
@@ -247,20 +247,19 @@ Must be called with `ucs-normalize-tests--norm-buf' as current buffer."
     ucs-normalize-tests--failing-lines-part1)))
 
 (defconst ucs-normalize-tests--failing-lines-part2
-  (list 17789 17790 17801 17802 17807 17808 17811 17812
-        17815 17816 17821 17822 17829 17830 17907 17908
-        18023 18024 18049 18050 18055 18056 18459 18460
-        18605 18606 18617 18618 18621 18622 18625 18626
-        18627 18628 18631 18632 18633 18634 18663 18664
-        18669 18670 18673 18674 18679 18680 18685 18686
-        18691 18692 18695 18697 18699 18701 18703 18704
-        18705 18707 18709 18711 18713 18715 18717 18719
-        18721 18723 18725 18727 18729 18731 18733 18735
-        18737 18739 18740 18741 18742 18743 18889 18891
-        18893 18895 18897 18899 18901 18903 18905 18907
-        18909 18911 18913 18914 18915 18916 18917 18919
-        18921 18923 18925 18927 18929 18931 18933 18935
-        18937 18939 18941 18943 18945 18947 18948))
+  (list 17867 17868 17879 17880 17885 17886 17889 17890
+        17893 17894 17899 17900 17907 17908 17985 17986
+        18101 18102 18127 18128 18133 18134 18537 18538
+        18693 18694 18705 18706 18709 18710 18713 18714
+        18715 18716 18719 18720 18721 18722 18757 18758
+        18763 18764 18767 18768 18773 18774 18779 18780
+        18785 18786 18789 18791 18793 18795 18797 18798
+        18799 18801 18803 18805 18807 18835 18836 18837
+        18838 18839 18985 18987 18989 18991 18993 18995
+        18997 18999 19001 19003 19005 19007 19009 19010
+        19011 19012 19013 19015 19017 19019 19021 19023
+        19025 19027 19029 19031 19033 19035 19037 19039
+        19041 19043 19045 19047 19048))
 
 (ert-deftest ucs-normalize-part2 ()
   :tags '(:expensive-test)
diff --git a/test/manual/BidiCharacterTest.txt b/test/manual/BidiCharacterTest.txt
index 6b3ef016036..304b6ca4670 100644
--- a/test/manual/BidiCharacterTest.txt
+++ b/test/manual/BidiCharacterTest.txt
@@ -1,7 +1,8 @@
-# BidiCharacterTest-15.1.0.txt
-# Date: 2023-01-05
-# Â© 2023 UnicodeÂ®, Inc.
-# For terms of use, see https://www.unicode.org/terms_of_use.html
+# BidiCharacterTest-16.0.0.txt
+# Date: 2024-02-02
+# Â© 2024 UnicodeÂ®, Inc.
+# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
+# For terms of use and license, see https://www.unicode.org/terms_of_use.html
 #
 # Unicode Character Database
 # For documentation, see https://www.unicode.org/reports/tr44/
-- 
2.39.5