From 38757723e14960260a5a1843715a83e4de26522f Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Sun, 17 Sep 2023 11:40:06 +0300 Subject: [PATCH] Support Unicode version 15.1 * admin/unidata/BidiBrackets.txt: * admin/unidata/BidiMirroring.txt: * admin/unidata/Blocks.txt: * admin/unidata/IdnaMappingTable.txt: * admin/unidata/NormalizationTest.txt: * admin/unidata/PropertyValueAliases.txt: * admin/unidata/ScriptExtensions.txt: * admin/unidata/Scripts.txt: * admin/unidata/SpecialCasing.txt: * admin/unidata/UnicodeData.txt: * admin/unidata/confusables.txt: * admin/unidata/copyright.html: * test/manual/BidiCharacterTest.txt: * admin/unidata/emoji-data.txt: * admin/unidata/emoji-sequences.txt: * admin/unidata/emoji-test.txt: * admin/unidata/emoji-variation-sequences.txt: * admin/unidata/emoji-zwj-sequences.txt: Update from Unicode data files. * admin/notes/unicode: Update instructions. * lisp/international/characters.el: Update 'char-width-table' data. * etc/NEWS: Announce support for Unicode 15.1. --- admin/notes/unicode | 13 +- admin/unidata/BidiBrackets.txt | 10 +- admin/unidata/BidiMirroring.txt | 8 +- admin/unidata/Blocks.txt | 7 +- admin/unidata/IdnaMappingTable.txt | 23 +- admin/unidata/NormalizationTest.txt | 6 +- admin/unidata/PropertyValueAliases.txt | 38 +- admin/unidata/ScriptExtensions.txt | 47 +- admin/unidata/Scripts.txt | 14 +- admin/unidata/SpecialCasing.txt | 6 +- admin/unidata/UnicodeData.txt | 7 + admin/unidata/confusables.txt | 10 +- admin/unidata/copyright.html | 20 +- admin/unidata/emoji-data.txt | 6 +- admin/unidata/emoji-sequences.txt | 17 +- admin/unidata/emoji-test.txt | 337 ++++- admin/unidata/emoji-variation-sequences.txt | 1458 ++++++++++--------- admin/unidata/emoji-zwj-sequences.txt | 144 +- etc/NEWS | 2 + lisp/international/characters.el | 3 +- test/manual/BidiCharacterTest.txt | 6 +- 21 files changed, 1341 insertions(+), 841 deletions(-) diff --git a/admin/notes/unicode b/admin/notes/unicode index f51393e8d4e..3748989e2fe 100644 --- a/admin/notes/unicode +++ b/admin/notes/unicode @@ -39,9 +39,9 @@ repository). Next, review the assignment of default values of the Bidi Class property to blocks in the file extracted/DerivedBidiClass.txt from the -UCD (search for "unassigned" in that file). Any changes should be -reflected in the unidata-gen.el file, where it sets up the default -values around line 210. +UCD (search for "unassigned" and "@missing" in that file). Any +changes should be reflected in the unidata-gen.el file, where it sets +up the default values around line 210. Then Emacs should be rebuilt for them to take effect. Rebuilding Emacs updates several derived files elsewhere in the Emacs source @@ -61,9 +61,10 @@ Next, review the changes in UnicodeData.txt vs the previous version used by Emacs. Any changes, be it introduction of new scripts or addition of codepoints to existing scripts, might need corresponding changes in the data used for filling the category-table, case-table, -and char-width-table. The additional scripts should cause automatic -updates in charscript.el, but it is a good idea to look at the results -and see if any changes in admin/unidata/blocks.awk are required. +and char-width-table in characters.el. The additional scripts should +cause automatic updates in charscript.el, but it is a good idea to +look at the results and see if any changes in admin/unidata/blocks.awk +are required. The setting of char-width-table around line 1200 of characters.el should be checked against the latest version of the Unicode file diff --git a/admin/unidata/BidiBrackets.txt b/admin/unidata/BidiBrackets.txt index e138e7f5bea..8cebea41544 100644 --- a/admin/unidata/BidiBrackets.txt +++ b/admin/unidata/BidiBrackets.txt @@ -1,6 +1,6 @@ -# BidiBrackets-15.0.0.txt -# Date: 2022-05-03, 18:42:00 GMT [AG, LI, KW] -# © 2022 Unicode®, Inc. +# BidiBrackets-15.1.0.txt +# Date: 2023-01-18 +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -12,11 +12,11 @@ # This file is a normative contributory data file in the Unicode # Character Database. # -# Bidi_Paired_Bracket is a normative property of type Miscellaneous, +# Bidi_Paired_Bracket is a normative property # which establishes a mapping between characters that are treated as # bracket pairs by the Unicode Bidirectional Algorithm. # -# Bidi_Paired_Bracket_Type is a normative property of type Enumeration, +# Bidi_Paired_Bracket_Type is a normative property # which classifies characters into opening and closing paired brackets # for the purposes of the Unicode Bidirectional Algorithm. # diff --git a/admin/unidata/BidiMirroring.txt b/admin/unidata/BidiMirroring.txt index 5861d6e7f4b..7e58cc4d715 100644 --- a/admin/unidata/BidiMirroring.txt +++ b/admin/unidata/BidiMirroring.txt @@ -1,6 +1,6 @@ -# BidiMirroring-15.0.0.txt -# Date: 2022-05-03, 18:47:00 GMT [KW, RP] -# © 2022 Unicode®, Inc. +# BidiMirroring-15.1.0.txt +# Date: 2023-01-05 +# © 2023 Unicode®, Inc. # For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database @@ -15,7 +15,7 @@ # value, for which there is another Unicode character that typically has a glyph # that is the mirror image of the original character's glyph. # -# The repertoire covered by the file is Unicode 15.0.0. +# The repertoire covered by the file is Unicode 15.1.0. # # The file contains a list of lines with mappings from one code point # to another one for character-based mirroring. diff --git a/admin/unidata/Blocks.txt b/admin/unidata/Blocks.txt index 12684594c9f..8fa3eaad04a 100644 --- a/admin/unidata/Blocks.txt +++ b/admin/unidata/Blocks.txt @@ -1,6 +1,6 @@ -# Blocks-15.0.0.txt -# Date: 2022-01-28, 20:58:00 GMT [KW] -# © 2022 Unicode®, Inc. +# Blocks-15.1.0.txt +# Date: 2023-07-28, 15:47:20 GMT +# © 2023 Unicode®, Inc. # For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Character Database @@ -352,6 +352,7 @@ FFF0..FFFF; Specials 2B740..2B81F; CJK Unified Ideographs Extension D 2B820..2CEAF; CJK Unified Ideographs Extension E 2CEB0..2EBEF; CJK Unified Ideographs Extension F +2EBF0..2EE5F; CJK Unified Ideographs Extension I 2F800..2FA1F; CJK Compatibility Ideographs Supplement 30000..3134F; CJK Unified Ideographs Extension G 31350..323AF; CJK Unified Ideographs Extension H diff --git a/admin/unidata/IdnaMappingTable.txt b/admin/unidata/IdnaMappingTable.txt index e4c06117929..3bf6b2668a4 100644 --- a/admin/unidata/IdnaMappingTable.txt +++ b/admin/unidata/IdnaMappingTable.txt @@ -1,11 +1,11 @@ # IdnaMappingTable.txt -# Date: 2022-05-02, 19:29:26 GMT -# © 2022 Unicode®, Inc. +# Date: 2023-08-10, 22:32:27 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode IDNA Compatible Preprocessing for UTS #46 -# Version: 15.0.0 +# Version: 15.1.0 # # For documentation and usage, see https://www.unicode.org/reports/tr46 # @@ -2036,7 +2036,7 @@ 1E9A ; mapped ; 0061 02BE # 1.1 LATIN SMALL LETTER A WITH RIGHT HALF RING 1E9B ; mapped ; 1E61 # 2.0 LATIN SMALL LETTER LONG S WITH DOT ABOVE 1E9C..1E9D ; valid # 5.1 LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE..LATIN SMALL LETTER LONG S WITH HIGH STROKE -1E9E ; mapped ; 0073 0073 # 5.1 LATIN CAPITAL LETTER SHARP S +1E9E ; mapped ; 00DF # 5.1 LATIN CAPITAL LETTER SHARP S 1E9F ; valid # 5.1 LATIN SMALL LETTER DELTA 1EA0 ; mapped ; 1EA1 # 1.1 LATIN CAPITAL LETTER A WITH DOT BELOW 1EA1 ; valid # 1.1 LATIN SMALL LETTER A WITH DOT BELOW @@ -2565,11 +2565,7 @@ 222E ; valid ; ; NV8 # 1.1 CONTOUR INTEGRAL 222F ; mapped ; 222E 222E # 1.1 SURFACE INTEGRAL 2230 ; mapped ; 222E 222E 222E #1.1 VOLUME INTEGRAL -2231..225F ; valid ; ; NV8 # 1.1 CLOCKWISE INTEGRAL..QUESTIONED EQUAL TO -2260 ; disallowed_STD3_valid # 1.1 NOT EQUAL TO -2261..226D ; valid ; ; NV8 # 1.1 IDENTICAL TO..NOT EQUIVALENT TO -226E..226F ; disallowed_STD3_valid # 1.1 NOT LESS-THAN..NOT GREATER-THAN -2270..22F1 ; valid ; ; NV8 # 1.1 NEITHER LESS-THAN NOR EQUAL TO..DOWN RIGHT DIAGONAL ELLIPSIS +2231..22F1 ; valid ; ; NV8 # 1.1 CLOCKWISE INTEGRAL..DOWN RIGHT DIAGONAL ELLIPSIS 22F2..22FF ; valid ; ; NV8 # 3.2 ELEMENT OF WITH LONG HORIZONTAL STROKE..Z NOTATION BAG MEMBERSHIP 2300 ; valid ; ; NV8 # 1.1 DIAMETER SIGN 2301 ; valid ; ; NV8 # 3.0 ELECTRIC ARROW @@ -3273,7 +3269,7 @@ 2FD5 ; mapped ; 9FA0 # 3.0 KANGXI RADICAL FLUTE 2FD6..2FEF ; disallowed # NA .. 2FF0..2FFB ; disallowed # 3.0 IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID -2FFC..2FFF ; disallowed # NA .. +2FFC..2FFF ; disallowed # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION 3000 ; disallowed_STD3_mapped ; 0020 # 1.1 IDEOGRAPHIC SPACE 3001 ; valid ; ; NV8 # 1.1 IDEOGRAPHIC COMMA 3002 ; mapped ; 002E # 1.1 IDEOGRAPHIC FULL STOP @@ -3425,7 +3421,8 @@ 31BB..31BF ; valid # 13.0 BOPOMOFO FINAL LETTER G..BOPOMOFO LETTER AH 31C0..31CF ; valid ; ; NV8 # 4.1 CJK STROKE T..CJK STROKE N 31D0..31E3 ; valid ; ; NV8 # 5.1 CJK STROKE H..CJK STROKE Q -31E4..31EF ; disallowed # NA .. +31E4..31EE ; disallowed # NA .. +31EF ; disallowed # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 31F0..31FF ; valid # 3.2 KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO 3200 ; disallowed_STD3_mapped ; 0028 1100 0029 #1.1 PARENTHESIZED HANGUL KIYEOK 3201 ; disallowed_STD3_mapped ; 0028 1102 0029 #1.1 PARENTHESIZED HANGUL NIEUN @@ -8450,7 +8447,9 @@ FFFE..FFFF ; disallowed # 1.1 .. 2CEB0..2EBE0 ; valid # 10.0 CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 -2EBE1..2F7FF ; disallowed # NA .. +2EBE1..2EBEF ; disallowed # NA .. +2EBF0..2EE5D ; valid # 15.1 CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D +2EE5E..2F7FF ; disallowed # NA .. 2F800 ; mapped ; 4E3D # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F800 2F801 ; mapped ; 4E38 # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F801 2F802 ; mapped ; 4E41 # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F802 diff --git a/admin/unidata/NormalizationTest.txt b/admin/unidata/NormalizationTest.txt index e75b4801c9b..2e88574243d 100644 --- a/admin/unidata/NormalizationTest.txt +++ b/admin/unidata/NormalizationTest.txt @@ -1,6 +1,6 @@ -# NormalizationTest-15.0.0.txt -# Date: 2022-04-02, 01:29:09 GMT -# © 2022 Unicode®, Inc. +# NormalizationTest-15.1.0.txt +# Date: 2023-01-05, 20:34:44 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # diff --git a/admin/unidata/PropertyValueAliases.txt b/admin/unidata/PropertyValueAliases.txt index 9346fcf03ee..6d308108818 100644 --- a/admin/unidata/PropertyValueAliases.txt +++ b/admin/unidata/PropertyValueAliases.txt @@ -1,6 +1,6 @@ -# PropertyValueAliases-15.0.0.txt -# Date: 2022-08-05, 23:42:17 GMT -# © 2022 Unicode®, Inc. +# PropertyValueAliases-15.1.0.txt +# Date: 2023-08-07, 15:21:34 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -91,6 +91,7 @@ age; 12.1 ; V12_1 age; 13.0 ; V13_0 age; 14.0 ; V14_0 age; 15.0 ; V15_0 +age; 15.1 ; V15_1 age; NA ; Unassigned # Alphabetic (Alpha) @@ -208,6 +209,7 @@ blk; CJK_Ext_E ; CJK_Unified_Ideographs_Extension_E blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F blk; CJK_Ext_G ; CJK_Unified_Ideographs_Extension_G blk; CJK_Ext_H ; CJK_Unified_Ideographs_Extension_H +blk; CJK_Ext_I ; CJK_Unified_Ideographs_Extension_I blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement blk; CJK_Strokes ; CJK_Strokes blk; CJK_Symbols ; CJK_Symbols_And_Punctuation @@ -817,6 +819,21 @@ IDSB; Y ; Yes ; T IDST; N ; No ; F ; False IDST; Y ; Yes ; T ; True +# IDS_Unary_Operator (IDSU) + +IDSU; N ; No ; F ; False +IDSU; Y ; Yes ; T ; True + +# ID_Compat_Math_Continue (ID_Compat_Math_Continue) + +ID_Compat_Math_Continue; N ; No ; F ; False +ID_Compat_Math_Continue; Y ; Yes ; T ; True + +# ID_Compat_Math_Start (ID_Compat_Math_Start) + +ID_Compat_Math_Start; N ; No ; F ; False +ID_Compat_Math_Start; Y ; Yes ; T ; True + # ID_Continue (IDC) IDC; N ; No ; F ; False @@ -836,6 +853,13 @@ IDS; Y ; Yes ; T Ideo; N ; No ; F ; False Ideo; Y ; Yes ; T ; True +# Indic_Conjunct_Break (InCB) + +InCB; Consonant ; Consonant +InCB; Extend ; Extend +InCB; Linker ; Linker +InCB; None ; None + # Indic_Positional_Category (InPC) InPC; Bottom ; Bottom @@ -1074,7 +1098,10 @@ jt ; U ; Non_Joining # Line_Break (lb) lb ; AI ; Ambiguous +lb ; AK ; Aksara lb ; AL ; Alphabetic +lb ; AP ; Aksara_Prebase +lb ; AS ; Aksara_Start lb ; B2 ; Break_Both lb ; BA ; Break_After lb ; BB ; Break_Before @@ -1112,6 +1139,8 @@ lb ; SA ; Complex_Context lb ; SG ; Surrogate lb ; SP ; Space lb ; SY ; Break_Symbols +lb ; VF ; Virama_Final +lb ; VI ; Virama lb ; WJ ; Word_Joiner lb ; XX ; Unknown lb ; ZW ; ZWSpace @@ -1156,6 +1185,9 @@ NFKC_QC; M ; Maybe NFKC_QC; N ; No NFKC_QC; Y ; Yes +# NFKC_Simple_Casefold (NFKC_SCF) + + # NFKD_Quick_Check (NFKD_QC) NFKD_QC; N ; No diff --git a/admin/unidata/ScriptExtensions.txt b/admin/unidata/ScriptExtensions.txt index 2f5a1727e33..23141fb8241 100644 --- a/admin/unidata/ScriptExtensions.txt +++ b/admin/unidata/ScriptExtensions.txt @@ -1,6 +1,6 @@ -# ScriptExtensions-15.0.0.txt -# Date: 2022-02-02, 00:57:11 GMT -# © 2022 Unicode®, Inc. +# ScriptExtensions-15.1.0.txt +# Date: 2023-02-01, 23:02:24 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -136,20 +136,20 @@ # ================================================ -# Script_Extensions=Arab Rohg +# Script_Extensions=Arab Nkoo -06D4 ; Arab Rohg # Po ARABIC FULL STOP +FD3E ; Arab Nkoo # Pe ORNATE LEFT PARENTHESIS +FD3F ; Arab Nkoo # Ps ORNATE RIGHT PARENTHESIS -# Total code points: 1 +# Total code points: 2 # ================================================ -# Script_Extensions=Arab Nkoo +# Script_Extensions=Arab Rohg -FD3E ; Arab Nkoo # Pe ORNATE LEFT PARENTHESIS -FD3F ; Arab Nkoo # Ps ORNATE RIGHT PARENTHESIS +06D4 ; Arab Rohg # Po ARABIC FULL STOP -# Total code points: 2 +# Total code points: 1 # ================================================ @@ -553,17 +553,17 @@ FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC C # ================================================ -# Script_Extensions=Beng Deva Gran Knda Nand Orya Telu Tirh +# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc -1CF2 ; Beng Deva Gran Knda Nand Orya Telu Tirh # Lo VEDIC SIGN ARDHAVISARGA +0640 ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm ARABIC TATWEEL # Total code points: 1 # ================================================ -# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc +# Script_Extensions=Beng Deva Gran Knda Mlym Nand Orya Sinh Telu Tirh -0640 ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm ARABIC TATWEEL +1CF2 ; Beng Deva Gran Knda Mlym Nand Orya Sinh Telu Tirh # Lo VEDIC SIGN ARDHAVISARGA # Total code points: 1 @@ -572,10 +572,9 @@ FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC C # Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh A836..A837 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK -A838 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # Sc NORTH INDIC RUPEE MARK A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So NORTH INDIC QUANTITY MARK -# Total code points: 4 +# Total code points: 3 # ================================================ @@ -587,6 +586,14 @@ A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So # ================================================ +# Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Shrd Sind Takr Tirh + +A838 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Shrd Sind Takr Tirh # Sc NORTH INDIC RUPEE MARK + +# Total code points: 1 + +# ================================================ + # Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh 0951 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN UDATTA @@ -595,17 +602,17 @@ A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So # ================================================ -# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh +# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Shrd Sind Takr Tirh -A833..A835 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE SIXTEENTH..NORTH INDIC FRACTION THREE SIXTEENTHS +A833..A835 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Shrd Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE SIXTEENTH..NORTH INDIC FRACTION THREE SIXTEENTHS # Total code points: 3 # ================================================ -# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh +# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Shrd Sind Takr Tirh -A830..A832 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE QUARTERS +A830..A832 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Shrd Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE QUARTERS # Total code points: 3 diff --git a/admin/unidata/Scripts.txt b/admin/unidata/Scripts.txt index 2b138bffb88..0b3f717cb20 100644 --- a/admin/unidata/Scripts.txt +++ b/admin/unidata/Scripts.txt @@ -1,6 +1,6 @@ -# Scripts-15.0.0.txt -# Date: 2022-04-26, 23:15:02 GMT -# © 2022 Unicode®, Inc. +# Scripts-15.1.0.txt +# Date: 2023-07-28, 16:01:07 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # @@ -357,7 +357,7 @@ 2E5B ; Common # Ps BOTTOM HALF LEFT PARENTHESIS 2E5C ; Common # Pe BOTTOM HALF RIGHT PARENTHESIS 2E5D ; Common # Pd OBLIQUE HYPHEN -2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +2FF0..2FFF ; Common # So [16] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION 3000 ; Common # Zs IDEOGRAPHIC SPACE 3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK 3004 ; Common # So JAPANESE INDUSTRIAL STANDARD SYMBOL @@ -399,6 +399,7 @@ 3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK 3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK 31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q +31EF ; Common # So IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION 3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN 322A..3247 ; Common # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO 3248..324F ; Common # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE @@ -629,7 +630,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 8301 +# Total code points: 8306 # ================================================ @@ -1593,11 +1594,12 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D 2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2EBF0..2EE5D ; Han # Lo [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Han # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 98408 +# Total code points: 99030 # ================================================ diff --git a/admin/unidata/SpecialCasing.txt b/admin/unidata/SpecialCasing.txt index 08d04fa9421..de08450a6b9 100644 --- a/admin/unidata/SpecialCasing.txt +++ b/admin/unidata/SpecialCasing.txt @@ -1,6 +1,6 @@ -# SpecialCasing-15.0.0.txt -# Date: 2022-02-02, 23:35:52 GMT -# © 2022 Unicode®, Inc. +# SpecialCasing-15.1.0.txt +# Date: 2023-01-05, 20:35:03 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # diff --git a/admin/unidata/UnicodeData.txt b/admin/unidata/UnicodeData.txt index ea963a7162c..bdcc41850d7 100644 --- a/admin/unidata/UnicodeData.txt +++ b/admin/unidata/UnicodeData.txt @@ -11231,6 +11231,10 @@ 2FF9;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT;So;0;ON;;;;;N;;;;; 2FFA;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT;So;0;ON;;;;;N;;;;; 2FFB;IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID;So;0;ON;;;;;N;;;;; +2FFC;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM RIGHT;So;0;ON;;;;;N;;;;; +2FFD;IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER RIGHT;So;0;ON;;;;;N;;;;; +2FFE;IDEOGRAPHIC DESCRIPTION CHARACTER HORIZONTAL REFLECTION;So;0;ON;;;;;N;;;;; +2FFF;IDEOGRAPHIC DESCRIPTION CHARACTER ROTATION;So;0;ON;;;;;N;;;;; 3000;IDEOGRAPHIC SPACE;Zs;0;WS; 0020;;;;N;;;;; 3001;IDEOGRAPHIC COMMA;Po;0;ON;;;;;N;;;;; 3002;IDEOGRAPHIC FULL STOP;Po;0;ON;;;;;N;IDEOGRAPHIC PERIOD;;;; @@ -11705,6 +11709,7 @@ 31E1;CJK STROKE HZZZG;So;0;ON;;;;;N;;;;; 31E2;CJK STROKE PG;So;0;ON;;;;;N;;;;; 31E3;CJK STROKE Q;So;0;ON;;;;;N;;;;; +31EF;IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION;So;0;ON;;;;;N;;;;; 31F0;KATAKANA LETTER SMALL KU;Lo;0;L;;;;;N;;;;; 31F1;KATAKANA LETTER SMALL SI;Lo;0;L;;;;;N;;;;; 31F2;KATAKANA LETTER SMALL SU;Lo;0;L;;;;;N;;;;; @@ -34035,6 +34040,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 2CEA1;;Lo;0;L;;;;;N;;;;; 2CEB0;;Lo;0;L;;;;;N;;;;; 2EBE0;;Lo;0;L;;;;;N;;;;; +2EBF0;;Lo;0;L;;;;;N;;;;; +2EE5D;;Lo;0;L;;;;;N;;;;; 2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;; 2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;; 2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;; diff --git a/admin/unidata/confusables.txt b/admin/unidata/confusables.txt index 24b61d519af..5e056ed5a35 100644 --- a/admin/unidata/confusables.txt +++ b/admin/unidata/confusables.txt @@ -1,11 +1,11 @@ # confusables.txt -# Date: 2022-08-26, 16:49:08 GMT -# © 2022 Unicode®, Inc. +# Date: 2023-08-11, 17:46:40 GMT +# © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html # # Unicode Security Mechanisms for UTS #39 -# Version: 15.0.0 +# Version: 15.1.0 # # For documentation and usage, see https://www.unicode.org/reports/tr39 # @@ -349,8 +349,8 @@ A4FA ; 002E 002E ; MA # ( ꓺ → .. ) LISU LETTER TONE MYA CYA → FULL STOP, F A6F4 ; A6F3 A6F3 ; MA #* ( ꛴ → ꛳꛳ ) BAMUM COLON → BAMUM FULL STOP, BAMUM FULL STOP # -30FB ; 00B7 ; MA #* ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ -FF65 ; 00B7 ; MA #* ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ +30FB ; 00B7 ; MA # ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ +FF65 ; 00B7 ; MA # ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→ 16EB ; 00B7 ; MA #* ( ᛫ → · ) RUNIC SINGLE PUNCTUATION → MIDDLE DOT # 0387 ; 00B7 ; MA # ( · → · ) GREEK ANO TELEIA → MIDDLE DOT # 2E31 ; 00B7 ; MA #* ( ⸱ → · ) WORD SEPARATOR MIDDLE DOT → MIDDLE DOT # diff --git a/admin/unidata/copyright.html b/admin/unidata/copyright.html index 567c54e72ac..fe6dd16903e 100644 --- a/admin/unidata/copyright.html +++ b/admin/unidata/copyright.html @@ -13,7 +13,7 @@ Unicode Terms of Use +href="http://www.unicode.org/webscripts/standard_styles.css">