From d129cb7b56f9137bf932d6afb593ca4ce03d3909 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Sat, 17 Aug 2024 11:01:12 +0300 Subject: [PATCH] Fix script for characters in 3300..3357 range * admin/unidata/blocks.awk: The characters in the 3300..3357 range are Katakana according to Unicode's Scripts.txt. (cherry picked from commit c70ac0fef12f5c84b2a62a4a8b1bdf78f69300ee) --- admin/unidata/blocks.awk | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/admin/unidata/blocks.awk b/admin/unidata/blocks.awk index 122164ce5b6..40cefe0d563 100755 --- a/admin/unidata/blocks.awk +++ b/admin/unidata/blocks.awk @@ -146,6 +146,19 @@ FILENAME ~ "Blocks.txt" && /^[0-9A-F]/ { end[i] = fix_end[e] ? fix_end[e]: e name[i] = $0 + # Hard-coded splits that must be processed before name2alias and + # before combining same-named adjacent ranges. + if (start[i] == "3300") # See Scripts.txt + { + end[i] = "3357" + name[i] = "Katakana" + alt[i] = "kana" + i++ + start[i] = "3358" + end[i] = "33FF" + name[i] = "CJK Compatibility" + } + alt[i] = name2alias(name[i]) if (!alt[i]) -- 2.39.2