From 9774df2d3a3f318d5267946bd109987580ab4512 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Thu, 25 Jun 2009 00:33:55 +0000 Subject: [PATCH] For the UNICODE format files, do reverse sort and don't compact the map. This is to prefer the first one in the duplicated mappings (e.g. 0x20->U+0020, 0x20->U+00A0). --- admin/ChangeLog | 6 ++++++ admin/charsets/mapconv | 6 ++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/admin/ChangeLog b/admin/ChangeLog index 0ceb348825c..88aac773bc9 100644 --- a/admin/ChangeLog +++ b/admin/ChangeLog @@ -1,3 +1,9 @@ +2009-06-24 Kenichi Handa + + * charsets/mapconv: For the UNICODE format files, do reverse sort + and don't compact the map. This is to prefer the first one in the + duplicated mappings (e.g. 0x20->U+0020, 0x20->U+00A0). + 2009-06-12 Kenichi Handa * charsets/Makefile (JISX213A.map): Make it depend on diff --git a/admin/charsets/mapconv b/admin/charsets/mapconv index 1913449f898..64c0772af52 100755 --- a/admin/charsets/mapconv +++ b/admin/charsets/mapconv @@ -30,7 +30,7 @@ # $1: source map file # $2: address pattern for sed (optionally with substitution command) # $3: format of source map file -# GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE YASUOKA +# GLIBC-1 GLIBC-2 GLIBC-2-7 CZYBORRA IANA UNICODE UNICODE2 YASUOKA # $4: awk script FILE="admin/charsets/$1" @@ -115,9 +115,11 @@ elif [ "$3" = "IANA" ] ; then elif [ "$3" = "UNICODE" ] ; then # Source format is: # YYYY XX + # We perform reverse sort to prefer the first one in the + # duplicated mappings (e.g. 0x20->U+0020, 0x20->U+00A0). zcat $1 | sed -n -e "$2 p" \ | sed -e 's/\([0-9A-F]*\)[^0-9A-F]*\([0-9A-F]*\).*/0x\2 0x\1/' \ - | sort | ${AWKPROG} + | sort -r elif [ "$3" = "UNICODE2" ] ; then # Source format is: # 0xXXXX 0xYYYY # ... -- 2.39.2