--- /dev/null
+BEGIN {
+ tohex["A"] = 10;
+ tohex["B"] = 11;
+ tohex["C"] = 12;
+ tohex["D"] = 13;
+ tohex["E"] = 14;
+ tohex["F"] = 15;
+ tohex["a"] = 10;
+ tohex["b"] = 11;
+ tohex["c"] = 12;
+ tohex["d"] = 13;
+ tohex["e"] = 14;
+ tohex["f"] = 15;
+}
+
+function decode_hex(str) {
+ n = 0;
+ len = length(str);
+ for (i = 1; i <= len; i++)
+ {
+ c = substr (str, i, 1);
+ if (c >= "0" && c <= "9")
+ n = n * 16 + (c - "0");
+ else
+ n = n * 16 + tohex[c];
+ }
+ return n;
+}
+
+function gb_to_index(gb) {
+ b0 = int(gb / 256);
+ b1 = gb % 256;
+ idx = (((b0 - 129)) * 190 + b1 - 64);
+ if (b1 >= 128)
+ idx--;
+ return idx
+}
+
+function index_to_gb(idx) {
+ b0 = int(idx / 190) + 129;
+ b1 = (idx % 190) + 64;
+ if (b1 >= 127)
+ b1++;
+ return (b0 * 256 + b1);
+}
+function decode_gb(str) {
+ b0 = decode_hex(substr(str, 3, 2));
+ b1 = decode_hex(substr(str, 7, 2));
+ return (b0 * 256 + b1)
+}
+
+/^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ {
+ if ($2 ~ /^\\x[0-9A-F][0-9A-F]\\x[0-9A-F][0-9A-F]$/)
+ {
+ unicode = decode_hex(substr($1, 3, 4));
+ gb = decode_gb($2);
+ idx = gb_to_index(gb);
+ gb_table[idx] = unicode;
+ }
+}
+
+END {
+ last_idx = gb_to_index(decode_hex("FEFE"));
+ from_idx = 0;
+ from_unicode = gb_table[0];
+ for (i = 1; i <= last_idx; i++)
+ {
+ gb = index_to_gb(i);
+ unicode = gb_table[i];
+ if (i - from_idx != unicode - from_unicode)
+ {
+ if (i - 1 == from_idx)
+ printf ("0x%04X 0x%04X\n",
+ index_to_gb(from_idx), from_unicode);
+ else
+ printf ("0x%04X-0x%04X 0x%04X\n",
+ index_to_gb(from_idx), index_to_gb(i - 1), from_unicode);
+ from_idx = i;
+ from_unicode=unicode;
+ }
+ }
+ if (i - from_idx != unicode - from_unicode)
+ printf ("0x%04X-0x%04X 0x%04X\n",
+ index_to_gb(from_idx), index_to_gb(i - 1), from_unicode);
+}
--- /dev/null
+BEGIN {
+ tohex["A"] = 10;
+ tohex["B"] = 11;
+ tohex["C"] = 12;
+ tohex["D"] = 13;
+ tohex["E"] = 14;
+ tohex["F"] = 15;
+ tohex["a"] = 10;
+ tohex["b"] = 11;
+ tohex["c"] = 12;
+ tohex["d"] = 13;
+ tohex["e"] = 14;
+ tohex["f"] = 15;
+}
+
+function decode_hex(str) {
+ n = 0;
+ len = length(str);
+ for (i = 1; i <= len; i++)
+ {
+ c = substr (str, i, 1);
+ if (c >= "0" && c <= "9")
+ n = n * 16 + (c - "0");
+ else
+ n = n * 16 + tohex[c];
+ }
+ return n;
+}
+
+function gb_to_index(b0,b1,b2,b3) {
+ return ((((b0 - 129) * 10 + (b1 - 48)) * 126 + (b2 - 129)) * 10 + b3 - 48);
+}
+
+function index_to_gb(idx) {
+ b3 = (idx % 10) + 48;
+ idx /= 10;
+ b2 = (idx % 126) + 129;
+ idx /= 126;
+ b1 = (idx % 10) + 48;
+ b0 = (idx / 10) + 129;
+ return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3);
+}
+
+function decode_gb(str) {
+ b0 = decode_hex(substr(str, 3, 2));
+ b1 = decode_hex(substr(str, 7, 2));
+ b2 = decode_hex(substr(str, 11, 2));
+ b3 = decode_hex(substr(str, 15, 2));
+ return gb_to_index(b0, b1, b2, b3);
+}
+
+function printline(from, to) {
+ fromgb = index_to_gb(from);
+ fromuni = gbtable[from];
+ if (from == to)
+ printf ("0x%s 0x%04X\n", fromgb, fromuni);
+ else
+ printf ("0x%s-0x%s 0x%04X\n", fromgb, index_to_gb(to), fromuni);
+}
+
+/^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ {
+ unicode = decode_hex(substr($1, 3, 4));
+ if ($2 ~ /\\x8[1-4]\\x3[0-9]\\x[8-9A-F][0-9A-F]\\x3[0-9]/)
+ unitable[unicode] = decode_gb($2);
+ else
+ unitable[unicode] = -1;
+}
+
+END {
+ lastgb = 0;
+ surrogate_min = decode_hex("D800");
+ surrogate_max = decode_hex("DFFF");
+ lastgb = unitable[128];
+ gbtable[lastgb] = 128;
+ for (i = 129; i < 65536; i++)
+ {
+ if (unitable[i] == 0 && (i < surrogate_min || i > surrogate_max))
+ {
+ lastgb++;
+ gbtable[lastgb] = i;
+ unitable[i] = lastgb;
+ }
+ else if (unitable[i] > 0)
+ {
+ lastgb = unitable[i];
+ gbtable[lastgb] = i;
+ }
+ }
+
+ fromgb = lastgb = unitable[128];
+ for (i = 129; i < 65536; i++)
+ {
+ if (unitable[i] > 0)
+ {
+ if (lastgb + 1 == unitable[i])
+ {
+ lastgb++;
+ }
+ else
+ {
+ if (lastgb >= 0)
+ printline(fromgb, lastgb);
+ fromgb = lastgb = unitable[i];
+ }
+ }
+ else # i.e. (unitable[i] < 0)
+ {
+ if (lastgb >= 0)
+ printline(fromgb, lastgb);
+ lastgb = -1;
+ }
+ }
+ printline(fromgb, unitable[65535]);
+}
--- /dev/null
+# Mapping table for GB18030 0x81308130..0x8439FE39
+# generated from gb18030.ucm distributed with ICU
+0x81308130-0x81308435 0x0080
+0x81308436-0x81308437 0x00A5
+0x81308438-0x81308534 0x00A9
+0x81308535-0x81308539 0x00B2
+0x81308630-0x81308930 0x00B8
+0x81308931-0x81308938 0x00D8
+0x81308939-0x81308A34 0x00E2
+0x81308A35 0x00EB
+0x81308A36-0x81308A39 0x00EE
+0x81308B30-0x81308B32 0x00F4
+0x81308B33 0x00F8
+0x81308B34 0x00FB
+0x81308B35-0x81308B38 0x00FD
+0x81308B39-0x81308D35 0x0102
+0x81308D36-0x81308E32 0x0114
+0x81308E33-0x81308F37 0x011C
+0x81308F38-0x81309231 0x012C
+0x81309232-0x81309234 0x0145
+0x81309235-0x81309238 0x0149
+0x81309239-0x81309537 0x014E
+0x81309538-0x81309F35 0x016C
+0x81309F36 0x01CF
+0x81309F37 0x01D1
+0x81309F38 0x01D3
+0x81309F39 0x01D5
+0x8130A030 0x01D7
+0x8130A031 0x01D9
+0x8130A032 0x01DB
+0x8130A033-0x8130A330 0x01DD
+0x8130A331-0x8130AB37 0x01FA
+0x8130AB38-0x8130AD32 0x0252
+0x8130AD33-0x8130B733 0x0262
+0x8130B734 0x02C8
+0x8130B735-0x8130B837 0x02CC
+0x8130B838-0x8130CB30 0x02DA
+0x8130CB31 0x03A2
+0x8130CB32-0x8130CB38 0x03AA
+0x8130CB39 0x03C2
+0x8130CC30-0x8130D134 0x03CA
+0x8130D135-0x8130D238 0x0402
+0x8130D239 0x0450
+0x8130D330-0x8136A531 0x0452
+0x8136A532-0x8136A533 0x2011
+0x8136A534 0x2017
+0x8136A535-0x8136A536 0x201A
+0x8136A537-0x8136A633 0x201E
+0x8136A634-0x8136A732 0x2027
+0x8136A733 0x2031
+0x8136A734 0x2034
+0x8136A735-0x8136A739 0x2036
+0x8136A830-0x8136B331 0x203C
+0x8136B332-0x8136BB37 0x20AD
+0x8136BB38 0x2104
+0x8136BB39-0x8136BC31 0x2106
+0x8136BC32-0x8136BD33 0x210A
+0x8136BD34-0x8136BE33 0x2117
+0x8136BE34-0x8136C435 0x2122
+0x8136C436-0x8136C439 0x216C
+0x8136C530-0x8136C731 0x217A
+0x8136C732-0x8136C733 0x2194
+0x8136C734-0x8136D233 0x219A
+0x8136D234-0x8136D239 0x2209
+0x8136D330 0x2210
+0x8136D331-0x8136D333 0x2212
+0x8136D334-0x8136D337 0x2216
+0x8136D338-0x8136D339 0x221B
+0x8136D430-0x8136D431 0x2221
+0x8136D432 0x2224
+0x8136D433 0x2226
+0x8136D434-0x8136D435 0x222C
+0x8136D436-0x8136D530 0x222F
+0x8136D531-0x8136D535 0x2238
+0x8136D536-0x8136D635 0x223E
+0x8136D636-0x8136D638 0x2249
+0x8136D639-0x8136D733 0x224D
+0x8136D734-0x8136D836 0x2253
+0x8136D837-0x8136D838 0x2262
+0x8136D839-0x8136D934 0x2268
+0x8136D935-0x8136DD31 0x2270
+0x8136DD32-0x8136DD34 0x2296
+0x8136DD35-0x8136DE35 0x229A
+0x8136DE36-0x8136E130 0x22A6
+0x8136E131-0x8136E932 0x22C0
+0x8136E933-0x81378C35 0x2313
+0x81378C36-0x81378D35 0x246A
+0x81378D36-0x81379735 0x249C
+0x81379736-0x81379739 0x254C
+0x81379830-0x81379932 0x2574
+0x81379933-0x81379935 0x2590
+0x81379936-0x81379A35 0x2596
+0x81379A36-0x81379C31 0x25A2
+0x81379C32-0x81379C39 0x25B4
+0x81379D30-0x81379D37 0x25BE
+0x81379D38-0x81379E30 0x25C8
+0x81379E31-0x81379E32 0x25CC
+0x81379E33-0x8137A030 0x25D0
+0x8137A031-0x8137A331 0x25E6
+0x8137A332-0x8137A333 0x2607
+0x8137A334-0x8137A837 0x260A
+0x8137A838 0x2641
+0x8137A839-0x8138FD38 0x2643
+0x8138FD39-0x8138FE30 0x2E82
+0x8138FE31-0x8138FE33 0x2E85
+0x8138FE34-0x8138FE35 0x2E89
+0x8138FE36-0x81398135 0x2E8D
+0x81398136-0x81398330 0x2E98
+0x81398331-0x81398332 0x2EA8
+0x81398333-0x81398335 0x2EAB
+0x81398336-0x81398339 0x2EAF
+0x81398430-0x81398431 0x2EB4
+0x81398432-0x81398434 0x2EB8
+0x81398435-0x81398538 0x2EBC
+0x81398539-0x8139A331 0x2ECB
+0x8139A332-0x8139A335 0x2FFC
+0x8139A336 0x3004
+0x8139A337-0x8139A431 0x3018
+0x8139A432-0x8139A433 0x301F
+0x8139A434-0x8139A633 0x302A
+0x8139A634-0x8139A635 0x303F
+0x8139A636-0x8139A732 0x3094
+0x8139A733-0x8139A734 0x309F
+0x8139A735-0x8139A739 0x30F7
+0x8139A830-0x8139A835 0x30FF
+0x8139A836-0x8139C131 0x312A
+0x8139C132-0x8139C138 0x322A
+0x8139C139-0x8139CD31 0x3232
+0x8139CD32-0x8139E435 0x32A4
+0x8139E436-0x8139E537 0x3390
+0x8139E538-0x8139E539 0x339F
+0x8139E630-0x8139E933 0x33A2
+0x8139E934-0x8139EA32 0x33C5
+0x8139EA33-0x8139EA34 0x33CF
+0x8139EA35-0x8139EA36 0x33D3
+0x8139EA37-0x8139F539 0x33D6
+0x8139F630-0x8139FA32 0x3448
+0x8139FA33-0x82309A30 0x3474
+0x82309A31-0x8230A531 0x359F
+0x8230A532-0x8230A632 0x360F
+0x8230A633-0x8230F237 0x361B
+0x8230F238-0x8230FB32 0x3919
+0x8230FB33-0x82318638 0x396F
+0x82318639-0x82318832 0x39D1
+0x82318833-0x82319639 0x39E0
+0x82319730-0x8231AC37 0x3A74
+0x8231AC38-0x8231C934 0x3B4F
+0x8231C935-0x8231D437 0x3C6F
+0x8231D438-0x8232AF32 0x3CE1
+0x8232AF33-0x8232C936 0x4057
+0x8232C937-0x8232F837 0x4160
+0x8232F838-0x82338633 0x4338
+0x82338634-0x82338637 0x43AD
+0x82338638-0x82338B30 0x43B2
+0x82338B31-0x8233A338 0x43DE
+0x8233A339-0x8233C931 0x44D7
+0x8233C932-0x8233CB31 0x464D
+0x8233CB32-0x8233DE34 0x4662
+0x8233DE35-0x8233DE39 0x4724
+0x8233DF30-0x8233E731 0x472A
+0x8233E732-0x8233E837 0x477D
+0x8233E838-0x82349638 0x478E
+0x82349639-0x82349B38 0x4948
+0x82349B39-0x82349C30 0x497B
+0x82349C31-0x82349C34 0x497E
+0x82349C35 0x4984
+0x82349C36-0x82349E35 0x4987
+0x82349E36-0x82349E38 0x499C
+0x82349E39-0x8234A130 0x49A0
+0x8234A131-0x8234E733 0x49B8
+0x8234E734-0x8234EB32 0x4C78
+0x8234EB33-0x8234F633 0x4CA4
+0x8234F634-0x82358731 0x4D1A
+0x82358732-0x82358F32 0x4DAF
+0x82358F33-0x8336C738 0x9FA6
+0x8336C739 0xE76C
+0x8336C830 0xE7C8
+0x8336C831-0x8336C933 0xE7E7
+0x8336C934 0xE815
+0x8336C935-0x8336C939 0xE819
+0x8336CA30-0x8336CA36 0xE81F
+0x8336CA37-0x8336CB30 0xE827
+0x8336CB31-0x8336CB34 0xE82D
+0x8336CB35-0x8336CC32 0xE833
+0x8336CC33-0x8336CC39 0xE83C
+0x8336CD30-0x8336CE35 0xE844
+0x8336CE36-0x8336CF39 0xE856
+0x8336D030-0x84308534 0xE865
+0x84308535-0x84308D30 0xF92D
+0x84308D31-0x84308F37 0xF97A
+0x84308F38-0x84309738 0xF996
+0x84309739-0x84309837 0xF9E8
+0x84309838-0x84309B33 0xF9F2
+0x84309B34 0xFA10
+0x84309B35 0xFA12
+0x84309B36-0x84309B38 0xFA15
+0x84309B39-0x84309C34 0xFA19
+0x84309C35 0xFA22
+0x84309C36-0x84309C37 0xFA25
+0x84309C38-0x84318537 0xFA2A
+0x84318538 0xFE32
+0x84318539-0x84318632 0xFE45
+0x84318633 0xFE53
+0x84318634 0xFE58
+0x84318635 0xFE67
+0x84318636-0x84319534 0xFE6C
+0x84319535-0x8431A233 0xFF5F
+0x8431A234-0x8431A439 0xFFE6