--- /dev/null
+0x81308130-0x81308435 U+0000-U+0023
+0x81308436-0x81308437 U+00A5-U+00A6
+0x81308438-0x81308534 U+00A9-U+00AF
+0x81308535-0x81308539 U+00B2-U+00B6
+0x81308630-0x81308930 U+00B8-U+00D6
+0x81308931-0x81308938 U+00D8-U+00DF
+0x81308939-0x81308A34 U+00E2-U+00E7
+0x81308A35 U+00EB
+0x81308A36-0x81308A39 U+00EE-U+00F1
+0x81308B30-0x81308B32 U+00F4-U+00F6
+0x81308B33 U+00F8
+0x81308B34 U+00FB
+0x81308B35-0x81308B38 U+00FD-U+0100
+0x81308B39-0x81308D35 U+0102-U+0112
+0x81308D36-0x81308E32 U+0114-U+011A
+0x81308E33-0x81308F37 U+011C-U+012A
+0x81308F38-0x81309231 U+012C-U+0143
+0x81309232-0x81309234 U+0145-U+0147
+0x81309235-0x81309238 U+0149-U+014C
+0x81309239-0x81309537 U+014E-U+016A
+0x81309538-0x81309F35 U+016C-U+01CD
+0x81309F36 U+01CF
+0x81309F37 U+01D1
+0x81309F38 U+01D3
+0x81309F39 U+01D5
+0x8130A030 U+01D7
+0x8130A031 U+01D9
+0x8130A032 U+01DB
+0x8130A033-0x8130A330 U+01DD-U+01F8
+0x8130A331-0x8130AB37 U+01FA-U+0250
+0x8130AB38-0x8130AD32 U+0252-U+0260
+0x8130AD33-0x8130B733 U+0262-U+02C6
+0x8130B734 U+02C8
+0x8130B735-0x8130B837 U+02CC-U+02D8
+0x8130B838-0x8130CB30 U+02DA-U+0390
+0x8130CB31 U+03A2
+0x8130CB32-0x8130CB38 U+03AA-U+03B0
+0x8130CB39 U+03C2
+0x8130CC30-0x8130D134 U+03CA-U+0400
+0x8130D135-0x8130D238 U+0402-U+040F
+0x8130D239 U+0450
+0x8130D330-0x8136A531 U+0452-U+200F
+0x8136A532-0x8136A533 U+2011-U+2012
+0x8136A534 U+2017
+0x8136A535-0x8136A536 U+201A-U+201B
+0x8136A537-0x8136A633 U+201E-U+2024
+0x8136A634-0x8136A732 U+2027-U+202F
+0x8136A733 U+2031
+0x8136A734 U+2034
+0x8136A735-0x8136A739 U+2036-U+203A
+0x8136A830-0x8136B331 U+203C-U+20AB
+0x8136B332-0x8136BB37 U+20AD-U+2102
+0x8136BB38 U+2104
+0x8136BB39-0x8136BC31 U+2106-U+2108
+0x8136BC32-0x8136BD33 U+210A-U+2115
+0x8136BD34-0x8136BE33 U+2117-U+2120
+0x8136BE34-0x8136C435 U+2122-U+215F
+0x8136C436-0x8136C439 U+216C-U+216F
+0x8136C530-0x8136C731 U+217A-U+218F
+0x8136C732-0x8136C733 U+2194-U+2195
+0x8136C734-0x8136D233 U+219A-U+2207
+0x8136D234-0x8136D239 U+2209-U+220E
+0x8136D330 U+2210
+0x8136D331-0x8136D333 U+2212-U+2214
+0x8136D334-0x8136D337 U+2216-U+2219
+0x8136D338-0x8136D339 U+221B-U+221C
+0x8136D430-0x8136D431 U+2221-U+2222
+0x8136D432 U+2224
+0x8136D433 U+2226
+0x8136D434-0x8136D435 U+222C-U+222D
+0x8136D436-0x8136D530 U+222F-U+2233
+0x8136D531-0x8136D535 U+2238-U+223C
+0x8136D536-0x8136D635 U+223E-U+2247
+0x8136D636-0x8136D638 U+2249-U+224B
+0x8136D639-0x8136D733 U+224D-U+2251
+0x8136D734-0x8136D836 U+2253-U+225F
+0x8136D837-0x8136D838 U+2262-U+2263
+0x8136D839-0x8136D934 U+2268-U+226D
+0x8136D935-0x8136DD31 U+2270-U+2294
+0x8136DD32-0x8136DD34 U+2296-U+2298
+0x8136DD35-0x8136DE35 U+229A-U+22A4
+0x8136DE36-0x8136E130 U+22A6-U+22BE
+0x8136E131-0x8136E932 U+22C0-U+2311
+0x8136E933-0x81378C35 U+2313-U+245F
+0x81378C36-0x81378D35 U+246A-U+2473
+0x81378D36-0x81379735 U+249C-U+24FF
+0x81379736-0x81379739 U+254C-U+254F
+0x81379830-0x81379932 U+2574-U+2580
+0x81379933-0x81379935 U+2590-U+2592
+0x81379936-0x81379A35 U+2596-U+259F
+0x81379A36-0x81379C31 U+25A2-U+25B1
+0x81379C32-0x81379C39 U+25B4-U+25BB
+0x81379D30-0x81379D37 U+25BE-U+25C5
+0x81379D38-0x81379E30 U+25C8-U+25CA
+0x81379E31-0x81379E32 U+25CC-U+25CD
+0x81379E33-0x8137A030 U+25D0-U+25E1
+0x8137A031-0x8137A331 U+25E6-U+2604
+0x8137A332-0x8137A333 U+2607-U+2608
+0x8137A334-0x8137A837 U+260A-U+263F
+0x8137A838 U+2641
+0x8137A839-0x8138FD38 U+2643-U+2E80
+0x8138FD39-0x8138FE30 U+2E82-U+2E83
+0x8138FE31-0x8138FE33 U+2E85-U+2E87
+0x8138FE34-0x8138FE35 U+2E89-U+2E8A
+0x8138FE36-0x81398135 U+2E8D-U+2E96
+0x81398136-0x81398330 U+2E98-U+2EA6
+0x81398331-0x81398332 U+2EA8-U+2EA9
+0x81398333-0x81398335 U+2EAB-U+2EAD
+0x81398336-0x81398339 U+2EAF-U+2EB2
+0x81398430-0x81398431 U+2EB4-U+2EB5
+0x81398432-0x81398434 U+2EB8-U+2EBA
+0x81398435-0x81398538 U+2EBC-U+2EC9
+0x81398539-0x8139A331 U+2ECB-U+2FEF
+0x8139A332-0x8139A335 U+2FFC-U+2FFF
+0x8139A336 U+3004
+0x8139A337-0x8139A431 U+3018-U+301C
+0x8139A432-0x8139A433 U+301F-U+3020
+0x8139A434-0x8139A633 U+302A-U+303D
+0x8139A634-0x8139A635 U+303F-U+3040
+0x8139A636-0x8139A732 U+3094-U+309A
+0x8139A733-0x8139A734 U+309F-U+30A0
+0x8139A735-0x8139A739 U+30F7-U+30FB
+0x8139A830-0x8139A835 U+30FF-U+3104
+0x8139A836-0x8139C131 U+312A-U+321F
+0x8139C132-0x8139C138 U+322A-U+3230
+0x8139C139-0x8139CD31 U+3232-U+32A2
+0x8139CD32-0x8139E435 U+32A4-U+338D
+0x8139E436-0x8139E537 U+3390-U+339B
+0x8139E538-0x8139E539 U+339F-U+33A0
+0x8139E630-0x8139E933 U+33A2-U+33C3
+0x8139E934-0x8139EA32 U+33C5-U+33CD
+0x8139EA33-0x8139EA34 U+33CF-U+33D0
+0x8139EA35-0x8139EA36 U+33D3-U+33D4
+0x8139EA37-0x8139F539 U+33D6-U+3446
+0x8139F630-0x8139FA32 U+3448-U+3472
+0x8139FA33-0x82309A30 U+3474-U+359D
+0x82309A31-0x8230A531 U+359F-U+360D
+0x8230A532-0x8230A632 U+360F-U+3619
+0x8230A633-0x8230F237 U+361B-U+3917
+0x8230F238-0x8230FB32 U+3919-U+396D
+0x8230FB33-0x82318638 U+396F-U+39CE
+0x82318639-0x82318832 U+39D1-U+39DE
+0x82318833-0x82319639 U+39E0-U+3A72
+0x82319730-0x8231AC37 U+3A74-U+3B4D
+0x8231AC38-0x8231C934 U+3B4F-U+3C6D
+0x8231C935-0x8231D437 U+3C6F-U+3CDF
+0x8231D438-0x8232AF32 U+3CE1-U+4055
+0x8232AF33-0x8232C936 U+4057-U+415E
+0x8232C937-0x8232F837 U+4160-U+4336
+0x8232F838-0x82338633 U+4338-U+43AB
+0x82338634-0x82338637 U+43AD-U+43B0
+0x82338638-0x82338B30 U+43B2-U+43DC
+0x82338B31-0x8233A338 U+43DE-U+44D5
+0x8233A339-0x8233C931 U+44D7-U+464B
+0x8233C932-0x8233CB31 U+464D-U+4660
+0x8233CB32-0x8233DE34 U+4662-U+4722
+0x8233DE35-0x8233DE39 U+4724-U+4728
+0x8233DF30-0x8233E731 U+472A-U+477B
+0x8233E732-0x8233E837 U+477D-U+478C
+0x8233E838-0x82349638 U+478E-U+4946
+0x82349639-0x82349B38 U+4948-U+4979
+0x82349B39-0x82349C30 U+497B-U+497C
+0x82349C31-0x82349C34 U+497E-U+4981
+0x82349C35 U+4984
+0x82349C36-0x82349E35 U+4987-U+499A
+0x82349E36-0x82349E38 U+499C-U+499E
+0x82349E39-0x8234A130 U+49A0-U+49B5
+0x8234A131-0x8234E733 U+49B8-U+4C76
+0x8234E734-0x8234EB32 U+4C78-U+4C9E
+0x8234EB33-0x8234F633 U+4CA4-U+4D12
+0x8234F634-0x82358731 U+4D1A-U+4DAD
+0x82358732-0x82358F32 U+4DAF-U+4DFF
+0x82358F33-0x8336C738 U+9FA6-U+D7FF
+0x8336C739 U+E76C
+0x8336C830 U+E7C8
+0x8336C831-0x8336C933 U+E7E7-U+E7F3
+0x8336C934 U+E815
+0x8336C935-0x8336C939 U+E819-U+E81D
+0x8336CA30-0x8336CA36 U+E81F-U+E825
+0x8336CA37-0x8336CB30 U+E827-U+E82A
+0x8336CB31-0x8336CB34 U+E82D-U+E830
+0x8336CB35-0x8336CC32 U+E833-U+E83A
+0x8336CC33-0x8336CC39 U+E83C-U+E842
+0x8336CD30-0x8336CE35 U+E844-U+E853
+0x8336CE36-0x8336CF39 U+E856-U+E863
+0x8336D030-0x84308534 U+E865-U+F92B
+0x84308535-0x84308D30 U+F92D-U+F978
+0x84308D31-0x84308F37 U+F97A-U+F994
+0x84308F38-0x84309738 U+F996-U+F9E6
+0x84309739-0x84309837 U+F9E8-U+F9F0
+0x84309838-0x84309B33 U+F9F2-U+FA0B
+0x84309B34 U+FA10
+0x84309B35 U+FA12
+0x84309B36-0x84309B38 U+FA15-U+FA17
+0x84309B39-0x84309C34 U+FA19-U+FA1E
+0x84309C35 U+FA22
+0x84309C36-0x84309C37 U+FA25-U+FA26
+0x84309C38-0x84318537 U+FA2A-U+FE2F
+0x84318538 U+FE32
+0x84318539-0x84318632 U+FE45-U+FE48
+0x84318633 U+FE53
+0x84318634 U+FE58
+0x84318635 U+FE67
+0x84318636-0x84319534 U+FE6C-U+FF00
+0x84319535-0x8431A233 U+FF5F-U+FFDF
+0x8431A234-0x8431A439 U+FFE6-U+FFFF
--- /dev/null
+BEGIN {
+ tohex["A"] = 10;
+ tohex["B"] = 11;
+ tohex["C"] = 12;
+ tohex["D"] = 13;
+ tohex["E"] = 14;
+ tohex["F"] = 15;
+ tohex["a"] = 10;
+ tohex["b"] = 11;
+ tohex["c"] = 12;
+ tohex["d"] = 13;
+ tohex["e"] = 14;
+ tohex["f"] = 15;
+}
+
+function decode_hex(str) {
+ n = 0;
+ len = length(str);
+ for (i = 1; i <= len; i++)
+ {
+ c = substr (str, i, 1);
+ if (c >= "0" && c <= "9")
+ n = n * 16 + (c - "0");
+ else
+ n = n * 16 + tohex[c];
+ }
+ return n;
+}
+
+function gb_to_index(b0,b1,b2,b3) {
+ return ((((b0 - 129) * 10 + (b1 - 48)) * 126 + (b2 - 129)) * 10 + b3 - 48);
+}
+
+function index_to_gb(idx) {
+ b3 = (idx % 10) + 48;
+ idx /= 10;
+ b2 = (idx % 126) + 129;
+ idx /= 126;
+ b1 = (idx % 10) + 48;
+ b0 = (idx / 10) + 129;
+ return sprintf("%02X%02X%02X%02X", b0, b1, b2, b3);
+}
+
+function decode_gb(str) {
+ b0 = decode_hex(substr(str, 3, 2));
+ b1 = decode_hex(substr(str, 7, 2));
+ b2 = decode_hex(substr(str, 11, 2));
+ b3 = decode_hex(substr(str, 15, 2));
+ return gb_to_index(b0, b1, b2, b3);
+}
+
+function printline(from, to) {
+ fromgb = index_to_gb(from);
+ fromuni = gbtable[from];
+ if (from == to)
+ printf ("0x%s U+%04X\n", fromgb, fromuni);
+ else
+ printf ("0x%s-0x%s U+%04X-U+%04X\n", fromgb, index_to_gb(to),
+ fromuni, fromuni + (to - from));
+}
+
+/^<U[0-9A-F][0-9A-F][0-9A-F][0-9A-F]>/ {
+ unicode = decode_hex(substr($1, 3, 4));
+ if ($2 ~ /\\x8[1-4]\\x3[0-9]\\x[8-9A-F][0-9A-F]\\x3[0-9]/)
+ unitable[unicode] = decode_gb($2);
+ else
+ unitable[unicode] = -1;
+}
+
+END {
+ lastgb = 0;
+ surrogate_min = decode_hex("D800")
+ surrogate_max = decode_hex("DFFF")
+ for (i = 129; i < 65536; i++)
+ {
+ if (unitable[i] == 0 && (i < surrogate_min || i > surrogate_max))
+ {
+ lastgb++;
+ gbtable[lastgb] = i;
+ unitable[i] = lastgb;
+ }
+ else if (unitable[i] > 0)
+ {
+ lastgb = unitable[i];
+ gbtable[lastgb] = i;
+ }
+ }
+
+ print "You blew it again!" > "/dev/stderr"
+ fromgb = lastgb = unitable[128];
+ for (i = 129; i < 65536; i++)
+ {
+ if (unitable[i] > 0)
+ {
+ if (lastgb + 1 == unitable[i])
+ {
+ lastgb++;
+ }
+ else
+ {
+ if (lastgb >= 0)
+ printline(fromgb, lastgb);
+ fromgb = lastgb = unitable[i];
+ }
+ }
+ else # i.e. (unitable[i] < 0)
+ {
+ if (lastgb >= 0)
+ printline(fromgb, lastgb);
+ lastgb = -1;
+ }
+ }
+ printline(fromgb, unitable[65535]);
+}