From 5d1bff2082c0af3e5d00bd5b8873c2bad5201836 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Tue, 27 Jan 2004 01:59:12 +0000 Subject: [PATCH] New file. --- admin/charsets/cp51932.awk | 57 +++++++++++++++++++++++++ admin/charsets/eucjp-ms.awk | 83 +++++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 admin/charsets/cp51932.awk create mode 100644 admin/charsets/eucjp-ms.awk diff --git a/admin/charsets/cp51932.awk b/admin/charsets/cp51932.awk new file mode 100644 index 00000000000..ac5498551d4 --- /dev/null +++ b/admin/charsets/cp51932.awk @@ -0,0 +1,57 @@ +# cp51932.awk -- Generate a translation table for CP51932. +# Copyright (C) 2004 +# National Institute of Advanced Industrial Science and Technology (AIST) +# Registration Number H13PRO009 +# +# This file is part of GNU Emacs. +# +# GNU Emacs is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# GNU Emacs is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Emacs; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# Comment: + +# Genereate a translation table for CP51932 (EUC-JP of MicroSoft Version). +# It maps invalid JISX0208 code points used by CP51932 to Unicode. +# 4th field of the input has these meanings: +# 0: JISX0208 characters. +# 1: NEC special characters. +# 2: IBM extension characters. +# 3: NEC selection of IBM extension characters. +# Among them, 1 and 3 are the target characters. 2 should have +# already been mapped to 1 or 3. + +BEGIN { + print ";;; cp51932.el -- translation table for CP51932. -*- no-byte-compile: t -*-"; + print ";;; Automatically genrated from CP932-2BYTE.map"; + print "(let ((map"; + printf " '(;JISEXT<->UNICODE"; +} + +/# [13]/ { + printf "\n (#x%s . #x%s)", $5 ,substr($2, 3, 4); +} + +END { + print ")))"; + print " (mapc #'(lambda (x)"; + print " (setcar x (decode-char 'japanese-jisx0208 (car x))))"; + print " map)"; + print " (define-translation-table 'cp51932-decode map)"; + print " (mapc #'(lambda (x)"; + print " (let ((tmp (car x)))"; + print " (setcar x (cdr x)) (setcdr x tmp)))"; + print " map)"; + print " (define-translation-table 'cp51932-encode map))"; +} diff --git a/admin/charsets/eucjp-ms.awk b/admin/charsets/eucjp-ms.awk new file mode 100644 index 00000000000..9dae6807570 --- /dev/null +++ b/admin/charsets/eucjp-ms.awk @@ -0,0 +1,83 @@ +# eucjp-ms.awk -- Generate a translation table for eucJP-ms. +# Copyright (C) 2004 +# National Institute of Advanced Industrial Science and Technology (AIST) +# Registration Number H13PRO009 +# +# This file is part of GNU Emacs. +# +# GNU Emacs is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2, or (at your option) +# any later version. +# +# GNU Emacs is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GNU Emacs; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 59 Temple Place - Suite 330, +# Boston, MA 02111-1307, USA. + +# Comment: + +# eucJP-ms is one of eucJP-open encoding defined at this page: +# http://www.opengroup.or.jp/jvc/cde/appendix.html + +BEGIN { + print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-"; + print ";;; Automatically genrated from eucJP-13th.txt, eucJP-udc.txt, eucJP-ibmext.txt"; + print "(let ((map"; + printf " '(;JISEXT<->UNICODE"; + + tohex["A"] = 10; + tohex["B"] = 11; + tohex["C"] = 12; + tohex["D"] = 13; + tohex["E"] = 14; + tohex["F"] = 15; +} + +function decode_hex(str) { + n = 0; + len = length(str); + for (i = 1; i <= len; i++) + { + c = substr(str, i, 1); + if (c >= "0" && c <= "9") + n = n * 16 + (c - "0"); + else + n = n * 16 + tohex[c]; + } + return n; +} + +/0x8F/ { + code = decode_hex(substr($1, 5, 4)); + code -= 32896; # code -= 0x8080 + printf "\n (#x%04x #x%s)", code, substr($2, 3, 4); + next; +} + +/0x[A-F]/ { + code = decode_hex(substr($1, 3, 4)); + code -= 32896; # code -= 0x8080 + printf "\n (#x%04x . #x%s)", code, substr($2, 3, 4); +} + +END { + print ")))"; + print " (mapc #'(lambda (x)"; + print " (if (integerp (cdr x))"; + print " (setcar x (decode-char 'japanese-jisx0208 (car x)))"; + print " (setcar x (decode-char 'japanese-jisx0212 (car x)))"; + print " (setcdr x (cadr x))))"; + print " map)"; + print " (define-translation-table 'eucjp-ms-decode map)"; + print " (mapc #'(lambda (x)"; + print " (let ((tmp (car x)))"; + print " (setcar x (cdr x)) (setcdr x tmp)))"; + print " map)"; + print " (define-translation-table 'eucjp-ms-encode map))"; +} -- 2.39.2