Synchronized with the code in 21.4

author Kenichi Handa <handa@m17n.org>

Tue, 1 Oct 2002 06:58:37 +0000 (06:58 +0000)

committer Kenichi Handa <handa@m17n.org>

Tue, 1 Oct 2002 06:58:37 +0000 (06:58 +0000)
author Kenichi Handa <handa@m17n.org>
Tue, 1 Oct 2002 06:58:37 +0000 (06:58 +0000)
committer Kenichi Handa <handa@m17n.org>
Tue, 1 Oct 2002 06:58:37 +0000 (06:58 +0000)
diff --git a/lisp/international/ucs-tables.el b/lisp/international/ucs-tables.el

index 50472edb2deca3d2c4e747df20ab1b86450914a5..64b77aac220c37a04610d960dd18128001d49b25 100644 (file)
--- a/lisp/international/ucs-tables.el
+++ b/lisp/international/ucs-tables.el
@@ -18,8 +18,8 @@
  ;; GNU General Public License for more details.
  
  ;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING.  If not, write to
-;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; along with GNU Emacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  ;; Boston, MA 02111-1307, USA.
  
  ;;; Commentary:
@@ -48,13 +48,11 @@
  ;; all that users normally care about unifying although, for instance,
  ;; Greek occurs in as many as nine Emacs charsets.
  
-;; The translation table `ucs-mule-to-mule-unicode' is populated,
-;; which could be used for more general unification on decoding.  This
-;; is used by the `mule-utf-8' coding system to encode extra
-;; characters, and also by the coding systems set up by code-pages.el.
-;; The decoding tables here take account of
-;; `utf-8-fragment-on-decoding' which may specify decoding Greek and
-;; Cyrillic into 8859 charsets.
+;; The translation-table `utf-translation-table-for-encode' is
+;; populated, which could be used for more general unification on
+;; decoding.  This is used by the `mule-utf-8' coding system to encode
+;; extra characters, and also by the coding systems set up by
+;; code-pages.el.
  
  ;; Unification also puts a `translation-table-for-input' property on
  ;; relevant coding coding systems and arranges for the
@@ -89,13 +87,13 @@
  ;;; Define tables, to be populated later.
  
  (defvar ucs-mule-8859-to-ucs-table (make-translation-table)
-  "Translation table from Emacs ISO-8859 characters to Unicode.
+  "Char table from Emacs ISO-8859 characters to Unicode.
  This maps Emacs characters from the non-Latin-1
  ...-iso8859-... charsets to their Unicode code points.  This is a
  many-to-one mapping.")
  
  (defvar ucs-mule-8859-to-mule-unicode (make-translation-table)
-  "Translation table from Emacs ISO-8859 characters to Mule Unicode.
+  "Char table from Emacs ISO-8859 characters to Mule Unicode.
  This maps Emacs characters from the non-Latin-1
  ...-iso8859-... charsets to characters from the
  mule-unicode-... charsets.  This is a many-to-one mapping.  The
@@ -157,15 +155,7 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
    "Used as `translation-table-for-encode' for iso-8859-15.
  Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
  
-;; Probably defined by utf-8.el.
-(defvar ucs-mule-to-mule-unicode (make-translation-table))
-(unless (get 'ucs-mule-to-mule-unicode 'translation-table)
-  (define-translation-table 'ucs-mule-to-mule-unicode ucs-mule-to-mule-unicode)) 
-(defvar utf-8-translation-table-for-decode (make-translation-table))
-(unless (get 'utf-8-translation-table-for-decode 'translation-table)
-  (define-translation-table 'utf-8-translation-table-for-decode
-    utf-8-translation-table-for-decode))
-(defvar utf-8-fragment-on-decoding nil)
+(defvar translation-table-for-input (make-translation-table))
  
  ;;; Set up the tables.
  
@@ -742,6 +732,31 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
          (?\\e,H<\e(B . ?\x00BC) ;; VULGAR FRACTION ONE QUARTER
          (?\\e,H=\e(B . ?\x00BD) ;; VULGAR FRACTION ONE HALF
          (?\\e,H>\e(B . ?\x00BE) ;; VULGAR FRACTION THREE QUARTERS
+        ;; These are commented out since the current 8859-8 standard
+        ;; does not yet define these codepoints, although there are
+        ;; drafts which do).
+;       (?\\e,H@\e(B . ?\x05B0) ;; HEBREW POINT SHEVA
+;       (?\\e,HA\e(B . ?\x05B1) ;; HEBREW POINT HATAF SEGOL
+;       (?\\e,HB\e(B . ?\x05B2) ;; HEBREW POINT HATAF PATAH
+;       (?\\e,HC\e(B . ?\x05B3) ;; HEBREW POINT HATAF QAMATS
+;       (?\\e,HD\e(B . ?\x05B4) ;; HEBREW POINT HIRIQ
+;       (?\\e,HE\e(B . ?\x05B5) ;; HEBREW POINT TSERE
+;       (?\\e,HF\e(B . ?\x05B6) ;; HEBREW POINT SEGOL
+;       (?\\e,HG\e(B . ?\x05B7) ;; HEBREW POINT PATAH
+;       (?\\e,HH\e(B . ?\x05B8) ;; HEBREW POINT QAMATS
+;       (?\\e,HI\e(B . ?\x05B9) ;; HEBREW POINT HOLAM
+;       (?\\e,HK\e(B . ?\x05BB) ;; HEBREW POINT QUBUTS
+;       (?\\e,HL\e(B . ?\x05BC) ;; HEBREW POINT DAGESH
+;       (?\\e,HM\e(B . ?\x05BD) ;; HEBREW POINT METEG
+;       (?\\e,HN\e(B . ?\x05BE) ;; HEBREW POINT MAQAF
+;       (?\\e,HO\e(B . ?\x05BF) ;; HEBREW POINT RAFE
+;       (?\\e,HP\e(B . ?\x05C0) ;; HEBREW PUNCTUATION PASEQ
+;       (?\\e,HQ\e(B . ?\x05C1) ;; HEBREW POINT SHIN DOT
+;       (?\\e,HR\e(B . ?\x05C2) ;; HEBREW POINT SIN DOT
+;       (?\\e,HS\e(B . ?\x05C3) ;; HEBREW PUNCTUATION SOF PASUQ
+        (?\\e,H[\e(B . ?\x202D) ;; LEFT-TO-RIGHT OVERRIDE
+        (?\\e,H\\e(B . ?\x202E) ;; RIGHT-TO-LEFT OVERRIDE
+        (?\\e,H]\e(B . ?\x202C) ;; POP DIRECTIONAL FORMATTING
          (?\\e,H_\e(B . ?\x2017) ;; DOUBLE LOW LINE
          (?\\e,H`\e(B . ?\x05D0) ;; HEBREW LETTER ALEF
          (?\\e,Ha\e(B . ?\x05D1) ;; HEBREW LETTER BET
@@ -770,6 +785,8 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
          (?\\e,Hx\e(B . ?\x05E8) ;; HEBREW LETTER RESH
          (?\\e,Hy\e(B . ?\x05E9) ;; HEBREW LETTER SHIN
          (?\\e,Hz\e(B . ?\x05EA) ;; HEBREW LETTER TAV
+        (?\\e,H{\e(B . ?\x202A) ;; LEFT-TO-RIGHT EMBEDDING
+        (?\\e,H|\e(B . ?\x202B) ;; RIGHT-TO-LEFT EMBEDDING
          (?\\e,H}\e(B . ?\x200E) ;; LEFT-TO-RIGHT MARK
          (?\\e,H~\e(B . ?\x200F) ;; RIGHT-TO-LEFT MARK
          ))
@@ -1092,10 +1109,7 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
           ;;      (aset ucs-mule-unicode-to-mule-8859 mu mule)
           (aset ucs-mule-8859-to-mule-unicode mule mu)
           (aset ucs-mule-to-mule-unicode mule mu)))))
-  ;; The table optimizing here and elsewhere probably isn't very
-  ;; useful, but seems good practice.
-  (optimize-char-table ucs-mule-to-mule-unicode)
-  (optimize-char-table ucs-mule-8859-to-mule-unicode)
+
    ;; Derive tables that can be used as per-coding-system
    ;; `translation-table-for-encode's.
    (dolist (n (list 15 14 9 8 7 5 4 3 2 1))
@@ -1103,12 +1117,16 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
            (encode-translator (set (intern (format "ucs-8859-%d-encode-table"
                                                    n))
                                    (make-translation-table)))
+          (coding-system
+           (coding-system-base (intern (format "iso-8859-%d" n))))
+          (dependency (coding-system-get coding-system 'dependency))
            elt)
-      ;; Start with the mule-unicode component.
-      (dolist (pair alist)
-       (let ((mule (car pair))
-             (mu (decode-char 'ucs (cdr pair))))
-         (aset encode-translator mu mule)))
+      ;; Start with the mule-unicode component (except for latin-iso8859-1).
+      (if (/= n 1)
+         (dolist (pair alist)
+           (let ((mule (car pair))
+                 (mu (decode-char 'ucs (cdr pair))))
+             (aset encode-translator mu mule))))
        ;; Find characters from other 8859 sets which map to the same
        ;; unicode as some character in this set.
        (map-char-table (lambda (k v)
@@ -1116,16 +1134,16 @@ Translates from the iso8859 charsets and `mule-unicode-0100-24ff'.")
                                  (not (assq k alist)))
                             (aset encode-translator k (car elt))))
                       ucs-mule-8859-to-ucs-table)
-      (optimize-char-table encode-translator))))
+      (optimize-char-table encode-translator)
  
-;; Register for use in CCL.
-(define-translation-table 'ucs-mule-8859-to-mule-unicode
-  ucs-mule-8859-to-mule-unicode)
-(define-translation-table 'ucs-mule-to-mule-unicode
-  ucs-mule-to-mule-unicode)
+      (or (memq 'unify-8859-on-encoding-mode dependency)
+         (setq dependency (cons 'unify-8859-on-encoding-mode dependency)))
+      (or (memq 'unify-8859-on-decoding-mode dependency)
+         (setq dependency (cons 'unify-8859-on-decoding-mode dependency)))
+      (coding-system-put coding-system 'dependency dependency))))
  
-(defun ucs-unify-8859 (&optional encode-only)
-  "Set up translation tables for unifying characters from ISO 8859.
+(defun ucs-unify-8859 (for-encode for-decode)
+  "Set up translation-tables for unifying characters from ISO 8859.
  
  On decoding, non-ASCII characters are mapped into the `iso-latin-1'
  and `mule-unicode-0100-24ff' charsets.  On encoding, these are mapped
@@ -1133,140 +1151,104 @@ back appropriate for the coding system.
  
  With prefix arg, do unification on encoding only, i.e. don't unify
  everything on input operations."
-  (interactive "P")
-  (unless encode-only
+  (when for-decode
      ;; Unify 8859 on decoding.  (Non-CCL coding systems only.)
-    (if utf-8-fragment-on-decoding
-       (map-char-table
-        (lambda (k v)
-          (if v (aset ucs-mule-to-mule-unicode v nil)))
-        utf-8-translation-table-for-decode)
-      ;; Reset in case it was changed.
-      (map-char-table
-       (lambda (k v)
-        (if v (aset ucs-mule-to-mule-unicode v k)))
-       utf-8-translation-table-for-decode))
      (set-char-table-parent standard-translation-table-for-decode
                            ucs-mule-8859-to-mule-unicode)
+
      ;; Translate Quail input globally.
      (setq-default translation-table-for-input ucs-mule-to-mule-unicode)
      ;; In case these are set up, but we should use the global
-    ;; translation table.
+    ;; translation-table.
      (remove-hook 'quail-activate-hook 'ucs-quail-activate)
      (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
-  ;; Adjust the 8859 coding systems to fragment the unified characters
-  ;; on encoding.
-  (dolist (n '(1 2 3 4 5 7 8 9 14 15))
-    (let* ((coding-system
-           (coding-system-base (intern (format "iso-8859-%d" n))))
-          (table (symbol-value
-                  (intern (format "ucs-8859-%d-encode-table" n))))
-          (safe (coding-system-get coding-system 'safe-chars)))
-      ;; Actually, the coding system's safe-chars are not normally
-      ;; used after they've been registered, but we might as well
-      ;; record them.  Setting the parent here is a convenience.
-      (set-char-table-parent safe table)
-      ;; Update the table of what encodes to what.
-      (register-char-codings coding-system table)
-      (coding-system-put coding-system 'translation-table-for-encode table)
-      (coding-system-put coding-system 'translation-table-for-input table)))
-  ;; Arrange local translation tables for Quail input.
-  (add-hook 'quail-activate-hook 'ucs-quail-activate)
-  (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
-
-(defun ucs-fragment-8859 (&optional encode-only)
+
+  (when for-encode
+    ;; Make mule-utf-* encode all characters in ucs-mule-to-mule-unicode.
+    (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le)))
+      (define-translation-table 'utf-translation-table-for-encode
+       ucs-mule-to-mule-unicode)
+      (dolist (coding coding-list)
+       (set-char-table-parent (coding-system-get coding 'safe-chars)
+                              ucs-mule-to-mule-unicode)
+       (register-char-codings coding ucs-mule-to-mule-unicode)))
+
+    ;; Adjust the 8859 coding systems to fragment the unified characters
+    ;; on encoding.
+    (dolist (n '(1 2 3 4 5 7 8 9 14 15))
+      (let* ((coding-system
+             (coding-system-base (intern (format "iso-8859-%d" n))))
+            (table (symbol-value
+                    (intern (format "ucs-8859-%d-encode-table" n))))
+            (safe (coding-system-get coding-system 'safe-chars)))
+       ;; Actually, the coding system's safe-chars are not normally
+       ;; used after they've been registered, but we might as well
+       ;; record them.  Setting the parent here is a convenience.
+       (set-char-table-parent safe table)
+       ;; Update the table of what encodes to what.
+       (register-char-codings coding-system table)
+       (coding-system-put coding-system 'translation-table-for-encode table)
+       (coding-system-put coding-system 'translation-table-for-input table)))
+    ;; Arrange local translation-tables for Quail input.
+    (add-hook 'quail-activate-hook 'ucs-quail-activate)
+    (add-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)))
+
+(defun ucs-fragment-8859 (for-encode for-decode)
    "Undo the unification done by `ucs-unify-8859'.
  With prefix arg, undo unification on encoding only, i.e. don't undo
  unification on input operations."
-  (interactive "P")
-  ;; Maybe fix decoding.
-  (unless encode-only
-    ;; Unify 8859 on decoding.  (Non-CCL coding systems only.)
+  (when for-decode
+    ;; Don't Unify 8859 on decoding.
+    ;; For non-CCL coding systems (e.g. iso-latin-2).
      (set-char-table-parent standard-translation-table-for-decode nil)
+    ;; For Quail input.
      (setq-default translation-table-for-input nil))
-  ;; Fix encoding.  For each charset, remove the entries in
-  ;; `char-coding-system-table' added to its safe-chars table (as its
-  ;; parent).
-  (dolist (n '(1 2 3 4 5 7 8 9 14 15))
-    (let* ((coding-system
-           (coding-system-base (intern (format "iso-8859-%d" n))))
-          (table (symbol-value
-                  (intern (format "ucs-8859-%d-encode-table" n))))
-          (safe (coding-system-get coding-system 'safe-chars)))
+
+  (when for-encode
+    ;; Make mule-utf-* disabled for all characters in
+    ;; ucs-mule-to-mule-unicode but what originally supported.
+    (let ((coding-list '(mule-utf-8 mule-utf-16-be mule-utf-16-le))
+         (safe (coding-system-get 'mule-utf-8 'safe-chars)))
+      (dolist (coding coding-list)
+       (set-char-table-parent (coding-system-get coding 'safe-chars) nil))
+      ;; Here we assume that all mule-utf-* have the same character
+      ;; repertory, thus we can use SAFE for all of them.
        (map-char-table
         (lambda (key val)
-        (if (and (>= key 128) val)
-            (let ((codings (aref char-coding-system-table key)))
-              (aset char-coding-system-table key
-                    (delq coding-system codings)))))
-       (char-table-parent safe))
-      (set-char-table-parent safe nil)
-      (coding-system-put coding-system 'translation-table-for-encode nil)
-      (coding-system-put coding-system 'translation-table-for-input nil)))
-  (optimize-char-table char-coding-system-table)
-  (remove-hook 'quail-activate-hook 'ucs-quail-activate)
-  (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup))
-
-;;;###autoload
-(define-minor-mode unify-8859-on-encoding-mode
-  "Set up translation tables for unifying ISO 8859 characters on encoding.
-
-The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and
-8859-15 (Latin-9) differ only in a few characters.  Emacs normally
-distinguishes equivalent characters from those ISO-8859 character sets
-which are built in to Emacs.  This behaviour is essentially inherited
-from the European-originated international standards.  Treating them
-equivalently, by translating to and from a single representation is
-called `unification'.  (The `utf-8' coding system treats the
-characters of European scripts in a unified manner.)
-
-In this mode, on encoding -- i.e. output operations -- non-ASCII
-characters from the built-in ISO 8859 and `mule-unicode-0100-24ff'
-charsets are handled automatically by the coding system used if it can
-represent them.  Thus, say, an e-acute from the Latin-1 charset (the
-unified representation) in a buffer saved as Latin-9 will be encoded
-directly to a byte value 233.  By default, in contrast, you would be
-prompted for a general coding system to use for saving the file, which
-can cope with separate Latin-1 and Latin-9 representations of e-acute.
-
-Also sets hooks that arrange `translation-table-for-input' to be set
-up locally when Quail input methods are activated.  This will often
-allow input generated by Quail input methods to conform with what the
-buffer's file coding system can encode.  Thus you could use a Latin-2
-input method to search for e-acute in a Latin-1 buffer.
-
-See also command `unify-8859-on-decoding-mode'."
-  :group 'mule
-  :global t
-  :init-value nil
-  (if unify-8859-on-encoding-mode
-      (ucs-unify-8859 t)
-    (ucs-fragment-8859 t)))
-
-(custom-add-version 'unify-8859-on-encoding-mode "21.3") ; who knows?
-
-;;;###autoload
-(define-minor-mode unify-8859-on-decoding-mode
-  "Set up translation tables for unifying ISO 8859 characters on decoding.
-On decoding, i.e. input operations, non-ASCII characters from the
-built-in ISO 8859 charsets are unified by mapping them into the
-`iso-latin-1' and `mule-unicode-0100-24ff' charsets.
-
-Also sets `translation-table-for-input' globally, so that Quail input
-methods produce unified characters.
-
-See also command `unify-8859-on-encoding-mode' and the user option
-`utf-8-fragment-on-decoding'."
-  :group 'mule
-  :global t
-  :init-value nil
-  (if unify-8859-on-decoding-mode
-      (ucs-unify-8859)
-    (ucs-fragment-8859)))
-
-(custom-add-dependencies 'unify-8859-on-decoding-mode
-                        '(utf-8-fragment-on-decoding))
-(custom-add-version 'unify-8859-on-decoding-mode "21.3") ; who knows?
+        (if (and (>= key 128) val
+                 (not (aref safe key)))
+            (aset char-coding-system-table key
+                  (delq 'mule-utf-8
+                        (delq 'mule-utf-16-le
+                              (delq 'mule-utf-16-be
+                                    (aref char-coding-system-table key)))))))
+       ucs-mule-to-mule-unicode)
+
+      (define-translation-table 'utf-translation-table-for-encode))
+
+    ;; For each charset, remove the entries in
+    ;; `char-coding-system-table' added to its safe-chars table (as
+    ;; its parent).
+    (dolist (n '(1 2 3 4 5 7 8 9 14 15))
+      (let* ((coding-system
+             (coding-system-base (intern (format "iso-8859-%d" n))))
+            (table (symbol-value
+                    (intern (format "ucs-8859-%d-encode-table" n))))
+            (safe (coding-system-get coding-system 'safe-chars)))
+       (when (char-table-parent safe)
+         (map-char-table
+          (lambda (key val)
+            (if (and (>= key 128) val)
+                (let ((codings (aref char-coding-system-table key)))
+                  (aset char-coding-system-table key
+                        (delq coding-system codings)))))
+          (char-table-parent safe))
+         (set-char-table-parent safe nil))
+       (coding-system-put coding-system 'translation-table-for-encode nil)
+       (coding-system-put coding-system 'translation-table-for-input nil)))
+    (optimize-char-table char-coding-system-table)
+    (remove-hook 'quail-activate-hook 'ucs-quail-activate)
+    (remove-hook 'minibuffer-setup-hook 'ucs-minibuffer-setup)))
  
  (defun ucs-insert (arg)
    "Insert the Emacs character representation of the given Unicode.
@@ -1284,7 +1266,8 @@ Interactively, prompts for a hex string giving the code."
  ;; We only set up translation on encoding to utf-8.  Also translation
  ;; tables ucs-CS-encode-table are constructed for some coding systems
  ;; CS which could be used as `translation-table-for-encode', currently
-;; for in-is13194, lao, thai, tibetan-iso-8bit and vietnamese-viscii.
+;; for indian-is13194, lao, thai, tibetan-iso-8bit and
+;; vietnamese-viscii.
  
  ;; The alists here cover both coding systems (external charsets), like
  ;; VISCII, and individual Emacs charsets, like `ipa'.
@@ -2119,91 +2102,90 @@ Interactively, prompts for a hex string giving the code."
          (?\e$(3%$\e(B . ?\e$,1Q;\e(B)
          (?\e$(3%%\e(B . ?\e$,1Q<\e(B)))
  
-;; Uses post-21.1 definitions.
-;;       (in-is13194
-;;        '((?\e(5!\e(B . ?\e$,15A\e(B)
-;;      (?\e(5"\e(B . ?\e$,15B\e(B)
-;;      (?\e(5#\e(B . ?\e$,15C\e(B)
-;;      (?\e(5$\e(B . ?\e$,15E\e(B)
-;;      (?\e(5%\e(B . ?\e$,15F\e(B)
-;;      (?\e(5&\e(B . ?\e$,15G\e(B)
-;;      (?\e(5'\e(B . ?\e$,15H\e(B)
-;;      (?\e(5(\e(B . ?\e$,15I\e(B)
-;;      (?\e(5)\e(B . ?\e$,15J\e(B)
-;;      (?\e(5*\e(B . ?\e$,15K\e(B)
-;;      (?\e(5+\e(B . ?\e$,15N\e(B)
-;;      (?\e(5,\e(B . ?\e$,15O\e(B)
-;;      (?\e(5-\e(B . ?\e$,15P\e(B)
-;;      (?\e(5.\e(B . ?\e$,15M\e(B)
-;;      (?\e(5/\e(B . ?\e$,15R\e(B)
-;;      (?\e(50\e(B . ?\e$,15S\e(B)
-;;      (?\e(51\e(B . ?\e$,15T\e(B)
-;;      (?\e(52\e(B . ?\e$,15M\e(B)
-;;      (?\e(53\e(B . ?\e$,15U\e(B)
-;;      (?\e(54\e(B . ?\e$,15V\e(B)
-;;      (?\e(55\e(B . ?\e$,15W\e(B)
-;;      (?\e(56\e(B . ?\e$,15X\e(B)
-;;      (?\e(57\e(B . ?\e$,15Y\e(B)
-;;      (?\e(58\e(B . ?\e$,15Z\e(B)
-;;      (?\e(59\e(B . ?\e$,15[\e(B)
-;;      (?\e(5:\e(B . ?\e$,15\\e(B)
-;;      (?\e(5;\e(B . ?\e$,15]\e(B)
-;;      (?\e(5<\e(B . ?\e$,15^\e(B)
-;;      (?\e(5=\e(B . ?\e$,15_\e(B)
-;;      (?\e(5>\e(B . ?\e$,15`\e(B)
-;;      (?\e(5?\e(B . ?\e$,15a\e(B)
-;;      (?\e(5@\e(B . ?\e$,15b\e(B)
-;;      (?\e(5A\e(B . ?\e$,15c\e(B)
-;;      (?\e(5B\e(B . ?\e$,15d\e(B)
-;;      (?\e(5C\e(B . ?\e$,15e\e(B)
-;;      (?\e(5D\e(B . ?\e$,15f\e(B)
-;;      (?\e(5E\e(B . ?\e$,15g\e(B)
-;;      (?\e(5F\e(B . ?\e$,15h\e(B)
-;;      (?\e(5G\e(B . ?\e$,15i\e(B)
-;;      (?\e(5H\e(B . ?\e$,15j\e(B)
-;;      (?\e(5I\e(B . ?\e$,15k\e(B)
-;;      (?\e(5J\e(B . ?\e$,15l\e(B)
-;;      (?\e(5K\e(B . ?\e$,15m\e(B)
-;;      (?\e(5L\e(B . ?\e$,15n\e(B)
-;;      (?\e(5M\e(B . ?\e$,15o\e(B)
-;;      (?\e(5N\e(B . ?\e$,16?\e(B)
-;;      (?\e(5O\e(B . ?\e$,15p\e(B)
-;;      (?\e(5P\e(B . ?\e$,15q\e(B)
-;;      (?\e(5Q\e(B . ?\e$,15r\e(B)
-;;      (?\e(5R\e(B . ?\e$,15s\e(B)
-;;      (?\e(5S\e(B . ?\e$,15t\e(B)
-;;      (?\e(5T\e(B . ?\e$,15u\e(B)
-;;      (?\e(5U\e(B . ?\e$,15v\e(B)
-;;      (?\e(5V\e(B . ?\e$,15w\e(B)
-;;      (?\e(5W\e(B . ?\e$,15x\e(B)
-;;      (?\e(5X\e(B . ?\e$,15y\e(B)
-;;      (?\e(5Z\e(B . ?\e$,15~\e(B)
-;;      (?\e(5[\e(B . ?\e$,15\7f\e(B)
-;;      (?\e(5\\e(B . ?\e$,16 \e(B)
-;;      (?\e(5]\e(B . ?\e$,16!\e(B)
-;;      (?\e(5^\e(B . ?\e$,16"\e(B)
-;;      (?\e(5_\e(B . ?\e$,16#\e(B)
-;;      (?\e(5`\e(B . ?\e$,16&\e(B)
-;;      (?\e(5a\e(B . ?\e$,16'\e(B)
-;;      (?\e(5b\e(B . ?\e$,16(\e(B)
-;;      (?\e(5c\e(B . ?\e$,16%\e(B)
-;;      (?\e(5d\e(B . ?\e$,16*\e(B)
-;;      (?\e(5e\e(B . ?\e$,16+\e(B)
-;;      (?\e(5f\e(B . ?\e$,16,\e(B)
-;;      (?\e(5g\e(B . ?\e$,16)\e(B)
-;;      (?\e(5h\e(B . ?\e$,16-\e(B)
-;;      (?\e(5i\e(B . ?\e$,15|\e(B)
-;;      (?\e(5j\e(B . ?\e$,16D\e(B)
-;;      (?\e(5q\e(B . ?\e$,16F\e(B)
-;;      (?\e(5r\e(B . ?\e$,16G\e(B)
-;;      (?\e(5s\e(B . ?\e$,16H\e(B)
-;;      (?\e(5t\e(B . ?\e$,16I\e(B)
-;;      (?\e(5u\e(B . ?\e$,16J\e(B)
-;;      (?\e(5v\e(B . ?\e$,16K\e(B)
-;;      (?\e(5w\e(B . ?\e$,16L\e(B)
-;;      (?\e(5x\e(B . ?\e$,16M\e(B)
-;;      (?\e(5y\e(B . ?\e$,16N\e(B)
-;;      (?\e(5z\e(B . ?\e$,16O\e(B)))
+      (indian-is13194
+       '((?\e(5!\e(B . ?\e$,15A\e(B)
+        (?\e(5"\e(B . ?\e$,15B\e(B)
+        (?\e(5#\e(B . ?\e$,15C\e(B)
+        (?\e(5$\e(B . ?\e$,15E\e(B)
+        (?\e(5%\e(B . ?\e$,15F\e(B)
+        (?\e(5&\e(B . ?\e$,15G\e(B)
+        (?\e(5'\e(B . ?\e$,15H\e(B)
+        (?\e(5(\e(B . ?\e$,15I\e(B)
+        (?\e(5)\e(B . ?\e$,15J\e(B)
+        (?\e(5*\e(B . ?\e$,15K\e(B)
+        (?\e(5+\e(B . ?\e$,15N\e(B)
+        (?\e(5,\e(B . ?\e$,15O\e(B)
+        (?\e(5-\e(B . ?\e$,15P\e(B)
+        (?\e(5.\e(B . ?\e$,15M\e(B)
+        (?\e(5/\e(B . ?\e$,15R\e(B)
+        (?\e(50\e(B . ?\e$,15S\e(B)
+        (?\e(51\e(B . ?\e$,15T\e(B)
+        (?\e(52\e(B . ?\e$,15M\e(B)
+        (?\e(53\e(B . ?\e$,15U\e(B)
+        (?\e(54\e(B . ?\e$,15V\e(B)
+        (?\e(55\e(B . ?\e$,15W\e(B)
+        (?\e(56\e(B . ?\e$,15X\e(B)
+        (?\e(57\e(B . ?\e$,15Y\e(B)
+        (?\e(58\e(B . ?\e$,15Z\e(B)
+        (?\e(59\e(B . ?\e$,15[\e(B)
+        (?\e(5:\e(B . ?\e$,15\\e(B)
+        (?\e(5;\e(B . ?\e$,15]\e(B)
+        (?\e(5<\e(B . ?\e$,15^\e(B)
+        (?\e(5=\e(B . ?\e$,15_\e(B)
+        (?\e(5>\e(B . ?\e$,15`\e(B)
+        (?\e(5?\e(B . ?\e$,15a\e(B)
+        (?\e(5@\e(B . ?\e$,15b\e(B)
+        (?\e(5A\e(B . ?\e$,15c\e(B)
+        (?\e(5B\e(B . ?\e$,15d\e(B)
+        (?\e(5C\e(B . ?\e$,15e\e(B)
+        (?\e(5D\e(B . ?\e$,15f\e(B)
+        (?\e(5E\e(B . ?\e$,15g\e(B)
+        (?\e(5F\e(B . ?\e$,15h\e(B)
+        (?\e(5G\e(B . ?\e$,15i\e(B)
+        (?\e(5H\e(B . ?\e$,15j\e(B)
+        (?\e(5I\e(B . ?\e$,15k\e(B)
+        (?\e(5J\e(B . ?\e$,15l\e(B)
+        (?\e(5K\e(B . ?\e$,15m\e(B)
+        (?\e(5L\e(B . ?\e$,15n\e(B)
+        (?\e(5M\e(B . ?\e$,15o\e(B)
+        (?\e(5N\e(B . ?\e$,16?\e(B)
+        (?\e(5O\e(B . ?\e$,15p\e(B)
+        (?\e(5P\e(B . ?\e$,15q\e(B)
+        (?\e(5Q\e(B . ?\e$,15r\e(B)
+        (?\e(5R\e(B . ?\e$,15s\e(B)
+        (?\e(5S\e(B . ?\e$,15t\e(B)
+        (?\e(5T\e(B . ?\e$,15u\e(B)
+        (?\e(5U\e(B . ?\e$,15v\e(B)
+        (?\e(5V\e(B . ?\e$,15w\e(B)
+        (?\e(5W\e(B . ?\e$,15x\e(B)
+        (?\e(5X\e(B . ?\e$,15y\e(B)
+        (?\e(5Z\e(B . ?\e$,15~\e(B)
+        (?\e(5[\e(B . ?\e$,15\7f\e(B)
+        (?\e(5\\e(B . ?\e$,16 \e(B)
+        (?\e(5]\e(B . ?\e$,16!\e(B)
+        (?\e(5^\e(B . ?\e$,16"\e(B)
+        (?\e(5_\e(B . ?\e$,16#\e(B)
+        (?\e(5`\e(B . ?\e$,16&\e(B)
+        (?\e(5a\e(B . ?\e$,16'\e(B)
+        (?\e(5b\e(B . ?\e$,16(\e(B)
+        (?\e(5c\e(B . ?\e$,16%\e(B)
+        (?\e(5d\e(B . ?\e$,16*\e(B)
+        (?\e(5e\e(B . ?\e$,16+\e(B)
+        (?\e(5f\e(B . ?\e$,16,\e(B)
+        (?\e(5g\e(B . ?\e$,16)\e(B)
+        (?\e(5h\e(B . ?\e$,16-\e(B)
+        (?\e(5i\e(B . ?\e$,15|\e(B)
+        (?\e(5j\e(B . ?\e$,16D\e(B)
+        (?\e(5q\e(B . ?\e$,16F\e(B)
+        (?\e(5r\e(B . ?\e$,16G\e(B)
+        (?\e(5s\e(B . ?\e$,16H\e(B)
+        (?\e(5t\e(B . ?\e$,16I\e(B)
+        (?\e(5u\e(B . ?\e$,16J\e(B)
+        (?\e(5v\e(B . ?\e$,16K\e(B)
+        (?\e(5w\e(B . ?\e$,16L\e(B)
+        (?\e(5x\e(B . ?\e$,16M\e(B)
+        (?\e(5y\e(B . ?\e$,16N\e(B)
+        (?\e(5z\e(B . ?\e$,16O\e(B)))
  
        (katakana-jisx0201
         '((?\e(I!\e(B . ?\e$,3sa\e(B)
@@ -2413,8 +2395,7 @@ Interactively, prompts for a hex string giving the code."
         safe-charsets)
      (dolist (cs '(vietnamese-viscii lao chinese-sisheng ipa
                   katakana-jisx0201 thai-tis620 tibetan-iso-8bit
-                 ;; in-is13194
-                 ethiopic))
+                 indian-is13194 ethiopic))
        ;; These tables could be used as translation-table-for-encode by
        ;; the relevant coding systems.
        (let ((encode-translator
@@ -2441,21 +2422,80 @@ Interactively, prompts for a hex string giving the code."
                                   encode-translator))
               ((memq cs '(lao thai-tis620 tibetan-iso-8bit))
                (coding-system-put cs 'translation-table-for-input cs)))))
-    (optimize-char-table ucs-mule-to-mule-unicode)
      (dolist (c safe-charsets)
        (aset table (make-char c) t))))
  
-(defvar translation-table-for-input (make-translation-table))
+(define-minor-mode unify-8859-on-encoding-mode
+  "Set up translation-tables for unifying ISO 8859 characters on encoding.
+
+The ISO 8859 characters sets overlap, e.g. 8859-1 (Latin-1) and
+8859-15 (Latin-9) differ only in a few characters.  Emacs normally
+distinguishes equivalent characters from those ISO-8859 character sets
+which are built in to Emacs.  This behaviour is essentially inherited
+from the European-originated international standards.  Treating them
+equivalently, by translating to and from a single representation is
+called `unification'.  (The `utf-8' coding system treats the
+characters of European scripts in a unified manner.)
+
+In this mode, on encoding -- i.e. output operations -- non-ASCII
+characters from the built-in ISO 8859 and `mule-unicode-0100-24ff'
+charsets are handled automatically by the coding system used if it can
+represent them.  Thus, say, an e-acute from the Latin-1 charset (the
+unified representation) in a buffer saved as Latin-9 will be encoded
+directly to a byte value 233.  By default, in contrast, you would be
+prompted for a general coding system to use for saving the file, which
+can cope with separate Latin-1 and Latin-9 representations of e-acute.
+
+Also sets hooks that arrange `translation-table-for-input' to be set
+up locally when Quail input methods are activated.  This will often
+allow input generated by Quail input methods to conform with what the
+buffer's file coding system can encode.  Thus you could use a Latin-2
+input method to search for e-acute in a Latin-1 buffer.
+
+See also command `unify-8859-on-decoding-mode'."
+  :group 'mule
+  :global t
+  :init-value t
+  (if unify-8859-on-encoding-mode
+      (ucs-unify-8859 t nil)
+    (ucs-fragment-8859 t nil)))
+
+(custom-add-version 'unify-8859-on-encoding-mode "21.3")
  
-;; Arrange to set up the translation table for Quail.  This probably
+(define-minor-mode unify-8859-on-decoding-mode
+  "Set up translation-tables for unifying ISO 8859 characters on decoding.
+On decoding, i.e. input operations, non-ASCII characters from the
+built-in ISO 8859 charsets are unified by mapping them into the
+`iso-latin-1' and `mule-unicode-0100-24ff' charsets.
+
+Also sets `translation-table-for-input' globally, so that Quail input
+methods produce unified characters.
+
+See also command `unify-8859-on-encoding-mode'."
+  :group 'mule
+  :global t
+  :init-value nil
+  (if unify-8859-on-decoding-mode
+      (ucs-unify-8859 nil t)
+    (ucs-fragment-8859 nil t)))
+
+(custom-add-version 'unify-8859-on-decoding-mode "21.3")
+
+;; Synchronize the status with the initial value of
+;; unify-8859-on-encoding-mode and unify-8859-on-decoding-mode.
+(ucs-unify-8859 t nil)
+
+;; Arrange to set up the translation-table for Quail.  This probably
  ;; isn't foolproof.
  (defun ucs-quail-activate ()
    "Set up an appropriate `translation-table-for-input' for current buffer.
  Intended to be added to `quail-activate-hook'."
-  (let ((cs (coding-system-base buffer-file-coding-system)))
+  (let ((cs (and buffer-file-coding-system
+                (coding-system-base buffer-file-coding-system))))
      (if (eq cs 'undecided)
-       (setq cs (coding-system-base default-buffer-file-coding-system)))
-    (if (coding-system-get cs 'translation-table-for-input)
+       (setq cs (and default-buffer-file-coding-system
+                     (coding-system-base default-buffer-file-coding-system))))
+    (if (and cs (coding-system-get cs 'translation-table-for-input))
         (set (make-variable-buffer-local 'translation-table-for-input)
              (coding-system-get cs 'translation-table-for-input)))))
  
@@ -2463,136 +2503,11 @@ Intended to be added to `quail-activate-hook'."
  ;; the above to work in it.
  (defun ucs-minibuffer-setup ()
    "Set up an appropriate `buffer-file-coding-system' for current buffer.
-Does so by inheriting it from the cadr of the current buffer list.
  Intended to be added to `minibuffer-setup-hook'."
    (set (make-local-variable 'buffer-file-coding-system)
         (with-current-buffer (cadr (buffer-list))
          buffer-file-coding-system)))
  
-;; Modified to allow display of arbitrary characters with an
-;; iso-10646-encoded (`Unicode') font.
-(define-ccl-program ccl-encode-unicode-font
-  `(0
-    ((if (r0 == ,(charset-id 'ascii))
-        ((r2 = r1)
-         (r1 = 0))
-       (
-       ;; Look for a translation for non-ASCII chars.  For a 2D
-       ;; charset, produce a single code for the translation.
-       ;; Official 2D sets are in the charset id range [#x90,#x99],
-       ;; private ones in the range [#xf0,#xfe] (with #xff not used).
-       ;; Fixme: Is there a better way to do this?
-       (r3 = (r0 >= #x90))
-       (r3 &= (r0 <= #x99))
-       (r3 |= (r0 >= #xf0))
-       (if r3                          ; 2D input
-           (r1 = ((r1 << 7) | r2)))
-       (translate-character ucs-mule-to-mule-unicode r0 r1)
-       (r3 = (r0 >= #x90))
-       (r3 &= (r0 <= #x99))
-       (r3 |= (r0 >= #xf0))
-       (if r3                          ; 2D translation
-           ((r2 = (r1 & 127))
-            (r1 = (r1 >> 7))))
-       (if (r0 == ,(charset-id 'latin-iso8859-1))
-           ((r2 = (r1 + 128))
-            (r1 = 0))
-         (if (r0 == ,(charset-id 'mule-unicode-0100-24ff))
-             ((r1 *= 96)
-              (r1 += r2)
-              (r1 += ,(- #x100 (* 32 96) 32))
-              (r1 >8= 0)
-              (r2 = r7))
-           (if (r0 == ,(charset-id 'mule-unicode-2500-33ff))
-               ((r1 *= 96)
-                (r1 += r2)
-                (r1 += ,(- #x2500 (* 32 96) 32))
-                (r1 >8= 0)
-                (r2 = r7))
-             (if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
-                 ((r1 *= 96)
-                  (r1 += r2)
-                  (r1 += ,(- #xe000 (* 32 96) 32))
-                  (r1 >8= 0)
-                  (r2 = r7))))))))))
-  "Encode characters for display with iso10646 font.
-Translate through table `ucs-mule-to-mule-unicode' initially.")
-
-;; Redefinitions of the versions in mule.el to take into account
-;; relevant translation tables.
-
-(defun decode-char (ccs code-point &optional restriction)
-  "Return character specified by coded character set CCS and CODE-POINT in it.
-Return nil if such a character is not supported.
-Currently the only supported coded character set is `ucs' (ISO/IEC
-10646: Universal Multi-Octet Coded Character Set), and the result is
-translated through the char table `utf-8-translation-table-for-decode'.
-
-Optional argument RESTRICTION specifies a way to map the pair of CCS
-and CODE-POINT to a character.  Currently not supported and just ignored."
-  (cond
-   ((eq ccs 'ucs)
-    (let ((c (cond
-             ((< code-point 160)
-              code-point)
-             ((< code-point 256)
-              (make-char 'latin-iso8859-1 code-point))
-             ((< code-point #x2500)
-              (setq code-point (- code-point #x0100))
-              (make-char 'mule-unicode-0100-24ff
-                         (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
-             ((< code-point #x3400)
-              (setq code-point (- code-point #x2500))
-              (make-char 'mule-unicode-2500-33ff
-                         (+ (/ code-point 96) 32) (+ (% code-point 96) 32)))
-             ((and (>= code-point #xe000) (< code-point #x10000))
-              (setq code-point (- code-point #xe000))
-              (make-char 'mule-unicode-e000-ffff
-                         (+ (/ code-point 96) 32) (+ (% code-point 96) 32))))))
-      (if (and c (aref utf-8-translation-table-for-decode c))
-         (aref utf-8-translation-table-for-decode c)
-       c)))))
-
-(defun encode-char (char ccs &optional restriction)
-  "Return code-point in coded character set CCS that corresponds to CHAR.
-Return nil if CHAR is not included in CCS.
-Currently the only supported coded character set is `ucs' (ISO/IEC
-10646: Universal Multi-Octet Coded Character Set), and CHAR is first
-translated through the char-table `ucs-mule-to-mule-unicode'.
-
-CHAR should be in one of these charsets:
-  ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff,
-  mule-unicode-e000-ffff, eight-bit-control
-Otherwise, return nil.
-
-Optional argument RESTRICTION specifies a way to map CHAR to a
-code-point in CCS.  Currently not supported and just ignored."
-  (let* ((split (split-char char))
-        (charset (car split))
-        trans)
-    (cond ((eq ccs 'ucs)
-          (setq trans (aref ucs-mule-to-mule-unicode char))
-          (if trans
-              (setq split (split-char trans)
-                    charset (car split)))
-          (cond ((eq charset 'ascii)
-                 char)
-                ((eq charset 'latin-iso8859-1)
-                 (+ (nth 1 split) 128))
-                ((eq charset 'mule-unicode-0100-24ff)
-                 (+ #x0100 (+ (* (- (nth 1 split) 32) 96)
-                              (- (nth 2 split) 32))))
-                ((eq charset 'mule-unicode-2500-33ff)
-                 (+ #x2500 (+ (* (- (nth 1 split) 32) 96)
-                              (- (nth 2 split) 32))))
-                ((eq charset 'mule-unicode-e000-ffff)
-                 (+ #xe000 (+ (* (- (nth 1 split) 32) 96)
-                              (- (nth 2 split) 32))))
-                ((eq charset 'eight-bit-control)
-                 char))))))
-
-(defalias 'ucs-tables-unload-hook 'ucs-fragment-8859)
-
  (provide 'ucs-tables)
  
  ;;; ucs-tables.el ends here
author	Kenichi Handa <handa@m17n.org>
	Tue, 1 Oct 2002 06:58:37 +0000 (06:58 +0000)
committer	Kenichi Handa <handa@m17n.org>
	Tue, 1 Oct 2002 06:58:37 +0000 (06:58 +0000)