From: Eli Zaretskii Date: Sun, 20 Dec 1998 15:07:03 +0000 (+0000) Subject: (IT-character-translations): New variable, an X-Git-Tag: emacs-20.4~1031 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=0064ab85ad90a26302b4841d8ee5601be2cb08a5;p=emacs.git (IT-character-translations): New variable, an alist used to display characters for which there's no glyphs in the current codepage. (IT-display-table-setup): New function, sets up the DOS terminal for a given codepage. (dos-codepage-setup): New function, sets up the MULE environment for the current value of dos-codepage. (top-level if): In the multibyte mode, turn on unibyte-display-via-language-environment. In the unibyte mode, set up the special syntax tables to map lower- to upper case and back. --- diff --git a/lisp/term/internal.el b/lisp/term/internal.el index 30669f5eb55..0cfa3025f50 100644 --- a/lisp/term/internal.el +++ b/lisp/term/internal.el @@ -1,6 +1,6 @@ ;;; internal.el --- support for PC internal terminal -*- coding: raw-text; -*- -;; Copyright (C) 1993, 1994 Free Software Foundation, Inc. +;; Copyright (C) 1993, 1994, 1998 Free Software Foundation, Inc. ;; Author: Morten Welinder @@ -23,9 +23,6 @@ ;;; Code: -;; --------------------------------------------------------------------------- -;; screen setup -- that's easy! -(standard-display-8bit 127 254) ;; --------------------------------------------------------------------------- ;; keyboard setup -- that's simple! (set-input-mode nil nil 0) @@ -50,7 +47,302 @@ (put 'clear 'ascii-character 12) (put 'return 'ascii-character 13) (put 'escape 'ascii-character ?\e) -;; --------------------------------------------------------------------------- + +;; ---------------------------------------------------------------------- +;; DOS display setup +;; ================= +;; +;; DOS can only support a single font. On most systems (with the +;; possible exception of Far Eastern DOS versions), this means that +;; two character sets are available at any given time: the ASCII +;; charset, and a single national charset, usually mapped to codes +;; above 128 (i.e., with 8th bit set). Which national charset is +;; supported depends on the codepage loaded by the system when it +;; boots; usually, this codepage cannot be changed without +;; rebooting. +;; +;; Since each codepage can usually display character of a single +;; MULE charset, Emacs can display a single MULE charset with the +;; glyphs of the current codepage. The mapping from DOS codepages +;; to MULE charsets is established by the charset property of the +;; cpNNN-decode-table variables in codepage.el, which also +;; defines translation tables for each such pair, and a bunch of +;; functions to generate coding systems that use those translation +;; tables to convert codepage-encoded text to the appropriate MULE +;; charset and back. When Emacs starts on DOS, it automatically +;; sets its default coding systems for file I/O and terminal output +;; according to the currend DOS codepage, given by the +;; `dos-codepage' variable. +;; +;; This leaves us with the problem of displaying character sets +;; other than the one which maps directly into the current codepage. +;; The following functions and variables handle this nuisance by +;; defining a display table where each character that doesn't have a +;; glyph in some codepage is mapped to a string which represents it. +;; For example, a small c with cedilla is mapped to the string +;; "{,c}" (the braces serve as a sign that this is a single +;; character). A nice feature of the display tables is that Emacs +;; knows that the string represents a single character, and thus +;; cursor motion works as you'd expect: a single `C-f' moves past +;; the entire string which represents a single character. +;; ---------------------------------------------------------------------- + +(defvar IT-character-translations + '( + (latin-iso8859-1 + . [255 "!I" "|c" "Pd" "$$" "Ye" "|" "SE" "\"" "(c)" + "_a" "<<" "~" "--" "(R)" "'-" "^o" "+-" "^2" "^3" + "'" "u" ".P" "^." "'," "^1" "_o" ">>" "1/4" "1/2" + "3/4" "?I" "`A" "A'" "A^" "~A" "\"A" "Ao" "AE" ",C" + "`E" "E'" "E^" "\"E" "`I" "I'" "I^" "\"I" "D-" "~N" + "`O" "O'" "O^" "~O" "\"O" "*x" "/O" "`U" "U'" "U^" + "\"U" "Y'" "TH" "ss" "`a" "a'" "a^" "~a" "\"a" "ao" + "ae" ",c" "`e" "e'" "e^" "\"e" "`i" "i'" "i^" "\"i" + "d-" "~n" "`o" "o'" "o^" "~o" "\"o" "-:" "/o" "`u" + "u'" "u^" "\"u" "y'" "th" "\"y"] + ) + (latin-iso8859-2 + . [255 "A;" "'(" "/L" "$$" "L<" "S'" "SE" "\"" "S<" + ",S" "T<" "Z'" "--" "Z<" "Z^." "^o" "a;" "';" "/l" + "'" "l<" "s'" "'<" "'," "s<" ",s" "t<" "z'" "'" + "z<" "z^." "R'" "A'" "A^" "A(" "\"A" "L'" "C'" ",C" + "C<" "E'" "E;" "E:" "E<" "I'" "I^" "D<" "/D" "N'" + "N<" "O'" "O^" "O''" "\"O" "*x" "R<" "U^0" "U'" "U''" + "\"U" "Y'" ",T" "ss" "r'" "a'" "a^" "a(" "\"a" "l'" + "c'" ",c" "c<" "e'" "e;" "\"e" "e<" "i'" "i^" "d<" + "/d" "n'" "n<" "o'" "o^" "o''" "\"o" "-:" "r<" "u^0" + "u'" "u''" "\"u" "y'" ",t" "'."] + ) + (latin-iso8859-3 + . [255 "/H" "'(" "Pd" "$$" " " "H^" "SE" "\"" "I^." + ",S" "G(" "J^" "--" " " "Z^." "^o" "/h" "^2" "^3" + "'" "u" "h^" "." "'," "i^." ",s" "g(" "j^" "1/2" + " " "z^." "`A" "A'" "A^" " " "\"A" "C^." "C^" ",C" + "`E" "E'" "E^" "\"E" "`I" "I'" "I^" "\"I" " " "~N" + "`O" "O'" "O^" "G^." "\"O" "*x" "G^" "`U" "U'" "U^" + "\"U" "U(" "S^" "ss" "`a" "a'" "a^" " " "\"a" "c^." + "c^" ",c" "`e" "e'" "e^" "\"e" "`i" "i'" "i^" "\"i" + " " "~n" "`o" "o'" "o^" "g^." "\"o" "-:" "g^" "`u" + "u'" "u^" "\"u" "u(" "s^" "^."] + ) + (latin-iso8859-4 + . [255 "A;" "kk" ",R" "$$" "?I" ",L" "SE" "\"" "S<" + "E-" ",G" "/T" "--" "Z<" "'-" "^o" "a;" "';" ",r" + "'" "~i" ",l" "'<" "'," "s<" "e-" ",g" "/t" "NG" + "z<" "ng" "A-" "A'" "A^" "~A" "\"A" "Ao" "AE" "I;" + "C<" "E'" "E;" "\"E" "E^." "I'" "I^" "I-" "/D" ",N" + "O-" ",K" "O^" "~O" "\"O" "*x" "/O" "U;" "U'" "U^" + "\"U" "~U" "U-" "ss" "a-" "a'" "a^" "~a" "\"a" "ao" + "ae" "i;" "c<" "e'" "e;" "\"e" "e^." "i'" "i^" "i-" + "/d" ",n" "o-" ",k" "o^" "~o" "\"o" "-:" "/o" "u;" + "u'" "u^" "\"u" "~u" "u-" "^."] + ) + (cyrillic-iso8859-5 + . [255 "\"E" "Dj" "Gj" "IE" "Dz" "Ii" "Ji" "JE" "Lj" + "Nj" "Ts" "Kj" 240 "V%" "Dzh" 65 "B=" 66 226 + 68 69 "Z%" 51 85 "J=" 75 "L=" 77 72 + 79 "P=" 80 67 84 89 232 88 "C=" "C%" + "S%" "Sc" "=\"" "Y=" "%\"" "Ee" "Yu" "Ya" 97 98 + "v=" "g=" 103 101 "z%" "z=" 117 "j=" 107 "l=" + "m=" "n=" 111 110 112 99 "t=" 121 "f=" 120 + "c=" "c%" "s%" "sc" "='" "y=" "%'" "ee" "yu" "ya" + "N0" "\"e" "dj" "gj" "ie" "dz" "ii" "ji" "je" "lj" + "nj" "ts" "kj" 21 "v%" "dzh"] + ) + (arabic-iso8859-6 + . [255 nil nil nil "$$" nil nil nil nil nil + nil nil ",+" "--" nil nil nil nil nil nil + nil nil nil nil nil nil nil ";+" nil nil + nil "?+" nil "H'" "aM" "aH" "wH" "ah" "yH" + "a+" "b+" "tm" "t+" "tk" "g+" "hk" "x+" "d+" "dk" + "r+" "z+" "s+" "sn" "c+" "dd" "tj" "zH" "e+" "i+" + nil nil nil nil nil "++" "f+" "q+" "k+" "l+" + "m+" "n+" "h+" "w+" "j+" "y+" ":+" "\"+" "=+" "/+" + "'+" "1+" "3+" "0+" nil nil nil nil nil nil + nil nil nil nil nil nil nil] + ) + (greek-iso8859-7 + . [255 "9'" "'9" "Pd" nil nil "|" "SE" "\"" "(c)" + nil "<<" "~" "--" nil "-M" "^o" "+-" "^2" "^3" + "'" "'%" "A%" "^." "E%" "Y%" "I%" ">>" "O%" "1/2" + "U%" "W%" "i3" "A*" "B*" "G*" "D*" "E*" "Z*" "Y*" + "H*" "I*" "K*" "L*" "M*" "N*" "C*" "O*" "P*" "R*" + nil "S*" "T*" "U*" "F*" "X*" "Q*" "W*" "J*" "V*" + "a%" "e%" "y%" "i%" "u3" "a*" "b*" "g*" "d*" "e*" + "z*" "y*" "h*" "i*" "k*" "l*" "m*" "n*" "c*" "o*" + "p*" "r*" "*s" "s*" "t*" "u*" "f*" "x*" "q*" "w*" + "j*" "v*" "o%" "u%" "w%" nil] + ) + (hebrew-iso8859-8 + . [255 nil "|c" "Pd" "$$" "Ye" "|" "SE" "\"" "(c)" + "*x" "<<" "~" "--" "(R)" "'-" "^o" "+-" "^2" "^3" + "'" "u" ".P" "^." "'," "^1" "-:" ">>" "1/4" "1/2" + "3/4" nil nil nil nil nil nil nil nil nil + nil nil nil nil nil nil nil nil nil nil + nil nil nil nil nil nil nil nil nil nil + nil nil nil "=2" "A+" "B+" "G+" "D+" "H+" "W+" + "Z+" "X+" "Tj" "J+" "K%" "K+" "L+" "M%" "M+" "N%" + "N+" "S+" "E+" "P%" "P+" "Zj" "ZJ" "Q+" "R+" "Sh" + "T+" nil nil nil nil nil] + ) + (latin-iso8859-9 + . [255 "!I" "|c" "Pd" "$$" "Ye" "|" "SE" "\"" "(c)" + "_a" "<<" "~" "--" "(R)" "'-" "^o" "+-" "^2" "^3" + "'" "u" ".P" "^." "'," "^1" "_o" ">>" "1/4" "1/2" + "3/4" "?I" "`A" "A'" "A^" "~A" "\"A" "Ao" "AE" ",C" + "`E" "E'" "E^" "\"E" "`I" "I'" "I^" "\"I" "G(" "~N" + "`O" "O'" "O^" "~O" "\"O" "*x" "/O" "`U" "U'" "U^" + "\"U" "I^." ",S" "ss" "`a" "a'" "a^" "~a" "\"a" "ao" + "ae" ",c" "`e" "e'" "e^" "\"e" "e^." "i'" "i^" "i-" + "g(" "~n" "`o" "o'" "o^" "~o" "\"o" "-:" "/o" "`u" + "u'" "u^" "\"u" "i^." ",s" "\"y"] + ) + ) + "An alist of MULE ISO-8859 character sets and the strings that +should be used to represent the characters from each set on a DOS +terminal which does not have corresponding glyphs built into the +installed codepage.") + +(defun IT-display-table-setup (codepage &optional table) + "Set up display table TABLE for a DOS terminal which supports a +glyphs built into the current codepage CODEPAGE. + +If TABLE is nil or omitted, `standard-display-table' is used." + (let* ((surrogates IT-character-translations) + (disp-tab (or table standard-display-table)) + (built-in-set (cp-charset-for-codepage codepage)) + (offset (cp-offset-for-codepage codepage)) + (cp-decoder + (symbol-value (intern-soft (format "%s-decode-table" codepage)))) + (cp-decoder-len (length cp-decoder)) + (c offset) + association chset) + ;; Undo the effects of previous call (where they may have used + ;; a different codepage) by reverting the display table for the + ;; built-in charset to its pristine shape. + (while (< c (+ offset 128)) + (aset disp-tab (make-char built-in-set c) nil) + (setq c (1+ c))) + (while surrogates + (setq association (car surrogates)) + (setq chset (car association)) + (let* ((vector (cdr association)) + (veclen (length vector)) + (i 0) + glyph) + (while (< i veclen) + (setq glyph (aref vector i)) + (if (and glyph + (or (not (equal chset built-in-set)) + (>= i cp-decoder-len) + (null (aref cp-decoder i)))) + (aset disp-tab (make-char chset (+ i (logand offset 127))) + (vconcat + (if (numberp glyph) + (char-to-string glyph) + (if (> (length glyph) 1) (concat "{" glyph "}") + glyph))))) + (setq i (1+ i)))) + (setq surrogates (cdr surrogates))) + ;; Most Windows programs send out apostrophe's as \222. Most DOS + ;; fonts contain a different character at that position. Map it + ;; to the ASCII apostrophe. + (aset standard-display-table 146 [39]))) + +(defun dos-codepage-setup () + "Set up the MULE environment as appropriate for the installed DOS codepage. + +This function sets coding systems, display tables, and the language +environment options as appropriate for the current value of `dos-codepage'. + +This function is automatically run at startup via the `term-setup-hook' +list. You can (and should) also run it whenever the value of +`dos-codepage' changes." + (interactive) + (let* ((cp (format "cp%s" dos-codepage)) + (charset (cp-charset-for-codepage cp)) + (offset (cp-offset-for-codepage cp))) + (cp-make-coding-systems-for-codepage cp charset offset) + ;; This is done by set-language-environment. + ;;(setq nonascii-translation-table + ;; (symbol-value (intern (concat cp "-nonascii-translation-table")))) + (set-language-environment (cp-language-for-codepage cp)) + (set-default-coding-systems (intern (concat cp "-dos"))) + (set-terminal-coding-system + (setq default-terminal-coding-system (intern (concat cp + "-unix")))) + (IT-display-table-setup cp) + ;; Some codepages have sporadic support for Latin-1, Greek, and + ;; symbol glyphs, which don't belong to their native character + ;; set. It's a nuisance to have all those glyphs here, for all + ;; the codepages (for starters, I don't even have references for + ;; all the codepages). So I provide a hook for those who want to + ;; squeeze every bit of support out of their terminal/font. + (run-hooks 'dos-codepage-setup-hook) + )) + +;; We want to delay the terminal and other codepage-related setup +;; until after the terminal is set and user's .emacs is processed, +;; because people might define their `dos-codepage-setup-hook' there. +(add-hook 'term-setup-hook 'dos-codepage-setup) + +;; In multibyte mode, we want unibyte buffers to be displayed using +;; the terminal coding system, so that they display correctly on the +;; DOS terminal; in unibyte mode we want to see all 8-bit characters +;; verbatim. In both cases, we want the entire range of 8-bit +;; characters to arrive at our display code verbatim. +(standard-display-8bit 127 254) + +(if default-enable-multibyte-characters + ;; We want this in multibyte version only, since unibyte version + ;; should not convert non-ASCII characters at all. + (setq unibyte-display-via-language-environment t) + ;; Let the unibyte version behave as Emacs 19 did. In particular, + ;; let it use and display native codepage-specific glyphs for + ;; non-ASCII characters. For this to work correctly, we need to + ;; establish the correspondence between lower-case letters and their + ;; upper-case brethren, as appropriate for the codepage in use. The + ;; code below makes this happen. + ;; (In the multibyte mode, the appropriate tables are prepared + ;; elsewhere, since multibyte Emacs uses normal MULE character sets, + ;; which are supported on all platforms.) + (let* ((i 128) + (modify (function + (lambda (ch sy) + (modify-syntax-entry ch sy text-mode-syntax-table) + (if (boundp 'tex-mode-syntax-table) + (modify-syntax-entry ch sy tex-mode-syntax-table)) + (modify-syntax-entry ch sy (standard-syntax-table)) + ))) + (table (standard-case-table)) + ;; The following are strings of letters, first lower then upper case. + ;; This will look funny on terminals which display other code pages. + ;; In particular, what is displayed as blanks are not blanks + ;; at all! (Use `C-x =' to see what they really are.) + (chars + (cond + ((= dos-codepage 850) + "‡€š‚ƒ¶„Ž…·†ÆÇ µˆÒ‰ÓŠÔ‹ØŒ×Þ¡Ö‘’“â”™•ã¢à›–ê£é—ë˜Yìí¡I£é¤¥ÐÑçè") + ((= dos-codepage 865) + "‡€š‚ƒA„Ž…A†ˆE‰EŠE‹IŒII‘’“O”™•O–U£U˜Y› A¡I¢O£U¤¥") + ;; default is 437 + (t "‡€š‚ƒA„Ž…A†ˆE‰EŠE‹IŒII‘’“O”™•O–U£U˜Y A¡I¢O£U¤¥")))) + + (while (< i 256) + (funcall modify i "_") + (setq i (1+ i))) + + (setq i 0) + (while (< i (length chars)) + (let ((ch1 (aref chars i)) + (ch2 (aref chars (1+ i)))) + (if (> ch2 127) + (set-case-syntax-pair ch2 ch1 table)) + (setq i (+ i 2)))) + (save-excursion + (mapcar (lambda (b) (set-buffer b) (set-case-table table)) + (buffer-list))) + (set-standard-case-table table))) ;;; internal.el ends here