From 6f27e0f93307f4810f9b45f22556e03921c6bf66 Mon Sep 17 00:00:00 2001 From: Dave Love Date: Wed, 17 Jul 2002 18:39:16 +0000 Subject: [PATCH] (cyrillic-koi8-r-decode-table) (cyrillic-alternativnyj-decode-table): Fill out with mule-unicode. (cyrillic-koi8-r-decode-table): Make unbound after use. (ccl-decode-koi8, ccl-decode-alternativnyj): Change expansion factor. Use utf-8-translation-table-for-decode. (ccl-encode-koi8, ccl-encode-koi8): Don't check r0. (cyrillic-unify-encoding): New function. (cyrillic-koi8): Modify safe-chars, valid-codes. Remove charset-origin-alist. (Cp878, cp866): New alias. (ccl-encode-8859-5-font): New. (font-ccl-encoder-alist): Add ccl-encode-8859-5-font. (ccl-encode-koi8-font, ccl-encode-alternativnyj-font): Remove redundant parens. ("Cyrillic-KOI8"): Add cyrillic-iso-8bit to coding-priority. (cyrillic-koi8-u-decode-table, ccl-decode-koi8-u) (ccl-encode-koi8-u, koi8-u, ccl-encode-koi8-u-font, "Ukrainian"): New. (cyrillic-alternativnyj): Modify safe-chars, add mime-charset. ("Cyrillic-ALT"): Change input method. ("Windows-1251", "Tajik", "Bulgarian", "Belarusian"): New. --- lisp/language/cyrillic.el | 326 +++++++++++++++++++++++++++++++------- 1 file changed, 266 insertions(+), 60 deletions(-) diff --git a/lisp/language/cyrillic.el b/lisp/language/cyrillic.el index a2f708fbf04..d0e428b4d0e 100644 --- a/lisp/language/cyrillic.el +++ b/lisp/language/cyrillic.el @@ -2,8 +2,10 @@ ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. ;; Licensed to the Free Software Foundation. +;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. -;; Keywords: multilingual, Cyrillic +;; Author: Kenichi Handa +;; Keywords: multilingual, Cyrillic, i18n ;; This file is part of GNU Emacs. @@ -24,15 +26,33 @@ ;;; Commentary: -;; The character set ISO8859-5 is supported. See -;; http://www.ecma.ch/ecma1/STAND/ECMA-113.HTM. KOI-8 and -;; ALTERNATIVNYJ are converted to ISO8859-5 internally. +;; The character set ISO8859-5 is supported. KOI-8 and ALTERNATIVNYJ +;; are converted to Unicode internally. See +;; . For more info +;; on Cyrillic charsets, see +;; . The KOI and +;; Alternativnyj coding systems should live in code-pages.el, but +;; they've always been preloaded and the coding system autoload +;; mechanism didn't get accepted, so they have to stay here and +;; duplicate code-pages stuff. + +;; Note that 8859-5 maps directly onto the Unicode Cyrillic block, +;; apart from codepoints 160 (NBSP, c.f. U+0400), 173 (soft hyphen, +;; c.f. U+04OD) and 253 (section sign, c.f U+045D). The KOI-8 and +;; Alternativnyj coding systems encode both 8859-5 and Unicode. +;; ucs-tables.el provides unification for cyrillic-iso-8bit. + +;; Customizing `utf-8-fragment-on-decoding' allows decoding characters +;; from KOI and Alternativnyj into 8859-5 where that's possible. +;; cyrillic-iso8859-5 characters take half as much space in the buffer +;; as the mule-unicode-0100-24ff equivalents, though that's probably +;; not normally a big deal. ;;; Code: ;; Cyrillic (general) -;; ISO-8859-5 staff +;; ISO-8859-5 stuff (make-coding-system 'cyrillic-iso-8bit 2 ?5 @@ -48,7 +68,7 @@ "Cyrillic-ISO" '((charset cyrillic-iso8859-5) (coding-system cyrillic-iso-8bit) (coding-priority cyrillic-iso-8bit) - (input-method . "cyrillic-yawerty") + (input-method . "cyrillic-yawerty") ; fixme (nonascii-translation . cyrillic-iso8859-5) (unibyte-display . cyrillic-iso-8bit) (features cyril-util) @@ -56,8 +76,11 @@ (documentation . "Support for Cyrillic ISO-8859-5.")) '("Cyrillic")) -;; KOI-8 staff +;; KOI-8R stuff +;; The mule-unicode portion of this is from +;; http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT, +;; which references RFC 1489. (defvar cyrillic-koi8-r-decode-table [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 @@ -68,14 +91,25 @@ 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 - 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 - 160 161 162 ?,Lq(B 164 165 166 167 168 169 170 171 172 173 174 175 - 176 177 178 ?,L!(B 180 181 182 183 184 185 186 187 188 189 190 191 - ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B - ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B - ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B - ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B ] + ;; 8859-5 plus Unicode + ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B + ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1s"(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B + ?$,2 p(B ?$,2 q(B ?$,2 r(B ?,Lq(B ?$,2 s(B ?$,2 t(B ?$,2 u(B ?$,2 v(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,2 |(B ?$,2 }(B ?$,2 ~(B + ?$,2 (B ?$,2! (B ?$,2!!(B ?,L!(B ?$,2!"(B ?$,2!#(B ?$,2!$(B ?$,2!%(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,2!+(B ?$,2!,(B ?,A)(B + ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B + ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B + ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B + ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B + ;; All Unicode: +;; ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B +;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1s"(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,A (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B +;; ?$,2 p(B ?$,2 q(B ?$,2 r(B ?$,1(q(B ?$,2 s(B ?$,2 t(B ?$,2 u(B ?$,2 v(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,2 |(B ?$,2 }(B ?$,2 ~(B +;; ?$,2 (B ?$,2! (B ?$,2!!(B ?$,1(!(B ?$,2!"(B ?$,2!#(B ?$,2!$(B ?$,2!%(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,2!+(B ?$,2!,(B ?,A)(B +;; ?$,1(n(B ?$,1(P(B ?$,1(Q(B ?$,1(f(B ?$,1(T(B ?$,1(U(B ?$,1(d(B ?$,1(S(B ?$,1(e(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B +;; ?$,1(_(B ?$,1(o(B ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(V(B ?$,1(R(B ?$,1(l(B ?$,1(k(B ?$,1(W(B ?$,1(h(B ?$,1(m(B ?$,1(i(B ?$,1(g(B ?$,1(j(B +;; ?$,1(N(B ?$,1(0(B ?$,1(1(B ?$,1(F(B ?$,1(4(B ?$,1(5(B ?$,1(D(B ?$,1(3(B ?$,1(E(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B +;; ?$,1(?(B ?$,1(O(B ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(6(B ?$,1(2(B ?$,1(L(B ?$,1(K(B ?$,1(7(B ?$,1(H(B ?$,1(M(B ?$,1(I(B ?$,1(G(B ?$,1(J(B + ] "Cyrillic KOI8-R decoding table.") (let ((table (make-translation-table-from-vector @@ -84,57 +118,91 @@ (define-translation-table 'cyrillic-koi8-r-encode-table (char-table-extra-slot table 0))) +;; No point in keeping it around. (It can't be let-bound, since it's +;; needed for macro expansion.) +(makunbound 'cyrillic-koi8-r-decode-table) + (define-ccl-program ccl-decode-koi8 - `(3 + `(4 ((loop (r0 = 0) (read r1) (if (r1 < 128) (write-repeat r1) ((translate-character cyrillic-koi8-r-nonascii-translation-table r0 r1) + (translate-character utf-8-translation-table-for-decode r0 r1) (write-multibyte-character r0 r1) (repeat)))))) - "CCL program to decode KOI8.") + "CCL program to decode KOI8-R.") (define-ccl-program ccl-encode-koi8 `(1 ((loop (read-multibyte-character r0 r1) - (if (r0 == ,(charset-id 'cyrillic-iso8859-5)) - (translate-character cyrillic-koi8-r-encode-table r0 r1)) + (translate-character cyrillic-koi8-r-encode-table r0 r1) (write-repeat r1)))) - "CCL program to encode KOI8.") - + "CCL program to encode KOI8-R.") + +(defun cyrillic-unify-encoding (table) + "Set up equivalent characters in the encoding TABLE. +This works wheher or not the table is is Unicode-based or +8859-5-based. (Only appropriate for Cyrillic.)" + (let ((table (get table 'translation-table))) + (dotimes (i 96) + (let* ((c (make-char 'cyrillic-iso8859-5 (+ i 32))) + (u ; equivalent Unicode char + (cond ((eq c ?,L (B) ?,A (B) + ((eq c ?,L-(B) ?,A-(B) + ((eq c ?,L}(B) ?,A'(B) + (t (decode-char 'ucs (+ #x400 i))))) + (ec (aref table c)) ; encoding of 8859-5 + (uc (aref table u))) ; encoding of Unicode + (unless (memq c '(?,L (B ?,L-(B ?,L}(B)) ; 8859-5 exceptions + (unless uc + (aset table u ec)) + (unless ec + (aset table c uc))))))) + +(cyrillic-unify-encoding 'cyrillic-koi8-r-encode-table) + (make-coding-system 'cyrillic-koi8 4 ;; We used to use ?K. It is true that ?K is more strictly correct, ;; but it is also used for Korean. ;; So people who use koi8 for languages other than Russian ;; will have to forgive us. - ?R - "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)." + ?R "KOI8-R 8-bit encoding for Cyrillic (MIME: KOI8-R)." '(ccl-decode-koi8 . ccl-encode-koi8) - `((safe-chars . ,(let ((table (make-char-table 'safe-chars)) - (i 0)) - (while (< i 256) - (aset table (aref cyrillic-koi8-r-decode-table i) t) - (setq i (1+ i))) - table)) + `((safe-chars . cyrillic-koi8-r-encode-table) (mime-charset . koi8-r) - (valid-codes (0 . 127) 163 179 (192 . 255)) - (charset-origin-alist (cyrillic-iso8859-5 "KOI8-R" - cyrillic-encode-koi8-r-char)))) + (valid-codes (0 . 255)))) (define-coding-system-alias 'koi8-r 'cyrillic-koi8) (define-coding-system-alias 'koi8 'cyrillic-koi8) +(define-coding-system-alias 'cp878 'cyrillic-koi8) + +;; Allow displaying some of KOI & al with an 8859-5-encoded font. We +;; won't bother about the exceptions when encoding the font, since +;; NBSP will fall through below and work anyhow, and we'll have +;; avoided setting the fontset for the other two to 8859-5 -- they're +;; not in KOI and Alternativnyj anyhow. +(define-ccl-program ccl-encode-8859-5-font + `(0 + ((if (r0 == ,(charset-id 'cyrillic-iso8859-5)) + (r1 += 128) + (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) + (r1 = (r2 + 128)))))) + "Encode ISO 8859-5 and Cyrillic Unicode chars to 8859-5 font.") + +(add-to-list 'font-ccl-encoder-alist '("iso8859-5" . ccl-encode-8859-5-font)) +;; The table is set up later to encode both Unicode and 8859-5. (define-ccl-program ccl-encode-koi8-font `(0 - ((translate-character cyrillic-koi8-r-encode-table r0 r1))) + (translate-character cyrillic-koi8-r-encode-table r0 r1)) "CCL program to encode Cyrillic chars to KOI font.") -(setq font-ccl-encoder-alist - (cons '("koi8" . ccl-encode-koi8-font) font-ccl-encoder-alist)) +(add-to-list 'font-ccl-encoder-alist '("koi8" . ccl-encode-koi8-font)) (set-language-info-alist "Cyrillic-KOI8" `((charset cyrillic-iso8859-5) @@ -142,7 +210,7 @@ . ,(get 'cyrillic-koi8-r-nonascii-translation-table 'translation-table)) (coding-system cyrillic-koi8) - (coding-priority cyrillic-koi8) + (coding-priority cyrillic-koi8 cyrillic-iso-8bit) (input-method . "cyrillic-jcuken") (features cyril-util) (unibyte-display . cyrillic-koi8) @@ -150,7 +218,97 @@ (documentation . "Support for Cyrillic KOI8-R.")) '("Cyrillic")) -;;; ALTERNATIVNYJ staff + +(defvar cyrillic-koi8-u-decode-table + [ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 + 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 + 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 + 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 + 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 + 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 + 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 + 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 + ;; All Unicode: +;; ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B +;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1x9(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B +;; ?$,2 p(B ?$,2 q(B ?$,2 r(B ?$,1(q(B ?$,1(t(B ?$,2 t(B ?$,1(v(B ?$,1(w(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,1)Q(B ?$,2 }(B ?$,2 ~(B +;; ?$,2 (B ?$,2! (B ?$,2!!(B ?$,1(!(B ?$,1($(B ?$,2!#(B ?$,1(&(B ?$,1('(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,1)P(B ?$,2!,(B ?,A)(B +;; ?$,1(n(B ?$,1(P(B ?$,1(Q(B ?$,1(f(B ?$,1(T(B ?$,1(U(B ?$,1(d(B ?$,1(S(B ?$,1(e(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B +;; ?$,1(_(B ?$,1(o(B ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(V(B ?$,1(R(B ?$,1(l(B ?$,1(k(B ?$,1(W(B ?$,1(h(B ?$,1(m(B ?$,1(i(B ?$,1(g(B ?$,1(j(B +;; ?$,1(N(B ?$,1(0(B ?$,1(1(B ?$,1(F(B ?$,1(4(B ?$,1(5(B ?$,1(D(B ?$,1(3(B ?$,1(E(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B +;; ?$,1(?(B ?$,1(O(B ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(6(B ?$,1(2(B ?$,1(L(B ?$,1(K(B ?$,1(7(B ?$,1(H(B ?$,1(M(B ?$,1(I(B ?$,1(G(B ?$,1(J(B +;; 8859-5 plus Unicode: + ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B + ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1x9(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B + ?$,2 p(B ?$,2 q(B ?$,2 r(B ?,Lq(B ?,Lt(B ?$,2 t(B ?,Lv(B ?,Lw(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,1)Q(B ?$,2 }(B ?$,2 ~(B + ?$,2 (B ?$,2! (B ?$,2!!(B ?,L!(B ?,L$(B ?$,2!#(B ?,L&(B ?,L'(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,1)P(B ?$,2!,(B ?,A)(B + ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B + ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B + ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B + ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B + ] + "Cyrillic KOI8-U decoding table.") + +(let ((table (make-translation-table-from-vector + cyrillic-koi8-u-decode-table))) + (define-translation-table 'cyrillic-koi8-u-nonascii-translation-table table) + (define-translation-table 'cyrillic-koi8-u-encode-table + (char-table-extra-slot table 0))) + +(makunbound 'cyrillic-koi8-u-decode-table) + +(define-ccl-program ccl-decode-koi8-u + `(4 + ((loop + (r0 = 0) + (read r1) + (if (r1 < 128) + (write-repeat r1) + ((translate-character cyrillic-koi8-u-nonascii-translation-table r0 r1) + (translate-character utf-8-translation-table-for-decode r0 r1) + (write-multibyte-character r0 r1) + (repeat)))))) + "CCL program to decode KOI8-U.") + +(define-ccl-program ccl-encode-koi8-u + `(1 + ((loop + (read-multibyte-character r0 r1) + (translate-character cyrillic-koi8-u-encode-table r0 r1) + (write-repeat r1)))) + "CCL program to encode KOI8-U.") + +(cyrillic-unify-encoding 'cyrillic-koi8-u-encode-table) + +(make-coding-system + 'koi8-u 4 + ?U "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" + '(ccl-decode-koi8-u . ccl-encode-koi8-u) + `((safe-chars . cyrillic-koi8-u-encode-table) + (mime-charset . koi8-u) + (valid-codes (0 . 255)))) + +(define-ccl-program ccl-encode-koi8-u-font + `(0 + (translate-character cyrillic-koi8-u-encode-table r0 r1)) + "CCL program to encode Cyrillic chars to KOI-U font.") + +(add-to-list 'font-ccl-encoder-alist '("koi8-u" . ccl-encode-koi8-u-font)) + +(set-language-info-alist + "Ukrainian" `((coding-system koi8-u) + (coding-priority koi8-u) + (nonascii-translation + . ,(get 'cyrillic-koi8-r-nonascii-translation-table + 'translation-table)) + (input-method . "ukrainian-computer") + (features code-pages) + (documentation + . "Support for Ukrainian with koi8-u character set.")) + '("Cyrillic")) + +;;; ALTERNATIVNYJ stuff (defvar cyrillic-alternativnyj-decode-table [ @@ -162,12 +320,21 @@ 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 +;; ?$,1(0(B ?$,1(1(B ?$,1(2(B ?$,1(3(B ?$,1(4(B ?$,1(5(B ?$,1(6(B ?$,1(7(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B ?$,1(?(B +;; ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(D(B ?$,1(E(B ?$,1(F(B ?$,1(G(B ?$,1(H(B ?$,1(I(B ?$,1(J(B ?$,1(K(B ?$,1(L(B ?$,1(M(B ?$,1(N(B ?$,1(O(B +;; ?$,1(P(B ?$,1(Q(B ?$,1(R(B ?$,1(S(B ?$,1(T(B ?$,1(U(B ?$,1(V(B ?$,1(W(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B ?$,1(_(B +;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,2 "(B ?$,2 D(B ?$,2!!(B ?$,2!"(B ?$,2 v(B ?$,2 u(B ?$,2!#(B ?$,2 q(B ?$,2 w(B ?$,2 }(B ?$,2 |(B ?$,2 {(B ?$,2 0(B +;; ?$,2 4(B ?$,2 T(B ?$,2 L(B ?$,2 <(B ?$,2 (B ?$,2 \(B ?$,2 ~(B ?$,2 (B ?$,2 z(B ?$,2 t(B ?$,2!)(B ?$,2!&(B ?$,2! (B ?$,2 p(B ?$,2!,(B ?$,2!'(B +;; ?$,2!((B ?$,2!$(B ?$,2!%(B ?$,2 y(B ?$,2 x(B ?$,2 r(B ?$,2 s(B ?$,2!+(B ?$,2!*(B ?$,2 8(B ?$,2 ,(B ?$,2!H(B ?$,2!D(B ?$,2!L(B ?$,2!P(B ?$,2!@(B +;; ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(d(B ?$,1(e(B ?$,1(f(B ?$,1(g(B ?$,1(h(B ?$,1(i(B ?$,1(j(B ?$,1(k(B ?$,1(l(B ?$,1(m(B ?$,1(n(B ?$,1(o(B +;; ?$,1(!(B ?$,1(q(B ?$,1($(B ?$,1(t(B ?$,1('(B ?$,1(w(B ?$,1(.(B ?$,1(~(B ?,A0(B ?$,1s"(B ?,A7(B ?$,1x:(B ?$,1uV(B ?,A$(B ?$,2!`(B ?,A (B ; +;; 8859+Unicode ?,L0(B ?,L1(B ?,L2(B ?,L3(B ?,L4(B ?,L5(B ?,L6(B ?,L7(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B ?,L?(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,LD(B ?,LE(B ?,LF(B ?,LG(B ?,LH(B ?,LI(B ?,LJ(B ?,LK(B ?,LL(B ?,LM(B ?,LN(B ?,LO(B ?,LP(B ?,LQ(B ?,LR(B ?,LS(B ?,LT(B ?,LU(B ?,LV(B ?,LW(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B ?,L_(B - 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 - 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 - 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 + ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,2 "(B ?$,2 D(B ?$,2!!(B ?$,2!"(B ?$,2 v(B ?$,2 u(B ?$,2!#(B ?$,2 q(B ?$,2 w(B ?$,2 }(B ?$,2 |(B ?$,2 {(B ?$,2 0(B + ?$,2 4(B ?$,2 T(B ?$,2 L(B ?$,2 <(B ?$,2 (B ?$,2 \(B ?$,2 ~(B ?$,2 (B ?$,2 z(B ?$,2 t(B ?$,2!)(B ?$,2!&(B ?$,2! (B ?$,2 p(B ?$,2!,(B ?$,2!'(B + ?$,2!((B ?$,2!$(B ?$,2!%(B ?$,2 y(B ?$,2 x(B ?$,2 r(B ?$,2 s(B ?$,2!+(B ?$,2!*(B ?$,2 8(B ?$,2 ,(B ?$,2!H(B ?$,2!D(B ?$,2!L(B ?$,2!P(B ?$,2!@(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,Ld(B ?,Le(B ?,Lf(B ?,Lg(B ?,Lh(B ?,Li(B ?,Lj(B ?,Lk(B ?,Ll(B ?,Lm(B ?,Ln(B ?,Lo(B ?,L!(B ?,Lq(B ?,L$(B ?,Lt(B ?,L'(B ?,Lw(B ?,L.(B ?,L~(B 248 249 250 251 ?,Lp(B 253 254 ?,L (B] "Cyrillic ALTERNATIVNYJ decoding table.") @@ -179,9 +346,10 @@ (define-translation-table 'cyrillic-alternativnyj-encode-table (char-table-extra-slot table 0))) +(makunbound 'cyrillic-alternativnyj-decode-table) (define-ccl-program ccl-decode-alternativnyj - `(3 + `(4 ((loop (r0 = 0) (read r1) @@ -189,6 +357,7 @@ (write-repeat r1) ((translate-character cyrillic-alternativnyj-nonascii-translation-table r0 r1) + (translate-character utf-8-translation-table-for-decode r0 r1) (write-multibyte-character r0 r1) (repeat)))))) "CCL program to decode Alternativnyj.") @@ -200,33 +369,27 @@ (translate-character cyrillic-alternativnyj-encode-table r0 r1) (write-repeat r1)))) "CCL program to encode Alternativnyj.") - + +(cyrillic-unify-encoding 'cyrillic-alternativnyj-encode-table) + (make-coding-system 'cyrillic-alternativnyj 4 ?A - "ALTERNATIVNYJ 8-bit encoding for Cyrillic." + "ALTERNATIVNYJ (CP866) 8-bit encoding for Cyrillic." '(ccl-decode-alternativnyj . ccl-encode-alternativnyj) - `((safe-chars . ,(let ((table (make-char-table 'safe-chars)) - (i 0)) - (while (< i 256) - (aset table (aref cyrillic-alternativnyj-decode-table i) - t) - (setq i (1+ i))) - table)) - (valid-codes (0 . 175) (224 . 241) 255) - (charset-origin-alist (cyrillic-iso8859-5 "ALTERNATIVNYJ" - cyrillic-encode-koi8-r-char)))) - + `((safe-chars . cyrillic-alternativnyj-encode-table) + (valid-codes (0 . 255)) + (mime-charset . cp866))) (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj) +(define-coding-system-alias 'cp866 'cyrillic-alternativnyj) (define-ccl-program ccl-encode-alternativnyj-font - '(0 - ((translate-character cyrillic-alternativnyj-encode-table r0 r1))) + `(0 + (translate-character cyrillic-alternativnyj-encode-table r0 r1)) "CCL program to encode Cyrillic chars to Alternativnyj font.") -(setq font-ccl-encoder-alist - (cons '("alternativnyj" . ccl-encode-alternativnyj-font) - font-ccl-encoder-alist)) +(add-to-list 'font-ccl-encoder-alist + '("alternativnyj" . ccl-encode-alternativnyj-font)) (set-language-info-alist "Cyrillic-ALT" `((charset cyrillic-iso8859-5) @@ -235,13 +398,56 @@ 'translation-table)) (coding-system cyrillic-alternativnyj) (coding-priority cyrillic-alternativnyj) - (input-method . "cyrillic-jcuken") + (input-method . "russian-typewriter") (features cyril-util) (unibyte-display . cyrillic-alternativnyj) (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") (documentation . "Support for Cyrillic ALTERNATIVNYJ.")) '("Cyrillic")) +(set-language-info-alist + "Windows-1251" `((coding-system windows-1251) + (coding-priority windows-1251) + (nonascii-translation + . ,(get 'decode-windows-1252 'translation-table)) + (input-method . "russian-typewriter") ; fixme? + (features code-pages) + (documentation . "Support for windows-1251 character set.")) + '("Cyrillic")) + +(set-language-info-alist + "Tajik" `((coding-system cyrillic-koi8-t) + (coding-priority cyrillic-koi8-t) + (nonascii-translation + . ,(get 'decode-koi8-t 'translation-table)) + (input-method . "russian-typewriter") ; fixme? + (features code-pages) + (documentation . "Support for Tajik using KOI8-T.")) + '("Cyrillic")) + +(set-language-info-alist + "Bulgarian" `((coding-system windows-1251) + (coding-priority windows-1251) + (nonascii-translation + . ,(get 'decode-windows-1251 'translation-table)) + (input-method . "bulgarian-standard") + (features code-pages) + (documentation + . "Support for Bulgrian with windows-1251 character set.")) + '("Cyrillic")) + +(set-language-info-alist + "Belarusian" `((coding-system windows-1251) + (coding-priority windows-1251) + (nonascii-translation + . ,(get 'decode-windows-1251 'translation-table)) + (input-method . "belarusian") + (features code-pages) + (documentation + . "Support for Belarusian with windows-1251 character set. +\(The name Belarusian replaced Byelorussian in the early 1990s.)")) + '("Cyrillic")) + (provide 'cyrillic) ;;; cyrillic.el ends here -- 2.39.5