From: Kenichi Handa Date: Mon, 31 Mar 2003 01:48:48 +0000 (+0000) Subject: (ccl-decode-mule-utf-16-le): Don't assume the signature bytes. X-Git-Tag: ttn-vms-21-2-B4~10720 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=2217b8e1e67a18aec2c1693038d21a12b1d02f76;p=emacs.git (ccl-decode-mule-utf-16-le): Don't assume the signature bytes. (ccl-decode-mule-utf-16-be): Likewise. (ccl-encode-mule-utf-16-le): Don't produce the signature bytes. (ccl-encode-mule-utf-16-be): Likewise. --- diff --git a/lisp/ChangeLog b/lisp/ChangeLog index a280903e00b..fc5951f3558 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,11 @@ +2003-03-31 Kenichi Handa + + * international/utf-16.el (ccl-decode-mule-utf-16-le): Don't + assume the signature bytes. + (ccl-decode-mule-utf-16-be): Likewise. + (ccl-encode-mule-utf-16-le): Don't produce the signature bytes. + (ccl-encode-mule-utf-16-be): Likewise. + 2003-03-30 Kevin Ryde * info-look.el (sh-mode): Add doc-specs for commands and variables. diff --git a/lisp/international/utf-16.el b/lisp/international/utf-16.el index e9b7c801f91..6359648e40c 100644 --- a/lisp/international/utf-16.el +++ b/lisp/international/utf-16.el @@ -68,8 +68,9 @@ ;; Needed in macro expansion, so can't be let-bound. Zapped after use. (eval-and-compile (defconst utf-16-decode-ucs - ;; We have the unicode in r1. Output is charset ID in r0, code point - ;; in r1. + ;; We have the unicode in r1. Output is charset ID in r0, code + ;; point in r1. As r6 keeps endian information, the value should + ;; not be changed. `((lookup-integer utf-subst-table-for-decode r1 r3) (if r7 ; got a translation ((r0 = r1) (r1 = r3)) @@ -114,15 +115,14 @@ (define-ccl-program ccl-decode-mule-utf-16-le `(2 ; 2 bytes -> 1 to 4 bytes - ((read r0 r1) ; signature - (loop + ((loop (read r3 r4) (r1 = (r4 <8 r3)) ,utf-16-decode-ucs (translate-character utf-translation-table-for-decode r0 r1) (write-multibyte-character r0 r1) (repeat)))) - "Decode little endian UTF-16 (ignoring signature bytes). + "Decode UTF-16LE (little endian without signature bytes). Basic decoding is done into the charsets ascii, latin-iso8859-1 and mule-unicode-*. Un-representable Unicode characters are decoded as U+fffd. The result is run through the translation-table named @@ -130,15 +130,14 @@ U+fffd. The result is run through the translation-table named (define-ccl-program ccl-decode-mule-utf-16-be `(2 ; 2 bytes -> 1 to 4 bytes - ((read r0 r1) ; signature - (loop + ((loop (read r3 r4) (r1 = (r3 <8 r4)) ,utf-16-decode-ucs (translate-character utf-translation-table-for-decode r0 r1) (write-multibyte-character r0 r1) (repeat)))) - "Decode big endian UTF-16 (ignoring signature bytes). + "Decode UTF-16BE (big endian without signature bytes). Basic decoding is done into the charsets ascii, latin-iso8859-1 and mule-unicode-*. Un-representable Unicode characters are decoded as U+fffd. The result is run through the translation-table of @@ -173,9 +172,7 @@ name `utf-translation-table-for-decode'.") (define-ccl-program ccl-encode-mule-utf-16-le `(1 - ((write #xff) - (write #xfe) - (loop + ((loop (read-multibyte-character r0 r1) (lookup-character utf-subst-table-for-encode r0 r1) (if (r7 == 0) @@ -184,7 +181,7 @@ name `utf-translation-table-for-decode'.") (write (r0 & 255)) (write (r0 >> 8)) (repeat)))) - "Encode to little endian UTF-16 with signature. + "Encode to UTF-16LE (little endian without signature). Characters from the charsets ascii, eight-bit-control, eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded after translation through the translation-table of name @@ -193,9 +190,7 @@ Others are encoded as U+FFFD.") (define-ccl-program ccl-encode-mule-utf-16-be `(1 - ((write #xfe) - (write #xff) - (loop + ((loop (read-multibyte-character r0 r1) (lookup-character utf-subst-table-for-encode r0 r1) (if (r7 == 0) @@ -204,7 +199,7 @@ Others are encoded as U+FFFD.") (write (r0 >> 8)) (write (r0 & 255)) (repeat)))) - "Encode to big endian UTF-16 with signature. + "Encode to UTF-16BE (big endian without signature). Characters from the charsets ascii, eight-bit-control, eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded after translation through the translation-table named @@ -215,8 +210,6 @@ Others are encoded as U+FFFD.") (let ((doc " -Assumes and ignores the leading two-byte signature. - It supports Unicode characters of these ranges: U+0000..U+33FF, U+E000..U+FFFF. They correspond to these Emacs character sets: