From: Kenichi Handa Date: Tue, 6 May 2003 07:00:00 +0000 (+0000) Subject: (utf-16-le-decode-loop) X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=79d2a4dbcd0f77c862fefa3d5c7c0460f0124c46;p=emacs.git (utf-16-le-decode-loop) (utf-16-be-decode-loop): New temporary variables. (ccl-decode-mule-utf-16-le): Use utf-16-le-decode-loop. (ccl-decode-mule-utf-16-be): Use utf-16-be-decode-loop (ccl-decode-mule-utf-16-le-with-signature) (ccl-decode-mule-utf-16-be-with-signature): New CCL programs. (utf-16-le-encode-loop, utf-16-be-encode-loop): New temporary variables. (ccl-encode-mule-utf-16-le): Use utf-16-le-encode-loop. (ccl-encode-mule-utf-16-be): Use utf-16-be-encode-loop (ccl-encode-mule-utf-16-le-with-signature) (ccl-encode-mule-utf-16-be-with-signature): New CCL programs. (mule-utf-16-le-with-signature, mule-utf-16-be-with-signature): New coding systems. (utf-16-le-with-signature, utf-16-be-with-signature): Aliases of the above coding systems. --- diff --git a/lisp/international/utf-16.el b/lisp/international/utf-16.el index 97417041efd..69497bf0e47 100644 --- a/lisp/international/utf-16.el +++ b/lisp/international/utf-16.el @@ -107,37 +107,57 @@ (r1 -= #xe000) (r2 = (((r1 / 96) + 32) << 7)) (r1 %= 96) - (r1 += (r2 + 32)))))))))))) + (r1 += (r2 + 32))))))))))) + +(defconst utf-16-le-decode-loop + `(loop + (read r3 r4) + (r1 = (r4 <8 r3)) + ,utf-16-decode-ucs + (write-multibyte-character r0 r1) + (repeat))) + +(defconst utf-16-be-decode-loop + `(loop + (read r3 r4) + (r1 = (r3 <8 r4)) + ,utf-16-decode-ucs + (write-multibyte-character r0 r1) + (repeat))) + +) (define-ccl-program ccl-decode-mule-utf-16-le `(2 ; 2 bytes -> 1 to 4 bytes - ((read r0 r1) ; signature - (loop - (read r3 r4) - (r1 = (r4 <8 r3)) - ,utf-16-decode-ucs - (write-multibyte-character r0 r1) - (repeat)))) - "Decode little endian UTF-16 (ignoring signature bytes). + ,utf-16-le-decode-loop) + "Decode UTF-16LE (little endian without signature bytes). Basic decoding is done into the charsets ascii, latin-iso8859-1 and mule-unicode-*. Un-representable Unicode characters are decoded as U+fffd.") (define-ccl-program ccl-decode-mule-utf-16-be `(2 ; 2 bytes -> 1 to 4 bytes - ((read r0 r1) ; signature - (loop - (read r3 r4) - (r1 = (r3 <8 r4)) - ,utf-16-decode-ucs - (write-multibyte-character r0 r1) - (repeat)))) - "Decode big endian UTF-16 (ignoring signature bytes). + ,utf-16-be-decode-loop) + "Decode UTF-16BE (big endian without signature bytes). Basic decoding is done into the charsets ascii, latin-iso8859-1 and mule-unicode-*. Un-representable Unicode characters are decoded as U+fffd.") +(define-ccl-program ccl-decode-mule-utf-16-le-with-signature + `(2 + ((read r3 r4) + ,utf-16-le-decode-loop)) + "Like ccl-decode-utf-16-le but skip the first 2-byte BOM.") + +(define-ccl-program ccl-decode-mule-utf-16-be-with-signature + `(2 + ((read r3 r4) + ,utf-16-be-decode-loop)) + "Like ccl-decode-utf-16-be but skip the first 2-byte BOM.") + (makunbound 'utf-16-decode-ucs) ; done with it +(makunbound 'utf-16-le-decode-loop) +(makunbound 'utf-16-be-decode-loop) (eval-and-compile (defconst utf-16-decode-to-ucs @@ -162,20 +182,32 @@ decoded as U+fffd.") (r0 = (r3 + #x2500)) (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) (r0 = (r3 + #xe000)) - (r0 = #xfffd))))))))))) + (r0 = #xfffd)))))))))) + +(defconst utf-16-le-encode-loop + `(loop + (read-multibyte-character r0 r1) + (translate-character utf-translation-table-for-encode r0 r1) + ,utf-16-decode-to-ucs + (write (r0 & 255)) + (write (r0 >> 8)) + (repeat))) + +(defconst utf-16-be-encode-loop + `(loop + (read-multibyte-character r0 r1) + (translate-character utf-translation-table-for-encode r0 r1) + ,utf-16-decode-to-ucs + (write (r0 >> 8)) + (write (r0 & 255)) + (repeat))) +) + (define-ccl-program ccl-encode-mule-utf-16-le `(1 - ((write #xff) - (write #xfe) - (loop - (read-multibyte-character r0 r1) - (translate-character utf-translation-table-for-encode r0 r1) - ,utf-16-decode-to-ucs - (write (r0 & 255)) - (write (r0 >> 8)) - (repeat)))) - "Encode to little endian UTF-16 with signature. + ,utf-16-le-encode-loop) + "Encode to UTF-16LE (little endian without signature). Characters from the charsets ascii, eight-bit-control, eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded after translation through the translation-table of name @@ -184,16 +216,32 @@ Others are encoded as U+FFFD.") (define-ccl-program ccl-encode-mule-utf-16-be `(1 - ((write #xfe) - (write #xff) - (loop - (read-multibyte-character r0 r1) - (translate-character utf-translation-table-for-encode r0 r1) - ,utf-16-decode-to-ucs - (write (r0 >> 8)) - (write (r0 & 255)) - (repeat)))) - "Encode to big endian UTF-16 with signature. + ,utf-16-be-encode-loop) + "Encode to UTF-16BE (big endian without signature). +Characters from the charsets ascii, eight-bit-control, +eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded +after translation through the translation-table named +`utf-translation-table-for-encode'. +Others are encoded as U+FFFD.") + +(define-ccl-program ccl-encode-mule-utf-16-le-with-signature + `(1 + ((write #xFF) + (write #xFE) + ,utf-16-le-encode-loop)) + "Encode to UTF-16 (little endian with signature). +Characters from the charsets ascii, eight-bit-control, +eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded +after translation through the translation-table of name +`utf-translation-table-for-encode'. +Others are encoded as U+FFFD.") + +(define-ccl-program ccl-encode-mule-utf-16-be-with-signature + `(1 + ((write #xFE) + (write #xFF) + ,utf-16-be-encode-loop)) + "Encode to UTF-16 (big endian with signature). Characters from the charsets ascii, eight-bit-control, eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded after translation through the translation-table named @@ -201,11 +249,11 @@ after translation through the translation-table named Others are encoded as U+FFFD.") (makunbound 'utf-16-decode-to-ucs) +(makunbound 'utf-16-le-encode-loop) +(makunbound 'utf-16-be-encode-loop) (let ((doc " -Assumes and ignores the leading two-byte signature. - It supports Unicode characters of these ranges: U+0000..U+33FF, U+E000..U+FFFF. They correspond to these Emacs character sets: @@ -235,9 +283,7 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER).")) mule-unicode-2500-33ff mule-unicode-e000-ffff) (mime-charset . utf-16le) - (coding-category . coding-category-utf-16-le) (valid-codes (0 . 255)) - (pre-write-conversion . utf-16-le-pre-write-conversion) (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode))) @@ -256,13 +302,57 @@ sequence representing U+FFFD (REPLACEMENT CHARACTER).")) mule-unicode-2500-33ff mule-unicode-e000-ffff) (mime-charset . utf-16be) + (valid-codes (0 . 255)) + (dependency unify-8859-on-encoding-mode + unify-8859-on-decoding-mode))) + + (make-coding-system + 'mule-utf-16-le-with-signature 4 ?u + (concat + "Little endian UTF-16 (with BOM) for Emacs-supported Unicode characters." + doc) + + '(ccl-decode-mule-utf-16-le-with-signature + . ccl-encode-mule-utf-16-le-with-signature) + '((safe-charsets + ascii + eight-bit-control + latin-iso8859-1 + mule-unicode-0100-24ff + mule-unicode-2500-33ff + mule-unicode-e000-ffff) + (coding-category . coding-category-utf-16-le) + (mime-charset . utf-16) + (valid-codes (0 . 255)) + (dependency unify-8859-on-encoding-mode + unify-8859-on-decoding-mode))) + + (make-coding-system + 'mule-utf-16-be-with-signature 4 ?u + (concat + "Big endian UTF-16 (with BOM) for Emacs-supported Unicode characters." + doc) + + '(ccl-decode-mule-utf-16-be-with-signature + . ccl-encode-mule-utf-16-be-with-signature) + '((safe-charsets + ascii + eight-bit-control + latin-iso8859-1 + mule-unicode-0100-24ff + mule-unicode-2500-33ff + mule-unicode-e000-ffff) (coding-category . coding-category-utf-16-be) + (mime-charset . utf-16) (valid-codes (0 . 255)) - (pre-write-conversion . utf-16-be-pre-write-conversion) (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode)))) (define-coding-system-alias 'utf-16-le 'mule-utf-16-le) (define-coding-system-alias 'utf-16-be 'mule-utf-16-be) +(define-coding-system-alias 'utf-16-le-with-signature + 'mule-utf-16-le-with-signature) +(define-coding-system-alias 'utf-16-be-with-signature + 'mule-utf-16-be-with-signature) ;;; utf-16.el ends here