(r1 -= #xe000)
(r2 = (((r1 / 96) + 32) << 7))
(r1 %= 96)
- (r1 += (r2 + 32))))))))))))
+ (r1 += (r2 + 32)))))))))))
+
+(defconst utf-16-le-decode-loop
+ `(loop
+ (read r3 r4)
+ (r1 = (r4 <8 r3))
+ ,utf-16-decode-ucs
+ (write-multibyte-character r0 r1)
+ (repeat)))
+
+(defconst utf-16-be-decode-loop
+ `(loop
+ (read r3 r4)
+ (r1 = (r3 <8 r4))
+ ,utf-16-decode-ucs
+ (write-multibyte-character r0 r1)
+ (repeat)))
+
+)
(define-ccl-program ccl-decode-mule-utf-16-le
`(2 ; 2 bytes -> 1 to 4 bytes
- ((read r0 r1) ; signature
- (loop
- (read r3 r4)
- (r1 = (r4 <8 r3))
- ,utf-16-decode-ucs
- (write-multibyte-character r0 r1)
- (repeat))))
- "Decode little endian UTF-16 (ignoring signature bytes).
+ ,utf-16-le-decode-loop)
+ "Decode UTF-16LE (little endian without signature bytes).
Basic decoding is done into the charsets ascii, latin-iso8859-1 and
mule-unicode-*. Un-representable Unicode characters are decoded as
U+fffd.")
(define-ccl-program ccl-decode-mule-utf-16-be
`(2 ; 2 bytes -> 1 to 4 bytes
- ((read r0 r1) ; signature
- (loop
- (read r3 r4)
- (r1 = (r3 <8 r4))
- ,utf-16-decode-ucs
- (write-multibyte-character r0 r1)
- (repeat))))
- "Decode big endian UTF-16 (ignoring signature bytes).
+ ,utf-16-be-decode-loop)
+ "Decode UTF-16BE (big endian without signature bytes).
Basic decoding is done into the charsets ascii, latin-iso8859-1 and
mule-unicode-*. Un-representable Unicode characters are
decoded as U+fffd.")
+(define-ccl-program ccl-decode-mule-utf-16-le-with-signature
+ `(2
+ ((read r3 r4)
+ ,utf-16-le-decode-loop))
+ "Like ccl-decode-utf-16-le but skip the first 2-byte BOM.")
+
+(define-ccl-program ccl-decode-mule-utf-16-be-with-signature
+ `(2
+ ((read r3 r4)
+ ,utf-16-be-decode-loop))
+ "Like ccl-decode-utf-16-be but skip the first 2-byte BOM.")
+
(makunbound 'utf-16-decode-ucs) ; done with it
+(makunbound 'utf-16-le-decode-loop)
+(makunbound 'utf-16-be-decode-loop)
(eval-and-compile
(defconst utf-16-decode-to-ucs
(r0 = (r3 + #x2500))
(if (r0 == ,(charset-id 'mule-unicode-e000-ffff))
(r0 = (r3 + #xe000))
- (r0 = #xfffd)))))))))))
+ (r0 = #xfffd))))))))))
+
+(defconst utf-16-le-encode-loop
+ `(loop
+ (read-multibyte-character r0 r1)
+ (translate-character utf-translation-table-for-encode r0 r1)
+ ,utf-16-decode-to-ucs
+ (write (r0 & 255))
+ (write (r0 >> 8))
+ (repeat)))
+
+(defconst utf-16-be-encode-loop
+ `(loop
+ (read-multibyte-character r0 r1)
+ (translate-character utf-translation-table-for-encode r0 r1)
+ ,utf-16-decode-to-ucs
+ (write (r0 >> 8))
+ (write (r0 & 255))
+ (repeat)))
+)
+
(define-ccl-program ccl-encode-mule-utf-16-le
`(1
- ((write #xff)
- (write #xfe)
- (loop
- (read-multibyte-character r0 r1)
- (translate-character utf-translation-table-for-encode r0 r1)
- ,utf-16-decode-to-ucs
- (write (r0 & 255))
- (write (r0 >> 8))
- (repeat))))
- "Encode to little endian UTF-16 with signature.
+ ,utf-16-le-encode-loop)
+ "Encode to UTF-16LE (little endian without signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
after translation through the translation-table of name
(define-ccl-program ccl-encode-mule-utf-16-be
`(1
- ((write #xfe)
- (write #xff)
- (loop
- (read-multibyte-character r0 r1)
- (translate-character utf-translation-table-for-encode r0 r1)
- ,utf-16-decode-to-ucs
- (write (r0 >> 8))
- (write (r0 & 255))
- (repeat))))
- "Encode to big endian UTF-16 with signature.
+ ,utf-16-be-encode-loop)
+ "Encode to UTF-16BE (big endian without signature).
+Characters from the charsets ascii, eight-bit-control,
+eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
+after translation through the translation-table named
+`utf-translation-table-for-encode'.
+Others are encoded as U+FFFD.")
+
+(define-ccl-program ccl-encode-mule-utf-16-le-with-signature
+ `(1
+ ((write #xFF)
+ (write #xFE)
+ ,utf-16-le-encode-loop))
+ "Encode to UTF-16 (little endian with signature).
+Characters from the charsets ascii, eight-bit-control,
+eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
+after translation through the translation-table of name
+`utf-translation-table-for-encode'.
+Others are encoded as U+FFFD.")
+
+(define-ccl-program ccl-encode-mule-utf-16-be-with-signature
+ `(1
+ ((write #xFE)
+ (write #xFF)
+ ,utf-16-be-encode-loop))
+ "Encode to UTF-16 (big endian with signature).
Characters from the charsets ascii, eight-bit-control,
eight-bit-graphic, latin-iso8859-1 and mule-unicode-* are encoded
after translation through the translation-table named
Others are encoded as U+FFFD.")
(makunbound 'utf-16-decode-to-ucs)
+(makunbound 'utf-16-le-encode-loop)
+(makunbound 'utf-16-be-encode-loop)
(let ((doc "
-Assumes and ignores the leading two-byte signature.
-
It supports Unicode characters of these ranges:
U+0000..U+33FF, U+E000..U+FFFF.
They correspond to these Emacs character sets:
mule-unicode-2500-33ff
mule-unicode-e000-ffff)
(mime-charset . utf-16le)
- (coding-category . coding-category-utf-16-le)
(valid-codes (0 . 255))
- (pre-write-conversion . utf-16-le-pre-write-conversion)
(dependency unify-8859-on-encoding-mode
unify-8859-on-decoding-mode)))
mule-unicode-2500-33ff
mule-unicode-e000-ffff)
(mime-charset . utf-16be)
+ (valid-codes (0 . 255))
+ (dependency unify-8859-on-encoding-mode
+ unify-8859-on-decoding-mode)))
+
+ (make-coding-system
+ 'mule-utf-16-le-with-signature 4 ?u
+ (concat
+ "Little endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
+ doc)
+
+ '(ccl-decode-mule-utf-16-le-with-signature
+ . ccl-encode-mule-utf-16-le-with-signature)
+ '((safe-charsets
+ ascii
+ eight-bit-control
+ latin-iso8859-1
+ mule-unicode-0100-24ff
+ mule-unicode-2500-33ff
+ mule-unicode-e000-ffff)
+ (coding-category . coding-category-utf-16-le)
+ (mime-charset . utf-16)
+ (valid-codes (0 . 255))
+ (dependency unify-8859-on-encoding-mode
+ unify-8859-on-decoding-mode)))
+
+ (make-coding-system
+ 'mule-utf-16-be-with-signature 4 ?u
+ (concat
+ "Big endian UTF-16 (with BOM) for Emacs-supported Unicode characters."
+ doc)
+
+ '(ccl-decode-mule-utf-16-be-with-signature
+ . ccl-encode-mule-utf-16-be-with-signature)
+ '((safe-charsets
+ ascii
+ eight-bit-control
+ latin-iso8859-1
+ mule-unicode-0100-24ff
+ mule-unicode-2500-33ff
+ mule-unicode-e000-ffff)
(coding-category . coding-category-utf-16-be)
+ (mime-charset . utf-16)
(valid-codes (0 . 255))
- (pre-write-conversion . utf-16-be-pre-write-conversion)
(dependency unify-8859-on-encoding-mode
unify-8859-on-decoding-mode))))
(define-coding-system-alias 'utf-16-le 'mule-utf-16-le)
(define-coding-system-alias 'utf-16-be 'mule-utf-16-be)
+(define-coding-system-alias 'utf-16-le-with-signature
+ 'mule-utf-16-le-with-signature)
+(define-coding-system-alias 'utf-16-be-with-signature
+ 'mule-utf-16-be-with-signature)
;;; utf-16.el ends here