(utf-translate-cjk-load-tables))
(gethash code-point
(get 'utf-subst-table-for-decode 'translation-hash-table)))
-
+
(defun utf-lookup-subst-table-for-encode (char)
(if (and utf-translate-cjk-mode
(utf-translate-cjk-load-tables))
(gethash char
(get 'utf-subst-table-for-encode 'translation-hash-table)))
-
+
(define-minor-mode utf-translate-cjk-mode
- "Whether the UTF based coding systems should decode/encode CJK characters.
+ "Toggle whether UTF based coding systems de/encode CJK characters.
+If ARG is an integer, enable if ARG is positive and disable if
+zero or negative. This is a minor mode.
Enabling this allows the coding systems mule-utf-8,
mule-utf-16le and mule-utf-16be to encode characters in the charsets
`korean-ksc5601', `chinese-gb2312', `chinese-big5-1',
turned on: ksc5601 for Korean, gb2312 for Chinese-GB, big5 for
Chinese-Big5 and jisx for other environments.
-This option is on by default. If you are not interested in CJK
+This mode is on by default. If you are not interested in CJK
characters and want to avoid some overhead on encoding/decoding
-by the above coding systems, you can customize this option to nil."
+by the above coding systems, you can customize the user option
+`utf-translate-cjk-mode' to nil."
:init-value t
:version "21.4"
:type 'boolean
;; UTF-8 decoder generates an UTF-8 sequence represented by a
;; sequence eight-bit-control/graphic chars for an untranslatable
;; character and an invalid byte.
- ;;
+ ;;
;; This CCL parses that sequence (the first byte is already in r1),
;; writes out the original bytes of that sequence, and sets r5 to
;; -1.
(read-multibyte-character r5 r6)
(r0 = (r5 != ,(charset-id 'eight-bit-control)))
(if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
- ((write r1) ; invalid UTF-8
+ ((write r1) ; invalid UTF-8
(r1 = -1)
(end)))
(r1 = -1)
;; Read the 3rd byte.
(read-multibyte-character r5 r6)
- (r0 = (r5 != ,(charset-id 'eight-bit-control)))
+ (r0 = (r5 != ,(charset-id 'eight-bit-control)))
(if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
(end)) ; invalid UTF-8
(write r6)
(end)))
;; Read the 4th byte.
(read-multibyte-character r5 r6)
- (r0 = (r5 != ,(charset-id 'eight-bit-control)))
+ (r0 = (r5 != ,(charset-id 'eight-bit-control)))
(if ((r5 != ,(charset-id 'eight-bit-graphic)) & r0)
(end)) ; invalid UTF-8
;; 4-byte sequence for an untranslated character.