From 5c88a01e1e8a7f7fefda2ee3c1e16e0782fa02e5 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Mon, 29 Jul 2002 05:05:19 +0000 Subject: [PATCH] (ctext-post-read-conversion): Add support for emboded utf-8 encodng (ESC % G ... ESC % @). --- lisp/international/mule.el | 124 +++++++++++++++++++++---------------- 1 file changed, 72 insertions(+), 52 deletions(-) diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 346133053cd..fc5b10bcb9b 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el @@ -429,7 +429,8 @@ code-point in CCS. Currently not supported and just ignored." "Return the coding type of CODING-SYSTEM. A coding type is an integer value indicating the encoding method of CODING-SYSTEM. See the function `make-coding-system' for more detail." - (aref (coding-system-spec coding-system) coding-spec-type-idx)) + (let ((spec (coding-system-spec coding-system))) + (if spec (aref spec coding-spec-type-idx)))) (defun coding-system-mnemonic (coding-system) "Return the mnemonic character of CODING-SYSTEM. @@ -440,18 +441,21 @@ to indicate the coding system. If the arg is nil, return ?-." (defun coding-system-doc-string (coding-system) "Return the documentation string for CODING-SYSTEM." - (aref (coding-system-spec coding-system) coding-spec-doc-string-idx)) + (let ((spec (coding-system-spec coding-system))) + (if spec (aref spec coding-spec-doc-string-idx)))) (defun coding-system-plist (coding-system) "Return the property list of CODING-SYSTEM." - (aref (coding-system-spec coding-system) coding-spec-plist-idx)) + (let ((spec (coding-system-spec coding-system))) + (if spec (aref spec coding-spec-plist-idx)))) (defun coding-system-flags (coding-system) "Return `flags' of CODING-SYSTEM. A `flags' of a coding system is a vector of length 32 indicating detailed information of a coding system. See the function `make-coding-system' for more detail." - (aref (coding-system-spec coding-system) coding-spec-flags-idx)) + (let ((spec (coding-system-spec coding-system))) + (if spec (aref spec coding-spec-flags-idx)))) (defun coding-system-get (coding-system prop) "Extract a value from CODING-SYSTEM's property list for property PROP." @@ -462,8 +466,8 @@ for more detail." (let ((plist (coding-system-plist coding-system))) (if plist (plist-put plist prop val) - (aset (coding-system-spec coding-system) coding-spec-plist-idx - (list prop val))))) + (let ((spec (coding-system-spec coding-system))) + (if spec (aset spec coding-spec-plist-idx (list prop val))))))) (defun coding-system-category (coding-system) "Return the coding category of CODING-SYSTEM. @@ -1307,10 +1311,15 @@ ARG is a list of coding categories ordered by priority." charsets or coding systems.") ;; Functions to support "Non-Standard Character Set Encodings" defined -;; by the ICCCM spec. We support that by converting the leading -;; sequence of the ``extended segment'' to the corresponding ISO-2022 -;; sequences (if the leading sequence names an Emacs charset), or decode -;; the segment (if it names a coding system). Encoding does the reverse. +;; by the COMPOUND-TEXT spec. +;; We support that by converting the leading sequence of the +;; ``extended segment'' to the corresponding ISO-2022 sequences (if +;; the leading sequence names an Emacs charset), or decode the segment +;; (if it names a coding system). Encoding does the reverse. +;; This function also supports "The UTF-8 encoding" described in the +;; section 7 of the documentation fo COMPOUND-TEXT distributed with +;; XFree86. + (defun ctext-post-read-conversion (len) "Decode LEN characters encoded as Compound Text with Extended Segments." (buffer-disable-undo) ; minimize consing due to insertions and deletions @@ -1324,54 +1333,65 @@ charsets or coding systems.") last-coding-system-used encoding textlen chset) (while (re-search-forward - "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002" + "\\(\e\\)%/[0-4]\\([\200-\377][\200-\377]\\)\\([^\002]+\\)\002\\|\e%G[^\e]+\e%@" nil 'move) (set-marker newpt (point)) (set-marker pt (match-beginning 0)) - (setq encoding (match-string 3)) - (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128) - (- (aref (match-string 2) 1) 128)) - (1+ (length encoding)))) - (setq - chset (cdr (assoc-ignore-case encoding - non-standard-icccm-encodings-alist))) - (cond ((null chset) - ;; This charset is not supported--leave this extended - ;; segment unaltered and skip over it. - (goto-char (+ (point) textlen))) - ((charsetp chset) - ;; If it's a charset, replace the leading escape sequence - ;; with a standard ISO-2022 sequence. We will decode all - ;; such segments later, in one go, when we exit the loop - ;; or find an extended segment that names a coding - ;; system, not a charset. - (replace-match - (concat "\\1" - (if (= 0 (charset-iso-graphic-plane chset)) - ;; GL charsets - (if (= 1 (charset-dimension chset)) "(" "$(") - ;; GR charsets - (if (= 96 (charset-chars chset)) - "-" - (if (= 1 (charset-dimension chset)) ")" "$)"))) - (string (charset-iso-final-char chset))) - t) - (goto-char (+ (point) textlen))) - ((coding-system-p chset) - ;; If it's a coding system, we need to decode the segment - ;; right away. But first, decode what we've skipped - ;; across until now. - (when (> pt oldpt) - (decode-coding-region oldpt pt 'ctext-no-compositions)) - (delete-region pt newpt) - (set-marker newpt (+ newpt textlen)) - (decode-coding-region pt newpt chset) - (goto-char newpt) - (set-marker oldpt newpt)))) + (if (= (preceding-char) ?@) + ;; We found embedded utf-8 sequence. + (progn + (delete-char -3) ; delete ESC % @ at the tail + (goto-char pt) + (delete-char 3) ; delete ESC % G at the head + (if (> pt oldpt) + (decode-coding-region oldpt pt 'ctext-no-compositions)) + (decode-coding-region pt newpt 'mule-utf-8) + (goto-char newpt) + (set-marker oldpt newpt)) + (setq encoding (match-string 3)) + (setq textlen (- (+ (* (- (aref (match-string 2) 0) 128) 128) + (- (aref (match-string 2) 1) 128)) + (1+ (length encoding)))) + (setq + chset (cdr (assoc-ignore-case encoding + non-standard-icccm-encodings-alist))) + (cond ((null chset) + ;; This charset is not supported--leave this extended + ;; segment unaltered and skip over it. + (goto-char (+ (point) textlen))) + ((charsetp chset) + ;; If it's a charset, replace the leading escape sequence + ;; with a standard ISO-2022 sequence. We will decode all + ;; such segments later, in one go, when we exit the loop + ;; or find an extended segment that names a coding + ;; system, not a charset. + (replace-match + (concat "\\1" + (if (= 0 (charset-iso-graphic-plane chset)) + ;; GL charsets + (if (= 1 (charset-dimension chset)) "(" "$(") + ;; GR charsets + (if (= 96 (charset-chars chset)) + "-" + (if (= 1 (charset-dimension chset)) ")" "$)"))) + (string (charset-iso-final-char chset))) + t) + (goto-char (+ (point) textlen))) + ((coding-system-p chset) + ;; If it's a coding system, we need to decode the segment + ;; right away. But first, decode what we've skipped + ;; across until now. + (when (> pt oldpt) + (decode-coding-region oldpt pt 'ctext-no-compositions)) + (delete-region pt newpt) + (set-marker newpt (+ newpt textlen)) + (decode-coding-region pt newpt chset) + (goto-char newpt) + (set-marker oldpt newpt))))) ;; Decode what's left. (when (> (point) oldpt) (decode-coding-region oldpt (point) 'ctext-no-compositions)) - ;; This buffer started as unibyte, because the string we get from + ;; This buffer started as unibyte, because the string we get from ;; the X selection is a unibyte string. We must now make it ;; multibyte, so that the decoded text is inserted as multibyte ;; into its buffer. -- 2.39.5