]> git.eshelyaron.com Git - emacs.git/commitdiff
(ccl-decode-mule-utf-8): Don't loose
authorKenichi Handa <handa@m17n.org>
Wed, 12 Mar 2003 00:46:32 +0000 (00:46 +0000)
committerKenichi Handa <handa@m17n.org>
Wed, 12 Mar 2003 00:46:32 +0000 (00:46 +0000)
bytes on handling an invalid byte sequence.

lisp/ChangeLog
lisp/international/utf-8.el

index da9568a6477873cbd8b29c6e51f6bc8979adfad6..c3ea8b2644d6f3ca76c18279c41acd20f422eaa5 100644 (file)
@@ -1,3 +1,8 @@
+2003-03-12  Kenichi Handa  <handa@etlken2>
+
+       * international/utf-8.el (ccl-decode-mule-utf-8): Don't loose
+       bytes on handling an invalid byte sequence.
+
 2003-03-11  Jason Rumney  <jasonr@gnu.org>
 
        * files.el (auto-mode-alist): Add .xsl for sgml-mode.
index 67a1f11a4f5cd7c7b6f6f62b1665509424e35675..c579a564ca75536a8919436515a4f30f569ebc40 100644 (file)
@@ -308,18 +308,20 @@ default.  Also, installing them may be rather slow."
     ((r5 = ,(charset-id 'eight-bit-control))
      (r6 = ,(charset-id 'eight-bit-graphic))
      (loop
+      (r0 = -1)
       (read r0)
 
       ;; 1byte encoding, i.e., ascii
       (if (r0 < #x80)
-         (write r0)
+         ((write r0))
        (if (r0 < #xc0)             ; continuation byte (invalid here)
-           (if (r0 < #xa0)
-               (write-multibyte-character r5 r0)
-             (write-multibyte-character r6 r0))
+           ((if (r0 < #xa0)
+                (write-multibyte-character r5 r0)
+              (write-multibyte-character r6 r0)))
          ;; 2 byte encoding 00000yyyyyxxxxxx = 110yyyyy 10xxxxxx
          (if (r0 < #xe0)
-             ((read r1)
+             ((r1 = -1)
+              (read r1)
 
               (if ((r1 & #b11000000) != #b10000000)
                   ;; Invalid 2-byte sequence
@@ -373,7 +375,9 @@ default.  Also, installing them may be rather slow."
            ;; 3byte encoding
            ;; zzzzyyyyyyxxxxxx = 1110zzzz 10yyyyyy 10xxxxxx
            (if (r0 < #xf0)
-               ((read r1 r2)
+               ((r1 = -1)
+                (r2 = -1)
+                (read r1 r2)
 
                 ;; This is set to 1 if the encoding is invalid.
                 (r4 = 0)
@@ -478,7 +482,10 @@ default.  Also, installing them may be rather slow."
                  ;; 4byte encoding
                  ;; keep those bytes as eight-bit-{control|graphic}
                  ;; Fixme: allow lookup in utf-subst-table-for-decode.
-                 ((read r1 r2 r3)
+                 ((r1 = -1)
+                  (r2 = -1)
+                  (r3 = -1)
+                  (read r1 r2 r3)
                   ;; r0 > #xf0, thus eight-bit-graphic
                   (write-multibyte-character r6 r0)
                   (if (r1 < #xa0)
@@ -512,7 +519,33 @@ default.  Also, installing them may be rather slow."
                               (write-multibyte-character r6 r1)))))))
                ;; else invalid byte >= #xfe
                (write-multibyte-character r6 r0))))))
-      (repeat))))
+      (repeat)))
+
+    ;; At EOF...
+    (if (r0 >= 0)
+       ((if (r0 < #x80)
+            (write r0)
+          (if (r0 < #xa0)
+              (write-multibyte-character r5 r0)
+            ((write-multibyte-character r6 r0))))
+        (if (r1 >= 0)
+            ((if (r1 < #x80)
+                 (write r1)
+               (if (r1 < #xa0)
+                   (write-multibyte-character r5 r1)
+                 ((write-multibyte-character r6 r1))))
+             (if (r2 >= 0)
+                 ((if (r2 < #x80)
+                      (write r2)
+                    (if (r2 < #xa0)
+                        (write-multibyte-character r5 r2)
+                      ((write-multibyte-character r6 r2))))
+                  (if (r3 >= 0)
+                      (if (r3 < #x80)
+                          (write r3)
+                        (if (r3 < #xa0)
+                            (write-multibyte-character r5 r3)
+                          ((write-multibyte-character r6 r3))))))))))))
 
   "CCL program to decode UTF-8.
 Basic decoding is done into the charsets ascii, latin-iso8859-1 and