]> git.eshelyaron.com Git - emacs.git/commitdiff
(TWO_MORE_BYTES): New macro.
authorKenichi Handa <handa@m17n.org>
Wed, 14 Jan 2009 12:19:44 +0000 (12:19 +0000)
committerKenichi Handa <handa@m17n.org>
Wed, 14 Jan 2009 12:19:44 +0000 (12:19 +0000)
(detect_coding_utf_16): Use TWO_MORE_BYTES instead of
ONE_MORE_BYTE.

src/ChangeLog
src/coding.c

index 5d048f7413b64832e79c2269d5eaeca3d7123239..4f11a1269ccda64726fac2bdbe29285479ccbe63 100644 (file)
@@ -1,3 +1,9 @@
+2009-01-14  Kenichi Handa  <handa@m17n.org>
+
+       * coding.c (TWO_MORE_BYTES): New macro.
+       (detect_coding_utf_16): Use TWO_MORE_BYTES instead of
+       ONE_MORE_BYTE.
+
 2009-01-13  Chong Yidong  <cyd@stupidchicken.com>
 
        * font.c (font_clear_prop): If clearing the family, clear the font
@@ -90,7 +96,7 @@
 2009-01-07  Kenichi Handa  <handa@m17n.org>
 
        * fileio.c (Finsert_file_contents): In the case of replace,
-       remeber the coding system used for decoding in
+       remember the coding system used for decoding in
        coding_system (Bug#1039).
 
        * coding.c (decode_coding_utf_8): Check byte_after_cr before
index 01878a37b5c0b5f45590f64f4f21d9f6016db41a..9a94bc6fb2a2e837b3142ec2c53ea6407104b0c2 100644 (file)
@@ -743,6 +743,47 @@ static struct coding_system coding_categories[coding_category_max];
     consumed_chars++;                                  \
   } while (0)
 
+/* Safely get two bytes from the source text pointed by SRC which ends
+   at SRC_END, and set C1 and C2 to those bytes.  If there are not
+   enough bytes in the source for C1, it jumps to `no_more_source'.
+   If there are not enough bytes in the source for C2, set C2 to -1.
+   If multibytep is nonzero and a multibyte character is found at SRC,
+   set C1 and/or C2 to the negative value of the character code.  The
+   caller should declare and set these variables appropriately in
+   advance:
+       src, src_end, multibytep
+   It is intended that this macro is used in detect_coding_utf_16.  */
+
+#define TWO_MORE_BYTES(c1, c2)                 \
+  do {                                         \
+    if (src == src_end)                                \
+      goto no_more_source;                     \
+    c1 = *src++;                               \
+    if (multibytep && (c1 & 0x80))             \
+      {                                                \
+       if ((c1 & 0xFE) == 0xC0)                \
+         c1 = ((c1 & 1) << 6) | *src++;        \
+       else                                    \
+         {                                     \
+           c1 = c2 = -1;                       \
+           break;                              \
+         }                                     \
+      }                                                \
+    if (src == src_end)                                \
+      c2 = -1;                                 \
+    else                                       \
+      {                                                \
+       c2 = *src++;                            \
+       if (multibytep && (c2 & 0x80))          \
+         {                                     \
+           if ((c2 & 0xFE) == 0xC0)            \
+             c2 = ((c2 & 1) << 6) | *src++;    \
+           else                                \
+             c2 = -1;                          \
+         }                                     \
+      }                                                \
+  } while (0)
+
 
 #define ONE_MORE_BYTE_NO_CHECK(c)                      \
   do {                                                 \
@@ -1575,8 +1616,7 @@ detect_coding_utf_16 (coding, detect_info)
       return 0;
     }
 
-  ONE_MORE_BYTE (c1);
-  ONE_MORE_BYTE (c2);
+  TWO_MORE_BYTES (c1, c2);
   if ((c1 == 0xFF) && (c2 == 0xFE))
     {
       detect_info->found |= (CATEGORY_MASK_UTF_16_LE
@@ -1593,6 +1633,11 @@ detect_coding_utf_16 (coding, detect_info)
                                | CATEGORY_MASK_UTF_16_BE_NOSIG
                                | CATEGORY_MASK_UTF_16_LE_NOSIG);
     }
+  else if (c1 < 0 || c2 < 0)
+    {
+      detect_info->rejected |= CATEGORY_MASK_UTF_16;
+      return 0;
+    }
   else
     {
       /* We check the dispersion of Eth and Oth bytes where E is even and
@@ -1610,8 +1655,9 @@ detect_coding_utf_16 (coding, detect_info)
 
       while (1)
        {
-         ONE_MORE_BYTE (c1);
-         ONE_MORE_BYTE (c2);
+         TWO_MORE_BYTES (c1, c2);
+         if (c1 < 0 || c2 < 0)
+           break;
          if (! e[c1])
            {
              e[c1] = 1;