]> git.eshelyaron.com Git - emacs.git/commitdiff
(check_composing_code): If the current composing
authorKenichi Handa <handa@m17n.org>
Sat, 26 Sep 1998 04:20:48 +0000 (04:20 +0000)
committerKenichi Handa <handa@m17n.org>
Sat, 26 Sep 1998 04:20:48 +0000 (04:20 +0000)
sequence doesn't end properly, return -1.
(DECODE_CHARACTER_ASCII): Update coding->composed_chars.
(DECODE_CHARACTER_DIMENSION1): Likewise.
(decode_coding_iso2022): Check validity of a composing sequence.
(code_convert_string): If the length of text to be converted is
shrunk to zero, don't perform code conversion.
(shrink_decoding_region): Fix previous change.

src/coding.c

index fa2bbc620a01b23f59d9a1f9cdb809abd6140a02..5c3299b6b569df00beea9a6a38e2eac02345d5ff 100644 (file)
@@ -213,15 +213,18 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 
 /* Decode one ASCII character C.  */
 
-#define DECODE_CHARACTER_ASCII(c)                              \
-  do {                                                         \
-    if (COMPOSING_P (coding->composing))                       \
-      *dst++ = 0xA0, *dst++ = (c) | 0x80;                      \
-    else                                                       \
-      {                                                                \
-       *dst++ = (c);                                           \
-       coding->produced_char++;                                \
-      }                                                                \
+#define DECODE_CHARACTER_ASCII(c)              \
+  do {                                         \
+    if (COMPOSING_P (coding->composing))       \
+      {                                                \
+       *dst++ = 0xA0, *dst++ = (c) | 0x80;     \
+       coding->composed_chars++;               \
+      }                                                \
+    else                                       \
+      {                                                \
+       *dst++ = (c);                           \
+       coding->produced_char++;                \
+      }                                                \
   } while (0)
 
 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
@@ -231,7 +234,10 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
   do {                                                                 \
     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);  \
     if (COMPOSING_P (coding->composing))                               \
-      *dst++ = leading_code + 0x20;                                    \
+      {                                                                        \
+       *dst++ = leading_code + 0x20;                                   \
+       coding->composed_chars++;                                       \
+      }                                                                        \
     else                                                               \
       {                                                                        \
        *dst++ = leading_code;                                          \
@@ -997,9 +1003,7 @@ check_composing_code (coding, src, src_end)
            invalid_code_found = 1;
        }
     }
-  return (invalid_code_found
-         ? src - src_start
-         : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
+  return (invalid_code_found ? src - src_start : -1);
 }
 
 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
@@ -1030,6 +1034,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
     translation_table = Vstandard_translation_table_for_decode;
 
   coding->produced_char = 0;
+  coding->composed_chars = 0;
   coding->fake_multibyte = 0;
   while (src < src_end && (dst_bytes
                           ? (dst < adjusted_dst_end)
@@ -1243,7 +1248,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                    coding->composing = (c1 == '0'
                                         ? COMPOSING_NO_RULE_HEAD
                                         : COMPOSING_WITH_RULE_HEAD);
-                   coding->produced_char++;
+                   coding->composed_chars = 0;
                  }
                else if (result1 > 0)
                  {
@@ -1253,6 +1258,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                        src += result1;
                        dst += result1 + 2;
                        coding->produced_char += result1 + 2;
+                       coding->fake_multibyte = 1;
                      }
                    else
                      {
@@ -1266,6 +1272,28 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
              break;
 
            case '1':           /* end composing */
+             if (coding->composed_chars > 0)
+               {
+                 if (coding->composed_chars == 1)
+                   {
+                     unsigned char *this_char_start = dst;
+                     int this_bytes;
+
+                     /* Only one character is in the composing
+                        sequence.  Make it a normal character.  */
+                     while (*--this_char_start != LEADING_CODE_COMPOSITION);
+                     dst = (this_char_start
+                            + (coding->composing == COMPOSING_NO_RULE_TAIL
+                               ? 1 : 2));
+                     *dst -= 0x20;
+                     if (*dst == 0x80)
+                       *++dst &= 0x7F;
+                     this_bytes = BYTES_BY_CHAR_HEAD (*dst);
+                     while (this_bytes--) *this_char_start++ = *dst++;
+                     dst = this_char_start;
+                   }
+                 coding->produced_char++;
+               }
              coding->composing = COMPOSING_NO;
              break;
 
@@ -3938,30 +3966,45 @@ shrink_decoding_region (beg, end, coding, str)
 
        case CODING_CATEGORY_IDX_ISO_7:
        case CODING_CATEGORY_IDX_ISO_7_TIGHT:
-         /* We can skip all charactes at the tail except for ESC and
-             the following 2-byte at the tail.  */
-         if (eol_conversion)
-           while (begp < endp
-                  && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
-             endp--;
-         else
-           while (begp < endp
-                  && (c = endp[-1]) != ISO_CODE_ESC)
-             endp--;
-         /* Do not consider LF as ascii if preceded by CR, since that
-             confuses eol decoding. */
-         if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
-           endp++;
-         if (begp < endp && endp[-1] == ISO_CODE_ESC)
-           {
-             if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
-               /* This is an ASCII designation sequence.  We can
-                    surely skip the tail.  */
-               endp += 2;
-             else
-               /* Hmmm, we can't skip the tail.  */
-               endp = endp_orig;
-           }
+         {
+           /* We can skip all charactes at the tail except for 8-bit
+              codes and ESC and the following 2-byte at the tail.  */
+           unsigned char *eight_bit = NULL;
+
+           if (eol_conversion)
+             while (begp < endp
+                    && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
+               {
+                 if (!eight_bit && c & 0x80) eight_bit = endp;
+                 endp--;
+               }
+           else
+             while (begp < endp
+                    && (c = endp[-1]) != ISO_CODE_ESC)
+               {
+                 if (!eight_bit && c & 0x80) eight_bit = endp;
+                 endp--;
+               }
+           /* Do not consider LF as ascii if preceded by CR, since that
+              confuses eol decoding. */
+           if (begp < endp && endp < endp_orig
+               && endp[-1] == '\r' && endp[0] == '\n')
+             endp++;
+           if (begp < endp && endp[-1] == ISO_CODE_ESC)
+             {
+               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
+                 /* This is an ASCII designation sequence.  We can
+                    surely skip the tail.  But, if we have
+                    encountered an 8-bit code, skip only the codes
+                    after that.  */
+                 endp = eight_bit ? eight_bit : endp + 2;
+               else
+                 /* Hmmm, we can't skip the tail.  */
+                 endp = endp_orig;
+             }
+           else if (eight_bit)
+             endp = eight_bit;
+         }
        }
     }
   *beg += begp - begp_orig;
@@ -4524,9 +4567,7 @@ code_convert_string (str, coding, encodep, nocopy)
       else
        shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
     }
-  if (from == to_byte
-      && ! (coding->mode & CODING_MODE_LAST_BLOCK
-           && CODING_REQUIRE_FLUSHING (coding)))
+  if (from == to_byte)
     return (nocopy ? str : Fcopy_sequence (str));
 
   if (encodep)