(check_composing_code): If the current composing

author Kenichi Handa <handa@m17n.org>

Sat, 26 Sep 1998 04:20:48 +0000 (04:20 +0000)

committer Kenichi Handa <handa@m17n.org>

Sat, 26 Sep 1998 04:20:48 +0000 (04:20 +0000)
author Kenichi Handa <handa@m17n.org>
Sat, 26 Sep 1998 04:20:48 +0000 (04:20 +0000)
committer Kenichi Handa <handa@m17n.org>
Sat, 26 Sep 1998 04:20:48 +0000 (04:20 +0000)
diff --git a/src/coding.c b/src/coding.c

index fa2bbc620a01b23f59d9a1f9cdb809abd6140a02..5c3299b6b569df00beea9a6a38e2eac02345d5ff 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -213,15 +213,18 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
  
  /* Decode one ASCII character C.  */
  
-#define DECODE_CHARACTER_ASCII(c)                              \
-  do {                                                         \
-    if (COMPOSING_P (coding->composing))                       \
-      *dst++ = 0xA0, *dst++ = (c) | 0x80;                      \
-    else                                                       \
-      {                                                                \
-       *dst++ = (c);                                           \
-       coding->produced_char++;                                \
-      }                                                                \
+#define DECODE_CHARACTER_ASCII(c)              \
+  do {                                         \
+    if (COMPOSING_P (coding->composing))       \
+      {                                                \
+       *dst++ = 0xA0, *dst++ = (c) | 0x80;     \
+       coding->composed_chars++;               \
+      }                                                \
+    else                                       \
+      {                                                \
+       *dst++ = (c);                           \
+       coding->produced_char++;                \
+      }                                                \
    } while (0)
  
  /* Decode one DIMENSION1 character whose charset is CHARSET and whose
@@ -231,7 +234,10 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
    do {                                                                 \
      unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);  \
      if (COMPOSING_P (coding->composing))                               \
-      *dst++ = leading_code + 0x20;                                    \
+      {                                                                        \
+       *dst++ = leading_code + 0x20;                                   \
+       coding->composed_chars++;                                       \
+      }                                                                        \
      else                                                               \
        {                                                                        \
         *dst++ = leading_code;                                          \
@@ -997,9 +1003,7 @@ check_composing_code (coding, src, src_end)
             invalid_code_found = 1;
         }
      }
-  return (invalid_code_found
-         ? src - src_start
-         : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
+  return (invalid_code_found ? src - src_start : -1);
  }
  
  /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
@@ -1030,6 +1034,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
      translation_table = Vstandard_translation_table_for_decode;
  
    coding->produced_char = 0;
+  coding->composed_chars = 0;
    coding->fake_multibyte = 0;
    while (src < src_end && (dst_bytes
                            ? (dst < adjusted_dst_end)
@@ -1243,7 +1248,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                     coding->composing = (c1 == '0'
                                          ? COMPOSING_NO_RULE_HEAD
                                          : COMPOSING_WITH_RULE_HEAD);
-                   coding->produced_char++;
+                   coding->composed_chars = 0;
                   }
                 else if (result1 > 0)
                   {
@@ -1253,6 +1258,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                         src += result1;
                         dst += result1 + 2;
                         coding->produced_char += result1 + 2;
+                       coding->fake_multibyte = 1;
                       }
                     else
                       {
@@ -1266,6 +1272,28 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
               break;
  
             case '1':           /* end composing */
+             if (coding->composed_chars > 0)
+               {
+                 if (coding->composed_chars == 1)
+                   {
+                     unsigned char *this_char_start = dst;
+                     int this_bytes;
+
+                     /* Only one character is in the composing
+                        sequence.  Make it a normal character.  */
+                     while (*--this_char_start != LEADING_CODE_COMPOSITION);
+                     dst = (this_char_start
+                            + (coding->composing == COMPOSING_NO_RULE_TAIL
+                               ? 1 : 2));
+                     *dst -= 0x20;
+                     if (*dst == 0x80)
+                       *++dst &= 0x7F;
+                     this_bytes = BYTES_BY_CHAR_HEAD (*dst);
+                     while (this_bytes--) *this_char_start++ = *dst++;
+                     dst = this_char_start;
+                   }
+                 coding->produced_char++;
+               }
               coding->composing = COMPOSING_NO;
               break;
  
@@ -3938,30 +3966,45 @@ shrink_decoding_region (beg, end, coding, str)
  
         case CODING_CATEGORY_IDX_ISO_7:
         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
-         /* We can skip all charactes at the tail except for ESC and
-             the following 2-byte at the tail.  */
-         if (eol_conversion)
-           while (begp < endp
-                  && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
-             endp--;
-         else
-           while (begp < endp
-                  && (c = endp[-1]) != ISO_CODE_ESC)
-             endp--;
-         /* Do not consider LF as ascii if preceded by CR, since that
-             confuses eol decoding. */
-         if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
-           endp++;
-         if (begp < endp && endp[-1] == ISO_CODE_ESC)
-           {
-             if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
-               /* This is an ASCII designation sequence.  We can
-                    surely skip the tail.  */
-               endp += 2;
-             else
-               /* Hmmm, we can't skip the tail.  */
-               endp = endp_orig;
-           }
+         {
+           /* We can skip all charactes at the tail except for 8-bit
+              codes and ESC and the following 2-byte at the tail.  */
+           unsigned char *eight_bit = NULL;
+
+           if (eol_conversion)
+             while (begp < endp
+                    && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
+               {
+                 if (!eight_bit && c & 0x80) eight_bit = endp;
+                 endp--;
+               }
+           else
+             while (begp < endp
+                    && (c = endp[-1]) != ISO_CODE_ESC)
+               {
+                 if (!eight_bit && c & 0x80) eight_bit = endp;
+                 endp--;
+               }
+           /* Do not consider LF as ascii if preceded by CR, since that
+              confuses eol decoding. */
+           if (begp < endp && endp < endp_orig
+               && endp[-1] == '\r' && endp[0] == '\n')
+             endp++;
+           if (begp < endp && endp[-1] == ISO_CODE_ESC)
+             {
+               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
+                 /* This is an ASCII designation sequence.  We can
+                    surely skip the tail.  But, if we have
+                    encountered an 8-bit code, skip only the codes
+                    after that.  */
+                 endp = eight_bit ? eight_bit : endp + 2;
+               else
+                 /* Hmmm, we can't skip the tail.  */
+                 endp = endp_orig;
+             }
+           else if (eight_bit)
+             endp = eight_bit;
+         }
         }
      }
    *beg += begp - begp_orig;
@@ -4524,9 +4567,7 @@ code_convert_string (str, coding, encodep, nocopy)
        else
         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
      }
-  if (from == to_byte
-      && ! (coding->mode & CODING_MODE_LAST_BLOCK
-           && CODING_REQUIRE_FLUSHING (coding)))
+  if (from == to_byte)
      return (nocopy ? str : Fcopy_sequence (str));
  
    if (encodep)
author	Kenichi Handa <handa@m17n.org>
	Sat, 26 Sep 1998 04:20:48 +0000 (04:20 +0000)
committer	Kenichi Handa <handa@m17n.org>
	Sat, 26 Sep 1998 04:20:48 +0000 (04:20 +0000)