From: Kenichi Handa Date: Sat, 26 Sep 1998 04:20:48 +0000 (+0000) Subject: (check_composing_code): If the current composing X-Git-Tag: emacs-20.4~1624 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=de79a6a5ed49e728d1ee62efd9b1542cb72c095d;p=emacs.git (check_composing_code): If the current composing sequence doesn't end properly, return -1. (DECODE_CHARACTER_ASCII): Update coding->composed_chars. (DECODE_CHARACTER_DIMENSION1): Likewise. (decode_coding_iso2022): Check validity of a composing sequence. (code_convert_string): If the length of text to be converted is shrunk to zero, don't perform code conversion. (shrink_decoding_region): Fix previous change. --- diff --git a/src/coding.c b/src/coding.c index fa2bbc620a0..5c3299b6b56 100644 --- a/src/coding.c +++ b/src/coding.c @@ -213,15 +213,18 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) /* Decode one ASCII character C. */ -#define DECODE_CHARACTER_ASCII(c) \ - do { \ - if (COMPOSING_P (coding->composing)) \ - *dst++ = 0xA0, *dst++ = (c) | 0x80; \ - else \ - { \ - *dst++ = (c); \ - coding->produced_char++; \ - } \ +#define DECODE_CHARACTER_ASCII(c) \ + do { \ + if (COMPOSING_P (coding->composing)) \ + { \ + *dst++ = 0xA0, *dst++ = (c) | 0x80; \ + coding->composed_chars++; \ + } \ + else \ + { \ + *dst++ = (c); \ + coding->produced_char++; \ + } \ } while (0) /* Decode one DIMENSION1 character whose charset is CHARSET and whose @@ -231,7 +234,10 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes) do { \ unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ if (COMPOSING_P (coding->composing)) \ - *dst++ = leading_code + 0x20; \ + { \ + *dst++ = leading_code + 0x20; \ + coding->composed_chars++; \ + } \ else \ { \ *dst++ = leading_code; \ @@ -997,9 +1003,7 @@ check_composing_code (coding, src, src_end) invalid_code_found = 1; } } - return (invalid_code_found - ? src - src_start - : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1)); + return (invalid_code_found ? src - src_start : -1); } /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ @@ -1030,6 +1034,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) translation_table = Vstandard_translation_table_for_decode; coding->produced_char = 0; + coding->composed_chars = 0; coding->fake_multibyte = 0; while (src < src_end && (dst_bytes ? (dst < adjusted_dst_end) @@ -1243,7 +1248,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) coding->composing = (c1 == '0' ? COMPOSING_NO_RULE_HEAD : COMPOSING_WITH_RULE_HEAD); - coding->produced_char++; + coding->composed_chars = 0; } else if (result1 > 0) { @@ -1253,6 +1258,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) src += result1; dst += result1 + 2; coding->produced_char += result1 + 2; + coding->fake_multibyte = 1; } else { @@ -1266,6 +1272,28 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) break; case '1': /* end composing */ + if (coding->composed_chars > 0) + { + if (coding->composed_chars == 1) + { + unsigned char *this_char_start = dst; + int this_bytes; + + /* Only one character is in the composing + sequence. Make it a normal character. */ + while (*--this_char_start != LEADING_CODE_COMPOSITION); + dst = (this_char_start + + (coding->composing == COMPOSING_NO_RULE_TAIL + ? 1 : 2)); + *dst -= 0x20; + if (*dst == 0x80) + *++dst &= 0x7F; + this_bytes = BYTES_BY_CHAR_HEAD (*dst); + while (this_bytes--) *this_char_start++ = *dst++; + dst = this_char_start; + } + coding->produced_char++; + } coding->composing = COMPOSING_NO; break; @@ -3938,30 +3966,45 @@ shrink_decoding_region (beg, end, coding, str) case CODING_CATEGORY_IDX_ISO_7: case CODING_CATEGORY_IDX_ISO_7_TIGHT: - /* We can skip all charactes at the tail except for ESC and - the following 2-byte at the tail. */ - if (eol_conversion) - while (begp < endp - && (c = endp[-1]) != ISO_CODE_ESC && c != '\r') - endp--; - else - while (begp < endp - && (c = endp[-1]) != ISO_CODE_ESC) - endp--; - /* Do not consider LF as ascii if preceded by CR, since that - confuses eol decoding. */ - if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n') - endp++; - if (begp < endp && endp[-1] == ISO_CODE_ESC) - { - if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') - /* This is an ASCII designation sequence. We can - surely skip the tail. */ - endp += 2; - else - /* Hmmm, we can't skip the tail. */ - endp = endp_orig; - } + { + /* We can skip all charactes at the tail except for 8-bit + codes and ESC and the following 2-byte at the tail. */ + unsigned char *eight_bit = NULL; + + if (eol_conversion) + while (begp < endp + && (c = endp[-1]) != ISO_CODE_ESC && c != '\r') + { + if (!eight_bit && c & 0x80) eight_bit = endp; + endp--; + } + else + while (begp < endp + && (c = endp[-1]) != ISO_CODE_ESC) + { + if (!eight_bit && c & 0x80) eight_bit = endp; + endp--; + } + /* Do not consider LF as ascii if preceded by CR, since that + confuses eol decoding. */ + if (begp < endp && endp < endp_orig + && endp[-1] == '\r' && endp[0] == '\n') + endp++; + if (begp < endp && endp[-1] == ISO_CODE_ESC) + { + if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') + /* This is an ASCII designation sequence. We can + surely skip the tail. But, if we have + encountered an 8-bit code, skip only the codes + after that. */ + endp = eight_bit ? eight_bit : endp + 2; + else + /* Hmmm, we can't skip the tail. */ + endp = endp_orig; + } + else if (eight_bit) + endp = eight_bit; + } } } *beg += begp - begp_orig; @@ -4524,9 +4567,7 @@ code_convert_string (str, coding, encodep, nocopy) else shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data); } - if (from == to_byte - && ! (coding->mode & CODING_MODE_LAST_BLOCK - && CODING_REQUIRE_FLUSHING (coding))) + if (from == to_byte) return (nocopy ? str : Fcopy_sequence (str)); if (encodep)