From 88993dfd4edca9518a5ed149fc61a683f2b63e7a Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Mon, 28 Sep 1998 11:52:53 +0000 Subject: [PATCH] (check_composing_code): Fix previous change. Now it alwasy returns 0 or -1. (decode_coding_iso2022): Adjusted for the above change. (encode_coding_iso2022): When encoding the last block, flush out tailing garbage bytes. (setup_coding_system): Delete unnecessary code. (shrink_decoding_region): Check translation table. If ASCII should be translated, give up shrinking. (shrink_encoding_region): Likewise. (SHRINK_CONVERSION_REGION_THRESHHOLD): New macro. (SHRINK_CONVERSION_REGION): New macro. (code_convert_region): Call SHRINK_CONVERSION_REGION. Delete text properties here. (code_convert_region): In the case of encoding, always calulate correct character number. (code_convert_string): Call SHRINK_CONVERSION_REGION. (code_convert_region1): Don't delete text properties here. (check_composing_code): Fix previous change. Now it alwasy returns 0 or -1. (decode_coding_iso2022): Adjusted for the above change. (encode_coding_iso2022): When encoding the last block, flush out tailing garbage bytes. (setup_coding_system): Delete unnecessary code. (shrink_decoding_region): Check translation table. If ASCII should be translated, give up shrinking. (shrink_encoding_region): Likewise. (SHRINK_CONVERSION_REGION_THRESHHOLD): New macro. (SHRINK_CONVERSION_REGION): New macro. (code_convert_region): Call SHRINK_CONVERSION_REGION. Delete text properties here. (code_convert_region): In the case of encoding, always calulate correct character number. (code_convert_string): Call SHRINK_CONVERSION_REGION. (code_convert_region1): Don't delete text properties here. --- src/coding.c | 198 ++++++++++++++++++++++++++++++--------------------- 1 file changed, 116 insertions(+), 82 deletions(-) diff --git a/src/coding.c b/src/coding.c index 5c3299b6b56..68855c05609 100644 --- a/src/coding.c +++ b/src/coding.c @@ -958,52 +958,49 @@ detect_coding_iso2022 (src, src_end) } \ } while (0) -/* Check if the current composing sequence contains only valid codes. - If the composing sequence doesn't end before SRC_END, return -1. - Else, if it contains only valid codes, return 0. - Else return the length of the composing sequence. */ +/* Return 0 if there's a valid composing sequence starting at SRC and + ending before SRC_END, else return -1. */ int check_composing_code (coding, src, src_end) struct coding_system *coding; unsigned char *src, *src_end; { - unsigned char *src_start = src; - int invalid_code_found = 0; int charset, c, c1, dim; while (src < src_end) { - if (*src++ != ISO_CODE_ESC) continue; - if (src >= src_end) break; - if ((c = *src++) == '1') /* end of compsition */ - return (invalid_code_found ? src - src_start : 0); - if (src + 2 >= src_end) break; - if (!coding->flags & CODING_FLAG_ISO_DESIGNATION) - invalid_code_found = 1; - else + c = *src++; + if (c >= 0x20) + continue; + if (c != ISO_CODE_ESC || src >= src_end) + return -1; + c = *src++; + if (c == '1') /* end of compsition */ + return 0; + if (src + 2 >= src_end + || !coding->flags & CODING_FLAG_ISO_DESIGNATION) + return -1; + + dim = (c == '$'); + if (dim == 1) + c = (*src >= '@' && *src <= 'B') ? '(' : *src++; + if (c >= '(' && c <= '/') { - dim = 0; - if (c == '$') - { - dim = 1; - c = (*src >= '@' && *src <= 'B') ? '(' : *src++; - } - if (c >= '(' && c <= '/') - { - c1 = *src++; - if ((c1 < ' ' || c1 >= 0x80) - || (charset = iso_charset_table[dim][c >= ','][c1]) < 0 - || ! coding->safe_charsets[charset] - || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) - == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) - invalid_code_found = 1; - } - else - invalid_code_found = 1; + c1 = *src++; + if ((c1 < ' ' || c1 >= 0x80) + || (charset = iso_charset_table[dim][c >= ','][c1]) < 0 + || ! coding->safe_charsets[charset] + || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) + == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) + return -1; } + else + return -1; } - return (invalid_code_found ? src - src_start : -1); + + /* We have not found the sequence "ESC 1". */ + return -1; } /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ @@ -1183,7 +1180,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) ONE_MORE_BYTE (c1); if (c1 >= '@' && c1 <= 'B') { /* designation of JISX0208.1978, GB2312.1980, - or JISX0208.1980 */ + or JISX0208.1980 */ DECODE_DESIGNATION (0, 2, 94, c1); } else if (c1 >= 0x28 && c1 <= 0x2B) @@ -1237,41 +1234,32 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) case '0': case '2': /* start composing */ /* Before processing composing, we must be sure that all characters being composed are supported by CODING. - If not, we must give up composing and insert the - bunch of codes for composing as is without decoding. */ - { - int result1; - - result1 = check_composing_code (coding, src, src_end); - if (result1 == 0) - { - coding->composing = (c1 == '0' - ? COMPOSING_NO_RULE_HEAD - : COMPOSING_WITH_RULE_HEAD); - coding->composed_chars = 0; - } - else if (result1 > 0) - { - if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst) - { - bcopy (src_base, dst, result1 + 2); - src += result1; - dst += result1 + 2; - coding->produced_char += result1 + 2; - coding->fake_multibyte = 1; - } - else - { - result = CODING_FINISH_INSUFFICIENT_DST; - goto label_end_of_loop_2; - } - } - else - goto label_end_of_loop; - } + If not, we must give up composing. */ + if (check_composing_code (coding, src, src_end) == 0) + { + /* We are looking at a valid composition sequence. */ + coding->composing = (c1 == '0' + ? COMPOSING_NO_RULE_HEAD + : COMPOSING_WITH_RULE_HEAD); + coding->composed_chars = 0; + } + else + { + *dst++ = ISO_CODE_ESC; + *dst++ = c1; + coding->produced_char += 2; + } break; case '1': /* end composing */ + if (!coding->composing) + { + *dst++ = ISO_CODE_ESC; + *dst++ = c1; + coding->produced_char += 2; + break; + } + if (coding->composed_chars > 0) { if (coding->composed_chars == 1) @@ -2002,6 +1990,11 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) ENCODE_RESET_PLANE_AND_REGISTER; if (COMPOSING_P (coding->composing)) ENCODE_COMPOSITION_END; + if (result == CODING_FINISH_INSUFFICIENT_SRC) + { + while (src < src_end && dst < dst_end) + *dst++ = *src++; + } } coding->consumed = src - source; coding->produced = coding->produced_char = dst - destination; @@ -2876,8 +2869,6 @@ setup_coding_system (coding_system, coding) /* Initialize remaining fields. */ coding->composing = 0; - coding->translation_table_for_decode = Qnil; - coding->translation_table_for_encode = Qnil; /* Get values of coding system properties: `post-read-conversion', `pre-write-conversion', @@ -3862,6 +3853,7 @@ shrink_decoding_region (beg, end, coding, str) { unsigned char *begp_orig, *begp, *endp_orig, *endp, c; int eol_conversion; + Lisp_Object translation_table; if (coding->type == coding_type_ccl || coding->type == coding_type_undecided @@ -3877,6 +3869,21 @@ shrink_decoding_region (beg, end, coding, str) return; } + translation_table = coding->translation_table_for_decode; + if (NILP (translation_table) && !NILP (Venable_character_translation)) + translation_table = Vstandard_translation_table_for_decode; + if (CHAR_TABLE_P (translation_table)) + { + int i; + for (i = 0; i < 128; i++) + if (!NILP (CHAR_TABLE_REF (translation_table, i))) + break; + if (i < 128) + /* Some ASCII character should be tranlsated. We give up + shrinking. */ + return; + } + eol_conversion = (coding->eol_type != CODING_EOL_LF); if ((! eol_conversion) && (coding->heading_ascii >= 0)) @@ -4022,6 +4029,7 @@ shrink_encoding_region (beg, end, coding, str) { unsigned char *begp_orig, *begp, *endp_orig, *endp; int eol_conversion; + Lisp_Object translation_table; if (coding->type == coding_type_ccl) /* We can't skip any data. */ @@ -4033,6 +4041,21 @@ shrink_encoding_region (beg, end, coding, str) return; } + translation_table = coding->translation_table_for_encode; + if (NILP (translation_table) && !NILP (Venable_character_translation)) + translation_table = Vstandard_translation_table_for_encode; + if (CHAR_TABLE_P (translation_table)) + { + int i; + for (i = 0; i < 128; i++) + if (!NILP (CHAR_TABLE_REF (translation_table, i))) + break; + if (i < 128) + /* Some ASCII character should be tranlsated. We give up + shrinking. */ + return; + } + if (str) { begp_orig = begp = str + *beg; @@ -4097,6 +4120,20 @@ shrink_encoding_region (beg, end, coding, str) return; } +/* As shrinking conversion region requires some overhead, we don't try + shrinking if the length of conversion region is less than this + value. */ +static int shrink_conversion_region_threshhold = 1024; + +#define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \ + do { \ + if (*(end) - *(beg) > shrink_conversion_region_threshhold) \ + { \ + if (encodep) shrink_encoding_region (beg, end, coding, str); \ + else shrink_decoding_region (beg, end, coding, str); \ + } \ + } while (0) + /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by coding system CODING, and return the status code of code conversion @@ -4240,10 +4277,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) if (from < GPT && GPT < to) move_gap_both (from, from_byte); - if (encodep) - shrink_encoding_region (&from_byte, &to_byte, coding, NULL); - else - shrink_decoding_region (&from_byte, &to_byte, coding, NULL); + SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep); if (from_byte == to_byte && ! (coding->mode & CODING_MODE_LAST_BLOCK && CODING_REQUIRE_FLUSHING (coding))) @@ -4264,6 +4298,11 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) len -= total_skip; len_byte -= total_skip; } + /* The code conversion routine can not preserve text properties for + now. So, we must remove all text properties in the region. */ + if (replace) + Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil); + /* For converion, we must put the gap before the text in addition to making the gap larger for efficient decoding. The required gap size starts from 2000 which is the magic number used in make_gap. @@ -4439,8 +4478,9 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) if (src - dst > 0) *dst = 0; /* Put an anchor. */ if (multibyte - && (fake_multibyte - || !encodep && (to - from) != (to_byte - from_byte))) + && (encodep + || fake_multibyte + || (to - from) != (to_byte - from_byte))) inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte); /* If we have shrinked the conversion area, adjust it now. */ @@ -4562,10 +4602,8 @@ code_convert_string (str, coding, encodep, nocopy) else { /* Try to skip the heading and tailing ASCIIs. */ - if (encodep) - shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data); - else - shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data); + SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data, + encodep); } if (from == to_byte) return (nocopy ? str : Fcopy_sequence (str)); @@ -4814,10 +4852,6 @@ code_convert_region1 (start, end, coding_system, encodep) if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data); - /* The code conversion routine can not preserve text properties for - now. So, we must remove all text properties in the region. */ - Fset_text_properties (start, end, Qnil, Qnil); - coding.mode |= CODING_MODE_LAST_BLOCK; code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), &coding, encodep, 1); -- 2.39.2