From: Kenichi Handa Date: Mon, 29 Dec 2003 07:52:49 +0000 (+0000) Subject: (Qinsufficient_source, Qinconsistent_eol) X-Git-Tag: emacs-pretest-23.0.90~8295^2~1662 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=065e359516274ece9178f9ddef01ae8573ff18a7;p=emacs.git (Qinsufficient_source, Qinconsistent_eol) (Qinvalid_source, Qinterrupted, Qinsufficient_memory): New variables. (Vlast_code_conversion_error): New variables. (syms_of_coding): DEFSYM or DEFVAR_LISP them. (ONE_MORE_BYTE): Record error if any instead of signaling an error. If non-ASCII multibyte char is found, return the negative value of the code. All callers changed to check it. (ONE_MORE_BYTE_NO_CHECK): Likewise. (record_conversion_result): New function. All codes setting coding->result are changed to call this function. (detect_coding_utf_8): Don't use the local variable incomplete. (decode_coding_utf_8): Likewise. (emacs_mule_char): Change the second arg to `const'. (detect_coding_emacs_mule): Don't use the local variable incomplete. (detect_coding_sjis): Likewise. (detect_coding_big5): Likewise. (decode_coding): Fix of flushing out unprocessed data. (make_conversion_work_buffer): Fix making of a work buffer. (decode_coding_object): Return coding->dst_object; --- diff --git a/src/coding.c b/src/coding.c index 353f165f4ca..9da19e53c2e 100644 --- a/src/coding.c +++ b/src/coding.c @@ -318,6 +318,9 @@ Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; Lisp_Object Qstart_process, Qopen_network_stream; Lisp_Object Qtarget_idx; +Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source; +Lisp_Object Qinterrupted, Qinsufficient_memory; + int coding_system_require_warning; Lisp_Object Vselect_safe_coding_system_function; @@ -347,7 +350,8 @@ Lisp_Object Vcoding_system_for_read; Lisp_Object Vcoding_system_for_write; /* Coding-system actually used in the latest I/O. */ Lisp_Object Vlast_coding_system_used; - +/* Set to non-nil when an error is detected while code conversion. */ +Lisp_Object Vlast_code_conversion_error; /* A vector of length 256 which contains information about special Latin codes (especially for dealing with Microsoft codes). */ Lisp_Object Vlatin_extra_code_table; @@ -406,6 +410,8 @@ Lisp_Object Vsjis_coding_system; Lisp_Object Vbig5_coding_system; +static void record_conversion_result (struct coding_system *coding, + enum coding_result_code result); static int detect_coding_utf_8 P_ ((struct coding_system *, struct coding_detection_info *info)); static void decode_coding_utf_8 P_ ((struct coding_system *)); @@ -718,40 +724,52 @@ static struct coding_system coding_categories[coding_category_max]; /* Safely get one byte from the source text pointed by SRC which ends at SRC_END, and set C to that byte. If there are not enough bytes - in the source, it jumps to `no_more_source'. The caller - should declare and set these variables appropriately in advance: - src, src_end, multibytep -*/ + in the source, it jumps to `no_more_source'. If multibytep is + nonzero, and a multibyte character is found at SRC, set C to the + negative value of the character code. The caller should declare + and set these variables appropriately in advance: + src, src_end, multibytep */ -#define ONE_MORE_BYTE(c) \ - do { \ - if (src == src_end) \ - { \ - if (src_base < src) \ - coding->result = CODING_RESULT_INSUFFICIENT_SRC; \ - goto no_more_source; \ - } \ - c = *src++; \ - if (multibytep && (c & 0x80)) \ - { \ - if ((c & 0xFE) != 0xC0) \ - error ("Undecodable char found"); \ - c = ((c & 1) << 6) | *src++; \ - } \ - consumed_chars++; \ +#define ONE_MORE_BYTE(c) \ + do { \ + if (src == src_end) \ + { \ + if (src_base < src) \ + record_conversion_result \ + (coding, CODING_RESULT_INSUFFICIENT_SRC); \ + goto no_more_source; \ + } \ + c = *src++; \ + if (multibytep && (c & 0x80)) \ + { \ + if ((c & 0xFE) == 0xC0) \ + c = ((c & 1) << 6) | *src++; \ + else \ + { \ + c = - string_char (--src, &src, NULL); \ + record_conversion_result \ + (coding, CODING_RESULT_INVALID_SRC); \ + } \ + } \ + consumed_chars++; \ } while (0) -#define ONE_MORE_BYTE_NO_CHECK(c) \ - do { \ - c = *src++; \ - if (multibytep && (c & 0x80)) \ - { \ - if ((c & 0xFE) != 0xC0) \ - error ("Undecodable char found"); \ - c = ((c & 1) << 6) | *src++; \ - } \ - consumed_chars++; \ +#define ONE_MORE_BYTE_NO_CHECK(c) \ + do { \ + c = *src++; \ + if (multibytep && (c & 0x80)) \ + { \ + if ((c & 0xFE) == 0xC0) \ + c = ((c & 1) << 6) | *src++; \ + else \ + { \ + c = - string_char (--src, &src, NULL); \ + record_conversion_result \ + (coding, CODING_RESULT_INVALID_SRC); \ + } \ + } \ + consumed_chars++; \ } while (0) @@ -839,6 +857,31 @@ static struct coding_system coding_categories[coding_category_max]; } while (0) +static void +record_conversion_result (struct coding_system *coding, + enum coding_result_code result) +{ + coding->result = result; + switch (result) + { + case CODING_RESULT_INSUFFICIENT_SRC: + Vlast_code_conversion_error = Qinsufficient_source; + break; + case CODING_RESULT_INCONSISTENT_EOL: + Vlast_code_conversion_error = Qinconsistent_eol; + break; + case CODING_RESULT_INVALID_SRC: + Vlast_code_conversion_error = Qinvalid_source; + break; + case CODING_RESULT_INTERRUPT: + Vlast_code_conversion_error = Qinterrupted; + break; + case CODING_RESULT_INSUFFICIENT_MEM: + Vlast_code_conversion_error = Qinsufficient_memory; + break; + } +} + #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ do { \ charset_map_loaded = 0; \ @@ -971,7 +1014,7 @@ alloc_destination (coding, nbytes, dst) coding_alloc_by_making_gap (coding, nbytes); else coding_alloc_by_realloc (coding, nbytes); - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); coding_set_destination (coding); dst = coding->destination + offset; return dst; @@ -1049,12 +1092,11 @@ detect_coding_utf_8 (coding, detect_info) struct coding_system *coding; struct coding_detection_info *detect_info; { - const unsigned char *src = coding->source, *src_base = src; + const unsigned char *src = coding->source, *src_base; const unsigned char *src_end = coding->source + coding->src_bytes; int multibytep = coding->src_multibyte; int consumed_chars = 0; int found = 0; - int incomplete; detect_info->checked |= CATEGORY_MASK_UTF_8; /* A coding system of this category is always ASCII compatible. */ @@ -1064,13 +1106,12 @@ detect_coding_utf_8 (coding, detect_info) { int c, c1, c2, c3, c4; - incomplete = 0; + src_base = src; ONE_MORE_BYTE (c); - if (UTF_8_1_OCTET_P (c)) + if (c < 0 || UTF_8_1_OCTET_P (c)) continue; - incomplete = 1; ONE_MORE_BYTE (c1); - if (! UTF_8_EXTRA_OCTET_P (c1)) + if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1)) break; if (UTF_8_2_OCTET_LEADING_P (c)) { @@ -1078,7 +1119,7 @@ detect_coding_utf_8 (coding, detect_info) continue; } ONE_MORE_BYTE (c2); - if (! UTF_8_EXTRA_OCTET_P (c2)) + if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2)) break; if (UTF_8_3_OCTET_LEADING_P (c)) { @@ -1086,7 +1127,7 @@ detect_coding_utf_8 (coding, detect_info) continue; } ONE_MORE_BYTE (c3); - if (! UTF_8_EXTRA_OCTET_P (c3)) + if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3)) break; if (UTF_8_4_OCTET_LEADING_P (c)) { @@ -1094,7 +1135,7 @@ detect_coding_utf_8 (coding, detect_info) continue; } ONE_MORE_BYTE (c4); - if (! UTF_8_EXTRA_OCTET_P (c4)) + if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4)) break; if (UTF_8_5_OCTET_LEADING_P (c)) { @@ -1107,7 +1148,7 @@ detect_coding_utf_8 (coding, detect_info) return 0; no_more_source: - if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) + if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) { detect_info->rejected |= CATEGORY_MASK_UTF_8; return 0; @@ -1143,14 +1184,18 @@ decode_coding_utf_8 (coding) break; ONE_MORE_BYTE (c1); - if (UTF_8_1_OCTET_P(c1)) + if (c1 < 0) + { + c = - c1; + } + else if (UTF_8_1_OCTET_P(c1)) { c = c1; } else { ONE_MORE_BYTE (c2); - if (! UTF_8_EXTRA_OCTET_P (c2)) + if (c2 < 0 || ! UTF_8_EXTRA_OCTET_P (c2)) goto invalid_code; if (UTF_8_2_OCTET_LEADING_P (c1)) { @@ -1164,7 +1209,7 @@ decode_coding_utf_8 (coding) else { ONE_MORE_BYTE (c3); - if (! UTF_8_EXTRA_OCTET_P (c3)) + if (c3 < 0 || ! UTF_8_EXTRA_OCTET_P (c3)) goto invalid_code; if (UTF_8_3_OCTET_LEADING_P (c1)) { @@ -1177,7 +1222,7 @@ decode_coding_utf_8 (coding) else { ONE_MORE_BYTE (c4); - if (! UTF_8_EXTRA_OCTET_P (c4)) + if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4)) goto invalid_code; if (UTF_8_4_OCTET_LEADING_P (c1)) { @@ -1189,7 +1234,7 @@ decode_coding_utf_8 (coding) else { ONE_MORE_BYTE (c5); - if (! UTF_8_EXTRA_OCTET_P (c5)) + if (c5 < 0 || ! UTF_8_EXTRA_OCTET_P (c5)) goto invalid_code; if (UTF_8_5_OCTET_LEADING_P (c1)) { @@ -1271,7 +1316,7 @@ encode_coding_utf_8 (coding) produced_chars++; } } - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->produced_char += produced_chars; coding->produced = dst - coding->destination; return 0; @@ -1331,7 +1376,7 @@ detect_coding_utf_16 (coding, detect_info) | CATEGORY_MASK_UTF_16_BE_NOSIG | CATEGORY_MASK_UTF_16_LE_NOSIG); } - else + else if (c1 >= 0 && c2 >= 0) { unsigned char b1[256], b2[256]; int b1_variants = 1, b2_variants = 1; @@ -1341,8 +1386,11 @@ detect_coding_utf_16 (coding, detect_info) b1[c1]++, b2[c2]++; for (n = 0; n < 256 && src < src_end; n++) { + src_base = src; ONE_MORE_BYTE (c1); ONE_MORE_BYTE (c2); + if (c1 < 0 || c2 < 0) + break; if (! b1[c1++]) b1_variants++; if (! b2[c2++]) b2_variants++; } @@ -1412,7 +1460,18 @@ decode_coding_utf_16 (coding) break; ONE_MORE_BYTE (c1); + if (c1 < 0) + { + *charbuf++ = -c1; + continue; + } ONE_MORE_BYTE (c2); + if (c2 < 0) + { + *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1); + *charbuf++ = -c2; + continue; + } c = (endian == utf_16_big_endian ? ((c1 << 8) | c2) : ((c2 << 8) | c1)); if (surrogate) @@ -1508,7 +1567,7 @@ encode_coding_utf_16 (coding) EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8); } } - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->produced = dst - coding->destination; coding->produced_char += produced_chars; return 0; @@ -1593,7 +1652,7 @@ char emacs_mule_bytes[256]; int emacs_mule_char (coding, src, nbytes, nchars, id) struct coding_system *coding; - unsigned char *src; + const unsigned char *src; int *nbytes, *nchars, *id; { const unsigned char *src_end = coding->source + coding->src_bytes; @@ -1605,58 +1664,78 @@ emacs_mule_char (coding, src, nbytes, nchars, id) int consumed_chars = 0; ONE_MORE_BYTE (c); - switch (emacs_mule_bytes[c]) + if (c < 0) { - case 2: - if (! (charset = emacs_mule_charset[c])) - goto invalid_code; - ONE_MORE_BYTE (c); - code = c & 0x7F; - break; - - case 3: - if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11 - || c == EMACS_MULE_LEADING_CODE_PRIVATE_12) + c = -c; + charset = emacs_mule_charset[0]; + } + else + { + switch (emacs_mule_bytes[c]) { - ONE_MORE_BYTE (c); + case 2: if (! (charset = emacs_mule_charset[c])) goto invalid_code; ONE_MORE_BYTE (c); + if (c < 0) + goto invalid_code; code = c & 0x7F; - } - else - { - if (! (charset = emacs_mule_charset[c])) + break; + + case 3: + if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11 + || c == EMACS_MULE_LEADING_CODE_PRIVATE_12) + { + ONE_MORE_BYTE (c); + if (c < 0 || ! (charset = emacs_mule_charset[c])) + goto invalid_code; + ONE_MORE_BYTE (c); + if (c < 0) + goto invalid_code; + code = c & 0x7F; + } + else + { + if (! (charset = emacs_mule_charset[c])) + goto invalid_code; + ONE_MORE_BYTE (c); + if (c < 0) + goto invalid_code; + code = (c & 0x7F) << 8; + ONE_MORE_BYTE (c); + if (c < 0) + goto invalid_code; + code |= c & 0x7F; + } + break; + + case 4: + ONE_MORE_BYTE (c); + if (c < 0 || ! (charset = emacs_mule_charset[c])) goto invalid_code; ONE_MORE_BYTE (c); + if (c < 0) + goto invalid_code; code = (c & 0x7F) << 8; ONE_MORE_BYTE (c); + if (c < 0) + goto invalid_code; code |= c & 0x7F; - } - break; - - case 4: - ONE_MORE_BYTE (c); - if (! (charset = emacs_mule_charset[c])) - goto invalid_code; - ONE_MORE_BYTE (c); - code = (c & 0x7F) << 8; - ONE_MORE_BYTE (c); - code |= c & 0x7F; - break; + break; - case 1: - code = c; - charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) - ? charset_ascii : charset_eight_bit); - break; + case 1: + code = c; + charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) + ? charset_ascii : charset_eight_bit); + break; - default: - abort (); + default: + abort (); + } + c = DECODE_CHAR (charset, code); + if (c < 0) + goto invalid_code; } - c = DECODE_CHAR (charset, code); - if (c < 0) - goto invalid_code; *nbytes = src - src_base; *nchars = consumed_chars; if (id) @@ -1680,13 +1759,12 @@ detect_coding_emacs_mule (coding, detect_info) struct coding_system *coding; struct coding_detection_info *detect_info; { - const unsigned char *src = coding->source, *src_base = src; + const unsigned char *src = coding->source, *src_base; const unsigned char *src_end = coding->source + coding->src_bytes; int multibytep = coding->src_multibyte; int consumed_chars = 0; int c; int found = 0; - int incomplete; detect_info->checked |= CATEGORY_MASK_EMACS_MULE; /* A coding system of this category is always ASCII compatible. */ @@ -1694,10 +1772,10 @@ detect_coding_emacs_mule (coding, detect_info) while (1) { - incomplete = 0; + src_base = src; ONE_MORE_BYTE (c); - incomplete = 1; - + if (c < 0) + continue; if (c == 0x80) { /* Perhaps the start of composite character. We simple skip @@ -1745,7 +1823,7 @@ detect_coding_emacs_mule (coding, detect_info) return 0; no_more_source: - if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) + if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) { detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; return 0; @@ -1842,10 +1920,14 @@ detect_coding_emacs_mule (coding, detect_info) int nbytes, nchars; \ \ ONE_MORE_BYTE (c); \ + if (c < 0) \ + goto invalid_code; \ nbytes = c - 0xA0; \ if (nbytes < 3) \ goto invalid_code; \ ONE_MORE_BYTE (c); \ + if (c < 0) \ + goto invalid_code; \ nchars = c - 0xA0; \ from = coding->produced + char_offset; \ to = from + nchars; \ @@ -1952,8 +2034,12 @@ decode_coding_emacs_mule (coding) break; ONE_MORE_BYTE (c); - - if (c < 0x80) + if (c < 0) + { + *charbuf++ = -c; + char_offset++; + } + else if (c < 0x80) { *charbuf++ = c; char_offset++; @@ -1961,6 +2047,8 @@ decode_coding_emacs_mule (coding) else if (c == 0x80) { ONE_MORE_BYTE (c); + if (c < 0) + goto invalid_code; if (c - 0xF2 >= COMPOSITION_RELATIVE && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) DECODE_EMACS_MULE_21_COMPOSITION (c); @@ -2130,7 +2218,7 @@ encode_coding_emacs_mule (coding) } } } - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->produced_char += produced_chars; coding->produced = dst - coding->destination; return 0; @@ -2430,6 +2518,7 @@ detect_coding_iso_2022 (coding, detect_info) while (rejected != CATEGORY_MASK_ISO) { + src_base = src; ONE_MORE_BYTE (c); switch (c) { @@ -2543,6 +2632,8 @@ detect_coding_iso_2022 (coding, detect_info) goto check_extra_latin; default: + if (c < 0) + continue; if (c < 0x80) { single_shifting = 0; @@ -2816,6 +2907,8 @@ decode_coding_iso_2022 (coding) break; ONE_MORE_BYTE (c1); + if (c1 < 0) + goto invalid_code; /* We produce at most one character. */ switch (iso_code_class [c1]) @@ -3186,7 +3279,7 @@ decode_coding_iso_2022 (coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); char_offset++; coding->errors++; continue; @@ -3745,7 +3838,7 @@ encode_coding_iso_2022 (coding) ASSURE_DESTINATION (safe_room); ENCODE_RESET_PLANE_AND_REGISTER (); } - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); CODING_ISO_BOL (coding) = bol_designation; coding->produced_char += produced_chars; coding->produced = dst - coding->destination; @@ -3798,13 +3891,12 @@ detect_coding_sjis (coding, detect_info) struct coding_system *coding; struct coding_detection_info *detect_info; { - const unsigned char *src = coding->source, *src_base = src; + const unsigned char *src = coding->source, *src_base; const unsigned char *src_end = coding->source + coding->src_bytes; int multibytep = coding->src_multibyte; int consumed_chars = 0; int found = 0; int c; - int incomplete; detect_info->checked |= CATEGORY_MASK_SJIS; /* A coding system of this category is always ASCII compatible. */ @@ -3812,9 +3904,8 @@ detect_coding_sjis (coding, detect_info) while (1) { - incomplete = 0; + src_base = src; ONE_MORE_BYTE (c); - incomplete = 1; if (c < 0x80) continue; if ((c >= 0x81 && c <= 0x9F) || (c >= 0xE0 && c <= 0xEF)) @@ -3833,7 +3924,7 @@ detect_coding_sjis (coding, detect_info) return 0; no_more_source: - if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) + if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) { detect_info->rejected |= CATEGORY_MASK_SJIS; return 0; @@ -3851,13 +3942,12 @@ detect_coding_big5 (coding, detect_info) struct coding_system *coding; struct coding_detection_info *detect_info; { - const unsigned char *src = coding->source, *src_base = src; + const unsigned char *src = coding->source, *src_base; const unsigned char *src_end = coding->source + coding->src_bytes; int multibytep = coding->src_multibyte; int consumed_chars = 0; int found = 0; int c; - int incomplete; detect_info->checked |= CATEGORY_MASK_BIG5; /* A coding system of this category is always ASCII compatible. */ @@ -3865,9 +3955,8 @@ detect_coding_big5 (coding, detect_info) while (1) { - incomplete = 0; + src_base = src; ONE_MORE_BYTE (c); - incomplete = 1; if (c < 0x80) continue; if (c >= 0xA1) @@ -3884,7 +3973,7 @@ detect_coding_big5 (coding, detect_info) return 0; no_more_source: - if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) + if (src_base < src && coding->mode & CODING_MODE_LAST_BLOCK) { detect_info->rejected |= CATEGORY_MASK_BIG5; return 0; @@ -3932,7 +4021,8 @@ decode_coding_sjis (coding) break; ONE_MORE_BYTE (c); - + if (c < 0) + goto invalid_code; if (c < 0x80) charset = charset_roman; else @@ -3975,7 +4065,7 @@ decode_coding_sjis (coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c); char_offset++; coding->errors++; } @@ -4023,6 +4113,8 @@ decode_coding_big5 (coding) ONE_MORE_BYTE (c); + if (c < 0) + goto invalid_code; if (c < 0x80) charset = charset_roman; else @@ -4053,7 +4145,7 @@ decode_coding_big5 (coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = c < 0 ? -c : BYTE8_TO_CHAR (c); char_offset++; coding->errors++; } @@ -4143,7 +4235,7 @@ encode_coding_sjis (coding) EMIT_ONE_ASCII_BYTE (code & 0x7F); } } - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->produced_char += produced_chars; coding->produced = dst - coding->destination; return 0; @@ -4214,7 +4306,7 @@ encode_coding_big5 (coding) EMIT_ONE_ASCII_BYTE (code & 0x7F); } } - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->produced_char += produced_chars; coding->produced = dst - coding->destination; return 0; @@ -4233,7 +4325,7 @@ detect_coding_ccl (coding, detect_info) struct coding_system *coding; struct coding_detection_info *detect_info; { - const unsigned char *src = coding->source, *src_base = src; + const unsigned char *src = coding->source, *src_base; const unsigned char *src_end = coding->source + coding->src_bytes; int multibytep = coding->src_multibyte; int consumed_chars = 0; @@ -4252,8 +4344,10 @@ detect_coding_ccl (coding, detect_info) while (1) { int c; + + src_base = src; ONE_MORE_BYTE (c); - if (! valids[c]) + if (c < 0 || ! valids[c]) break; if ((valids[c] > 1)) found = CATEGORY_MASK_CCL; @@ -4329,16 +4423,16 @@ decode_coding_ccl (coding) switch (ccl.status) { case CCL_STAT_SUSPEND_BY_SRC: - coding->result = CODING_RESULT_INSUFFICIENT_SRC; + record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC); break; case CCL_STAT_SUSPEND_BY_DST: break; case CCL_STAT_QUIT: case CCL_STAT_INVALID_CMD: - coding->result = CODING_RESULT_INTERRUPT; + record_conversion_result (coding, CODING_RESULT_INTERRUPT); break; default: - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); break; } coding->consumed_char += consumed_chars; @@ -4390,17 +4484,17 @@ encode_coding_ccl (coding) switch (ccl.status) { case CCL_STAT_SUSPEND_BY_SRC: - coding->result = CODING_RESULT_INSUFFICIENT_SRC; + record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC); break; case CCL_STAT_SUSPEND_BY_DST: - coding->result = CODING_RESULT_INSUFFICIENT_DST; + record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_DST); break; case CCL_STAT_QUIT: case CCL_STAT_INVALID_CMD: - coding->result = CODING_RESULT_INTERRUPT; + record_conversion_result (coding, CODING_RESULT_INTERRUPT); break; default: - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); break; } @@ -4422,7 +4516,7 @@ decode_coding_raw_text (coding) coding->chars_at_source = 1; coding->consumed_char = 0; coding->consumed = 0; - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); } static int @@ -4500,7 +4594,7 @@ encode_coding_raw_text (coding) produced_chars = dst - (coding->destination + coding->dst_bytes); } } - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->produced_char += produced_chars; coding->produced = dst - coding->destination; return 0; @@ -4515,7 +4609,7 @@ detect_coding_charset (coding, detect_info) struct coding_system *coding; struct coding_detection_info *detect_info; { - const unsigned char *src = coding->source, *src_base = src; + const unsigned char *src = coding->source, *src_base; const unsigned char *src_end = coding->source + coding->src_bytes; int multibytep = coding->src_multibyte; int consumed_chars = 0; @@ -4535,7 +4629,10 @@ detect_coding_charset (coding, detect_info) { int c; + src_base = src; ONE_MORE_BYTE (c); + if (c < 0) + continue; if (NILP (AREF (valids, c))) break; if (c >= 0x80) @@ -4584,6 +4681,8 @@ decode_coding_charset (coding) break; ONE_MORE_BYTE (c); + if (c < 0) + goto invalid_code; code = c; val = AREF (valids, c); @@ -4643,7 +4742,7 @@ decode_coding_charset (coding) src = src_base; consumed_chars = consumed_chars_base; ONE_MORE_BYTE (c); - *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); char_offset++; coding->errors++; } @@ -4714,7 +4813,7 @@ encode_coding_charset (coding) } } - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->produced_char += produced_chars; coding->produced = dst - coding->destination; return 0; @@ -5480,7 +5579,8 @@ produce_chars (coding) { if (src == src_end) { - coding->result = CODING_RESULT_INSUFFICIENT_SRC; + record_conversion_result + (coding, CODING_RESULT_INSUFFICIENT_SRC); goto no_more_source; } if (*src == '\n') @@ -5669,7 +5769,7 @@ produce_charset (coding, charbuf) } \ if (! coding->charbuf) \ { \ - coding->result = CODING_RESULT_INSUFFICIENT_MEM; \ + record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \ return coding->result; \ } \ coding->charbuf_size = size; \ @@ -5759,7 +5859,7 @@ decode_coding (coding) coding->consumed = coding->consumed_char = 0; coding->produced = coding->produced_char = 0; coding->chars_at_source = 0; - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->errors = 0; ALLOC_CONVERSION_WORK_AREA (coding); @@ -5798,6 +5898,7 @@ decode_coding (coding) /* Flush out unprocessed data as binary chars. We are sure that the number of data is less than the size of coding->charbuf. */ + coding->charbuf_used = 0; while (nbytes-- > 0) { int c = *src++; @@ -6076,7 +6177,7 @@ encode_coding (coding) coding->consumed = coding->consumed_char = 0; coding->produced = coding->produced_char = 0; - coding->result = CODING_RESULT_SUCCESS; + record_conversion_result (coding, CODING_RESULT_SUCCESS); coding->errors = 0; ALLOC_CONVERSION_WORK_AREA (coding); @@ -6125,10 +6226,17 @@ make_conversion_work_buffer (multibyte) struct buffer *current; if (reused_workbuf_in_use++) - name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); + { + name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil); + workbuf = Fget_buffer_create (name); + } else - name = Vcode_conversion_workbuf_name; - workbuf = Fget_buffer_create (name); + { + name = Vcode_conversion_workbuf_name; + workbuf = Fget_buffer_create (name); + if (NILP (Vcode_conversion_reused_workbuf)) + Vcode_conversion_reused_workbuf = workbuf; + } current = current_buffer; set_buffer_internal (XBUFFER (workbuf)); Ferase_buffer (); @@ -6389,7 +6497,8 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, = (unsigned char *) xrealloc (destination, coding->produced); if (! destination) { - coding->result = CODING_RESULT_INSUFFICIENT_DST; + record_conversion_result (coding, + CODING_RESULT_INSUFFICIENT_DST); unbind_to (count, Qnil); return; } @@ -6419,7 +6528,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte, saved_pt_byte + (coding->produced - bytes)); } - unbind_to (count, Qnil); + unbind_to (count, coding->dst_object); } @@ -7363,9 +7472,6 @@ code_convert_region (start, end, coding_system, dst_object, encodep, norecord) if (! norecord) Vlast_coding_system_used = CODING_ID_NAME (coding.id); - if (coding.result != CODING_RESULT_SUCCESS) - error ("Code conversion error: %d", coding.result); - return (BUFFERP (dst_object) ? make_number (coding.produced_char) : coding.dst_object); @@ -7453,9 +7559,6 @@ code_convert_string (string, coding_system, dst_object, if (! norecord) Vlast_coding_system_used = CODING_ID_NAME (coding.id); - if (coding.result != CODING_RESULT_SUCCESS) - error ("Code conversion error: %d", coding.result); - return (BUFFERP (dst_object) ? make_number (coding.produced_char) : coding.dst_object); @@ -8740,6 +8843,12 @@ syms_of_coding () ASET (Vcoding_category_table, coding_category_undecided, intern ("coding-category-undecided")); + DEFSYM (Qinsufficient_source, "insufficient-source"); + DEFSYM (Qinconsistent_eol, "inconsistent-eol"); + DEFSYM (Qinvalid_source, "invalid-source"); + DEFSYM (Qinterrupted, "interrupted"); + DEFSYM (Qinsufficient_memory, "insufficient-memory"); + defsubr (&Scoding_system_p); defsubr (&Sread_coding_system); defsubr (&Sread_non_nil_coding_system); @@ -8835,6 +8944,23 @@ the value of `buffer-file-coding-system' is used. */); Coding system used in the latest file or process I/O. */); Vlast_coding_system_used = Qnil; + DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error, + doc: /* +Error status of the last code conversion. + +When an error was detected in the last code conversion, this variable +is set to one of the following symbols. + `insufficient-source' + `inconsistent-eol' + `invalid-source' + `interrupted' + `insufficient-memory' +When no error was detected, the value doesn't change. So, to check +the error status of a code conversion by this variable, you must +explicitly set this variable to nil before performing code +conversion. */); + Vlast_code_conversion_error = Qnil; + DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion, doc: /* *Non-nil means always inhibit code conversion of end-of-line format.