From 27901516b62b78a4b52c7787c7c115c8d00368d8 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Thu, 28 Aug 1997 10:51:12 +0000 Subject: [PATCH] (Qno_conversion, Qundecided): New variables. (syms_of_coding): Initialize and staticpro them. (coding_category_name): Include "coding-category-raw-test". (setup_coding_system): Handle coding_type_raw_text. (detect_coding_mask): Include CODING_CATEGORY_MASK_RAW_TEXT in the return value instead of CODING_CATEGORY_MASK_BINARY. (detect_coding): Do not check the case that `mask' is 0, which never happens now. (detect_eol_type): If EOL format is inconsistent, return CODING_EOL_INCONSISTENT. (detect_eol): If EOL format of raw-text file is inconsistent, detect it as no-conversion. (decode_coding): Handle coding_type_raw_text. (encode_coding): Likewise. (Fdetect_coding_region): Ajusted for the above changes. (shrink_conversion_area): Handle coding_type_raw_text. --- src/coding.c | 129 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 87 insertions(+), 42 deletions(-) diff --git a/src/coding.c b/src/coding.c index 1e0e992ea35..66fbe517215 100644 --- a/src/coding.c +++ b/src/coding.c @@ -67,7 +67,13 @@ Boston, MA 02111-1307, USA. */ (all uppercase), we mean the coding system, and when we write "Big5" (capitalized), we mean the character set. - 4. Other + 4. Raw text + + A coding system to for a text containing random 8-bit code. Emacs + does no code conversion on such a text except for end-of-line + format. + + 5. Other If a user wants to read/write a text encoded in a coding system not listed above, he can supply a decoder and an encoder for it in CCL @@ -246,6 +252,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes, consumed) Lisp_Object Qcoding_system, Qeol_type; Lisp_Object Qbuffer_file_coding_system; Lisp_Object Qpost_read_conversion, Qpre_write_conversion; +Lisp_Object Qno_conversion, Qundecided; extern Lisp_Object Qinsert_file_contents, Qwrite_region; Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument; @@ -319,6 +326,7 @@ char *coding_category_name[CODING_CATEGORY_IDX_MAX] = { "coding-category-iso-7-else", "coding-category-iso-8-else", "coding-category-big5", + "coding-category-raw-text", "coding-category-binary" }; @@ -2546,6 +2554,10 @@ setup_coding_system (coding_system, coding) coding->require_flushing = 1; break; + case 5: + coding->type = coding_type_raw_text; + break; + default: if (EQ (type, Qt)) coding->type = coding_type_undecided; @@ -2687,7 +2699,7 @@ detect_coding_mask (src, src_bytes) /* If C is a special latin extra code, or is an ISO2022 specific control code of C1 (SS2 or SS3), or is an ISO2022 control-sequence-introducer (CSI), - we should also consider the possibility of someof ISO2022 codings. */ + we should also consider the possibility of ISO2022 codings. */ if ((VECTORP (Vlatin_extra_code_table) && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3) @@ -2700,14 +2712,14 @@ detect_coding_mask (src, src_bytes) mask = (detect_coding_iso2022 (src, src_end) | detect_coding_sjis (src, src_end) | detect_coding_emacs_mule (src, src_end) - | CODING_CATEGORY_MASK_BINARY); + | CODING_CATEGORY_MASK_RAW_TEXT); else - /* C is the first byte of SJIS character code, or a - leading-code of Emacs. */ + /* C is the first byte of SJIS character code, + or a leading-code of Emacs' internal format (emacs-mule). */ mask = (detect_coding_sjis (src, src_end) | detect_coding_emacs_mule (src, src_end) - | CODING_CATEGORY_MASK_BINARY); + | CODING_CATEGORY_MASK_RAW_TEXT); } else /* C is a character of ISO2022 in graphic plane right, @@ -2716,7 +2728,7 @@ detect_coding_mask (src, src_bytes) mask = (detect_coding_iso2022 (src, src_end) | detect_coding_sjis (src, src_end) | detect_coding_big5 (src, src_end) - | CODING_CATEGORY_MASK_BINARY); + | CODING_CATEGORY_MASK_RAW_TEXT); return mask; } @@ -2732,42 +2744,33 @@ detect_coding (coding, src, src_bytes) { int mask = detect_coding_mask (src, src_bytes); int idx; + Lisp_Object val = Vcoding_category_list; if (mask == CODING_CATEGORY_MASK_ANY) /* We found nothing other than ASCII. There's nothing to do. */ return; - if (!mask) - /* The source text seems to be encoded in unknown coding system. - Emacs regards the category of such a kind of coding system as - `coding-category-binary'. We assume that a user has assigned - an appropriate coding system for a `coding-category-binary'. */ - idx = CODING_CATEGORY_IDX_BINARY; - else - { - /* We found some plausible coding systems. Let's use a coding - system of the highest priority. */ - Lisp_Object val = Vcoding_category_list; + /* We found some plausible coding systems. Let's use a coding + system of the highest priority. */ - if (CONSP (val)) - while (!NILP (val)) - { - idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index)); - if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx))) - break; - val = XCONS (val)->cdr; - } - else - val = Qnil; + if (CONSP (val)) + while (!NILP (val)) + { + idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index)); + if ((idx < CODING_CATEGORY_IDX_MAX) && (mask & (1 << idx))) + break; + val = XCONS (val)->cdr; + } + else + val = Qnil; - if (NILP (val)) - { - /* For unknown reason, `Vcoding_category_list' contains none - of found categories. Let's use any of them. */ - for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++) - if (mask & (1 << idx)) - break; - } + if (NILP (val)) + { + /* For unknown reason, `Vcoding_category_list' contains none of + found categories. Let's use any of them. */ + for (idx = 0; idx < CODING_CATEGORY_IDX_MAX; idx++) + if (mask & (1 << idx)) + break; } setup_coding_system (XSYMBOL (coding_category_table[idx])->value, coding); } @@ -2807,8 +2810,8 @@ detect_eol_type (src, src_bytes) eol_type = this_eol_type; else if (eol_type != this_eol_type) /* The found type is different from what found before. - We had better not decode end-of-line. */ - return CODING_EOL_LF; + Let's notice the caller about this inconsistency. */ + return CODING_EOL_INCONSISTENT; } } @@ -2832,6 +2835,24 @@ detect_eol (coding, src, src_bytes) /* We found no end-of-line in the source text. */ return; + if (eol_type == CODING_EOL_INCONSISTENT) + { +#if 0 + /* This code is suppressed until we find a better way to + distinguish raw-text and binary. */ + + /* If we have already detected that the coding is raw-text, the + coding should actually be no-conversion. */ + if (coding->type == coding_type_raw_text) + { + setup_coding_system (Qno_conversion, coding); + return; + } + /* Else, let's decode only text code anyway. */ +#endif /* 0 */ + eol_type == CODING_EOL_LF; + } + coding_system = coding->symbol; while (!NILP (coding_system) && NILP (val = Fget (coding_system, Qeol_type))) @@ -2877,6 +2898,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes, consumed) case coding_type_emacs_mule: case coding_type_undecided: + case coding_type_raw_text: if (coding->eol_type == CODING_EOL_LF || coding->eol_type == CODING_EOL_UNDECIDED) goto label_no_conversion; @@ -2941,6 +2963,7 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes, consumed) case coding_type_emacs_mule: case coding_type_undecided: + case coding_type_raw_text: if (coding->eol_type == CODING_EOL_LF || coding->eol_type == CODING_EOL_UNDECIDED) goto label_no_conversion; @@ -3133,10 +3156,11 @@ If only ASCII characters are found, it returns `undecided'\n\ if (coding_mask == CODING_CATEGORY_MASK_ANY) { - val = intern ("undecided"); - if (eol_type != CODING_EOL_UNDECIDED) + val = Qundecided; + if (eol_type != CODING_EOL_UNDECIDED + && eol_type != CODING_EOL_INCONSISTENT) { - Lisp_Object val2 = Fget (val, Qeol_type); + Lisp_Object val2 = Fget (Qundecided, Qeol_type); if (VECTORP (val2)) val = XVECTOR (val2)->contents[eol_type]; } @@ -3155,13 +3179,26 @@ If only ASCII characters are found, it returns `undecided'\n\ int idx = XFASTINT (Fget (XCONS (val2)->car, Qcoding_category_index)); if (coding_mask & (1 << idx)) - val = Fcons (Fsymbol_value (XCONS (val2)->car), val); + { +#if 0 + /* This code is suppressed until we find a better way to + distinguish raw-text and binary. */ + + if (idx == CODING_CATEGORY_IDX_RAW_TEXT + && eol_type == CODING_EOL_INCONSISTENT) + val = Fcons (Qno_conversion, val); + else +#endif /* 0 */ + val = Fcons (Fsymbol_value (XCONS (val2)->car), val); + } } /* Then, change the order of the list, while getting subsidiary coding-systems. */ val2 = val; val = Qnil; + if (eol_type == CODING_EOL_INCONSISTENT) + eol_type == CODING_EOL_UNDECIDED; for (; !NILP (val2); val2 = XCONS (val2)->cdr) { if (eol_type == CODING_EOL_UNDECIDED) @@ -3206,6 +3243,7 @@ shrink_conversion_area (begp, endp, coding, encodep) case coding_type_no_conversion: case coding_type_emacs_mule: case coding_type_undecided: + case coding_type_raw_text: /* We need no conversion. */ *begp = *endp; return; @@ -3243,6 +3281,7 @@ shrink_conversion_area (begp, endp, coding, encodep) *begp = *endp; return; case coding_type_emacs_mule: + case coding_type_raw_text: if (coding->eol_type == CODING_EOL_LF) { /* We need no conversion. */ @@ -3857,6 +3896,12 @@ syms_of_coding () Qpre_write_conversion = intern ("pre-write-conversion"); staticpro (&Qpre_write_conversion); + Qno_conversion = intern ("no-conversion"); + staticpro (&Qno_conversion); + + Qundecided = intern ("undecided"); + staticpro (&Qundecided); + Qcoding_system_spec = intern ("coding-system-spec"); staticpro (&Qcoding_system_spec); -- 2.39.2