/*** TABLE OF CONTENTS ***
+ 0. General comments
1. Preamble
2. Emacs' internal format (emacs-mule) handlers
3. ISO2022 handlers
*/
+/*** 0. General comments ***/
+
+
/*** GENERAL NOTE on CODING SYSTEM ***
Coding system is an encoding mechanism of one or more character
/*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
- These functions decode SRC_BYTES length text at SOURCE encoded in
- CODING to Emacs' internal format (emacs-mule). The resulting text
- goes to a place pointed to by DESTINATION, the length of which
- should not exceed DST_BYTES. These functions set the information of
- original and decoded texts in the members produced, produced_char,
- consumed, and consumed_char of the structure *CODING.
+ These functions decode SRC_BYTES length of unibyte text at SOURCE
+ encoded in CODING to Emacs' internal format. The resulting
+ multibyte text goes to a place pointed to by DESTINATION, the length
+ of which should not exceed DST_BYTES.
- The return value is an integer (CODING_FINISH_XXX) indicating how
- the decoding finished.
+ These functions set the information of original and decoded texts in
+ the members produced, produced_char, consumed, and consumed_char of
+ the structure *CODING. They also set the member result to one of
+ CODING_FINISH_XXX indicating how the decoding finished.
DST_BYTES zero means that source area and destination area are
overlapped, which means that we can produce a decoded text until it
Below is a template of these functions. */
#if 0
+static void
decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
unsigned char *source, *destination;
/*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
These functions encode SRC_BYTES length text at SOURCE of Emacs'
- internal format (emacs-mule) to CODING. The resulting text goes to
- a place pointed to by DESTINATION, the length of which should not
- exceed DST_BYTES. These functions set the information of
- original and encoded texts in the members produced, produced_char,
- consumed, and consumed_char of the structure *CODING.
+ internal multibyte format to CODING. The resulting unibyte text
+ goes to a place pointed to by DESTINATION, the length of which
+ should not exceed DST_BYTES.
- The return value is an integer (CODING_FINISH_XXX) indicating how
- the encoding finished.
+ These functions set the information of original and encoded texts in
+ the members produced, produced_char, consumed, and consumed_char of
+ the structure *CODING. They also set the member result to one of
+ CODING_FINISH_XXX indicating how the encoding finished.
DST_BYTES zero means that source area and destination area are
- overlapped, which means that we can produce a decoded text until it
- reaches at the head of not-yet-decoded source text.
+ overlapped, which means that we can produce a encoded text until it
+ reaches at the head of not-yet-encoded source text.
Below is a template of these functions. */
#if 0
+static void
encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
unsigned char *source, *destination;
/*** COMMONLY USED MACROS ***/
-/* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
- THREE_MORE_BYTES safely get one, two, and three bytes from the
- source text respectively. If there are not enough bytes in the
- source, they jump to `label_end_of_loop'. The caller should set
- variables `src' and `src_end' to appropriate areas in advance. */
-
-#define ONE_MORE_BYTE(c1) \
- do { \
- if (src < src_end) \
- c1 = *src++; \
- else \
- goto label_end_of_loop; \
- } while (0)
+/* The following two macros ONE_MORE_BYTE and TWO_MORE_BYTES safely
+ get one, two, and three bytes from the source text respectively.
+ If there are not enough bytes in the source, they jump to
+ `label_end_of_loop'. The caller should set variables `coding',
+ `src' and `src_end' to appropriate pointer in advance. These
+ macros are called from decoding routines `decode_coding_XXX', thus
+ it is assumed that the source text is unibyte. */
-#define TWO_MORE_BYTES(c1, c2) \
- do { \
- if (src + 1 < src_end) \
- c1 = *src++, c2 = *src++; \
- else \
- goto label_end_of_loop; \
+#define ONE_MORE_BYTE(c1) \
+ do { \
+ if (src >= src_end) \
+ { \
+ coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
+ goto label_end_of_loop; \
+ } \
+ c1 = *src++; \
} while (0)
-#define THREE_MORE_BYTES(c1, c2, c3) \
- do { \
- if (src + 2 < src_end) \
- c1 = *src++, c2 = *src++, c3 = *src++; \
- else \
- goto label_end_of_loop; \
+#define TWO_MORE_BYTES(c1, c2) \
+ do { \
+ if (src + 1 >= src_end) \
+ { \
+ coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
+ goto label_end_of_loop; \
+ } \
+ c1 = *src++; \
+ c2 = *src++; \
} while (0)
-/* The following three macros DECODE_CHARACTER_ASCII,
- DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
- the multi-byte form of a character of each class at the place
- pointed by `dst'. The caller should set the variable `dst' to
- point to an appropriate area and the variable `coding' to point to
- the coding-system of the currently decoding text in advance. */
-/* Decode one ASCII character C. */
+/* Set C to the next character at the source text pointed by `src'.
+ If there are not enough characters in the source, jump to
+ `label_end_of_loop'. The caller should set variables `coding'
+ `src', `src_end', and `translation_table' to appropriate pointers
+ in advance. This macro is used in encoding routines
+ `encode_coding_XXX', thus it assumes that the source text is in
+ multibyte form except for 8-bit characters. 8-bit characters are
+ in multibyte form if coding->src_multibyte is nonzero, else they
+ are represented by a single byte. */
-#define DECODE_CHARACTER_ASCII(c) \
- do { \
- *dst++ = (c) & 0x7F; \
- coding->produced_char++; \
+#define ONE_MORE_CHAR(c) \
+ do { \
+ int len = src_end - src; \
+ int bytes; \
+ if (len <= 0) \
+ { \
+ coding->result = CODING_FINISH_INSUFFICIENT_SRC; \
+ goto label_end_of_loop; \
+ } \
+ if (coding->src_multibyte \
+ || UNIBYTE_STR_AS_MULTIBYTE_P (src, len, bytes)) \
+ c = STRING_CHAR_AND_LENGTH (src, len, bytes); \
+ else \
+ c = *src, bytes = 1; \
+ if (!NILP (translation_table)) \
+ c = translate_char (translation_table, c, 0, 0, 0); \
+ src += bytes; \
} while (0)
-/* Decode one DIMENSION1 character whose charset is CHARSET and whose
- position-code is C. */
-#define DECODE_CHARACTER_DIMENSION1(charset, c) \
+/* Produce a multibyte form of characater C to `dst'. Jump to
+ `label_end_of_loop' if there's not enough space at `dst'.
+
+ If we are now in the middle of composition sequence, the decoded
+ character may be ALTCHAR (for the current composition). In that
+ case, the character goes to coding->cmp_data->data instead of
+ `dst'.
+
+ This macro is used in decoding routines. */
+
+#define EMIT_CHAR(c) \
do { \
- unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \
+ if (! COMPOSING_P (coding) \
+ || coding->composing == COMPOSITION_RELATIVE \
+ || coding->composing == COMPOSITION_WITH_RULE) \
+ { \
+ int bytes = CHAR_BYTES (c); \
+ if ((dst + bytes) > (dst_bytes ? dst_end : src)) \
+ { \
+ coding->result = CODING_FINISH_INSUFFICIENT_DST; \
+ goto label_end_of_loop; \
+ } \
+ dst += CHAR_STRING (c, dst); \
+ coding->produced_char++; \
+ } \
\
- *dst++ = leading_code; \
- if ((leading_code = CHARSET_LEADING_CODE_EXT (charset)) > 0) \
- *dst++ = leading_code; \
- *dst++ = (c) | 0x80; \
- coding->produced_char++; \
+ if (COMPOSING_P (coding) \
+ && coding->composing != COMPOSITION_RELATIVE) \
+ { \
+ CODING_ADD_COMPOSITION_COMPONENT (coding, c); \
+ coding->composition_rule_follows \
+ = coding->composing != COMPOSITION_WITH_ALTCHARS; \
+ } \
} while (0)
-/* Decode one DIMENSION2 character whose charset is CHARSET and whose
- position-codes are C1 and C2. */
-#define DECODE_CHARACTER_DIMENSION2(charset, c1, c2) \
- do { \
- DECODE_CHARACTER_DIMENSION1 (charset, c1); \
- *dst++ = (c2) | 0x80; \
+#define EMIT_ONE_BYTE(c) \
+ do { \
+ if (dst >= (dst_bytes ? dst_end : src)) \
+ { \
+ coding->result = CODING_FINISH_INSUFFICIENT_DST; \
+ goto label_end_of_loop; \
+ } \
+ *dst++ = c; \
+ } while (0)
+
+#define EMIT_TWO_BYTES(c1, c2) \
+ do { \
+ if (dst + 2 > (dst_bytes ? dst_end : src)) \
+ { \
+ coding->result = CODING_FINISH_INSUFFICIENT_DST; \
+ goto label_end_of_loop; \
+ } \
+ *dst++ = c1, *dst++ = c2; \
+ } while (0)
+
+#define EMIT_BYTES(from, to) \
+ do { \
+ if (dst + (to - from) > (dst_bytes ? dst_end : src)) \
+ { \
+ coding->result = CODING_FINISH_INSUFFICIENT_DST; \
+ goto label_end_of_loop; \
+ } \
+ while (from < to) \
+ *dst++ = *from++; \
} while (0)
\f
/* Emacs' internal format for encoding multiple character sets is a
kind of multi-byte encoding, i.e. characters are encoded by
- variable-length sequences of one-byte codes. ASCII characters
- and control characters (e.g. `tab', `newline') are represented by
- one-byte sequences which are their ASCII codes, in the range 0x00
- through 0x7F. The other characters are represented by a sequence
- of `base leading-code', optional `extended leading-code', and one
- or two `position-code's. The length of the sequence is determined
- by the base leading-code. Leading-code takes the range 0x80
- through 0x9F, whereas extended leading-code and position-code take
- the range 0xA0 through 0xFF. See `charset.h' for more details
- about leading-code and position-code.
+ variable-length sequences of one-byte codes.
+
+ ASCII characters and control characters (e.g. `tab', `newline') are
+ represented by one-byte sequences which are their ASCII codes, in
+ the range 0x00 through 0x7F.
+
+ 8-bit characters of the range 0x80..0x9F are represented by
+ two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit
+ code + 0x20).
+
+ 8-bit characters of the range 0xA0..0xFF are represented by
+ one-byte sequences which are their 8-bit code.
+
+ The other characters are represented by a sequence of `base
+ leading-code', optional `extended leading-code', and one or two
+ `position-code's. The length of the sequence is determined by the
+ base leading-code. Leading-code takes the range 0x80 through 0x9F,
+ whereas extended leading-code and position-code take the range 0xA0
+ through 0xFF. See `charset.h' for more details about leading-code
+ and position-code.
--- CODE RANGE of Emacs' internal format ---
- (character set) (range)
- ASCII 0x00 .. 0x7F
- ELSE (1st byte) 0x81 .. 0x9F
- (rest bytes) 0xA0 .. 0xFF
+ character set range
+ ------------- -----
+ ascii 0x00..0x7F
+ eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF
+ eight-bit-graphic 0xA0..0xBF
+ ELSE 0x81..0x9F + [0xA0..0xFF]+
---------------------------------------------
*/
enum emacs_code_class_type emacs_code_class[256];
-/* Go to the next statement only if *SRC is accessible and the code is
- greater than 0xA0. */
-#define CHECK_CODE_RANGE_A0_FF \
- do { \
- if (src >= src_end) \
- goto label_end_of_switch; \
- else if (*src++ < 0xA0) \
- return 0; \
- } while (0)
-
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in Emacs' internal format. If it is,
return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */
int
detect_coding_emacs_mule (src, src_end)
- unsigned char *src, *src_end;
+ unsigned char *src, *src_end;
{
unsigned char c;
int composing = 0;
+ /* Dummy for ONE_MORE_BYTE. */
+ struct coding_system dummy_coding;
+ struct coding_system *coding = &dummy_coding;
- while (src < src_end)
+ while (1)
{
- c = *src++;
+ ONE_MORE_BYTE (c);
if (composing)
{
if (c < 0xA0)
composing = 0;
+ else if (c == 0xA0)
+ {
+ ONE_MORE_BYTE (c);
+ c &= 0x7F;
+ }
else
c -= 0x20;
}
- switch (emacs_code_class[c])
+ if (c < 0x20)
{
- case EMACS_ascii_code:
- case EMACS_linefeed_code:
- break;
-
- case EMACS_control_code:
if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
return 0;
- break;
+ }
+ else if (c >= 0x80 && c < 0xA0)
+ {
+ if (c == 0x80)
+ /* Old leading code for a composite character. */
+ composing = 1;
+ else
+ {
+ unsigned char *src_base = src - 1;
+ int bytes;
- case EMACS_invalid_code:
- return 0;
+ if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base,
+ bytes))
+ return 0;
+ src = src_base + bytes;
+ }
+ }
+ }
+ label_end_of_loop:
+ return CODING_CATEGORY_MASK_EMACS_MULE;
+}
- case EMACS_leading_code_4:
- CHECK_CODE_RANGE_A0_FF;
- /* fall down to check it two more times ... */
- case EMACS_leading_code_3:
- CHECK_CODE_RANGE_A0_FF;
- /* fall down to check it one more time ... */
+/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
- case EMACS_leading_code_2:
- CHECK_CODE_RANGE_A0_FF;
- break;
+static void
+decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
+ struct coding_system *coding;
+ unsigned char *source, *destination;
+ int src_bytes, dst_bytes;
+{
+ unsigned char *src = source;
+ unsigned char *src_end = source + src_bytes;
+ unsigned char *dst = destination;
+ unsigned char *dst_end = destination + dst_bytes;
+ /* SRC_BASE remembers the start position in source in each loop.
+ The loop will be exited when there's not enough source code, or
+ when there's not enough destination area to produce a
+ character. */
+ unsigned char *src_base;
- case 0x80: /* Old leading code for a composite character. */
- if (composing)
- CHECK_CODE_RANGE_A0_FF;
- else
- composing = 1;
- break;
+ coding->produced_char = 0;
+ while (src < src_end)
+ {
+ unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
+ int bytes;
- default:
- label_end_of_switch:
+ src_base = src;
+ if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ {
+ p = src;
+ src += bytes;
+ }
+ else
+ {
+ bytes = CHAR_STRING (*src, tmp);
+ p = tmp;
+ src++;
+ }
+ if (dst + bytes >= (dst_bytes ? dst_end : src))
+ {
+ coding->result = CODING_FINISH_INSUFFICIENT_DST;
break;
}
+ while (bytes--) *dst++ = *p++;
+ coding->produced_char++;
}
- return CODING_CATEGORY_MASK_EMACS_MULE;
+ coding->consumed = coding->consumed_char = src_base - source;
+ coding->produced = dst - destination;
}
+#define encode_coding_emacs_mule(coding, source, destination, src_bytes, dst_bytes) \
+ encode_eol (coding, source, destination, src_bytes, dst_bytes)
+
+
\f
/*** 3. ISO2022 handlers ***/
is encoded using bytes less than 128. This may make the encoded
text a little bit longer, but the text passes more easily through
several gateways, some of which strip off MSB (Most Signigant Bit).
-
+
There are two kinds of character sets: control character set and
graphic character set. The former contains control characters such
as `newline' and `escape' to provide control functions (control
o ESC '2' -- start rule-base composition (*)
o ESC '3' -- start relative composition with alternate chars (**)
o ESC '4' -- start rule-base composition with alternate chars (**)
- Since these are not standard escape sequences of any ISO standard,
- the use of them for these meaning is restricted to Emacs only.
+ Since these are not standard escape sequences of any ISO standard,
+ the use of them for these meaning is restricted to Emacs only.
- (*) This form is used only in Emacs 20.5 and the older versions,
- but the newer versions can safely decode it.
- (**) This form is used only in Emacs 21.1 and the newer versions,
- and the older versions can't decode it.
+ (*) This form is used only in Emacs 20.5 and the older versions,
+ but the newer versions can safely decode it.
+ (**) This form is used only in Emacs 21.1 and the newer versions,
+ and the older versions can't decode it.
- Here's a list of examples usages of these composition escape
- sequences (categorized by `enum composition_method').
+ Here's a list of examples usages of these composition escape
+ sequences (categorized by `enum composition_method').
- COMPOSITION_RELATIVE:
+ COMPOSITION_RELATIVE:
ESC 0 CHAR [ CHAR ] ESC 1
- COMPOSITOIN_WITH_RULE:
+ COMPOSITOIN_WITH_RULE:
ESC 2 CHAR [ RULE CHAR ] ESC 1
- COMPOSITION_WITH_ALTCHARS:
+ COMPOSITION_WITH_ALTCHARS:
ESC 3 ALTCHAR [ ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1
- COMPOSITION_WITH_RULE_ALTCHARS:
+ COMPOSITION_WITH_RULE_ALTCHARS:
ESC 4 ALTCHAR [ RULE ALTCHAR ] ESC 0 CHAR [ CHAR ] ESC 1 */
enum iso_code_class_type iso_code_class[256];
int mask_found = 0;
int reg[4], shift_out = 0, single_shifting = 0;
int c, c1, i, charset;
+ /* Dummy for ONE_MORE_BYTE. */
+ struct coding_system dummy_coding;
+ struct coding_system *coding = &dummy_coding;
reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
while (mask && src < src_end)
{
- c = *src++;
+ ONE_MORE_BYTE (c);
switch (c)
{
case ISO_CODE_ESC:
single_shifting = 0;
- if (src >= src_end)
- break;
- c = *src++;
+ ONE_MORE_BYTE (c);
if (c >= '(' && c <= '/')
{
/* Designation sequence for a charset of dimension 1. */
- if (src >= src_end)
- break;
- c1 = *src++;
+ ONE_MORE_BYTE (c1);
if (c1 < ' ' || c1 >= 0x80
|| (charset = iso_charset_table[0][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
else if (c == '$')
{
/* Designation sequence for a charset of dimension 2. */
- if (src >= src_end)
- break;
- c = *src++;
+ ONE_MORE_BYTE (c);
if (c >= '@' && c <= 'B')
/* Designation for JISX0208.1978, GB2312, or JISX0208. */
reg[0] = charset = iso_charset_table[1][0][c];
else if (c >= '(' && c <= '/')
{
- if (src >= src_end)
- break;
- c1 = *src++;
+ ONE_MORE_BYTE (c1);
if (c1 < ' ' || c1 >= 0x80
|| (charset = iso_charset_table[1][c >= ','][c1]) < 0)
/* Invalid designation sequence. Just ignore. */
0xA0..0FF. If the byte length is odd, we exclude
CODING_CATEGORY_MASK_ISO_8_2. We can check this only
when we are not single shifting. */
- if (!single_shifting)
+ if (!single_shifting
+ && mask & CODING_CATEGORY_MASK_ISO_8_2)
{
- while (src < src_end && *src >= 0xA0)
- src++;
- if ((src - src_begin - 1) & 1 && src < src_end)
+ int i = 0;
+ while (src < src_end)
+ {
+ ONE_MORE_BYTE (c);
+ if (c < 0xA0)
+ break;
+ i++;
+ }
+
+ if (i & 1 && src < src_end)
mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
else
mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
break;
}
}
-
+ label_end_of_loop:
return (mask & mask_found);
}
-/* Decode a character of which charset is CHARSET and the 1st position
- code is C1. If dimension of CHARSET is 2, the 2nd position code is
- fetched from SRC and set to C2. If CHARSET is negative, it means
- that we are decoding ill formed text, and what we can do is just to
- read C1 as is.
+/* Decode a character of which charset is CHARSET, the 1st position
+ code is C1, the 2nd position code is C2, and return the decoded
+ character code. If the variable `translation_table' is non-nil,
+ returned the translated code. */
- If we are now in the middle of composition sequence, the decoded
- character may be ALTCHAR (see the comment above). In that case,
- the character goes to coding->cmp_data->data instead of DST. */
-
-#define DECODE_ISO_CHARACTER(charset, c1) \
- do { \
- int c_alt = -1, charset_alt = (charset); \
- if (charset_alt >= 0) \
- { \
- if (CHARSET_DIMENSION (charset_alt) == 2) \
- { \
- ONE_MORE_BYTE (c2); \
- if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F \
- && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0) \
- { \
- src--; \
- charset_alt = CHARSET_ASCII; \
- } \
- } \
- if (!NILP (translation_table) \
- && ((c_alt = translate_char (translation_table, \
- -1, charset_alt, c1, c2)) >= 0)) \
- SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
- } \
- if (! COMPOSING_P (coding) \
- || coding->composing == COMPOSITION_RELATIVE \
- || coding->composing == COMPOSITION_WITH_RULE) \
- { \
- if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
- DECODE_CHARACTER_ASCII (c1); \
- else if (CHARSET_DIMENSION (charset_alt) == 1) \
- DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
- else \
- DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
- } \
- if (COMPOSING_P (coding) \
- && coding->composing != COMPOSITION_RELATIVE) \
- { \
- if (c_alt < 0) \
- c_alt = MAKE_CHAR (charset_alt, c1, c2); \
- CODING_ADD_COMPOSITION_COMPONENT (coding, c_alt); \
- coding->composition_rule_follows \
- = coding->composing != COMPOSITION_WITH_ALTCHARS; \
- } \
- } while (0)
+#define DECODE_ISO_CHARACTER(charset, c1, c2) \
+ (NILP (translation_table) \
+ ? MAKE_CHAR (charset, c1, c2) \
+ : translate_char (translation_table, -1, charset, c1, c2))
/* Set designation state into CODING. */
#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
>= COMPOSITION_DATA_SIZE) \
{ \
- result = CODING_FINISH_INSUFFICIENT_CMP; \
- goto label_end_of_loop_2; \
+ coding->result = CODING_FINISH_INSUFFICIENT_CMP; \
+ goto label_end_of_loop; \
} \
coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
: c1 == '2' ? COMPOSITION_WITH_RULE \
if (nref == 4) nref = 10; \
rule = COMPOSITION_ENCODE_RULE (gref, nref); \
} \
- else if (c1 < 93) /* new format (after ver.21 */ \
+ else if (c1 < 93) /* new format (after ver.21) */ \
{ \
ONE_MORE_BYTE (c2); \
rule = COMPOSITION_ENCODE_RULE (c1 - 81, c2 - 32); \
/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
-int
+static void
decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
unsigned char *source, *destination;
unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
- /* Since the maximum bytes produced by each loop is 7, we subtract 6
- from DST_END to assure that overflow checking is necessary only
- at the head of loop. */
- unsigned char *adjusted_dst_end = dst_end - 6;
- int charset;
/* Charsets invoked to graphic plane 0 and 1 respectively. */
int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
- Lisp_Object translation_table
- = coding->translation_table_for_decode;
- int result = CODING_FINISH_NORMAL;
+ /* SRC_BASE remembers the start position in source in each loop.
+ The loop will be exited when there's not enough source code
+ (within macro ONE_MORE_BYTE), or when there's not enough
+ destination area to produce a character (within macro
+ EMIT_CHAR). */
+ unsigned char *src_base;
+ int c, charset;
+ Lisp_Object translation_table;
- if (!NILP (Venable_character_translation) && NILP (translation_table))
- translation_table = Vstandard_translation_table_for_decode;
+ if (NILP (Venable_character_translation))
+ translation_table = Qnil;
+ else
+ {
+ translation_table = coding->translation_table_for_decode;
+ if (NILP (translation_table))
+ translation_table = Vstandard_translation_table_for_decode;
+ }
- coding->produced_char = 0;
- coding->fake_multibyte = 0;
- while (src < src_end && (dst_bytes
- ? (dst < adjusted_dst_end)
- : (dst < src - 6)))
+ coding->result = CODING_FINISH_NORMAL;
+
+ while (1)
{
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source text
- to analyze long escape sequence or 2-byte code (within macros
- ONE_MORE_BYTE or TWO_MORE_BYTES). In that case, SRC is reset
- to SRC_BASE before exiting. */
- unsigned char *src_base = src;
- int c1 = *src++, c2;
+ int c1, c2;
+
+ src_base = src;
+ ONE_MORE_BYTE (c1);
/* We produce no character or one character. */
switch (iso_code_class [c1])
if (COMPOSING_P (coding) && coding->composition_rule_follows)
{
DECODE_COMPOSITION_RULE (c1);
- break;
+ continue;
}
if (charset0 < 0 || CHARSET_CHARS (charset0) == 94)
{
/* This is SPACE or DEL. */
- *dst++ = c1;
- coding->produced_char++;
+ charset = CHARSET_ASCII;
break;
}
/* This is a graphic character, we fall down ... */
case ISO_graphic_plane_0:
if (COMPOSING_P (coding) && coding->composition_rule_follows)
- DECODE_COMPOSITION_RULE (c1);
- else
- DECODE_ISO_CHARACTER (charset0, c1);
+ {
+ DECODE_COMPOSITION_RULE (c1);
+ continue;
+ }
+ charset = charset0;
break;
case ISO_0xA0_or_0xFF:
/* This is a graphic character, we fall down ... */
case ISO_graphic_plane_1:
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
+ if (charset1 < 0)
goto label_invalid_code;
- DECODE_ISO_CHARACTER (charset1, c1);
+ charset = charset1;
break;
- case ISO_control_code:
+ case ISO_control_0:
if (COMPOSING_P (coding))
DECODE_COMPOSITION_END ('1');
&& (coding->eol_type == CODING_EOL_CR
|| coding->eol_type == CODING_EOL_CRLF))
{
- result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop_2;
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
}
- *dst++ = c1;
- coding->produced_char++;
+ charset = CHARSET_ASCII;
break;
+ case ISO_control_1:
+ if (COMPOSING_P (coding))
+ DECODE_COMPOSITION_END ('1');
+ goto label_invalid_code;
+
case ISO_carriage_return:
if (COMPOSING_P (coding))
DECODE_COMPOSITION_END ('1');
if (coding->eol_type == CODING_EOL_CR)
- *dst++ = '\n';
+ c1 = '\n';
else if (coding->eol_type == CODING_EOL_CRLF)
{
ONE_MORE_BYTE (c1);
- if (c1 == ISO_CODE_LF)
- *dst++ = '\n';
- else
+ if (c1 != ISO_CODE_LF)
{
if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
{
- result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop_2;
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
}
src--;
- *dst++ = '\r';
+ c1 = '\r';
}
}
- else
- *dst++ = c1;
- coding->produced_char++;
+ charset = CHARSET_ASCII;
break;
case ISO_shift_out:
goto label_invalid_code;
CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- break;
+ continue;
case ISO_shift_in:
if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
goto label_invalid_code;
CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- break;
+ continue;
case ISO_single_shift_2_7:
case ISO_single_shift_2:
}
else
goto label_invalid_code;
- break;
+ /* We must update these variables now. */
+ charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
+ charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
+ continue;
case 'n': /* invocation of locking-shift-2 */
if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
goto label_invalid_code;
CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- break;
+ continue;
case 'o': /* invocation of locking-shift-3 */
if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
goto label_invalid_code;
CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- break;
+ continue;
case 'N': /* invocation of single-shift-2 */
if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
|| CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
goto label_invalid_code;
- ONE_MORE_BYTE (c1);
charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
- DECODE_ISO_CHARACTER (charset, c1);
+ ONE_MORE_BYTE (c1);
break;
case 'O': /* invocation of single-shift-3 */
if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
|| CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
goto label_invalid_code;
- ONE_MORE_BYTE (c1);
charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
- DECODE_ISO_CHARACTER (charset, c1);
+ ONE_MORE_BYTE (c1);
break;
case '0': case '2': case '3': case '4': /* start composition */
DECODE_COMPOSITION_START (c1);
- break;
+ continue;
case '1': /* end composition */
DECODE_COMPOSITION_END (c1);
- break;
+ continue;
case '[': /* specification of direction */
if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
default:
goto label_invalid_code;
}
- break;
+ continue;
default:
if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
}
else
- {
- goto label_invalid_code;
- }
+ goto label_invalid_code;
+ /* We must update these variables now. */
+ charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
+ charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
+ continue;
}
- /* We must update these variables now. */
- charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
- charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
- break;
+ }
- label_invalid_code:
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
- coding->produced_char += src - src_base;
- while (src_base < src)
- *dst++ = (*src_base++) & 0x7F;
+ /* Now we know CHARSET and 1st position code C1 of a character.
+ Produce a multibyte sequence for that character while getting
+ 2nd position code C2 if necessary. */
+ if (CHARSET_DIMENSION (charset) == 2)
+ {
+ ONE_MORE_BYTE (c2);
+ if (c1 < 0x80 ? c2 < 0x20 || c2 >= 0x80 : c2 < 0xA0)
+ /* C2 is not in a valid range. */
+ goto label_invalid_code;
}
+ c = DECODE_ISO_CHARACTER (charset, c1, c2);
+ EMIT_CHAR (c);
continue;
- label_end_of_loop:
- result = CODING_FINISH_INSUFFICIENT_SRC;
- label_end_of_loop_2:
+ label_invalid_code:
+ coding->errors++;
+ if (COMPOSING_P (coding))
+ DECODE_COMPOSITION_END ('1');
src = src_base;
- break;
- }
-
- if (src < src_end)
- {
- if (result == CODING_FINISH_NORMAL)
- result = CODING_FINISH_INSUFFICIENT_DST;
- else if (result != CODING_FINISH_INCONSISTENT_EOL
- && coding->mode & CODING_MODE_LAST_BLOCK)
- {
- /* This is the last block of the text to be decoded. We had
- better just flush out all remaining codes in the text
- although they are not valid characters. */
- if (COMPOSING_P (coding))
- DECODE_COMPOSITION_END ('1');
- src_bytes = src_end - src;
- if (dst_bytes && (dst_end - dst < src_end - src))
- src_end = src + (dst_end - dst);
- coding->produced_char += src_end - src;
- while (src < src_end)
- *dst++ = (*src++) & 0x7F;
- }
+ c = *src++;
+ EMIT_CHAR (c);
}
- coding->consumed = coding->consumed_char = src - source;
+ label_end_of_loop:
+ coding->consumed = coding->consumed_char = src_base - source;
coding->produced = dst - destination;
- return result;
+ return;
}
+
/* ISO2022 encoding stuff. */
/*
*/
/* Produce codes (escape sequence) for designating CHARSET to graphic
- register REG. If <final-char> of CHARSET is '@', 'A', or 'B' and
- the coding system CODING allows, produce designation sequence of
- short-form. */
+ register REG at DST, and increment DST. If <final-char> of CHARSET is
+ '@', 'A', or 'B' and the coding system CODING allows, produce
+ designation sequence of short-form. */
#define ENCODE_DESIGNATION(charset, reg, coding) \
do { \
char *intermediate_char_94 = "()*+"; \
char *intermediate_char_96 = ",-./"; \
int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset); \
+ \
if (revision < 255) \
{ \
*dst++ = ISO_CODE_ESC; \
*dst++ = '&'; \
*dst++ = '@' + revision; \
} \
- *dst++ = ISO_CODE_ESC; \
+ *dst++ = ISO_CODE_ESC; \
if (CHARSET_DIMENSION (charset) == 1) \
{ \
if (CHARSET_CHARS (charset) == 94) \
*dst++ = '$'; \
if (CHARSET_CHARS (charset) == 94) \
{ \
- if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
- || reg != 0 \
- || final_char < '@' || final_char > 'B') \
+ if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM) \
+ || reg != 0 \
+ || final_char < '@' || final_char > 'B') \
*dst++ = (unsigned char) (intermediate_char_94[reg]); \
} \
else \
- *dst++ = (unsigned char) (intermediate_char_96[reg]); \
+ *dst++ = (unsigned char) (intermediate_char_96[reg]); \
} \
- *dst++ = final_char; \
+ *dst++ = final_char; \
CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset; \
} while (0)
if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
*dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
else \
- { \
- *dst++ = ISO_CODE_SS2; \
- coding->fake_multibyte = 1; \
- } \
+ *dst++ = ISO_CODE_SS2; \
CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
} while (0)
if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
*dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
else \
- { \
- *dst++ = ISO_CODE_SS3; \
- coding->fake_multibyte = 1; \
- } \
+ *dst++ = ISO_CODE_SS3; \
CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
} while (0)
escape sequence) for ISO2022 locking-shift functions (shift-in,
shift-out, locking-shift-2, and locking-shift-3). */
-#define ENCODE_SHIFT_IN \
- do { \
- *dst++ = ISO_CODE_SI; \
+#define ENCODE_SHIFT_IN \
+ do { \
+ *dst++ = ISO_CODE_SI; \
CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
} while (0)
-#define ENCODE_SHIFT_OUT \
- do { \
- *dst++ = ISO_CODE_SO; \
+#define ENCODE_SHIFT_OUT \
+ do { \
+ *dst++ = ISO_CODE_SO; \
CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
} while (0)
CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
} while (0)
-#define ENCODE_LOCKING_SHIFT_3 \
- do { \
- *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
+#define ENCODE_LOCKING_SHIFT_3 \
+ do { \
+ *dst++ = ISO_CODE_ESC, *dst++ = 'o'; \
CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
} while (0)
CHARSET and whose position-code is C1. Designation and invocation
sequences are also produced in advance if necessary. */
-
#define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1) \
do { \
if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding)) \
#define ENCODE_ISO_CHARACTER(charset, c1, c2) \
do { \
- int c_alt, charset_alt; \
+ int alt_charset = charset; \
\
- if (!NILP (translation_table) \
- && ((c_alt = translate_char (translation_table, -1, \
- charset, c1, c2)) \
- >= 0)) \
- SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
- else \
- charset_alt = charset; \
- if (CHARSET_DEFINED_P (charset_alt)) \
+ if (CHARSET_DEFINED_P (charset)) \
{ \
- if (CHARSET_DIMENSION (charset_alt) == 1) \
+ if (CHARSET_DIMENSION (charset) == 1) \
{ \
if (charset == CHARSET_ASCII \
&& coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
- charset_alt = charset_latin_jisx0201; \
- ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1); \
+ alt_charset = charset_latin_jisx0201; \
+ ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \
} \
else \
{ \
if (charset == charset_jisx0208 \
&& coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
- charset_alt = charset_jisx0208_1978; \
- ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
+ alt_charset = charset_jisx0208_1978; \
+ ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \
} \
} \
else \
{ \
- if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
- { \
- *dst++ = charset & 0x7f; \
- *dst++ = c1 & 0x7f; \
- if (c2) \
- *dst++ = c2 & 0x7f; \
- } \
- else \
- { \
- *dst++ = charset; \
- *dst++ = c1; \
- if (c2) \
- *dst++ = c2; \
- } \
+ *dst++ = c1; \
+ if (c2 >= 0) \
+ *dst++ = c2; \
} \
- coding->consumed_char++; \
} while (0)
/* Produce designation and invocation codes at a place pointed by DST
break;
}
}
+
return dst;
}
/* The following three macros produce codes for indicating direction
of text. */
-#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
- do { \
+#define ENCODE_CONTROL_SEQUENCE_INTRODUCER \
+ do { \
if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = ISO_CODE_ESC, *dst++ = '['; \
- else \
- *dst++ = ISO_CODE_CSI; \
+ *dst++ = ISO_CODE_ESC, *dst++ = '['; \
+ else \
+ *dst++ = ISO_CODE_CSI; \
} while (0)
#define ENCODE_DIRECTION_R2L \
- ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
+ ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '2', *dst++ = ']'
#define ENCODE_DIRECTION_L2R \
- ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
+ ENCODE_CONTROL_SEQUENCE_INTRODUCER (dst), *dst++ = '0', *dst++ = ']'
/* Produce codes for designation and invocation to reset the graphic
planes and registers to initial state. */
} while (0)
/* Produce designation sequences of charsets in the line started from
- SRC to a place pointed by *DSTP, and update DSTP.
+ SRC to a place pointed by DST, and return updated DST.
If the current block ends before any end-of-line, we may fail to
find all the necessary designations. */
-void
-encode_designation_at_bol (coding, table, src, src_end, dstp)
+static unsigned char *
+encode_designation_at_bol (coding, translation_table, src, src_end, dst)
struct coding_system *coding;
- Lisp_Object table;
- unsigned char *src, *src_end, **dstp;
+ Lisp_Object translation_table;
+ unsigned char *src, *src_end, *dst;
{
int charset, c, found = 0, reg;
/* Table of charsets to be designated to each graphic register. */
int r[4];
- unsigned char *dst = *dstp;
for (reg = 0; reg < 4; reg++)
r[reg] = -1;
- while (src < src_end && *src != '\n' && found < 4)
+ while (found < 4)
{
- int bytes = BYTES_BY_CHAR_HEAD (*src);
+ ONE_MORE_CHAR (c);
+ if (c == '\n')
+ break;
- if (NILP (table))
- charset = CHARSET_AT (src);
- else
- {
- int c_alt;
- unsigned char c1, c2;
-
- SPLIT_STRING(src, bytes, charset, c1, c2);
- if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
- charset = CHAR_CHARSET (c_alt);
- }
-
+ charset = CHAR_CHARSET (c);
reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
{
found++;
r[reg] = charset;
}
-
- src += bytes;
}
+ label_end_of_loop:
if (found)
{
for (reg = 0; reg < 4; reg++)
if (r[reg] >= 0
&& CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
ENCODE_DESIGNATION (r[reg], reg, coding);
- *dstp = dst;
}
+
+ return dst;
}
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */
-int
+static void
encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
unsigned char *source, *destination;
unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
- /* Since the maximum bytes produced by each loop is 14, we subtract 13
+ /* Since the maximum bytes produced by each loop is 20, we subtract 19
from DST_END to assure overflow checking is necessary only at the
head of loop. */
- unsigned char *adjusted_dst_end = dst_end - 13;
- Lisp_Object translation_table
- = coding->translation_table_for_encode;
- int result = CODING_FINISH_NORMAL;
+ unsigned char *adjusted_dst_end = dst_end - 19;
+ /* SRC_BASE remembers the start position in source in each loop.
+ The loop will be exited when there's not enough source text to
+ analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
+ there's not enough destination area to produce encoded codes
+ (within macro EMIT_BYTES). */
+ unsigned char *src_base;
+ int c;
+ Lisp_Object translation_table;
- if (!NILP (Venable_character_translation) && NILP (translation_table))
- translation_table = Vstandard_translation_table_for_encode;
+ if (NILP (Venable_character_translation))
+ translation_table = Qnil;
+ else
+ {
+ translation_table = coding->translation_table_for_encode;
+ if (NILP (translation_table))
+ translation_table = Vstandard_translation_table_for_encode;
+ }
coding->consumed_char = 0;
- coding->fake_multibyte = 0;
- while (src < src_end && (dst_bytes
- ? (dst < adjusted_dst_end)
- : (dst < src - 13)))
+ coding->errors = 0;
+ while (1)
{
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source text
- to analyze multi-byte codes (within macros ONE_MORE_BYTE,
- TWO_MORE_BYTES, and THREE_MORE_BYTES). In that case, SRC is
- reset to SRC_BASE before exiting. */
- unsigned char *src_base = src;
- int charset, c1, c2, c3, c4;
+ int charset, c1, c2;
+
+ src_base = src;
+
+ if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
+ {
+ coding->result = CODING_FINISH_INSUFFICIENT_DST;
+ break;
+ }
if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
&& CODING_SPEC_ISO_BOL (coding))
{
/* We have to produce designation sequences if any now. */
- encode_designation_at_bol (coding, translation_table,
- src, src_end, &dst);
+ dst = encode_designation_at_bol (coding, translation_table,
+ src, src_end, dst);
CODING_SPEC_ISO_BOL (coding) = 0;
}
{
SPLIT_CHAR (c, charset, c1, c2);
ENCODE_ISO_CHARACTER (charset, c1, c2);
- /* But, we didn't consume a character in SRC. */
- coding->consumed_char--;
if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
coding->composition_rule_follows = 1;
}
}
}
- c1 = *src++;
- /* Now encode one character. C1 is a control character, an
- ASCII character, or a leading-code of multi-byte character. */
- switch (emacs_code_class[c1])
- {
- case EMACS_ascii_code:
- c2 = 0;
- ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
- break;
-
- case EMACS_control_code:
- if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
- ENCODE_RESET_PLANE_AND_REGISTER;
- *dst++ = c1;
- coding->consumed_char++;
- break;
+ ONE_MORE_CHAR (c);
- case EMACS_carriage_return_code:
- if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
- {
- if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
- ENCODE_RESET_PLANE_AND_REGISTER;
- *dst++ = c1;
- coding->consumed_char++;
- break;
- }
- /* fall down to treat '\r' as '\n' ... */
-
- case EMACS_linefeed_code:
- if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
- ENCODE_RESET_PLANE_AND_REGISTER;
- if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
- bcopy (coding->spec.iso2022.initial_designation,
- coding->spec.iso2022.current_designation,
- sizeof coding->spec.iso2022.initial_designation);
- if (coding->eol_type == CODING_EOL_LF
- || coding->eol_type == CODING_EOL_UNDECIDED)
- *dst++ = ISO_CODE_LF;
- else if (coding->eol_type == CODING_EOL_CRLF)
- *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
- else
- *dst++ = ISO_CODE_CR;
- CODING_SPEC_ISO_BOL (coding) = 1;
- coding->consumed_char++;
- break;
-
- case EMACS_leading_code_2:
- ONE_MORE_BYTE (c2);
- c3 = 0;
- if (c2 < 0xA0)
+ /* Now encode the character C. */
+ if (c < 0x20 || c == 0x7F)
+ {
+ if (c == '\r')
{
- /* invalid sequence */
- *dst++ = c1;
- src--;
- coding->consumed_char++;
+ if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
+ {
+ if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
+ ENCODE_RESET_PLANE_AND_REGISTER;
+ *dst++ = c;
+ continue;
+ }
+ /* fall down to treat '\r' as '\n' ... */
+ c = '\n';
}
- else
- ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
- break;
-
- case EMACS_leading_code_3:
- TWO_MORE_BYTES (c2, c3);
- c4 = 0;
- if (c2 < 0xA0 || c3 < 0xA0)
+ if (c == '\n')
{
- /* invalid sequence */
- *dst++ = c1;
- src -= 2;
- coding->consumed_char++;
+ if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
+ ENCODE_RESET_PLANE_AND_REGISTER;
+ if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
+ bcopy (coding->spec.iso2022.initial_designation,
+ coding->spec.iso2022.current_designation,
+ sizeof coding->spec.iso2022.initial_designation);
+ if (coding->eol_type == CODING_EOL_LF
+ || coding->eol_type == CODING_EOL_UNDECIDED)
+ *dst++ = ISO_CODE_LF;
+ else if (coding->eol_type == CODING_EOL_CRLF)
+ *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
+ else
+ *dst++ = ISO_CODE_CR;
+ CODING_SPEC_ISO_BOL (coding) = 1;
}
- else if (c1 < LEADING_CODE_PRIVATE_11)
- ENCODE_ISO_CHARACTER (c1, c2, c3);
- else
- ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
- break;
-
- case EMACS_leading_code_4:
- THREE_MORE_BYTES (c2, c3, c4);
- if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
+ else
{
- /* invalid sequence */
- *dst++ = c1;
- src -= 3;
- coding->consumed_char++;
+ if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
+ ENCODE_RESET_PLANE_AND_REGISTER;
+ *dst++ = c;
}
- else
- ENCODE_ISO_CHARACTER (c2, c3, c4);
- break;
-
- case EMACS_invalid_code:
- if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
- ENCODE_RESET_PLANE_AND_REGISTER;
- *dst++ = c1;
- coding->consumed_char++;
- break;
}
- continue;
- label_end_of_loop:
- result = CODING_FINISH_INSUFFICIENT_SRC;
- src = src_base;
- break;
- }
-
- if (src < src_end && result == CODING_FINISH_NORMAL)
- result = CODING_FINISH_INSUFFICIENT_DST;
-
- /* If this is the last block of the text to be encoded, we must
- reset graphic planes and registers to the initial state, and
- flush out the carryover if any. */
- if (coding->mode & CODING_MODE_LAST_BLOCK)
- {
- ENCODE_RESET_PLANE_AND_REGISTER;
- if (COMPOSING_P (coding))
- *dst++ = ISO_CODE_ESC, *dst++ = '1';
- if (result == CODING_FINISH_INSUFFICIENT_SRC)
+ else if (ASCII_BYTE_P (c))
+ ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1);
+ else if (SINGLE_BYTE_CHAR_P (c))
{
- while (src < src_end && dst < dst_end)
- *dst++ = *src++;
+ *dst++ = c;
+ coding->errors++;
}
+ else
+ {
+ SPLIT_CHAR (c, charset, c1, c2);
+ ENCODE_ISO_CHARACTER (charset, c1, c2);
+ }
+
+ coding->consumed_char++;
}
- coding->consumed = src - source;
+
+ label_end_of_loop:
+ coding->consumed = src_base - source;
coding->produced = coding->produced_char = dst - destination;
- return result;
}
\f
b2 += b2 < 0x3F ? 0x40 : 0x62; \
} while (0)
-#define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
- do { \
- int c_alt, charset_alt = (charset); \
- if (!NILP (translation_table) \
- && ((c_alt = translate_char (translation_table, \
- -1, (charset), c1, c2)) >= 0)) \
- SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
- if (charset_alt == CHARSET_ASCII || charset_alt < 0) \
- DECODE_CHARACTER_ASCII (c1); \
- else if (CHARSET_DIMENSION (charset_alt) == 1) \
- DECODE_CHARACTER_DIMENSION1 (charset_alt, c1); \
- else \
- DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2); \
- } while (0)
-
-#define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2) \
- do { \
- int c_alt, charset_alt; \
- if (!NILP (translation_table) \
- && ((c_alt = translate_char (translation_table, -1, \
- charset, c1, c2)) \
- >= 0)) \
- SPLIT_CHAR (c_alt, charset_alt, c1, c2); \
- else \
- charset_alt = charset; \
- if (charset_alt == charset_ascii) \
- *dst++ = c1; \
- else if (CHARSET_DIMENSION (charset_alt) == 1) \
- { \
- if (sjis_p && charset_alt == charset_katakana_jisx0201) \
- *dst++ = c1; \
- else if (sjis_p && charset_alt == charset_latin_jisx0201) \
- *dst++ = c1 & 0x7F; \
- else \
- { \
- *dst++ = charset_alt, *dst++ = c1; \
- coding->fake_multibyte = 1; \
- } \
- } \
- else \
- { \
- c1 &= 0x7F, c2 &= 0x7F; \
- if (sjis_p && (charset_alt == charset_jisx0208 \
- || charset_alt == charset_jisx0208_1978))\
- { \
- unsigned char s1, s2; \
- \
- ENCODE_SJIS (c1, c2, s1, s2); \
- *dst++ = s1, *dst++ = s2; \
- coding->fake_multibyte = 1; \
- } \
- else if (!sjis_p \
- && (charset_alt == charset_big5_1 \
- || charset_alt == charset_big5_2)) \
- { \
- unsigned char b1, b2; \
- \
- ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \
- *dst++ = b1, *dst++ = b2; \
- } \
- else \
- { \
- *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \
- coding->fake_multibyte = 1; \
- } \
- } \
- coding->consumed_char++; \
- } while (0)
-
/* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
Check if a text is encoded in SJIS. If it is, return
CODING_CATEGORY_MASK_SJIS, else return 0. */
detect_coding_sjis (src, src_end)
unsigned char *src, *src_end;
{
- unsigned char c;
+ int c;
+ /* Dummy for ONE_MORE_BYTE. */
+ struct coding_system dummy_coding;
+ struct coding_system *coding = &dummy_coding;
- while (src < src_end)
+ while (1)
{
- c = *src++;
+ ONE_MORE_BYTE (c);
if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
{
- if (src < src_end && *src++ < 0x40)
+ ONE_MORE_BYTE (c);
+ if (c < 0x40)
return 0;
}
}
+ label_end_of_loop:
return CODING_CATEGORY_MASK_SJIS;
}
detect_coding_big5 (src, src_end)
unsigned char *src, *src_end;
{
- unsigned char c;
+ int c;
+ /* Dummy for ONE_MORE_BYTE. */
+ struct coding_system dummy_coding;
+ struct coding_system *coding = &dummy_coding;
- while (src < src_end)
+ while (1)
{
- c = *src++;
+ ONE_MORE_BYTE (c);
if (c >= 0xA1)
{
- if (src >= src_end)
- break;
- c = *src++;
+ ONE_MORE_BYTE (c);
if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
return 0;
}
}
+ label_end_of_loop:
return CODING_CATEGORY_MASK_BIG5;
}
{
unsigned char c;
int seq_maybe_bytes;
+ /* Dummy for ONE_MORE_BYTE. */
+ struct coding_system dummy_coding;
+ struct coding_system *coding = &dummy_coding;
- while (src < src_end)
+ while (1)
{
- c = *src++;
+ ONE_MORE_BYTE (c);
if (UTF_8_1_OCTET_P (c))
continue;
else if (UTF_8_2_OCTET_LEADING_P (c))
do
{
- if (src >= src_end)
- return CODING_CATEGORY_MASK_UTF_8;
-
- c = *src++;
+ ONE_MORE_BYTE (c);
if (!UTF_8_EXTRA_OCTET_P (c))
return 0;
seq_maybe_bytes--;
while (seq_maybe_bytes > 0);
}
+ label_end_of_loop:
return CODING_CATEGORY_MASK_UTF_8;
}
detect_coding_utf_16 (src, src_end)
unsigned char *src, *src_end;
{
- if ((src + 1) >= src_end) return 0;
+ unsigned char c1, c2;
+ /* Dummy for TWO_MORE_BYTES. */
+ struct coding_system dummy_coding;
+ struct coding_system *coding = &dummy_coding;
- if ((src[0] == 0xFF) && (src[1] == 0xFE))
+ TWO_MORE_BYTES (c1, c2);
+
+ if ((c1 == 0xFF) && (c2 == 0xFE))
return CODING_CATEGORY_MASK_UTF_16_LE;
- else if ((src[0] == 0xFE) && (src[1] == 0xFF))
+ else if ((c1 == 0xFE) && (c2 == 0xFF))
return CODING_CATEGORY_MASK_UTF_16_BE;
+ label_end_of_loop:
return 0;
}
/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */
-int
+static void
decode_coding_sjis_big5 (coding, source, destination,
src_bytes, dst_bytes, sjis_p)
struct coding_system *coding;
unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
- /* Since the maximum bytes produced by each loop is 4, we subtract 3
- from DST_END to assure overflow checking is necessary only at the
- head of loop. */
- unsigned char *adjusted_dst_end = dst_end - 3;
- Lisp_Object translation_table
- = coding->translation_table_for_decode;
- int result = CODING_FINISH_NORMAL;
+ /* SRC_BASE remembers the start position in source in each loop.
+ The loop will be exited when there's not enough source code
+ (within macro ONE_MORE_BYTE), or when there's not enough
+ destination area to produce a character (within macro
+ EMIT_CHAR). */
+ unsigned char *src_base;
+ Lisp_Object translation_table;
- if (!NILP (Venable_character_translation) && NILP (translation_table))
- translation_table = Vstandard_translation_table_for_decode;
+ if (NILP (Venable_character_translation))
+ translation_table = Qnil;
+ else
+ {
+ translation_table = coding->translation_table_for_decode;
+ if (NILP (translation_table))
+ translation_table = Vstandard_translation_table_for_decode;
+ }
coding->produced_char = 0;
- coding->fake_multibyte = 0;
- while (src < src_end && (dst_bytes
- ? (dst < adjusted_dst_end)
- : (dst < src - 3)))
+ while (1)
{
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source text
- to analyze two-byte character (within macro ONE_MORE_BYTE).
- In that case, SRC is reset to SRC_BASE before exiting. */
- unsigned char *src_base = src;
- unsigned char c1 = *src++, c2, c3, c4;
-
- if (c1 < 0x20)
+ int c, charset, c1, c2;
+
+ src_base = src;
+ ONE_MORE_BYTE (c1);
+
+ if (c1 < 0x80)
{
- if (c1 == '\r')
+ charset = CHARSET_ASCII;
+ if (c1 < 0x20)
{
- if (coding->eol_type == CODING_EOL_CRLF)
+ if (c1 == '\r')
{
- ONE_MORE_BYTE (c2);
- if (c2 == '\n')
- *dst++ = c2;
- else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ if (coding->eol_type == CODING_EOL_CRLF)
{
- result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop_2;
+ ONE_MORE_BYTE (c2);
+ if (c2 == '\n')
+ c1 = c2;
+ else if (coding->mode
+ & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ {
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
+ }
+ else
+ /* To process C2 again, SRC is subtracted by 1. */
+ src--;
}
- else
- /* To process C2 again, SRC is subtracted by 1. */
- *dst++ = c1, src--;
+ else if (coding->eol_type == CODING_EOL_CR)
+ c1 = '\n';
+ }
+ else if (c1 == '\n'
+ && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ && (coding->eol_type == CODING_EOL_CR
+ || coding->eol_type == CODING_EOL_CRLF))
+ {
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
}
- else if (coding->eol_type == CODING_EOL_CR)
- *dst++ = '\n';
- else
- *dst++ = c1;
- }
- else if (c1 == '\n'
- && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- && (coding->eol_type == CODING_EOL_CR
- || coding->eol_type == CODING_EOL_CRLF))
- {
- result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop_2;
}
- else
- *dst++ = c1;
- coding->produced_char++;
}
- else if (c1 < 0x80)
- {
- c2 = 0; /* avoid warning */
- DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
- }
else
- {
+ {
if (sjis_p)
{
- if (c1 < 0xA0 || (c1 >= 0xE0 && c1 < 0xF0))
+ if (c1 >= 0xF0)
+ goto label_invalid_code;
+ if (c1 < 0xA0 || c1 >= 0xE0)
{
/* SJIS -> JISX0208 */
ONE_MORE_BYTE (c2);
- if (c2 >= 0x40 && c2 != 0x7F && c2 <= 0xFC)
- {
- DECODE_SJIS (c1, c2, c3, c4);
- DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
- }
- else
- goto label_invalid_code_2;
- }
- else if (c1 < 0xE0)
- /* SJIS -> JISX0201-Kana */
- {
- c2 = 0; /* avoid warning */
- DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
- /* dummy */ c2);
+ if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC)
+ goto label_invalid_code;
+ DECODE_SJIS (c1, c2, c1, c2);
+ charset = charset_jisx0208;
}
else
- goto label_invalid_code_1;
+ /* SJIS -> JISX0201-Kana */
+ charset = charset_katakana_jisx0201;
}
else
{
/* BIG5 -> Big5 */
- if (c1 >= 0xA1 && c1 <= 0xFE)
- {
- ONE_MORE_BYTE (c2);
- if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
- {
- int charset;
-
- DECODE_BIG5 (c1, c2, charset, c3, c4);
- DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
- }
- else
- goto label_invalid_code_2;
- }
- else
- goto label_invalid_code_1;
+ if (c1 < 0xA1 || c1 > 0xFE)
+ goto label_invalid_code;
+ ONE_MORE_BYTE (c2);
+ if (c2 < 0x40 || (c2 > 0x7E && c2 < 0xA1) || c2 > 0xFE)
+ goto label_invalid_code;
+ DECODE_BIG5 (c1, c2, charset, c1, c2);
}
}
- continue;
- label_invalid_code_1:
- *dst++ = c1;
- coding->produced_char++;
- coding->fake_multibyte = 1;
+ c = DECODE_ISO_CHARACTER (charset, c1, c2);
+ EMIT_CHAR (c);
continue;
- label_invalid_code_2:
- *dst++ = c1; *dst++= c2;
- coding->produced_char += 2;
- coding->fake_multibyte = 1;
- continue;
-
- label_end_of_loop:
- result = CODING_FINISH_INSUFFICIENT_SRC;
- label_end_of_loop_2:
+ label_invalid_code:
+ coding->errors++;
src = src_base;
- break;
- }
-
- if (src < src_end)
- {
- if (result == CODING_FINISH_NORMAL)
- result = CODING_FINISH_INSUFFICIENT_DST;
- else if (result != CODING_FINISH_INCONSISTENT_EOL
- && coding->mode & CODING_MODE_LAST_BLOCK)
- {
- src_bytes = src_end - src;
- if (dst_bytes && (dst_end - dst < src_bytes))
- src_bytes = dst_end - dst;
- bcopy (dst, src, src_bytes);
- src += src_bytes;
- dst += src_bytes;
- coding->fake_multibyte = 1;
- }
+ c = *src++;
+ EMIT_CHAR (c);
}
- coding->consumed = coding->consumed_char = src - source;
+ label_end_of_loop:
+ coding->consumed = coding->consumed_char = src_base - source;
coding->produced = dst - destination;
- return result;
+ return;
}
/* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
- This function can encode `charset_ascii', `charset_katakana_jisx0201',
- `charset_jisx0208', `charset_big5_1', and `charset_big5-2'. We are
- sure that all these charsets are registered as official charset
+ This function can encode charsets `ascii', `katakana-jisx0201',
+ `japanese-jisx0208', `chinese-big5-1', and `chinese-big5-2'. We
+ are sure that all these charsets are registered as official charset
(i.e. do not have extended leading-codes). Characters of other
charsets are produced without any encoding. If SJIS_P is 1, encode
SJIS text, else encode BIG5 text. */
-int
+static void
encode_coding_sjis_big5 (coding, source, destination,
src_bytes, dst_bytes, sjis_p)
struct coding_system *coding;
unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
- /* Since the maximum bytes produced by each loop is 2, we subtract 1
- from DST_END to assure overflow checking is necessary only at the
- head of loop. */
- unsigned char *adjusted_dst_end = dst_end - 1;
- Lisp_Object translation_table
- = coding->translation_table_for_encode;
- int result = CODING_FINISH_NORMAL;
-
- if (!NILP (Venable_character_translation) && NILP (translation_table))
- translation_table = Vstandard_translation_table_for_encode;
+ /* SRC_BASE remembers the start position in source in each loop.
+ The loop will be exited when there's not enough source text to
+ analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
+ there's not enough destination area to produce encoded codes
+ (within macro EMIT_BYTES). */
+ unsigned char *src_base;
+ Lisp_Object translation_table;
- coding->consumed_char = 0;
- coding->fake_multibyte = 0;
- while (src < src_end && (dst_bytes
- ? (dst < adjusted_dst_end)
- : (dst < src - 1)))
+ if (NILP (Venable_character_translation))
+ translation_table = Qnil;
+ else
{
- /* SRC_BASE remembers the start position in source in each loop.
- The loop will be exited when there's not enough source text
- to analyze multi-byte codes (within macros ONE_MORE_BYTE and
- TWO_MORE_BYTES). In that case, SRC is reset to SRC_BASE
- before exiting. */
- unsigned char *src_base = src;
- unsigned char c1 = *src++, c2, c3, c4;
-
- switch (emacs_code_class[c1])
- {
- case EMACS_ascii_code:
- ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
- break;
+ translation_table = coding->translation_table_for_decode;
+ if (NILP (translation_table))
+ translation_table = Vstandard_translation_table_for_decode;
+ }
- case EMACS_control_code:
- *dst++ = c1;
- coding->consumed_char++;
- break;
+ while (1)
+ {
+ int c, charset, c1, c2;
- case EMACS_carriage_return_code:
- if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
+ src_base = src;
+ ONE_MORE_CHAR (c);
+
+ /* Now encode the character C. */
+ if (SINGLE_BYTE_CHAR_P (c))
+ {
+ switch (c)
{
- *dst++ = c1;
- coding->consumed_char++;
- break;
+ case '\r':
+ if (!coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
+ {
+ EMIT_ONE_BYTE (c);
+ break;
+ }
+ c = '\n';
+ case '\n':
+ if (coding->eol_type == CODING_EOL_CRLF)
+ {
+ EMIT_TWO_BYTES ('\r', c);
+ break;
+ }
+ else if (coding->eol_type == CODING_EOL_CR)
+ c = '\r';
+ default:
+ EMIT_ONE_BYTE (c);
+ }
+ }
+ else
+ {
+ SPLIT_CHAR (c, charset, c1, c2);
+ if (sjis_p)
+ {
+ if (charset == charset_jisx0208
+ || charset == charset_jisx0208_1978)
+ {
+ ENCODE_SJIS (c1, c2, c1, c2);
+ EMIT_TWO_BYTES (c1, c2);
+ }
+ else if (charset == charset_latin_jisx0201)
+ EMIT_ONE_BYTE (c1);
+ else
+ /* There's no way other than producing the internal
+ codes as is. */
+ EMIT_BYTES (src_base, src);
}
- /* fall down to treat '\r' as '\n' ... */
-
- case EMACS_linefeed_code:
- if (coding->eol_type == CODING_EOL_LF
- || coding->eol_type == CODING_EOL_UNDECIDED)
- *dst++ = '\n';
- else if (coding->eol_type == CODING_EOL_CRLF)
- *dst++ = '\r', *dst++ = '\n';
else
- *dst++ = '\r';
- coding->consumed_char++;
- break;
-
- case EMACS_leading_code_2:
- ONE_MORE_BYTE (c2);
- ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
- break;
-
- case EMACS_leading_code_3:
- TWO_MORE_BYTES (c2, c3);
- ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
- break;
-
- case EMACS_leading_code_4:
- THREE_MORE_BYTES (c2, c3, c4);
- ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
- break;
-
- default: /* i.e. case EMACS_invalid_code: */
- *dst++ = c1;
- coding->consumed_char++;
+ {
+ if (charset == charset_big5_1 || charset == charset_big5_2)
+ {
+ ENCODE_BIG5 (charset, c1, c2, c1, c2);
+ EMIT_TWO_BYTES (c1, c2);
+ }
+ else
+ /* There's no way other than producing the internal
+ codes as is. */
+ EMIT_BYTES (src_base, src);
+ }
}
- continue;
-
- label_end_of_loop:
- result = CODING_FINISH_INSUFFICIENT_SRC;
- src = src_base;
- break;
+ coding->consumed_char++;
}
- if (result == CODING_FINISH_NORMAL
- && src < src_end)
- result = CODING_FINISH_INSUFFICIENT_DST;
- coding->consumed = src - source;
+ label_end_of_loop:
+ coding->consumed = src_base - source;
coding->produced = coding->produced_char = dst - destination;
- return result;
}
\f
unsigned char *src, *src_end;
{
unsigned char *valid;
+ int c;
+ /* Dummy for ONE_MORE_BYTE. */
+ struct coding_system dummy_coding;
+ struct coding_system *coding = &dummy_coding;
/* No coding system is assigned to coding-category-ccl. */
if (!coding_system_table[CODING_CATEGORY_IDX_CCL])
return 0;
valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
- while (src < src_end)
+ while (1)
{
- if (! valid[*src]) return 0;
- src++;
+ ONE_MORE_BYTE (c);
+ if (! valid[c])
+ return 0;
}
+ label_end_of_loop:
return CODING_CATEGORY_MASK_CCL;
}
\f
/*** 6. End-of-line handlers ***/
-/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
- This function is called only when `coding->eol_type' is
- CODING_EOL_CRLF or CODING_EOL_CR. */
+/* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */
-int
+static void
decode_eol (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
unsigned char *source, *destination;
int src_bytes, dst_bytes;
{
unsigned char *src = source;
- unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
- unsigned char *dst_end = destination + dst_bytes;
- unsigned char c;
- int result = CODING_FINISH_NORMAL;
-
- coding->fake_multibyte = 0;
-
- if (src_bytes <= 0)
- {
- coding->produced = coding->produced_char = 0;
- coding->consumed = coding->consumed_char = 0;
- return result;
- }
-
+ unsigned char *src_end = src + src_bytes;
+ unsigned char *dst_end = dst + dst_bytes;
+ Lisp_Object translation_table;
+ /* SRC_BASE remembers the start position in source in each loop.
+ The loop will be exited when there's not enough source code
+ (within macro ONE_MORE_BYTE), or when there's not enough
+ destination area to produce a character (within macro
+ EMIT_CHAR). */
+ unsigned char *src_base;
+ int c;
+
+ translation_table = Qnil;
switch (coding->eol_type)
{
case CODING_EOL_CRLF:
- {
- /* Since the maximum bytes produced by each loop is 2, we
- subtract 1 from DST_END to assure overflow checking is
- necessary only at the head of loop. */
- unsigned char *adjusted_dst_end = dst_end - 1;
-
- while (src < src_end && (dst_bytes
- ? (dst < adjusted_dst_end)
- : (dst < src - 1)))
- {
- unsigned char *src_base = src;
-
- c = *src++;
- if (c == '\r')
- {
- ONE_MORE_BYTE (c);
- if (c == '\n')
- *dst++ = c;
- else
- {
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
- {
- result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop_2;
- }
- src--;
- *dst++ = '\r';
- if (BASE_LEADING_CODE_P (c))
- coding->fake_multibyte = 1;
- }
- }
- else if (c == '\n'
- && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
- {
- result = CODING_FINISH_INCONSISTENT_EOL;
- goto label_end_of_loop_2;
- }
- else
- {
- *dst++ = c;
- if (BASE_LEADING_CODE_P (c))
- coding->fake_multibyte = 1;
- }
- continue;
-
- label_end_of_loop:
- result = CODING_FINISH_INSUFFICIENT_SRC;
- label_end_of_loop_2:
- src = src_base;
- break;
- }
- if (src < src_end)
- {
- if (result == CODING_FINISH_NORMAL)
- result = CODING_FINISH_INSUFFICIENT_DST;
- else if (result != CODING_FINISH_INCONSISTENT_EOL
- && coding->mode & CODING_MODE_LAST_BLOCK)
- {
- /* This is the last block of the text to be decoded.
- We flush out all remaining codes. */
- src_bytes = src_end - src;
- if (dst_bytes && (dst_end - dst < src_bytes))
- src_bytes = dst_end - dst;
- bcopy (src, dst, src_bytes);
- dst += src_bytes;
- src += src_bytes;
- }
- }
- }
- break;
-
- case CODING_EOL_CR:
- if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ while (1)
{
- while (src < src_end)
+ src_base = src;
+ ONE_MORE_BYTE (c);
+ if (c == '\r')
{
- if ((c = *src++) == '\n')
- break;
- if (BASE_LEADING_CODE_P (c))
- coding->fake_multibyte = 1;
+ ONE_MORE_BYTE (c);
+ if (c != '\n')
+ {
+ if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ {
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
+ }
+ src--;
+ c = '\r';
+ }
}
- if (*--src == '\n')
+ else if (c == '\n'
+ && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
{
- src_bytes = src - source;
- result = CODING_FINISH_INCONSISTENT_EOL;
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
}
+ EMIT_CHAR (c);
}
- if (dst_bytes && src_bytes > dst_bytes)
+ break;
+
+ case CODING_EOL_CR:
+ while (1)
{
- result = CODING_FINISH_INSUFFICIENT_DST;
- src_bytes = dst_bytes;
+ src_base = src;
+ ONE_MORE_BYTE (c);
+ if (c == '\n')
+ {
+ if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ {
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
+ }
+ }
+ else if (c == '\r')
+ c = '\n';
+ EMIT_CHAR (c);
}
- if (dst_bytes)
- bcopy (source, destination, src_bytes);
- else
- safe_bcopy (source, destination, src_bytes);
- src = source + src_bytes;
- while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
break;
- default: /* i.e. case: CODING_EOL_LF */
- if (dst_bytes && src_bytes > dst_bytes)
+ default: /* no need for EOL handling */
+ while (1)
{
- result = CODING_FINISH_INSUFFICIENT_DST;
- src_bytes = dst_bytes;
+ src_base = src;
+ ONE_MORE_BYTE (c);
+ EMIT_CHAR (c);
}
- if (dst_bytes)
- bcopy (source, destination, src_bytes);
- else
- safe_bcopy (source, destination, src_bytes);
- src += src_bytes;
- dst += src_bytes;
- coding->fake_multibyte = 1;
- break;
}
- coding->consumed = coding->consumed_char = src - source;
- coding->produced = coding->produced_char = dst - destination;
- return result;
+ label_end_of_loop:
+ coding->consumed = coding->consumed_char = src_base - source;
+ coding->produced = dst - destination;
+ return;
}
/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode
- format of end-of-line according to `coding->eol_type'. If
- `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
- '\r' in source text also means end-of-line. */
+ format of end-of-line according to `coding->eol_type'. It also
+ convert multibyte form 8-bit characers to unibyte if
+ CODING->src_multibyte is nonzero. If `coding->mode &
+ CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text
+ also means end-of-line. */
-int
+static void
encode_eol (coding, source, destination, src_bytes, dst_bytes)
struct coding_system *coding;
unsigned char *source, *destination;
{
unsigned char *src = source;
unsigned char *dst = destination;
- int result = CODING_FINISH_NORMAL;
-
- coding->fake_multibyte = 0;
+ unsigned char *src_end = src + src_bytes;
+ unsigned char *dst_end = dst + dst_bytes;
+ Lisp_Object translation_table;
+ /* SRC_BASE remembers the start position in source in each loop.
+ The loop will be exited when there's not enough source text to
+ analyze multi-byte codes (within macro ONE_MORE_CHAR), or when
+ there's not enough destination area to produce encoded codes
+ (within macro EMIT_BYTES). */
+ unsigned char *src_base;
+ int c;
+ int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY;
+
+ translation_table = Qnil;
+ if (coding->src_multibyte
+ && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL)
+ {
+ src_end--;
+ src_bytes--;
+ coding->result = CODING_FINISH_INSUFFICIENT_SRC;
+ }
if (coding->eol_type == CODING_EOL_CRLF)
{
- unsigned char c;
- unsigned char *src_end = source + src_bytes;
- unsigned char *dst_end = destination + dst_bytes;
- /* Since the maximum bytes produced by each loop is 2, we
- subtract 1 from DST_END to assure overflow checking is
- necessary only at the head of loop. */
- unsigned char *adjusted_dst_end = dst_end - 1;
-
- while (src < src_end && (dst_bytes
- ? (dst < adjusted_dst_end)
- : (dst < src - 1)))
+ while (src < src_end)
{
+ src_base = src;
c = *src++;
- if (c == '\n'
- || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
- *dst++ = '\r', *dst++ = '\n';
+ if (c >= 0x20)
+ EMIT_ONE_BYTE (c);
+ else if (c == '\n' || (c == '\r' && selective_display))
+ EMIT_TWO_BYTES ('\r', '\n');
else
- {
- *dst++ = c;
- if (BASE_LEADING_CODE_P (c))
- coding->fake_multibyte = 1;
- }
+ EMIT_ONE_BYTE (c);
}
- if (src < src_end)
- result = CODING_FINISH_INSUFFICIENT_DST;
+ label_end_of_loop:
}
else
{
- unsigned char c;
-
- if (dst_bytes && src_bytes > dst_bytes)
+ if (src_bytes <= dst_bytes)
{
- src_bytes = dst_bytes;
- result = CODING_FINISH_INSUFFICIENT_DST;
+ safe_bcopy (src, dst, src_bytes);
+ src_base = src_end;
+ dst += src_bytes;
}
- if (dst_bytes)
- bcopy (source, destination, src_bytes);
else
- safe_bcopy (source, destination, src_bytes);
- dst_bytes = src_bytes;
+ {
+ if (coding->src_multibyte
+ && *(src + dst_bytes - 1) == LEADING_CODE_8_BIT_CONTROL)
+ dst_bytes--;
+ safe_bcopy (src, dst, dst_bytes);
+ src_base = src + dst_bytes;
+ dst = destination + dst_bytes;
+ coding->result = CODING_FINISH_INSUFFICIENT_DST;
+ }
if (coding->eol_type == CODING_EOL_CR)
{
- while (src_bytes--)
- {
- if ((c = *dst++) == '\n')
- dst[-1] = '\r';
- else if (BASE_LEADING_CODE_P (c))
- coding->fake_multibyte = 1;
- }
+ for (src = destination; src < dst; src++)
+ if (*src == '\n') *src = '\r';
}
- else
+ else if (selective_display)
{
- if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
- {
- while (src_bytes--)
- if (*dst++ == '\r') dst[-1] = '\n';
- }
- coding->fake_multibyte = 1;
+ for (src = destination; src < dst; src++)
+ if (*src == '\r') *src = '\n';
}
- src = source + dst_bytes;
- dst = destination + dst_bytes;
}
+ if (coding->src_multibyte)
+ dst = destination + str_as_unibyte (destination, dst - destination);
- coding->consumed = coding->consumed_char = src - source;
- coding->produced = coding->produced_char = dst - destination;
- return result;
+ coding->consumed = src_base - source;
+ coding->produced = dst - destination;
}
\f
if (VECTORP (tmp))
val = XVECTOR (tmp)->contents[coding->eol_type];
}
- setup_coding_system (val, coding);
- /* Set this again because setup_coding_system reset this member. */
- coding->heading_ascii = skip;
+
+ /* Setup this new coding system while preserving some slots. */
+ {
+ int src_multibyte = coding->src_multibyte;
+ int dst_multibyte = coding->dst_multibyte;
+
+ setup_coding_system (val, coding);
+ coding->src_multibyte = src_multibyte;
+ coding->dst_multibyte = dst_multibyte;
+ coding->heading_ascii = skip;
+ }
}
/* Detect how end-of-line of a text of length SRC_BYTES pointed by
val = Fget (coding->symbol, Qeol_type);
if (VECTORP (val) && XVECTOR (val)->size == 3)
{
+ int src_multibyte = coding->src_multibyte;
+ int dst_multibyte = coding->dst_multibyte;
+
setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
+ coding->src_multibyte = src_multibyte;
+ coding->dst_multibyte = dst_multibyte;
coding->heading_ascii = skip;
}
}
#define CONVERSION_BUFFER_EXTRA_ROOM 256
-#define DECODING_BUFFER_MAG(coding) \
- (coding->type == coding_type_iso2022 \
- ? 3 \
- : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
- ? 2 \
- : (coding->type == coding_type_raw_text \
- ? 1 \
- : (coding->type == coding_type_ccl \
- ? coding->spec.ccl.decoder.buf_magnification \
- : 2))))
+#define DECODING_BUFFER_MAG(coding) \
+ (coding->type == coding_type_iso2022 \
+ ? 3 \
+ : (coding->type == coding_type_ccl \
+ ? coding->spec.ccl.decoder.buf_magnification \
+ : 2))
/* Return maximum size (bytes) of a buffer enough for decoding
SRC_BYTES of text encoded in CODING. */
if (coding->type == coding_type_ccl)
magnification = coding->spec.ccl.encoder.buf_magnification;
- else
+ else if (CODING_REQUIRE_ENCODING (coding))
magnification = 3;
+ else
+ magnification = 1;
return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
}
coding->produced = ccl_driver (ccl, source, destination,
src_bytes, dst_bytes, &(coding->consumed));
- coding->produced_char
- = (encodep
- ? coding->produced
- : multibyte_chars_in_text (destination, coding->produced));
- coding->consumed_char
- = multibyte_chars_in_text (source, coding->consumed);
+ if (encodep)
+ coding->produced_char = coding->produced;
+ else
+ {
+ int bytes
+ = dst_bytes ? dst_bytes : source + coding->consumed - destination;
+ coding->produced = str_as_multibyte (destination, bytes,
+ coding->produced,
+ &(coding->produced_char));
+ }
switch (ccl->status)
{
/* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
decoding, it may detect coding system and format of end-of-line if
- those are not yet decided.
-
- This function does not make full use of DESTINATION buffer. For
- instance, if coding->type is coding_type_iso2022, it uses only
- (DST_BYTES - 7) bytes of DESTINATION buffer. In the case that
- DST_BYTES is decided by the function decoding_buffer_size, it
- contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
- So, this function can decode the full SOURCE. But, in the other
- case, if you want to avoid carry over, you must supply at least 7
- bytes more area in DESTINATION buffer than expected maximum bytes
- that will be produced by this function. */
+ those are not yet decided. The source should be unibyte, the
+ result is multibyte if CODING->dst_multibyte is nonzero, else
+ unibyte. */
int
decode_coding (coding, source, destination, src_bytes, dst_bytes)
unsigned char *source, *destination;
int src_bytes, dst_bytes;
{
- int result;
-
- if (src_bytes <= 0
- && coding->type != coding_type_ccl
- && ! (coding->mode & CODING_MODE_LAST_BLOCK
- && CODING_REQUIRE_FLUSHING (coding)))
- {
- coding->produced = coding->produced_char = 0;
- coding->consumed = coding->consumed_char = 0;
- coding->fake_multibyte = 0;
- return CODING_FINISH_NORMAL;
- }
-
if (coding->type == coding_type_undecided)
detect_coding (coding, source, src_bytes);
if (coding->eol_type == CODING_EOL_UNDECIDED)
detect_eol (coding, source, src_bytes);
+ coding->produced = coding->produced_char = 0;
+ coding->consumed = coding->consumed_char = 0;
+ coding->errors = 0;
+ coding->result = CODING_FINISH_NORMAL;
+
switch (coding->type)
{
- case coding_type_emacs_mule:
- case coding_type_undecided:
- case coding_type_raw_text:
- if (coding->eol_type == CODING_EOL_LF
- || coding->eol_type == CODING_EOL_UNDECIDED)
- goto label_no_conversion;
- result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
- break;
-
case coding_type_sjis:
- result = decode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 1);
+ decode_coding_sjis_big5 (coding, source, destination,
+ src_bytes, dst_bytes, 1);
break;
case coding_type_iso2022:
- result = decode_coding_iso2022 (coding, source, destination,
- src_bytes, dst_bytes);
+ decode_coding_iso2022 (coding, source, destination,
+ src_bytes, dst_bytes);
break;
case coding_type_big5:
- result = decode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 0);
+ decode_coding_sjis_big5 (coding, source, destination,
+ src_bytes, dst_bytes, 0);
+ break;
+
+ case coding_type_emacs_mule:
+ decode_coding_emacs_mule (coding, source, destination,
+ src_bytes, dst_bytes);
break;
case coding_type_ccl:
- result = ccl_coding_driver (coding, source, destination,
- src_bytes, dst_bytes, 0);
+ ccl_coding_driver (coding, source, destination,
+ src_bytes, dst_bytes, 0);
break;
- default: /* i.e. case coding_type_no_conversion: */
- label_no_conversion:
- if (dst_bytes && src_bytes > dst_bytes)
- {
- coding->produced = dst_bytes;
- result = CODING_FINISH_INSUFFICIENT_DST;
- }
- else
+ default:
+ decode_eol (coding, source, destination, src_bytes, dst_bytes);
+ }
+
+ if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
+ && coding->consumed == src_bytes)
+ coding->result = CODING_FINISH_NORMAL;
+
+ if (coding->mode & CODING_MODE_LAST_BLOCK
+ && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
+ {
+ unsigned char *src = source + coding->consumed;
+ unsigned char *dst = destination + coding->produced;
+
+ src_bytes -= coding->consumed;
+ coding->errors++;
+ if (COMPOSING_P (coding))
+ DECODE_COMPOSITION_END ('1');
+ while (src_bytes--)
{
- coding->produced = src_bytes;
- result = CODING_FINISH_NORMAL;
+ int c = *src++;
+ dst += CHAR_STRING (c, dst);
+ coding->produced_char++;
}
- if (dst_bytes)
- bcopy (source, destination, coding->produced);
- else
- safe_bcopy (source, destination, coding->produced);
- coding->fake_multibyte = 1;
- coding->consumed
- = coding->consumed_char = coding->produced_char = coding->produced;
- break;
+ coding->consumed = coding->consumed_char = src - source;
+ coding->produced = dst - destination;
}
- return result;
-}
+ if (!coding->dst_multibyte)
+ {
+ coding->produced = str_as_unibyte (destination, coding->produced);
+ coding->produced_char = coding->produced;
+ }
-/* See "GENERAL NOTES about `encode_coding_XXX ()' functions".
+ return coding->result;
+}
- This function does not make full use of DESTINATION buffer. For
- instance, if coding->type is coding_type_iso2022, it uses only
- (DST_BYTES - 20) bytes of DESTINATION buffer. In the case that
- DST_BYTES is decided by the function encoding_buffer_size, it
- contains extra 256 bytes (defined by CONVERSION_BUFFER_EXTRA_ROOM).
- So, this function can encode the full SOURCE. But, in the other
- case, if you want to avoid carry over, you must supply at least 20
- bytes more area in DESTINATION buffer than expected maximum bytes
- that will be produced by this function. */
+/* See "GENERAL NOTES about `encode_coding_XXX ()' functions". The
+ multibyteness of the source is CODING->src_multibyte, the
+ multibyteness of the result is always unibyte. */
int
encode_coding (coding, source, destination, src_bytes, dst_bytes)
unsigned char *source, *destination;
int src_bytes, dst_bytes;
{
- int result;
-
- if (src_bytes <= 0
- && ! (coding->mode & CODING_MODE_LAST_BLOCK
- && CODING_REQUIRE_FLUSHING (coding)))
- {
- coding->produced = coding->produced_char = 0;
- coding->consumed = coding->consumed_char = 0;
- coding->fake_multibyte = 0;
- return CODING_FINISH_NORMAL;
- }
+ coding->produced = coding->produced_char = 0;
+ coding->consumed = coding->consumed_char = 0;
+ coding->errors = 0;
+ coding->result = CODING_FINISH_NORMAL;
switch (coding->type)
{
- case coding_type_emacs_mule:
- case coding_type_undecided:
- case coding_type_raw_text:
- if (coding->eol_type == CODING_EOL_LF
- || coding->eol_type == CODING_EOL_UNDECIDED)
- goto label_no_conversion;
- result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
- break;
-
case coding_type_sjis:
- result = encode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 1);
+ encode_coding_sjis_big5 (coding, source, destination,
+ src_bytes, dst_bytes, 1);
break;
case coding_type_iso2022:
- result = encode_coding_iso2022 (coding, source, destination,
- src_bytes, dst_bytes);
+ encode_coding_iso2022 (coding, source, destination,
+ src_bytes, dst_bytes);
break;
case coding_type_big5:
- result = encode_coding_sjis_big5 (coding, source, destination,
- src_bytes, dst_bytes, 0);
+ encode_coding_sjis_big5 (coding, source, destination,
+ src_bytes, dst_bytes, 0);
+ break;
+
+ case coding_type_emacs_mule:
+ encode_coding_emacs_mule (coding, source, destination,
+ src_bytes, dst_bytes);
break;
case coding_type_ccl:
- result = ccl_coding_driver (coding, source, destination,
- src_bytes, dst_bytes, 1);
+ ccl_coding_driver (coding, source, destination,
+ src_bytes, dst_bytes, 1);
break;
- default: /* i.e. case coding_type_no_conversion: */
- label_no_conversion:
- if (dst_bytes && src_bytes > dst_bytes)
- {
- coding->produced = dst_bytes;
- result = CODING_FINISH_INSUFFICIENT_DST;
- }
- else
- {
- coding->produced = src_bytes;
- result = CODING_FINISH_NORMAL;
- }
- if (dst_bytes)
- bcopy (source, destination, coding->produced);
- else
- safe_bcopy (source, destination, coding->produced);
- if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
+ default:
+ encode_eol (coding, source, destination, src_bytes, dst_bytes);
+ }
+
+ if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
+ && coding->consumed == src_bytes)
+ coding->result = CODING_FINISH_NORMAL;
+
+ if (coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ unsigned char *src = source + coding->consumed;
+ unsigned char *src_end = src + src_bytes;
+ unsigned char *dst = destination + coding->produced;
+
+ if (coding->type == coding_type_iso2022)
+ ENCODE_RESET_PLANE_AND_REGISTER;
+ if (COMPOSING_P (coding))
+ *dst++ = ISO_CODE_ESC, *dst++ = '1';
+ if (coding->consumed < src_bytes)
{
- unsigned char *p = destination, *pend = p + coding->produced;
- while (p < pend)
- if (*p++ == '\015') p[-1] = '\n';
+ int len = src_bytes - coding->consumed;
+
+ BCOPY_SHORT (source + coding->consumed, dst, len);
+ if (coding->src_multibyte)
+ len = str_as_unibyte (dst, len);
+ dst += len;
+ coding->consumed = src_bytes;
}
- coding->fake_multibyte = 1;
- coding->consumed
- = coding->consumed_char = coding->produced_char = coding->produced;
- break;
+ coding->produced = coding->produced_char = dst - destination;
}
- return result;
+ return coding->result;
}
/* Scan text in the region between *BEG and *END (byte positions),
skip characters which we don't have to decode by coding system
CODING at the head and tail, then set *BEG and *END to the region
of the text we actually have to convert. The caller should move
- the gap out of the region in advance.
+ the gap out of the region in advance if the region is from a
+ buffer.
If STR is not NULL, *BEG and *END are indices into STR. */
if (coding->type == coding_type_ccl
|| coding->type == coding_type_undecided
- || !NILP (coding->post_read_conversion))
+ || coding->eol_type != CODING_EOL_LF
+ || !NILP (coding->post_read_conversion)
+ || coding->composing != COMPOSITION_DISABLED)
{
/* We can't skip any data. */
return;
}
- else if (coding->type == coding_type_no_conversion)
+ if (coding->type == coding_type_no_conversion
+ || coding->type == coding_type_raw_text
+ || coding->type == coding_type_emacs_mule)
{
/* We need no conversion, but don't have to skip any data here.
Decoding routine handles them effectively anyway. */
return;
}
- eol_conversion = (coding->eol_type != CODING_EOL_LF);
-
- if ((! eol_conversion) && (coding->heading_ascii >= 0))
+ if (coding->heading_ascii >= 0)
/* Detection routine has already found how much we can skip at the
head. */
*beg += coding->heading_ascii;
switch (coding->type)
{
- case coding_type_emacs_mule:
- case coding_type_raw_text:
- if (eol_conversion)
- {
- if (coding->heading_ascii < 0)
- while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
- while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
- endp--;
- /* Do not consider LF as ascii if preceded by CR, since that
- confuses eol decoding. */
- if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
- endp++;
- }
- else
- begp = endp;
- break;
-
case coding_type_sjis:
case coding_type_big5:
/* We can skip all ASCII characters at the head. */
endp++;
break;
- default: /* i.e. case coding_type_iso2022: */
+ case coding_type_iso2022:
if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
/* We can't skip any data. */
break;
endp = eight_bit;
}
}
+ break;
+
+ default:
+ abort ();
}
*beg += begp - begp_orig;
*end += endp - endp_orig;
int eol_conversion;
Lisp_Object translation_table;
- if (coding->type == coding_type_ccl)
- /* We can't skip any data. */
- return;
- else if (coding->type == coding_type_no_conversion)
+ if (coding->type == coding_type_ccl
+ || coding->eol_type == CODING_EOL_CRLF
+ || coding->eol_type == CODING_EOL_CR
+ || coding->cmp_data && coding->cmp_data->used > 0)
{
- /* We need no conversion. */
- *beg = *end;
+ /* We can't skip any data. */
+ return;
+ }
+ if (coding->type == coding_type_no_conversion
+ || coding->type == coding_type_raw_text
+ || coding->type == coding_type_emacs_mule
+ || coding->type == coding_type_undecided)
+ {
+ /* We need no conversion, but don't have to skip any data here.
+ Encoding routine handles them effectively anyway. */
return;
}
the caller is expected to have handled it already. */
switch (coding->type)
{
- case coding_type_undecided:
- case coding_type_emacs_mule:
- case coding_type_raw_text:
- if (eol_conversion)
- {
- while (begp < endp && *begp != '\n') begp++;
- while (begp < endp && endp[-1] != '\n') endp--;
- }
- else
- begp = endp;
- break;
-
case coding_type_iso2022:
if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII)
/* We can't skip any data. */
}
/* fall down ... */
- default:
+ case coding_type_sjis:
+ case coding_type_big5:
/* We can skip all ASCII characters at the head and tail. */
if (eol_conversion)
while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
else
while (begp < endp && *(endp - 1) < 0x80) endp--;
break;
+
+ default:
+ abort ();
}
*beg += begp - begp_orig;
If REPLACE is nonzero, we do various things as if the original text
is deleted and a new text is inserted. See the comments in
- replace_range (insdel.c) to know what we are doing. */
+ replace_range (insdel.c) to know what we are doing.
+
+ If REPLACE is zero, it is assumed that the source text is unibyte.
+ Otherwize, it is assumed that the source text is multibyte. */
int
code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
int require, inserted, inserted_byte;
int head_skip, tail_skip, total_skip = 0;
Lisp_Object saved_coding_symbol;
- int multibyte = !NILP (current_buffer->enable_multibyte_characters);
int first = 1;
- int fake_multibyte = 0;
unsigned char *src, *dst;
Lisp_Object deletion;
int orig_point = PT, orig_len = len;
int prev_Z;
+ int multibyte_p = !NILP (current_buffer->enable_multibyte_characters);
+
+ coding->src_multibyte = replace && multibyte_p;
+ coding->dst_multibyte = multibyte_p;
deletion = Qnil;
saved_coding_symbol = Qnil;
if (saved_from != from)
{
to = from + len;
- if (multibyte)
- from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
- else
- from_byte = from, to_byte = to;
+ from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
len_byte = to_byte - from_byte;
}
}
}
}
- if (encodep
- ? ! CODING_REQUIRE_ENCODING (coding)
- : ! CODING_REQUIRE_DECODING (coding))
- {
- coding->consumed_char = len;
- coding->consumed = len_byte;
- coding->produced = len_byte;
- if (multibyte
- && ! replace
- /* See the comment of the member heading_ascii in coding.h. */
- && coding->heading_ascii < len_byte)
- {
- /* We still may have to combine byte at the head and the
- tail of the text in the region. */
- if (from < GPT && GPT < to)
- move_gap_both (to, to_byte);
- len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
- adjust_after_insert (from, from_byte, to, to_byte, len);
- coding->produced_char = len;
- }
- else
- {
- if (!replace)
- adjust_after_insert (from, from_byte, to, to_byte, len_byte);
- coding->produced_char = len_byte;
- }
- return 0;
- }
-
/* Now we convert the text. */
/* For encoding, we must process pre-write-conversion in advance. */
- if (encodep
- && ! NILP (coding->pre_write_conversion)
+ if (! inhibit_pre_post_conversion
+ && encodep
&& SYMBOLP (coding->pre_write_conversion)
&& ! NILP (Ffboundp (coding->pre_write_conversion)))
{
orig_point = from;
orig_len = len;
to = from + len;
- from_byte = multibyte ? CHAR_TO_BYTE (from) : from_byte;
- to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
+ from_byte = CHAR_TO_BYTE (from);
+ to_byte = CHAR_TO_BYTE (to);
len_byte = to_byte - from_byte;
TEMP_SET_PT_BOTH (from, from_byte);
}
coding_allocate_composition_data (coding, from);
}
- /* For conversion by CCL program and for encoding with composition
- handling, we can't skip any character because we may convert or
- compose even ASCII characters. */
- if (coding->type != coding_type_ccl
- && (!encodep || coding->cmp_data == NULL))
- {
- /* Try to skip the heading and tailing ASCIIs. */
- int from_byte_orig = from_byte, to_byte_orig = to_byte;
-
- if (from < GPT && GPT < to)
- move_gap_both (from, from_byte);
- SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
- if (from_byte == to_byte
- && (encodep || NILP (coding->post_read_conversion))
- && ! CODING_REQUIRE_FLUSHING (coding))
- {
- coding->produced = len_byte;
- coding->produced_char = multibyte ? len : len_byte;
- if (!replace)
- /* We must record and adjust for this new text now. */
- adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
- return 0;
- }
-
- head_skip = from_byte - from_byte_orig;
- tail_skip = to_byte_orig - to_byte;
- total_skip = head_skip + tail_skip;
- from += head_skip;
- to -= tail_skip;
- len -= total_skip; len_byte -= total_skip;
+ /* Try to skip the heading and tailing ASCIIs. */
+ {
+ int from_byte_orig = from_byte, to_byte_orig = to_byte;
+
+ if (from < GPT && GPT < to)
+ move_gap_both (from, from_byte);
+ SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
+ if (from_byte == to_byte
+ && (encodep || NILP (coding->post_read_conversion))
+ && ! CODING_REQUIRE_FLUSHING (coding))
+ {
+ coding->produced = len_byte;
+ coding->produced_char = len;
+ if (!replace)
+ /* We must record and adjust for this new text now. */
+ adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
+ return 0;
+ }
- if (coding->cmp_data)
- coding->cmp_data->char_offset = from;
- }
+ head_skip = from_byte - from_byte_orig;
+ tail_skip = to_byte_orig - to_byte;
+ total_skip = head_skip + tail_skip;
+ from += head_skip;
+ to -= tail_skip;
+ len -= total_skip; len_byte -= total_skip;
+ }
/* The code conversion routine can not preserve text properties for
now. So, we must remove all text properties in the region.
if (Z - GPT < END_UNCHANGED)
END_UNCHANGED = Z - GPT;
+ if (!encodep && coding->src_multibyte)
+ {
+ /* Decoding routines expects that the source text is unibyte.
+ We must convert 8-bit characters of multibyte form to
+ unibyte. */
+ int len_byte_orig = len_byte;
+ len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte);
+ if (len_byte < len_byte_orig)
+ safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte,
+ len_byte);
+ coding->src_multibyte = 0;
+ }
+
for (;;)
{
int result;
/* The buffer memory is now:
- +--------+converted-text+---------+-------original-text------+---+
- |<-from->|<--inserted-->|---------|<-----------len---------->|---|
- |<------------------- GAP_SIZE -------------------->| */
+ +--------+converted-text+---------+-------original-text-------+---+
+ |<-from->|<--inserted-->|---------|<--------len_byte--------->|---|
+ |<---------------------- GAP ----------------------->| */
src = GAP_END_ADDR - len_byte;
dst = GPT_ADDR + inserted_byte;
result = decode_coding (coding, src, dst, len_byte, 0);
/* The buffer memory is now:
- +--------+-------converted-text--------+--+---original-text--+---+
- |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
- |<------------------- GAP_SIZE -------------------->| */
+ +--------+-------converted-text----+--+------original-text----+---+
+ |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---|
+ |<---------------------- GAP ----------------------->| */
- if (coding->fake_multibyte)
- fake_multibyte = 1;
-
- if (!encodep && !multibyte)
- coding->produced_char = coding->produced;
inserted += coding->produced_char;
inserted_byte += coding->produced;
len_byte -= coding->consumed;
inserted_byte += len_byte;
while (len_byte--)
*dst++ = *src++;
- fake_multibyte = 1;
break;
}
if (result == CODING_FINISH_INTERRUPT)
{
/* The conversion procedure was interrupted by a user. */
- fake_multibyte = 1;
break;
}
/* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */
{
/* It's quite strange to require more memory without
consuming any bytes. Perhaps CCL program bug. */
- fake_multibyte = 1;
break;
}
if (first)
}
if (src - dst > 0) *dst = 0; /* Put an anchor. */
- if (multibyte
- && (encodep
- || fake_multibyte
- || (to - from) != (to_byte - from_byte)))
- inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
+ if (encodep && coding->dst_multibyte)
+ {
+ /* The output is unibyte. We must convert 8-bit characters to
+ multibyte form. */
+ if (inserted_byte * 2 > GAP_SIZE)
+ {
+ GAP_SIZE -= inserted_byte;
+ ZV += inserted_byte; Z += inserted_byte;
+ ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
+ GPT += inserted_byte; GPT_BYTE += inserted_byte;
+ make_gap (inserted_byte - GAP_SIZE);
+ GAP_SIZE += inserted_byte;
+ ZV -= inserted_byte; Z -= inserted_byte;
+ ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte;
+ GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
+ }
+ inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte);
+ }
/* If we have shrinked the conversion area, adjust it now. */
if (total_skip > 0)
coding_restore_composition (coding, Fcurrent_buffer ());
coding_free_composition_data (coding);
- if (! encodep && ! NILP (coding->post_read_conversion))
+ if (! inhibit_pre_post_conversion
+ && ! encodep && ! NILP (coding->post_read_conversion))
{
Lisp_Object val;
int count = specpdl_ptr - specpdl;
}
Lisp_Object
-code_convert_string (str, coding, encodep, nocopy)
+run_pre_post_conversion_on_str (str, coding, encodep)
+ Lisp_Object str;
+ struct coding_system *coding;
+ int encodep;
+{
+ int count = specpdl_ptr - specpdl;
+ struct gcpro gcpro1;
+ struct buffer *prev = current_buffer;
+ int multibyte = STRING_MULTIBYTE (str);
+
+ record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
+ record_unwind_protect (code_convert_region_unwind, Qnil);
+ GCPRO1 (str);
+ temp_output_buffer_setup (" *code-converting-work*");
+ set_buffer_internal (XBUFFER (Vstandard_output));
+ /* We must insert the contents of STR as is without
+ unibyte<->multibyte conversion. For that, we adjust the
+ multibyteness of the working buffer to that of STR. */
+ Ferase_buffer ();
+ current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
+ insert_from_string (str, 0, 0,
+ XSTRING (str)->size, STRING_BYTES (XSTRING (str)), 0);
+ UNGCPRO;
+ inhibit_pre_post_conversion = 1;
+ if (encodep)
+ call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
+ else
+ call1 (coding->post_read_conversion, make_number (Z - BEG));
+ inhibit_pre_post_conversion = 0;
+ str = make_buffer_string (BEG, Z, 0);
+ return unbind_to (count, str);
+}
+
+Lisp_Object
+decode_coding_string (str, coding, nocopy)
Lisp_Object str;
struct coding_system *coding;
- int encodep, nocopy;
+ int nocopy;
{
int len;
char *buf;
- int from = 0, to = XSTRING (str)->size;
- int to_byte = STRING_BYTES (XSTRING (str));
+ int from, to, to_byte;
struct gcpro gcpro1;
Lisp_Object saved_coding_symbol;
int result;
- saved_coding_symbol = Qnil;
- if ((encodep && !NILP (coding->pre_write_conversion)
- || !encodep && !NILP (coding->post_read_conversion)))
- {
- /* Since we have to call Lisp functions which assume target text
- is in a buffer, after setting a temporary buffer, call
- code_convert_region. */
- int count = specpdl_ptr - specpdl;
- struct buffer *prev = current_buffer;
- int multibyte = STRING_MULTIBYTE (str);
-
- record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
- record_unwind_protect (code_convert_region_unwind, Qnil);
- inhibit_pre_post_conversion = 1;
- GCPRO1 (str);
- temp_output_buffer_setup (" *code-converting-work*");
- set_buffer_internal (XBUFFER (Vstandard_output));
- /* We must insert the contents of STR as is without
- unibyte<->multibyte conversion. For that, we adjust the
- multibyteness of the working buffer to that of STR. */
- Ferase_buffer (); /* for safety */
- current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
- insert_from_string (str, 0, 0, to, to_byte, 0);
- UNGCPRO;
- code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
- /* Make a unibyte string if we are encoding, otherwise make a
- multibyte string. */
- Fset_buffer_multibyte (encodep ? Qnil : Qt);
- str = make_buffer_string (BEGV, ZV, 0);
- return unbind_to (count, str);
- }
+ from = 0;
+ to = XSTRING (str)->size;
+ to_byte = STRING_BYTES (XSTRING (str));
- if (! encodep && CODING_REQUIRE_DETECTION (coding))
+ saved_coding_symbol = Qnil;
+ if (CODING_REQUIRE_DETECTION (coding))
{
/* See the comments in code_convert_region. */
if (coding->type == coding_type_undecided)
}
}
- if (encodep
- ? ! CODING_REQUIRE_ENCODING (coding)
- : ! CODING_REQUIRE_DECODING (coding))
- return (nocopy ? str : Fcopy_sequence (str));
-
- if (coding->composing != COMPOSITION_DISABLED)
+ if (! CODING_REQUIRE_DECODING (coding))
{
- if (encodep)
- coding_save_composition (coding, from, to, str);
- else
- coding_allocate_composition_data (coding, from);
+ if (!STRING_MULTIBYTE (str))
+ {
+ str = Fstring_as_multibyte (str);
+ nocopy = 1;
+ }
+ return (nocopy ? str : Fcopy_sequence (str));
}
- /* For conversion by CCL program and for encoding with composition
- handling, we can't skip any character because we may convert or
- compose even ASCII characters. */
- if (coding->type != coding_type_ccl
- && (!encodep || coding->cmp_data == NULL))
+ if (STRING_MULTIBYTE (str))
{
- /* Try to skip the heading and tailing ASCIIs. */
- int from_orig = from;
+ /* Decoding routines expect the source text to be unibyte. */
+ str = Fstring_as_unibyte (str);
+ nocopy = 1;
+ coding->src_multibyte = 0;
+ }
+ coding->dst_multibyte = 1;
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
- encodep);
- if (from == to_byte)
- return (nocopy ? str : Fcopy_sequence (str));
+ if (coding->composing != COMPOSITION_DISABLED)
+ coding_allocate_composition_data (coding, from);
- if (coding->cmp_data)
- coding->cmp_data->char_offset = from;
- }
+ /* Try to skip the heading and tailing ASCIIs. */
+ {
+ int from_orig = from;
- if (encodep)
- len = encoding_buffer_size (coding, to_byte - from);
- else
- len = decoding_buffer_size (coding, to_byte - from);
+ SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+ 0);
+ if (from == to_byte)
+ return (nocopy ? str : Fcopy_sequence (str));
+ }
+
+ len = decoding_buffer_size (coding, to_byte - from);
len += from + STRING_BYTES (XSTRING (str)) - to_byte;
GCPRO1 (str);
buf = get_conversion_buffer (len);
if (from > 0)
bcopy (XSTRING (str)->data, buf, from);
- result = (encodep
- ? encode_coding (coding, XSTRING (str)->data + from,
- buf + from, to_byte - from, len)
- : decode_coding (coding, XSTRING (str)->data + from,
- buf + from, to_byte - from, len));
- if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
+ result = decode_coding (coding, XSTRING (str)->data + from,
+ buf + from, to_byte - from, len);
+ if (result == CODING_FINISH_INCONSISTENT_EOL)
{
/* We simply try to decode the whole string again but without
eol-conversion this time. */
coding->eol_type = CODING_EOL_LF;
coding->symbol = saved_coding_symbol;
coding_free_composition_data (coding);
- return code_convert_string (str, coding, encodep, nocopy);
+ return decode_coding_string (str, coding, nocopy);
}
bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
STRING_BYTES (XSTRING (str)) - to_byte);
len = from + STRING_BYTES (XSTRING (str)) - to_byte;
- if (encodep)
- str = make_unibyte_string (buf, len + coding->produced);
- else
+ str = make_multibyte_string (buf, len + coding->produced_char,
+ len + coding->produced);
+
+ if (coding->cmp_data && coding->cmp_data->used)
+ coding_restore_composition (coding, str);
+ coding_free_composition_data (coding);
+
+ if (SYMBOLP (coding->post_read_conversion)
+ && !NILP (Ffboundp (coding->post_read_conversion)))
+ str = run_pre_post_conversion_on_str (str, 0);
+
+ return str;
+}
+
+Lisp_Object
+encode_coding_string (str, coding, nocopy)
+ Lisp_Object str;
+ struct coding_system *coding;
+ int nocopy;
+{
+ int len;
+ char *buf;
+ int from, to, to_byte;
+ struct gcpro gcpro1;
+ Lisp_Object saved_coding_symbol;
+ int result;
+
+ if (SYMBOLP (coding->pre_write_conversion)
+ && !NILP (Ffboundp (coding->pre_write_conversion)))
+ str = run_pre_post_conversion_on_str (str, 1);
+
+ from = 0;
+ to = XSTRING (str)->size;
+ to_byte = STRING_BYTES (XSTRING (str));
+
+ saved_coding_symbol = Qnil;
+ if (! CODING_REQUIRE_ENCODING (coding))
{
- int chars= (coding->fake_multibyte
- ? multibyte_chars_in_text (buf + from, coding->produced)
- : coding->produced_char);
- str = make_multibyte_string (buf, len + chars, len + coding->produced);
+ if (STRING_MULTIBYTE (str))
+ {
+ str = Fstring_as_unibyte (str);
+ nocopy = 1;
+ }
+ return (nocopy ? str : Fcopy_sequence (str));
}
- if (!encodep && coding->cmp_data && coding->cmp_data->used)
- coding_restore_composition (coding, str);
+ /* Encoding routines determine the multibyteness of the source text
+ by coding->src_multibyte. */
+ coding->src_multibyte = STRING_MULTIBYTE (str);
+ coding->dst_multibyte = 0;
+
+ if (coding->composing != COMPOSITION_DISABLED)
+ coding_save_composition (coding, from, to, str);
+ /* Try to skip the heading and tailing ASCIIs. */
+ {
+ int from_orig = from;
+
+ SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+ 1);
+ if (from == to_byte)
+ return (nocopy ? str : Fcopy_sequence (str));
+ }
+
+ len = encoding_buffer_size (coding, to_byte - from);
+ len += from + STRING_BYTES (XSTRING (str)) - to_byte;
+ GCPRO1 (str);
+ buf = get_conversion_buffer (len);
+ UNGCPRO;
+
+ if (from > 0)
+ bcopy (XSTRING (str)->data, buf, from);
+ result = encode_coding (coding, XSTRING (str)->data + from,
+ buf + from, to_byte - from, len);
+ bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
+ STRING_BYTES (XSTRING (str)) - to_byte);
+
+ len = from + STRING_BYTES (XSTRING (str)) - to_byte;
+ str = make_unibyte_string (buf, len + coding->produced);
coding_free_composition_data (coding);
+
return str;
}
error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
coding.mode |= CODING_MODE_LAST_BLOCK;
+ coding.src_multibyte = coding.dst_multibyte
+ = !NILP (current_buffer->enable_multibyte_characters);
code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
&coding, encodep, 1);
Vlast_coding_system_used = coding.symbol;
error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
coding.mode |= CODING_MODE_LAST_BLOCK;
- string = code_convert_string (string, &coding, encodep, !NILP (nocopy));
+ string = (encodep
+ ? encode_coding_string (string, &coding, !NILP (nocopy))
+ : decode_coding_string (string, &coding, !NILP (nocopy)));
Vlast_coding_system_used = coding.symbol;
return string;
coding.composing = COMPOSITION_DISABLED;
coding.mode |= CODING_MODE_LAST_BLOCK;
- return code_convert_string (string, &coding, encodep, 1);
+ return (encodep
+ ? encode_coding_string (string, &coding, 1)
+ : decode_coding_string (string, &coding, 1));
}
\f
DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
if (s2 < 0x80)
XSETFASTINT (val, s2);
else if (s2 >= 0xA0 || s2 <= 0xDF)
- XSETFASTINT (val,
- MAKE_NON_ASCII_CHAR (charset_katakana_jisx0201, s2, 0));
+ XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0));
else
error ("Invalid Shift JIS code: %x", XFASTINT (code));
}
|| (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC))
error ("Invalid Shift JIS code: %x", XFASTINT (code));
DECODE_SJIS (s1, s2, c1, c2);
- XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
+ XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2));
}
return val;
}
|| (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE))
error ("Invalid BIG5 code: %x", XFASTINT (code));
DECODE_BIG5 (b1, b2, charset, c1, c2);
- XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
+ XSETFASTINT (val, MAKE_CHAR (charset, c1, c2));
}
return val;
}
terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
/* Characer composition should be disabled. */
terminal_coding.composing = COMPOSITION_DISABLED;
+ terminal_coding.src_multibyte = 1;
+ terminal_coding.dst_multibyte = 0;
return Qnil;
}
&safe_terminal_coding);
/* Characer composition should be disabled. */
safe_terminal_coding.composing = COMPOSITION_DISABLED;
+ safe_terminal_coding.src_multibyte = 1;
+ safe_terminal_coding.dst_multibyte = 0;
return Qnil;
}
/* ISO2022 specific initialize routine. */
for (i = 0; i < 0x20; i++)
- iso_code_class[i] = ISO_control_code;
+ iso_code_class[i] = ISO_control_0;
for (i = 0x21; i < 0x7F; i++)
iso_code_class[i] = ISO_graphic_plane_0;
for (i = 0x80; i < 0xA0; i++)
- iso_code_class[i] = ISO_control_code;
+ iso_code_class[i] = ISO_control_1;
for (i = 0xA1; i < 0xFF; i++)
iso_code_class[i] = ISO_graphic_plane_1;
iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;