unification_table = Vstandard_character_unification_table_for_decode;
coding->produced_char = 0;
+ coding->fake_multibyte = 0;
while (src < src_end && (dst_bytes
? (dst < adjusted_dst_end)
: (dst < src - 6)))
case ISO_0xA0_or_0xFF:
if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
|| coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
- {
- /* Invalid code. */
- *dst++ = c1;
- coding->produced_char++;
- break;
- }
+ goto label_invalid_code;
/* This is a graphic character, we fall down ... */
case ISO_graphic_plane_1:
if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
- {
- /* Invalid code. */
- *dst++ = c1;
- coding->produced_char++;
- }
+ goto label_invalid_code;
else
DECODE_ISO_CHARACTER (charset1, c1);
break;
break;
label_invalid_code:
- coding->produced_char += src - src_base;
while (src_base < src)
*dst++ = *src_base++;
+ coding->fake_multibyte = 1;
}
continue;
break;
}
- if (result == CODING_FINISH_NORMAL
- && src < src_end)
- result = CODING_FINISH_INSUFFICIENT_DST;
-
- /* If this is the last block of the text to be decoded, we had
- better just flush out all remaining codes in the text although
- they are not valid characters. */
- if (coding->mode & CODING_MODE_LAST_BLOCK)
+ if (src < src_end)
{
- bcopy (src, dst, src_end - src);
- dst += (src_end - src);
- src = src_end;
+ if (result == CODING_FINISH_NORMAL)
+ result = CODING_FINISH_INSUFFICIENT_DST;
+ else if (result != CODING_FINISH_INCONSISTENT_EOL
+ && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ /* This is the last block of the text to be decoded. We had
+ better just flush out all remaining codes in the text
+ although they are not valid characters. */
+ src_bytes = src_end - src;
+ if (dst_bytes && (dst_end - dst < src_bytes))
+ src_bytes = dst_end - dst;
+ bcopy (src, dst, src_bytes);
+ dst += src_bytes;
+ src += src_bytes;
+ coding->fake_multibyte = 1;
+ }
}
+
coding->consumed = coding->consumed_char = src - source;
coding->produced = dst - destination;
return result;
if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
*dst++ = ISO_CODE_ESC, *dst++ = 'N'; \
else \
- *dst++ = ISO_CODE_SS2; \
+ { \
+ *dst++ = ISO_CODE_SS2; \
+ coding->fake_multibyte = 1; \
+ } \
CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
} while (0)
-#define ENCODE_SINGLE_SHIFT_3 \
- do { \
+#define ENCODE_SINGLE_SHIFT_3 \
+ do { \
if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) \
- *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
- else \
- *dst++ = ISO_CODE_SS3; \
+ *dst++ = ISO_CODE_ESC, *dst++ = 'O'; \
+ else \
+ { \
+ *dst++ = ISO_CODE_SS3; \
+ coding->fake_multibyte = 1; \
+ } \
CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1; \
} while (0)
unification_table = Vstandard_character_unification_table_for_encode;
coding->consumed_char = 0;
+ coding->fake_multibyte = 0;
while (src < src_end && (dst_bytes
? (dst < adjusted_dst_end)
: (dst < src - 19)))
break;
}
- if (result == CODING_FINISH_NORMAL
- && src < src_end)
- result = CODING_FINISH_INSUFFICIENT_DST;
-
- /* If this is the last block of the text to be encoded, we must
- reset graphic planes and registers to the initial state, and
- flush out the carryover if any. */
- if (coding->mode & CODING_MODE_LAST_BLOCK)
- ENCODE_RESET_PLANE_AND_REGISTER;
+ if (src < src_end)
+ {
+ if (result == CODING_FINISH_NORMAL)
+ result = CODING_FINISH_INSUFFICIENT_DST;
+ else
+ /* If this is the last block of the text to be encoded, we
+ must reset graphic planes and registers to the initial
+ state, and flush out the carryover if any. */
+ if (coding->mode & CODING_MODE_LAST_BLOCK)
+ ENCODE_RESET_PLANE_AND_REGISTER;
+ }
coding->consumed = src - source;
coding->produced = coding->produced_char = dst - destination;
if (sjis_p && charset_alt == charset_katakana_jisx0201) \
*dst++ = c1; \
else \
- *dst++ = charset_alt, *dst++ = c1; \
+ { \
+ *dst++ = charset_alt, *dst++ = c1; \
+ coding->fake_multibyte = 1; \
+ } \
} \
else \
{ \
if (sjis_p && charset_alt == charset_jisx0208) \
{ \
unsigned char s1, s2; \
- \
+ \
ENCODE_SJIS (c1, c2, s1, s2); \
*dst++ = s1, *dst++ = s2; \
+ coding->fake_multibyte = 1; \
} \
else if (!sjis_p \
&& (charset_alt == charset_big5_1 \
|| charset_alt == charset_big5_2)) \
{ \
unsigned char b1, b2; \
- \
+ \
ENCODE_BIG5 (charset_alt, c1, c2, b1, b2); \
*dst++ = b1, *dst++ = b2; \
} \
else \
- *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \
+ { \
+ *dst++ = charset_alt, *dst++ = c1, *dst++ = c2; \
+ coding->fake_multibyte = 1; \
+ } \
} \
coding->consumed_char++; \
} while (0);
unification_table = Vstandard_character_unification_table_for_decode;
coding->produced_char = 0;
+ coding->fake_multibyte = 0;
while (src < src_end && (dst_bytes
? (dst < adjusted_dst_end)
: (dst < src - 3)))
}
else if (c1 < 0x80)
DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
- else if (c1 < 0xA0 || c1 >= 0xE0)
+ else if (c1 < 0xA0)
{
- /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */
+ /* SJIS -> JISX0208 */
if (sjis_p)
{
ONE_MORE_BYTE (c2);
- DECODE_SJIS (c1, c2, c3, c4);
- DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
+ if (c2 >= 0x40)
+ {
+ DECODE_SJIS (c1, c2, c3, c4);
+ DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
+ }
+ else
+ goto label_invalid_code_2;
}
- else if (c1 >= 0xE0 && c1 < 0xFF)
+ else
+ goto label_invalid_code_1;
+ }
+ else if (c1 < 0xE0)
+ {
+ /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
+ if (sjis_p)
+ DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
+ /* dummy */ c2);
+ else
{
int charset;
ONE_MORE_BYTE (c2);
- DECODE_BIG5 (c1, c2, charset, c3, c4);
- DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
- }
- else /* Invalid code */
- {
- *dst++ = c1;
- coding->produced_char++;
+ if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
+ {
+ DECODE_BIG5 (c1, c2, charset, c3, c4);
+ DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
+ }
+ else
+ goto label_invalid_code_2;
}
}
- else
+ else /* C1 >= 0xE0 */
{
- /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
+ /* SJIS -> JISX0208, BIG5 -> Big5 */
if (sjis_p)
- DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
- /* dummy */ c2);
+ {
+ ONE_MORE_BYTE (c2);
+ if (c2 >= 0x40)
+ {
+ DECODE_SJIS (c1, c2, c3, c4);
+ DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
+ }
+ else
+ goto label_invalid_code_2;
+ }
else
{
int charset;
ONE_MORE_BYTE (c2);
- DECODE_BIG5 (c1, c2, charset, c3, c4);
- DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
+ if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
+ {
+ DECODE_BIG5 (c1, c2, charset, c3, c4);
+ DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
+ }
+ else
+ goto label_invalid_code_2;
}
}
continue;
+ label_invalid_code_1:
+ *dst++ = c1;
+ coding->produced_char++;
+ coding->fake_multibyte = 1;
+ continue;
+
+ label_invalid_code_2:
+ *dst++ = c1; *dst++= c2;
+ coding->produced_char += 2;
+ coding->fake_multibyte = 1;
+ continue;
+
label_end_of_loop:
result = CODING_FINISH_INSUFFICIENT_SRC;
label_end_of_loop_2:
break;
}
- if (result == CODING_FINISH_NORMAL
- && src < src_end)
- result = CODING_FINISH_INSUFFICIENT_DST;
+ if (src < src_end)
+ {
+ if (result == CODING_FINISH_NORMAL)
+ result = CODING_FINISH_INSUFFICIENT_DST;
+ else if (result != CODING_FINISH_INCONSISTENT_EOL
+ && coding->mode & CODING_MODE_LAST_BLOCK)
+ {
+ src_bytes = src_end - src;
+ if (dst_bytes && (dst_end - dst < src_bytes))
+ src_bytes = dst_end - dst;
+ bcopy (dst, src, src_bytes);
+ src += src_bytes;
+ dst += src_bytes;
+ coding->fake_multibyte = 1;
+ }
+ }
coding->consumed = coding->consumed_char = src - source;
coding->produced = dst - destination;
unification_table = Vstandard_character_unification_table_for_encode;
coding->consumed_char = 0;
+ coding->fake_multibyte = 0;
while (src < src_end && (dst_bytes
? (dst < adjusted_dst_end)
: (dst < src - 1)))
unsigned char *src_end = source + src_bytes;
unsigned char *dst = destination;
unsigned char *dst_end = destination + dst_bytes;
+ unsigned char c;
int result = CODING_FINISH_NORMAL;
+ coding->fake_multibyte = 0;
+
if (src_bytes <= 0)
return result;
: (dst < src - 1)))
{
unsigned char *src_base = src;
- unsigned char c = *src++;
+
+ c = *src++;
if (c == '\r')
{
ONE_MORE_BYTE (c);
goto label_end_of_loop_2;
}
*dst++ = '\r';
+ if (BASE_LEADING_CODE_P (c))
+ coding->fake_multibyte = 1;
}
*dst++ = c;
}
goto label_end_of_loop_2;
}
else
- *dst++ = c;
+ {
+ *dst++ = c;
+ if (BASE_LEADING_CODE_P (c))
+ coding->fake_multibyte = 1;
+ }
continue;
label_end_of_loop:
case CODING_EOL_CR:
if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
{
- while (src < src_end) if (*src++ == '\n') break;
+ while (src < src_end)
+ {
+ if ((c = *src++) == '\n')
+ break;
+ if (BASE_LEADING_CODE_P (c))
+ coding->fake_multibyte = 1;
+ }
if (*--src == '\n')
{
src_bytes = src - source;
safe_bcopy (source, destination, src_bytes);
src += src_bytes;
dst += dst_bytes;
+ coding->fake_multibyte = 1;
break;
}
unsigned char *dst = destination;
int result = CODING_FINISH_NORMAL;
+ coding->fake_multibyte = 0;
+
if (coding->eol_type == CODING_EOL_CRLF)
{
unsigned char c;
|| (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
*dst++ = '\r', *dst++ = '\n';
else
- *dst++ = c;
+ {
+ *dst++ = c;
+ if (BASE_LEADING_CODE_P (c))
+ coding->fake_multibyte = 1;
+ }
}
if (src < src_end)
result = CODING_FINISH_INSUFFICIENT_DST;
}
else
{
+ unsigned char c;
+
if (dst_bytes && src_bytes > dst_bytes)
{
src_bytes = dst_bytes;
if (dst_bytes)
bcopy (source, destination, src_bytes);
else
- safe_bcopy (source, destination, src_bytes);
+ {
+ safe_bcopy (source, destination, src_bytes);
+ dst_bytes = src_bytes;
+ }
if (coding->eol_type == CODING_EOL_CRLF)
{
while (src_bytes--)
- if (*dst++ == '\n') dst[-1] = '\r';
+ {
+ if ((c = *dst++) == '\n')
+ dst[-1] = '\r';
+ else if (BASE_LEADING_CODE_P (c))
+ coding->fake_multibyte = 1;
+ }
}
- else if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
+ else
{
- while (src_bytes--)
- if (*dst++ == '\r') dst[-1] = '\n';
+ if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
+ {
+ while (src_bytes--)
+ if (*dst++ == '\r') dst[-1] = '\n';
+ }
+ coding->fake_multibyte = 1;
}
- src += src_bytes;
- dst += src_bytes;
+ src = source + dst_bytes;
+ dst = destination + dst_bytes;
}
coding->consumed = coding->consumed_char = src - source;
{
coding->produced = coding->produced_char = 0;
coding->consumed = coding->consumed_char = 0;
+ coding->fake_multibyte = 0;
return CODING_FINISH_NORMAL;
}
bcopy (source, destination, coding->produced);
else
safe_bcopy (source, destination, coding->produced);
+ coding->fake_multibyte = 1;
coding->consumed
= coding->consumed_char = coding->produced_char = coding->produced;
break;
{
coding->produced = coding->produced_char = 0;
coding->consumed = coding->consumed_char = 0;
+ coding->fake_multibyte = 0;
return CODING_FINISH_NORMAL;
}
while (p < pend)
if (*p++ == '\015') p[-1] = '\n';
}
+ coding->fake_multibyte = 1;
coding->consumed
= coding->consumed_char = coding->produced_char = coding->produced;
break;
return result;
}
-/* Scan text in the region between *BEG and *END, skip characters
- which we don't have to decode by coding system CODING at the head
- and tail, then set *BEG and *END to the region of the text we
- actually have to convert.
+/* Scan text in the region between *BEG and *END (byte positions),
+ skip characters which we don't have to decode by coding system
+ CODING at the head and tail, then set *BEG and *END to the region
+ of the text we actually have to convert. The caller should move
+ the gap out of the region in advance.
If STR is not NULL, *BEG and *END are indices into STR. */
struct coding_system *coding;
unsigned char *str;
{
- unsigned char *begp_orig, *begp, *endp_orig, *endp;
+ unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
int eol_conversion;
if (coding->type == coding_type_ccl
}
else if (coding->type == coding_type_no_conversion)
{
- /* We need no conversion. */
- *beg = *end;
+ /* We need no conversion, but don't have to skip any data here.
+ Decoding routine handles them effectively anyway. */
return;
}
}
else
{
- move_gap (*beg);
- begp_orig = begp = GAP_END_ADDR;
+ begp_orig = begp = BYTE_POS_ADDR (*beg);
endp_orig = endp = begp + *end - *beg;
}
if (eol_conversion)
{
if (coding->heading_ascii < 0)
- while (begp < endp && *begp != '\r') begp++;
- while (begp < endp && *(endp - 1) != '\r') endp--;
+ while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
+ while (begp < endp && *(endp - 1) != '\r' && *(endp - 1) < 0x80)
+ endp--;
}
else
begp = endp;
default: /* i.e. case coding_type_iso2022: */
if (coding->heading_ascii < 0)
{
- unsigned char c;
-
/* We can skip all ASCII characters at the head except for a
few control codes. */
while (begp < endp && (c = *begp) < 0x80
case CODING_CATEGORY_IDX_ISO_8_2:
/* We can skip all ASCII characters at the tail. */
if (eol_conversion)
- while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
+ while (begp < endp && (c = endp[-1]) < 0x80 && c != '\n') endp--;
else
while (begp < endp && endp[-1] < 0x80) endp--;
break;
/* We can skip all charactes at the tail except for ESC and
the following 2-byte at the tail. */
if (eol_conversion)
- while (begp < endp && endp[-1] != ISO_CODE_ESC && endp[-1] != '\n')
+ while (begp < endp
+ && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\n')
endp--;
else
- while (begp < endp && endp[-1] != ISO_CODE_ESC)
+ while (begp < endp
+ && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
endp--;
if (begp < endp && endp[-1] == ISO_CODE_ESC)
{
}
else
{
- move_gap (*beg);
- begp_orig = begp = GAP_END_ADDR;
+ begp_orig = begp = BYTE_POS_ADDR (*beg);
endp_orig = endp = begp + *end - *beg;
}
}
/* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
- text from FROM to TO by coding system CODING, and return number of
- characters in the resulting text.
+ text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
+ coding system CODING, and return the status code of code conversion
+ (currently, this value has no meaning).
+
+ How many characters (and bytes) are converted to how many
+ characters (and bytes) are recorded in members of the structure
+ CODING.
If ADJUST is nonzero, we do various things as if the original text
is deleted and a new text is inserted. See the comments in
pre-write-conversion functions (if any) should be processed. */
int
-code_convert_region (from, to, coding, encodep, adjust)
- int from, to, encodep, adjust;
+code_convert_region (from, from_byte, to, to_byte, coding, encodep, adjust)
+ int from, from_byte, to, to_byte, encodep, adjust;
struct coding_system *coding;
{
- int len = to - from, require, inserted, inserted_byte;
- int from_byte, to_byte, len_byte;
+ int len = to - from, len_byte = to_byte - from_byte;
+ int require, inserted, inserted_byte;
int from_byte_orig, to_byte_orig;
Lisp_Object saved_coding_symbol = Qnil;
+ int multibyte = !NILP (current_buffer->enable_multibyte_characters);
+ int first = 1;
+ int fake_multibyte = 0;
+ unsigned char *src, *dst;
if (adjust)
{
+ int saved_from = from;
+
prepare_to_modify_buffer (from, to, &from);
- to = from + len;
+ if (saved_from != from)
+ {
+ to = from + len;
+ if (multibyte)
+ from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
+ else
+ from_byte = from, to_byte = to;
+ len_byte = to_byte - from_byte;
+ }
}
- from_byte = CHAR_TO_BYTE (from); to_byte = CHAR_TO_BYTE (to);
- len_byte = to_byte - from_byte;
if (! encodep && CODING_REQUIRE_DETECTION (coding))
{
move_gap_both (from, from_byte);
if (coding->type == coding_type_undecided)
{
- detect_coding (coding, BYTE_POS_ADDR (from), len);
+ detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
if (coding->type == coding_type_undecided)
coding->type = coding_type_emacs_mule;
}
}
}
+ coding->consumed_char = len, coding->consumed = len_byte;
+
if (encodep
? ! CODING_REQUIRE_ENCODING (coding)
: ! CODING_REQUIRE_DECODING (coding))
- return len;
+ {
+ coding->produced = len_byte;
+ if (multibyte)
+ {
+ if (GPT < from || GPT > to)
+ move_gap_both (from, from_byte);
+ coding->produced_char
+ = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
+ if (coding->produced_char != len)
+ {
+ int diff = coding->produced_char - len;
+
+ if (adjust)
+ adjust_before_replace (from, from_byte, to, to_byte);
+ ZV += diff; Z += diff; GPT += diff;
+ if (adjust)
+ adjust_after_replace (from, from_byte, to, to_byte,
+ diff, 0);
+ }
+ }
+ else
+ coding->produced_char = len_byte;
+ return 0;
+ }
/* Now we convert the text. */
len = ZV - BEGV;
new = current_buffer;
set_buffer_internal_1 (prev);
- del_range (from, to);
+ del_range_2 (from, to, from_byte, to_byte);
insert_from_buffer (new, BEG, len, 0);
to = from + len;
- to_byte = CHAR_TO_BYTE (to);
+ to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
len_byte = to_byte - from_byte;
}
}
/* Try to skip the heading and tailing ASCIIs. */
from_byte_orig = from_byte; to_byte_orig = to_byte;
+ if (from < GPT && GPT < to)
+ move_gap (from);
if (encodep)
shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
else
shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
if (from_byte == to_byte)
- return len;
+ {
+ coding->produced = len_byte;
+ coding->produced_char = multibyte ? len : len_byte;
+ return 0;
+ }
+
/* Here, the excluded region by shrinking contains only ASCIIs. */
from += (from_byte - from_byte_orig);
to += (to_byte - to_byte_orig);
len = to - from;
len_byte = to_byte - from_byte;
- /* For converion, we must put the gap before the text to be decoded
- in addition to make the gap larger for efficient decoding. The
- required gap size starts from 2000 which is the magic number used
- in make_gap. But, after one batch of conversion, it will be
- incremented if we find that it is not enough . */
+ /* For converion, we must put the gap before the text in addition to
+ making the gap larger for efficient decoding. The required gap
+ size starts from 2000 which is the magic number used in make_gap.
+ But, after one batch of conversion, it will be incremented if we
+ find that it is not enough . */
require = 2000;
if (GAP_SIZE < require)
end_unchanged = Z - GPT;
inserted = inserted_byte = 0;
+ src = GAP_END_ADDR, dst = GPT_ADDR;
+
+ GAP_SIZE += len_byte;
+ ZV -= len;
+ Z -= len;
+ ZV_BYTE -= len_byte;
+ Z_BYTE -= len_byte;
+
for (;;)
{
- int result, diff_char, diff_byte;
+ int result;
/* The buffer memory is changed from:
- +--------+converted-text+------------+-----original-text-----+---+
- |<-from->|<--inserted-->|<-GAP_SIZE->|<---------len--------->|---| */
-
+ +--------+converted-text+---------+-------original-text------+---+
+ |<-from->|<--inserted-->|---------|<-----------len---------->|---|
+ |<------------------- GAP_SIZE -------------------->| */
if (encodep)
- result = encode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0);
+ result = encode_coding (coding, src, dst, len_byte, 0);
else
- result = decode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0);
+ result = decode_coding (coding, src, dst, len_byte, 0);
/* to:
+--------+-------converted-text--------+--+---original-text--+---+
- |<-from->|<----(inserted+produced)---->|--|<-(len-consumed)->|---| */
-
- diff_char = coding->produced_char - coding->consumed_char;
- diff_byte = coding->produced - coding->consumed;
-
- GAP_SIZE -= diff_byte;
- ZV += diff_char; ZV_BYTE += diff_byte;
- Z += diff_char; Z_BYTE += diff_byte;
- GPT += coding->produced_char; GPT_BYTE += coding->produced;
+ |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
+ |<------------------- GAP_SIZE -------------------->| */
+ if (coding->fake_multibyte)
+ fake_multibyte = 1;
+ if (!encodep && !multibyte)
+ coding->produced_char = coding->produced;
inserted += coding->produced_char;
inserted_byte += coding->produced;
- len -= coding->consumed_char;
len_byte -= coding->consumed;
+ src += coding->consumed;
+ dst += inserted_byte;
if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
{
- unsigned char *p = GPT_ADDR - inserted_byte, *pend = GPT_ADDR;
+ unsigned char *pend = dst, *p = pend - inserted_byte;
/* Encode LFs back to the original eol format (CR or CRLF). */
if (coding->eol_type == CODING_EOL_CR)
}
else
{
- unsigned char *p2 = p;
int count = 0;
- while (p2 < pend) if (*p2++ == '\n') count++;
- if (GAP_SIZE < count)
- make_gap (count - GAP_SIZE);
- p2 = GPT_ADDR + count;
- while (p < pend)
+ while (p < pend) if (*p++ == '\n') count++;
+ if (src - dst < count)
{
- *--p2 = *--pend;
- if (*pend == '\n') *--p2 = '\r';
+ /* We don't have sufficient room for putting LFs
+ back to CRLF. We must record converted and
+ not-yet-converted text back to the buffer
+ content, enlarge the gap, then record them out of
+ the buffer contents again. */
+ int add = len_byte + inserted_byte;
+
+ GAP_SIZE -= add;
+ ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
+ GPT += inserted_byte; GPT_BYTE += inserted_byte;
+ make_gap (count - GAP_SIZE);
+ GAP_SIZE += add;
+ ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
+ GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
+ /* Don't forget to update SRC, DST, and PEND. */
+ src = GAP_END_ADDR - len_byte;
+ dst = GPT_ADDR + inserted_byte;
+ pend = dst;
}
- GPT += count; GAP_SIZE -= count; ZV += count; Z += count;
- ZV_BYTE += count; Z_BYTE += count;
- coding->produced += count;
- coding->produced_char += count;
inserted += count;
inserted_byte += count;
+ coding->produced += count;
+ p = dst = pend + count;
+ while (count)
+ {
+ *--p = *--pend;
+ if (*p == '\n') count--, *--p = '\r';
+ }
}
/* Suppress eol-format conversion in the further conversion. */
/* Restore the original symbol. */
coding->symbol = saved_coding_symbol;
+
+ continue;
}
if (len_byte <= 0)
break;
{
/* The source text ends in invalid codes. Let's just
make them valid buffer contents, and finish conversion. */
- inserted += len;
+ inserted += len_byte;
inserted_byte += len_byte;
+ while (len_byte--)
+ *src++ = *dst++;
+ fake_multibyte = 1;
break;
}
- if (inserted == coding->produced_char)
- /* We have just done the first batch of conversion. Let's
- reconsider the required gap size now.
-
- We have converted CONSUMED bytes into PRODUCED bytes. To
- convert the remaining LEN bytes, we may need REQUIRE bytes
- of gap, where:
- REQUIRE + LEN = (LEN * PRODUCED / CONSUMED)
- REQUIRE = LEN * (PRODUCED - CONSUMED) / CONSUMED
- = LEN * DIFF / CONSUMED
- Here, we are sure that DIFF is positive. */
- require = len_byte * diff_byte / coding->consumed;
- if (GAP_SIZE < require)
- make_gap (require - GAP_SIZE);
+ if (first)
+ {
+ /* We have just done the first batch of conversion which was
+ stoped because of insufficient gap. Let's reconsider the
+ required gap size (i.e. SRT - DST) now.
+
+ We have converted ORIG bytes (== coding->consumed) into
+ NEW bytes (coding->produced). To convert the remaining
+ LEN bytes, we may need REQUIRE bytes of gap, where:
+ REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
+ REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
+ Here, we are sure that NEW >= ORIG. */
+ require = (len_byte * (coding->produced - coding->consumed)
+ / coding->consumed);
+ first = 0;
+ }
+ if ((src - dst) < (require + 2000))
+ {
+ /* See the comment above the previous call of make_gap. */
+ int add = len_byte + inserted_byte;
+
+ GAP_SIZE -= add;
+ ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
+ GPT += inserted_byte; GPT_BYTE += inserted_byte;
+ make_gap (require + 2000);
+ GAP_SIZE += add;
+ ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
+ GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
+ /* Don't forget to update SRC, DST. */
+ src = GAP_END_ADDR - len_byte;
+ dst = GPT_ADDR + inserted_byte;
+ }
}
- if (GAP_SIZE > 0) *GPT_ADDR = 0; /* Put an anchor. */
+ if (src - dst > 0) *dst = 0; /* Put an anchor. */
+
+ if (multibyte && (fake_multibyte || !encodep && (to - from) != (to_byte - from_byte)))
+ inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
+
+ /* Update various buffer positions for the new text. */
+ GAP_SIZE -= inserted_byte;
+ ZV += inserted; Z+= inserted;
+ ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte;
+ GPT += inserted; GPT_BYTE += inserted_byte;
if (adjust)
{
if (pos >= from + orig_inserted)
temp_set_point (current_buffer, pos + (inserted - orig_inserted));
}
+ signal_after_change (from, to - from, inserted);
}
- return ((from_byte - from_byte_orig) + inserted + (to_byte_orig - to_byte));
+ {
+ int skip = (to_byte_orig - to_byte) + (from_byte - from_byte_orig);
+
+ coding->consumed = to_byte_orig - from_byte_orig;
+ coding->consumed_char = skip + (to - from);
+ coding->produced = skip + inserted_byte;
+ coding->produced_char = skip + inserted;
+ }
+ return 0;
}
Lisp_Object
insert_from_string (str, 0, 0, to_byte, to_byte, 0);
current_buffer->enable_multibyte_characters = Qt;
}
- code_convert_region (BEGV, ZV, coding, encodep, 1);
+ code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
if (encodep)
/* We must return the buffer contents as unibyte string. */
current_buffer->enable_multibyte_characters = Qnil;
error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
coding.mode |= CODING_MODE_LAST_BLOCK;
- len = code_convert_region (from, to, &coding, encodep, 1);
- return make_number (len);
+ code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
+ &coding, encodep, 1);
+ return make_number (coding.produced_char);
}
DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,