From a32a7dc7214bf5f618d50d0143fe5f8159445d2d Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Thu, 19 Jan 2012 22:19:21 +0800 Subject: [PATCH] Pay attention to buffer relocation on encoding (Bug#9318; backport from trunk). Backport of 2011-12-05T06:39:26Z!handa@m17n.org from trunk. --- src/ChangeLog | 26 ++++++++ src/coding.c | 171 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 157 insertions(+), 40 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 18b96b04195..5b25a68259a 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,29 @@ +2012-01-19 Kenichi Handa + + * coding.c (encode_designation_at_bol): New args charbuf_end and + dst. Return the number of produced bytes. Callers changed. + (coding_set_source): Return how many bytes coding->source was + relocated. + (coding_set_destination): Return how many bytes + coding->destination was relocated. + (CODING_DECODE_CHAR, CODING_ENCODE_CHAR, CODING_CHAR_CHARSET) + (CODING_CHAR_CHARSET_P): Adjusted for the avove changes. + +2012-01-19 Kazuhiro Ito (tiny change) + + * coding.c (CODING_CHAR_CHARSET_P): New macro. + (encode_coding_emacs_mule, encode_coding_iso_2022): Use the above + macro (Bug#9318). + +2012-01-19 Andreas Schwab + + The following changes are to fix Bug#9318. + + * coding.c (CODING_ENCODE_CHAR, CODING_CHAR_CHARSET): New macros. + (encode_coding_emacs_mule, ENCODE_ISO_CHARACTER) + (encode_coding_iso_2022, encode_coding_sjis) + (encode_coding_big5, encode_coding_charset): Use the above macros. + 2012-01-18 Yoshiaki Kasahara (tiny change) * buffer.c (init_buffer) [USE_MMAP_FOR_BUFFERS]: Adjust to diff --git a/src/coding.c b/src/coding.c index fbb028f658c..9a2c1f9c3f2 100644 --- a/src/coding.c +++ b/src/coding.c @@ -936,17 +936,16 @@ static int encode_coding_ccl P_ ((struct coding_system *)); static void decode_coding_raw_text P_ ((struct coding_system *)); static int encode_coding_raw_text P_ ((struct coding_system *)); -static void coding_set_source P_ ((struct coding_system *)); -static void coding_set_destination P_ ((struct coding_system *)); +static EMACS_INT coding_set_source P_ ((struct coding_system *)); +static EMACS_INT coding_set_destination P_ ((struct coding_system *)); static void coding_alloc_by_realloc P_ ((struct coding_system *, EMACS_INT)); static void coding_alloc_by_making_gap P_ ((struct coding_system *, EMACS_INT, EMACS_INT)); static unsigned char *alloc_destination P_ ((struct coding_system *, EMACS_INT, unsigned char *)); static void setup_iso_safe_charsets P_ ((Lisp_Object)); -static unsigned char *encode_designation_at_bol P_ ((struct coding_system *, - int *, int *, - unsigned char *)); +static int encode_designation_at_bol P_ ((struct coding_system *, + int *, int *, unsigned char *)); static int detect_eol P_ ((const unsigned char *, EMACS_INT, enum coding_category)); static Lisp_Object adjust_coding_eol_type P_ ((struct coding_system *, int)); @@ -1005,27 +1004,68 @@ record_conversion_result (struct coding_system *coding, } } -/* This wrapper macro is used to preserve validity of pointers into - buffer text across calls to decode_char, which could cause - relocation of buffers if it loads a charset map, because loading a - charset map allocates large structures. */ +/* These wrapper macros are used to preserve validity of pointers into + buffer text across calls to decode_char, encode_char, etc, which + could cause relocation of buffers if it loads a charset map, + because loading a charset map allocates large structures. */ + #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ do { \ + EMACS_INT offset; \ + \ charset_map_loaded = 0; \ c = DECODE_CHAR (charset, code); \ - if (charset_map_loaded) \ + if (charset_map_loaded \ + && (offset = coding_set_source (coding))) \ { \ - const unsigned char *orig = coding->source; \ - EMACS_INT offset; \ - \ - coding_set_source (coding); \ - offset = coding->source - orig; \ src += offset; \ src_base += offset; \ src_end += offset; \ } \ } while (0) +#define CODING_ENCODE_CHAR(coding, dst, dst_end, charset, c, code) \ + do { \ + EMACS_INT offset; \ + \ + charset_map_loaded = 0; \ + code = ENCODE_CHAR (charset, c); \ + if (charset_map_loaded \ + && (offset = coding_set_destination (coding))) \ + { \ + dst += offset; \ + dst_end += offset; \ + } \ + } while (0) + +#define CODING_CHAR_CHARSET(coding, dst, dst_end, c, charset_list, code_return, charset) \ + do { \ + EMACS_INT offset; \ + \ + charset_map_loaded = 0; \ + charset = char_charset (c, charset_list, code_return); \ + if (charset_map_loaded \ + && (offset = coding_set_destination (coding))) \ + { \ + dst += offset; \ + dst_end += offset; \ + } \ + } while (0) + +#define CODING_CHAR_CHARSET_P(coding, dst, dst_end, c, charset, result) \ + do { \ + EMACS_INT offset; \ + \ + charset_map_loaded = 0; \ + result = CHAR_CHARSET_P (c, charset); \ + if (charset_map_loaded \ + && (offset = coding_set_destination (coding))) \ + { \ + dst += offset; \ + dst_end += offset; \ + } \ + } while (0) + /* If there are at least BYTES length of room at dst, allocate memory for coding->destination and update dst and dst_end. We don't have @@ -1105,10 +1145,15 @@ record_conversion_result (struct coding_system *coding, | ((p)[-1] & 0x3F)))) -static void +/* Update coding->source from coding->src_object, and return how many + bytes coding->source was changed. */ + +static EMACS_INT coding_set_source (coding) struct coding_system *coding; { + const unsigned char *orig = coding->source; + if (BUFFERP (coding->src_object)) { struct buffer *buf = XBUFFER (coding->src_object); @@ -1126,12 +1171,19 @@ coding_set_source (coding) /* Otherwise, the source is C string and is never relocated automatically. Thus we don't have to update anything. */ ; + + return coding->source - orig; } -static void +/* Update coding->destination from coding->dst_object, and return how + many bytes coding->destination was changed. */ + +static EMACS_INT coding_set_destination (coding) struct coding_system *coding; { + const unsigned char *orig = coding->destination; + if (BUFFERP (coding->dst_object)) { if (coding->src_pos < 0) @@ -1155,6 +1207,8 @@ coding_set_destination (coding) /* Otherwise, the destination is C string and is never relocated automatically. Thus we don't have to update anything. */ ; + + return coding->destination - orig; } @@ -2778,14 +2832,19 @@ encode_coding_emacs_mule (coding) if (preferred_charset_id >= 0) { + int result; + charset = CHARSET_FROM_ID (preferred_charset_id); - if (CHAR_CHARSET_P (c, charset)) + CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); + if (result) code = ENCODE_CHAR (charset, c); else - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); } else - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); if (! charset) { c = coding->default_char; @@ -2794,7 +2853,8 @@ encode_coding_emacs_mule (coding) EMIT_ONE_ASCII_BYTE (c); continue; } - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); } dimension = CHARSET_DIMENSION (charset); emacs_mule_id = CHARSET_EMACS_MULE_ID (charset); @@ -4317,7 +4377,8 @@ decode_coding_iso_2022 (coding) #define ENCODE_ISO_CHARACTER(charset, c) \ do { \ - int code = ENCODE_CHAR ((charset),(c)); \ + int code; \ + CODING_ENCODE_CHAR (coding, dst, dst_end, (charset), (c), code); \ \ if (CHARSET_DIMENSION (charset) == 1) \ ENCODE_ISO_CHARACTER_DIMENSION1 ((charset), code); \ @@ -4441,17 +4502,20 @@ encode_invocation_designation (charset, coding, dst, p_nchars) /* Produce designation sequences of charsets in the line started from - SRC to a place pointed by DST, and return updated DST. + CHARBUF to a place pointed by DST, and return the number of + produced bytes. DST should not directly point a buffer text area + which may be relocated by char_charset call. If the current block ends before any end-of-line, we may fail to find all the necessary designations. */ -static unsigned char * +static int encode_designation_at_bol (coding, charbuf, charbuf_end, dst) struct coding_system *coding; int *charbuf, *charbuf_end; unsigned char *dst; { + unsigned char *orig; struct charset *charset; /* Table of charsets to be designated to each graphic register. */ int r[4]; @@ -4469,7 +4533,7 @@ encode_designation_at_bol (coding, charbuf, charbuf_end, dst) for (reg = 0; reg < 4; reg++) r[reg] = -1; - while (found < 4) + while (charbuf < charbuf_end && found < 4) { int id; @@ -4494,7 +4558,7 @@ encode_designation_at_bol (coding, charbuf, charbuf_end, dst) ENCODE_DESIGNATION (CHARSET_FROM_ID (r[reg]), reg, coding); } - return dst; + return dst - orig; } /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ @@ -4539,13 +4603,26 @@ encode_coding_iso_2022 (coding) if (bol_designation) { - unsigned char *dst_prev = dst; - /* We have to produce designation sequences if any now. */ - dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst); - bol_designation = 0; + unsigned char desig_buf[16]; + int nbytes; + EMACS_INT offset; + + charset_map_loaded = 0; + nbytes = encode_designation_at_bol (coding, charbuf, charbuf_end, + desig_buf); + if (charset_map_loaded + && (offset = coding_set_destination (coding))) + { + dst += offset; + dst_end += offset; + } + memcpy (dst, desig_buf, nbytes); + dst += nbytes; /* We are sure that designation sequences are all ASCII bytes. */ - produced_chars += dst - dst_prev; + produced_chars += nbytes; + bol_designation = 0; + ASSURE_DESTINATION (safe_room); } c = *charbuf++; @@ -4616,12 +4693,17 @@ encode_coding_iso_2022 (coding) if (preferred_charset_id >= 0) { + int result; + charset = CHARSET_FROM_ID (preferred_charset_id); - if (! CHAR_CHARSET_P (c, charset)) - charset = char_charset (c, charset_list, NULL); + CODING_CHAR_CHARSET_P (coding, dst, dst_end, c, charset, result); + if (! result) + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + NULL, charset); } else - charset = char_charset (c, charset_list, NULL); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + NULL, charset); if (!charset) { if (coding->mode & CODING_MODE_SAFE_ENCODING) @@ -4632,7 +4714,8 @@ encode_coding_iso_2022 (coding) else { c = coding->default_char; - charset = char_charset (c, charset_list, NULL); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, + charset_list, NULL, charset); } } ENCODE_ISO_CHARACTER (charset, c); @@ -5064,7 +5147,9 @@ encode_coding_sjis (coding) else { unsigned code; - struct charset *charset = char_charset (c, charset_list, &code); + struct charset *charset; + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); if (!charset) { @@ -5076,7 +5161,8 @@ encode_coding_sjis (coding) else { c = coding->default_char; - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, + charset_list, &code, charset); } } if (code == CHARSET_INVALID_CODE (charset)) @@ -5153,7 +5239,9 @@ encode_coding_big5 (coding) else { unsigned code; - struct charset *charset = char_charset (c, charset_list, &code); + struct charset *charset; + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); if (! charset) { @@ -5165,7 +5253,8 @@ encode_coding_big5 (coding) else { c = coding->default_char; - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, + charset_list, &code, charset); } } if (code == CHARSET_INVALID_CODE (charset)) @@ -5747,7 +5836,9 @@ encode_coding_charset (coding) } else { - charset = char_charset (c, charset_list, &code); + CODING_CHAR_CHARSET (coding, dst, dst_end, c, charset_list, + &code, charset); + if (charset) { if (CHARSET_DIMENSION (charset) == 1) -- 2.39.5