From: K. Handa Date: Sun, 4 Aug 2019 12:14:26 +0000 (+0900) Subject: Add Unicode-safe UTF-8 converter X-Git-Tag: emacs-27.0.90~1713 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=a8026dfde9734a03ad03a9872ec801871dd1d81a;p=emacs.git Add Unicode-safe UTF-8 converter * src/coding.c (encode_string_utf_8, decode_string_utf_8): New functions. * src/coding.h (encode_string_utf_8, decode_string_utf_8): Extern them. --- diff --git a/src/coding.c b/src/coding.c index 189a4b39d15..ab0e15119f3 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9515,6 +9515,732 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, return code_convert_string (string, coding_system, Qt, encodep, 0, 1); } + +/* Return the gap address of BUFFER. If the gap size is less than + NBYTES, enlarge the gap in advance. */ + +static unsigned char * +get_buffer_gap_address (Lisp_Object buffer, int nbytes) +{ + struct buffer *buf = XBUFFER (buffer); + + if (BUF_GPT (buf) != BUF_PT (buf)) + { + struct buffer *oldb = current_buffer; + + current_buffer = buf; + move_gap_both (PT, PT_BYTE); + current_buffer = oldb; + } + if (BUF_GAP_SIZE (buf) < nbytes) + make_gap_1 (buf, nbytes); + return BUF_GPT_ADDR (buf); +} + +/* Return a pointer to the byte sequence for C, and set the length in + LEN. This function is used to get a byte sequence for HANDLE_8_BIT + and HANDLE_OVER_UNI arguments of encode_string_utf_8 and + decode_string_utf_8 when those arguments are given by + characters. */ + +static unsigned char * +get_char_bytes (int c, int *len) +{ + /* We uses two chaches considering the situation that + encode/decode_string_utf_8 are called repeatedly with the same + values for HANDLE_8_BIT and HANDLE_OVER_UNI arguments. */ + static int chars[2]; + static unsigned char bytes[2][6]; + static int nbytes[2]; + static int last_index; + + if (chars[last_index] == c) + { + *len = nbytes[last_index]; + return bytes[last_index]; + } + if (chars[1 - last_index] == c) + { + *len = nbytes[1 - last_index]; + return bytes[1 - last_index]; + } + last_index = 1 - last_index; + chars[last_index] = c; + *len = nbytes[last_index] = CHAR_STRING (c, bytes[last_index]); + return bytes[last_index]; +} + +/* Encode STRING by the coding system utf-8-unix. + + Even if :pre-write-conversion and :encode-translation-table + properties are put to that coding system, they are ignored. + + It ignores :pre-write-conversion and :encode-translation-table + propeties of that coding system. + + This function assumes that arguments have values as described + below. The validity must be assured by callers. + + STRING is a multibyte string or an ASCII-only unibyte string. + + BUFFER is a unibyte buffer or Qnil. + + If BUFFER is a unibyte buffer, the encoding result of UTF-8 + sequence is inserted after point of the buffer, and the number of + inserted characters is returned. Note that a caller should have + made BUFFER ready for modifying in advance (e.g. by calling + invalidate_buffer_caches). + + If BUFFER is Qnil, a unibyte string is made from the encodnig + result of UTF-8 sequence, and it is returned. If NOCOPY and STRING + contains only Unicode characters (i.e. the encoding does not change + the byte sequence), STRING is returned even if it is multibyte. + + HANDLE-8-BIT and HANDE-OVER-UNI specify how to handle a non-Unicode + character. The former is for an eight-bit character (represented + by 2-byte overlong sequence in multibyte STRING). The latter is + for an over-unicode character (a character whose code is greater + than the maximum Unicode character 0x10FFFF, and is represented by + 4 or 5-byte sequence in multibyte STRING). + + If they are unibyte strings (typically "\357\277\275"; UTF-8 + sequence for the Unicode REPLACEMENT CHARACTER #xFFFD), a + non-Unicode character is encoded into that sequence. + + If they are characters, a non-Unicode chracters is encoded into the + corresponding UTF-8 sequences. + + If they are Qignored, a non-Unicode character is skipped on + encoding. + + If HANDLE-8-BIT is Qt, an eight-bit character is encoded into one + byte of the same value. + + If HANDLE-OVER-UNI is Qt, an over-unicode character is encoded + into the the same 4 or 5-byte sequence. + + If they are Qnil, Qnil is returned if STRING has a non-Unicode + character. */ + +Lisp_Object +encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, + bool nocopy, Lisp_Object handle_8_bit, + Lisp_Object handle_over_uni) +{ + ptrdiff_t nchars = SCHARS (string), nbytes = SBYTES (string); + if (NILP (buffer) && nchars == nbytes) + /* STRING contains only ASCII characters. */ + return string; + + ptrdiff_t num_8_bit = 0; /* number of eight-bit chars in STRING */ + /* The following two vars are counted only if handle_over_uni is not Qt */ + ptrdiff_t num_over_4 = 0; /* number of 4-byte non-Unicode chars in STRING */ + ptrdiff_t num_over_5 = 0; /* number of 5-byte non-Unicode chars in STRING */ + ptrdiff_t outbytes; /* number of bytes of decoding result. */ + unsigned char *p = SDATA (string); + unsigned char *pend = p + nbytes; + unsigned char *src = NULL, *dst = NULL; + unsigned char *replace_8_bit = NULL, *replace_over_uni = NULL; + int replace_8_bit_len = 0, replace_over_uni_len = 0; + Lisp_Object val; /* the return value */ + + /* Scan bytes in STRING twice. The first scan is to count non-Unicode + characters, and the second scan is to encode STRING. If the + encoding is trivial (no need of changing the byte sequence), + the second scan is avoided. */ + for (int scan_count = 0; scan_count < 2; scan_count++) + { + while (p < pend) + { + if (nchars == pend - p) + /* There is no multibyte character remaining. */ + break; + + int c = *p; + int len = BYTES_BY_CHAR_HEAD (c); + + nchars--; + if (len == 1 + || len == 3 + || (len == 2 ? ! CHAR_BYTE8_HEAD_P (c) + : (EQ (handle_over_uni, Qt) + || (len == 4 + && string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR)))) + { + p += len; + continue; + } + + /* A character to change the byte sequence on encoding was + found. A rare case. */ + if (len == 2) + { + /* Handle an eight-bit character by handle_8_bit. */ + if (scan_count == 0) + { + if (NILP (handle_8_bit)) + return Qnil; + num_8_bit++; + } + else + { + if (src < p) + { + memcpy (dst, src, p - src); + dst += p - src; + } + if (replace_8_bit_len > 0) + { + memcpy (dst, replace_8_bit, replace_8_bit_len); + dst += replace_8_bit_len; + } + else if (EQ (handle_8_bit, Qt)) + { + int char8 = STRING_CHAR (p); + *dst++ = CHAR_TO_BYTE8 (char8); + } + } + } + else /* len == 4 or 5 */ + { + /* Handle an over-unicode character by handle_over_uni. */ + if (scan_count == 0) + { + if (NILP (handle_over_uni)) + return Qnil; + if (len == 4) + num_over_4++; + else + num_over_5++; + } + else + { + if (src < p) + { + memcpy (dst, src, p - src); + dst += p - src; + } + if (replace_over_uni_len > 0) + { + memcpy (dst, replace_over_uni, replace_over_uni_len); + dst += replace_over_uni_len; + } + } + } + p += len; + src = p; + } + + if (scan_count == 0) + { + /* End of the first scane */ + outbytes = nbytes; + if (num_8_bit == 0 + && (num_over_4 + num_over_5 == 0 || EQ (handle_over_uni, Qt))) + { + /* We can break the loop because there is no need of + changing the byte sequence. This is the typical + case. */ + scan_count = 1; + } + else + { + /* Prepare for the next scan to handle non-Unicode characters. */ + if (num_8_bit > 0) + { + if (CHARACTERP (handle_8_bit)) + replace_8_bit = get_char_bytes (XFIXNUM (handle_8_bit), + &replace_8_bit_len); + else if (STRINGP (handle_8_bit)) + { + replace_8_bit = SDATA (handle_8_bit); + replace_8_bit_len = SBYTES (handle_8_bit); + } + if (replace_8_bit) + outbytes += (replace_8_bit_len - 2) * num_8_bit; + else if (EQ (handle_8_bit, Qignored)) + outbytes -= 2 * num_8_bit; + else if (EQ (handle_8_bit, Qt)) + outbytes -= num_8_bit; + else + return Qnil; + } + if (num_over_4 + num_over_5 > 0) + { + if (CHARACTERP (handle_over_uni)) + replace_over_uni = get_char_bytes (XFIXNUM (handle_over_uni), + &replace_over_uni_len); + else if (STRINGP (handle_over_uni)) + { + replace_over_uni = SDATA (handle_over_uni); + replace_over_uni_len = SBYTES (handle_over_uni); + } + if (num_over_4 > 0) + { + if (replace_over_uni) + outbytes += (replace_over_uni_len - 4) * num_over_4; + else if (EQ (handle_over_uni, Qignored)) + outbytes -= 4 * num_over_4; + else if (! EQ (handle_over_uni, Qt)) + return Qnil; + } + if (num_over_5 > 0) + { + if (replace_over_uni) + outbytes += (replace_over_uni_len - 5) * num_over_5; + else if (EQ (handle_over_uni, Qignored)) + outbytes -= 5 * num_over_5; + else if (! EQ (handle_over_uni, Qt)) + return Qnil; + } + } + } + + /* Prepare a return value and a space to store the encoded bytes. */ + if (BUFFERP (buffer)) + { + val = make_fixnum (outbytes); + dst = get_buffer_gap_address (buffer, nbytes); + } + else + { + if (nocopy && (num_8_bit + num_over_4 + num_over_5) == 0) + return string; + val = make_uninit_string (outbytes); + dst = SDATA (val); + } + p = src = SDATA (string); + } + } + + if (src < pend) + memcpy (dst, src, pend - src); + if (BUFFERP (buffer)) + { + struct buffer *oldb = current_buffer; + + current_buffer = XBUFFER (buffer); + insert_from_gap (outbytes, outbytes, false); + current_buffer = oldb; + } + return val; +} + +/* Decode STRING by the coding system utf-8-unix. + + Even if :post-read-conversion and :decode-translation-table + properties are put to that coding system, they are ignored. + + This function assumes that arguments have values as described + below. The validity must be assured by callers. + + STRING is a unibyte string or an ASCII-only multibyte string. + + BUFFER is a multibyte buffer or Qnil. + + If BUFFER is a multibyte buffer, the decoding result of Unicode + characters are inserted after point of the buffer, and the number + of inserted characters is returned. Note that a caller should have + made BUFFER ready for modifying in advance (e.g. by calling + invalidate_buffer_caches). + + If BUFFER is Qnil, a multibyte string is made from the decoding + result of Unicode characters, and it is returned. As a special + case, STRING itself is returned in the following cases: + 1. STRING contains only ASCII characters. + 2. NOCOPY, and STRING contains only valid UTF-8 sequences. + + HANDLE-8-BIT and HANDLE-OVER-UNI specify how to handle a invalid + byte sequence. The former is for an 1-byte invalid sequence that + violates the fundamental UTF-8 encoding rule. The latter is for a + 4 or 5-byte invalid sequence that Emacs internally uses to + represent an over-unicode character (a character of code greater + than #x10FFFF). Note that this function does not treat an overlong + UTF-8 sequence as invalid. + + If they are strings (typically 1-char string of the Unicode + REPLACEMENT CHARACTER #xFFFD), an invalid sequence is decoded into + that string. They must be multibyte strings if they contain a + non-ASCII character. + + If they are characters, an invalid sequence is decoded into the + corresponding multibyte representation of the characters. + + If they are Qignored, an invalid sequence is skipped on decoding. + + If HANDLE-8-BIT is Qt, an 1-byte invalid sequence is deoded into + the corresponding eight-bit character. + + If HANDLE-OVER-UNI is Qt, a 4 or 5-byte invalid sequence that + follows Emacs' representation for an over-unicode character is + decoded into the corresponding character. + + If they are Qnil, Qnil is returned if STRING has an invalid sequence. */ + +Lisp_Object +decode_string_utf_8 (Lisp_Object string, Lisp_Object buffer, + bool nocopy, Lisp_Object handle_8_bit, + Lisp_Object handle_over_uni) +{ + /* This is like BYTES_BY_CHAR_HEAD, but it is assured that C >= 0x80 + and it returns 0 for invalid sequence. */ +#define UTF_8_SEQUENCE_LENGTH(c) \ + ((c) < 0xC2 ? 0 \ + : (c) < 0xE0 ? 2 \ + : (c) < 0xF0 ? 3 \ + : (c) < 0xF8 ? 4 \ + : (c) == 0xF8 ? 5 \ + : 0) + + ptrdiff_t nbytes = SBYTES (string); + unsigned char *p = SDATA (string), *pend = p + nbytes; + ptrdiff_t num_8_bit = 0; /* number of invalid 1-byte sequences. */ + ptrdiff_t num_over_4 = 0; /* number of invalid 4-byte sequences. */ + ptrdiff_t num_over_5 = 0; /* number of invalid 5-byte sequences. */ + ptrdiff_t outbytes = nbytes; /* number of decoded bytes. */ + ptrdiff_t outchars = 0; /* number of decoded characters. */ + unsigned char *src = NULL, *dst = NULL; + bool change_byte_sequence = false; + + /* Scan bytes in STRING twice. The first scan is to count invalid + sequences, and the second scan is to decode STRING. If the + decoding is trivial (no need of changing the byte sequence), + the second scan is avoided. */ + while (p < pend) + { + src = p; + /* Try short cut for an ASCII-only case. */ + while (p < pend && *p < 0x80) p++; + outchars += (p - src); + if (p == pend) + break; + int c = *p; + outchars++; + int len = UTF_8_SEQUENCE_LENGTH (c); + /* len == 0, 2, 3, 4, 5 */ + if (UTF_8_EXTRA_OCTET_P (p[1]) + && (len == 2 + || (UTF_8_EXTRA_OCTET_P (p[2]) + && (len == 3 + || (UTF_8_EXTRA_OCTET_P (p[3]) + && len == 4 + && string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR))))) + { + p += len; + continue; + } + + /* A sequence to change on decoding was found. A rare case. */ + if (len == 0) + { + if (NILP (handle_8_bit)) + return Qnil; + num_8_bit++; + len = 1; + } + else /* len == 4 or 5 */ + { + if (NILP (handle_over_uni)) + return Qnil; + if (len == 4) + num_over_4++; + else + num_over_5++; + } + change_byte_sequence = true; + p += len; + } + + Lisp_Object val; /* the return value. */ + + if (! change_byte_sequence + && NILP (buffer)) + { + if (nocopy) + return string; + val = make_uninit_multibyte_string (outchars, outbytes); + memcpy (SDATA (val), SDATA (string), pend - SDATA (string)); + return val; + } + + /* Count the number of resulting chars and bytes. */ + unsigned char *replace_8_bit = NULL, *replace_over_uni = NULL; + int replace_8_bit_len = 0, replace_over_uni_len = 0; + + if (change_byte_sequence) + { + if (num_8_bit > 0) + { + if (CHARACTERP (handle_8_bit)) + replace_8_bit = get_char_bytes (XFIXNUM (handle_8_bit), + &replace_8_bit_len); + else if (STRINGP (handle_8_bit)) + { + replace_8_bit = SDATA (handle_8_bit); + replace_8_bit_len = SBYTES (handle_8_bit); + } + if (replace_8_bit) + outbytes += (replace_8_bit_len - 1) * num_8_bit; + else if (EQ (handle_8_bit, Qignored)) + { + outbytes -= num_8_bit; + outchars -= num_8_bit; + } + else /* EQ (handle_8_bit, Qt)) */ + outbytes += num_8_bit; + } + else if (num_over_4 + num_over_5 > 0) + { + if (CHARACTERP (handle_over_uni)) + replace_over_uni = get_char_bytes (XFIXNUM (handle_over_uni), + &replace_over_uni_len); + else if (STRINGP (handle_over_uni)) + { + replace_over_uni = SDATA (handle_over_uni); + replace_over_uni_len = SBYTES (handle_over_uni); + } + if (num_over_4 > 0) + { + if (replace_over_uni) + outbytes += (replace_over_uni_len - 4) * num_over_4; + else if (EQ (handle_over_uni, Qignored)) + { + outbytes -= 4 * num_over_4; + outchars -= num_over_4; + } + } + if (num_over_5 > 0) + { + if (replace_over_uni) + outbytes += (replace_over_uni_len - 5) * num_over_5; + else if (EQ (handle_over_uni, Qignored)) + { + outbytes -= 5 * num_over_5; + outchars -= num_over_5; + } + } + } + } + + /* Prepare a return value and a space to store the decoded bytes. */ + if (BUFFERP (buffer)) + { + val = make_fixnum (outchars); + dst = get_buffer_gap_address (buffer, outbytes); + } + else + { + if (nocopy && (num_8_bit + num_over_4 + num_over_5) == 0) + return string; + val = make_uninit_multibyte_string (outchars, outbytes); + dst = SDATA (val); + } + + src = SDATA (string); + if (change_byte_sequence) + { + p = src; + while (p < pend) + { + /* Try short cut for an ASCII-only case. */ + /* while (p < pend && *p < 0x80) p++; */ + /* if (p == pend) */ + /* break; */ + int c = *p; + if (c < 0x80) + { + p++; + continue; + } + int len = UTF_8_SEQUENCE_LENGTH (c); + if (len > 1) + { + int mlen; + for (mlen = 1; mlen < len && UTF_8_EXTRA_OCTET_P (p[mlen]); + mlen++); + if (mlen == len + && (len <= 3 + || (len == 4 + && string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR) + || EQ (handle_over_uni, Qt))) + { + p += len; + continue; + } + } + + if (src < p) + { + memcpy (dst, src, p - src); + dst += p - src; + } + if (len == 0) + { + if (replace_8_bit) + { + memcpy (dst, replace_8_bit, replace_8_bit_len); + dst += replace_8_bit_len; + } + else if (EQ (handle_8_bit, Qt)) + { + dst += BYTE8_STRING (c, dst); + } + len = 1; + } + else /* len == 4 or 5 */ + { + /* Handle p[0]... by handle_over_uni */ + if (replace_over_uni) + { + memcpy (dst, replace_over_uni, replace_over_uni_len); + dst += replace_over_uni_len; + } + } + p += len; + src = p; + } + } + + if (src < pend) + memcpy (dst, src, pend - src); + if (BUFFERP (buffer)) + { + struct buffer *oldb = current_buffer; + + current_buffer = XBUFFER (buffer); + insert_from_gap (outchars, outbytes, false); + current_buffer = oldb; + } + return val; +} + +/* #define ENABLE_UTF_8_CONVERTER_TEST */ + +#ifdef ENABLE_UTF_8_CONVERTER_TEST + +/* These functions are useful for testing and benchmarking + encode_string_utf_8 and decode_string_utf_8. */ + +/* ENCODE_METHOD specifies which internal decoder to use. + If it is Qnil, use encode_string_utf_8. + Otherwise, use code_convert_string. + + COUNT, if integer, specifies how many times to call those functions + with the same arguments (for benchmarking). */ + +DEFUN ("internal-encode-string-utf-8", Finternal_encode_string_utf_8, + Sinternal_encode_string_utf_8, 7, 7, 0, + doc: /* Internal use only.*/) + (Lisp_Object string, Lisp_Object buffer, Lisp_Object nocopy, + Lisp_Object handle_8_bit, Lisp_Object handle_over_uni, + Lisp_Object encode_method, Lisp_Object count) +{ + int repeat_count; + Lisp_Object val; + + /* Check arguments. Return Qnil when an argmement is invalid. */ + if (! STRINGP (string)) + return Qnil; + if (! NILP (buffer) + && (! BUFFERP (buffer) + || ! NILP (BVAR (XBUFFER (buffer), enable_multibyte_characters)))) + return Qnil; + if (! NILP (handle_8_bit) && ! EQ (handle_8_bit, Qt) + && ! EQ (handle_8_bit, Qignored) + && ! CHARACTERP (handle_8_bit) + && (! STRINGP (handle_8_bit) || STRING_MULTIBYTE (handle_8_bit))) + return Qnil; + if (! NILP (handle_over_uni) && ! EQ (handle_over_uni, Qt) + && ! EQ (handle_over_uni, Qignored) + && ! CHARACTERP (handle_over_uni) + && (! STRINGP (handle_over_uni) || STRING_MULTIBYTE (handle_over_uni))) + return Qnil; + + CHECK_FIXNUM (count); + repeat_count = XFIXNUM (count); + + val = Qnil; + /* Run an encoder according to ENCODE_METHOD. */ + if (NILP (encode_method)) + { + for (int i = 0; i < repeat_count; i++) + val = encode_string_utf_8 (string, buffer, ! NILP (nocopy), + handle_8_bit, handle_over_uni); + } + else + { + for (int i = 0; i < repeat_count; i++) + val = code_convert_string (string, Qutf_8_unix, Qnil, true, + ! NILP (nocopy), true); + } + return val; +} + +/* DECODE_METHOD specifies which internal decoder to use. + If it is Qnil, use decode_string_utf_8. + If it is Qt, use code_convert_string. + Otherwise, use make_string_from_utf8. + + COUNT, if integer, specifies how many times to call those functions + with the same arguments (for benchmarking). */ + +DEFUN ("internal-decode-string-utf-8", Finternal_decode_string_utf_8, + Sinternal_decode_string_utf_8, 7, 7, 0, + doc: /* Internal use only.*/) + (Lisp_Object string, Lisp_Object buffer, Lisp_Object nocopy, + Lisp_Object handle_8_bit, Lisp_Object handle_over_uni, + Lisp_Object decode_method, Lisp_Object count) +{ + int repeat_count; + Lisp_Object val; + + /* Check arguments. Return Qnil when an argmement is invalid. */ + if (! STRINGP (string)) + return Qnil; + if (! NILP (buffer) + && (! BUFFERP (buffer) + || NILP (BVAR (XBUFFER (buffer), enable_multibyte_characters)))) + return Qnil; + if (! NILP (handle_8_bit) && ! EQ (handle_8_bit, Qt) + && ! EQ (handle_8_bit, Qignored) + && ! CHARACTERP (handle_8_bit) + && (! STRINGP (handle_8_bit) || ! STRING_MULTIBYTE (handle_8_bit))) + return Qnil; + if (! NILP (handle_over_uni) && ! EQ (handle_over_uni, Qt) + && ! EQ (handle_over_uni, Qignored) + && ! CHARACTERP (handle_over_uni) + && (! STRINGP (handle_over_uni) || ! STRING_MULTIBYTE (handle_over_uni))) + return Qnil; + + CHECK_FIXNUM (count); + repeat_count = XFIXNUM (count); + + val = Qnil; + /* Run a decoder according to DECODE_METHOD. */ + if (NILP (decode_method)) + { + for (int i = 0; i < repeat_count; i++) + val = decode_string_utf_8 (string, buffer, ! NILP (nocopy), + handle_8_bit, handle_over_uni); + } + else if (EQ (decode_method, Qt)) + { + if (! BUFFERP (buffer)) + buffer = Qt; + for (int i = 0; i < repeat_count; i++) + val = code_convert_string (string, Qutf_8_unix, buffer, false, + ! NILP (nocopy), true); + } + else if (! NILP (decode_method)) + { + for (int i = 0; i < repeat_count; i++) + val = make_string_from_utf8 ((char *) SDATA (string), SBYTES (string)); + } + return val; +} + +#endif /* ENABLE_UTF_8_CONVERTER_TEST */ + /* Encode or decode a file name, to or from a unibyte string suitable for passing to C library functions. */ Lisp_Object @@ -10974,6 +11700,10 @@ syms_of_coding (void) defsubr (&Sencode_coding_region); defsubr (&Sdecode_coding_string); defsubr (&Sencode_coding_string); +#ifdef ENABLE_UTF_8_CONVERTER_TEST + defsubr (&Sinternal_encode_string_utf_8); + defsubr (&Sinternal_decode_string_utf_8); +#endif /* ENABLE_UTF_8_CONVERTER_TEST */ defsubr (&Sdecode_sjis_char); defsubr (&Sencode_sjis_char); defsubr (&Sdecode_big5_char); diff --git a/src/coding.h b/src/coding.h index 70690d42d30..8efddbf55c4 100644 --- a/src/coding.h +++ b/src/coding.h @@ -689,6 +689,10 @@ extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object, Lisp_Object, bool, bool, bool); extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object, bool); +extern Lisp_Object encode_string_utf_8 (Lisp_Object, Lisp_Object, bool, + Lisp_Object, Lisp_Object); +extern Lisp_Object decode_string_utf_8 (Lisp_Object, Lisp_Object, bool, + Lisp_Object, Lisp_Object); extern Lisp_Object encode_file_name (Lisp_Object); extern Lisp_Object decode_file_name (Lisp_Object); extern Lisp_Object raw_text_coding_system (Lisp_Object);