From 75ee20364c5ed4c175b13debaa53a2ba14168999 Mon Sep 17 00:00:00 2001 From: Philipp Stephani Date: Sun, 28 Apr 2019 12:28:27 +0200 Subject: [PATCH] Refactoring: move UTF-8 decoding functions into coding.h. json_make_string and json_build_string are generally useful and not JSON-specific. Move them to coding.[ch]. * src/coding.h (build_utf8_string): Move from json.c. * src/coding.c (make_utf8_string): Move from json.c. * src/json.c (json_make_string, json_build_string): Move to coding.[ch]. Split out JSON-specific comment. (json_parse_error, Fjson_serialize, json_to_lisp): Fix callers. * src/emacs-module.c (module_make_function, module_make_string): Use new functions. (module_decode, module_decode_copy): Remove. --- src/coding.c | 19 +++++++++++++++++ src/coding.h | 12 +++++++++++ src/emacs-module.c | 23 +++----------------- src/json.c | 52 +++++++--------------------------------------- 4 files changed, 42 insertions(+), 64 deletions(-) diff --git a/src/coding.c b/src/coding.c index 2c6b2c4d051..71f687a14e3 100644 --- a/src/coding.c +++ b/src/coding.c @@ -6353,6 +6353,25 @@ utf8_string_p (Lisp_Object string) return check_utf_8 (&coding) != -1; } +Lisp_Object +make_utf8_string (const char *data, ptrdiff_t size) +{ + ptrdiff_t chars, bytes; + parse_str_as_multibyte ((const unsigned char *) data, size, &chars, &bytes); + /* If DATA is a valid UTF-8 string, we can convert it to a Lisp + string directly. Otherwise, we need to decode it. */ + if (chars == size || bytes == size) + return make_specified_string (data, chars, size, true); + else + { + struct coding_system coding; + setup_coding_system (Qutf_8_unix, &coding); + coding.mode |= CODING_MODE_LAST_BLOCK; + coding.source = (const unsigned char *) data; + decode_coding_object (&coding, Qnil, 0, 0, size, size, Qt); + return coding.dst_object; + } +} /* Detect how end-of-line of a text of length SRC_BYTES pointed by SOURCE is encoded. If CATEGORY is one of diff --git a/src/coding.h b/src/coding.h index 0c03d1a44ed..773df9abb90 100644 --- a/src/coding.h +++ b/src/coding.h @@ -695,6 +695,7 @@ extern Lisp_Object raw_text_coding_system (Lisp_Object); extern bool raw_text_coding_system_p (struct coding_system *); extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object); extern Lisp_Object complement_process_encoding_system (Lisp_Object); +extern Lisp_Object make_utf8_string (const char *, ptrdiff_t); extern void decode_coding_gap (struct coding_system *, ptrdiff_t, ptrdiff_t); @@ -762,6 +763,17 @@ surrogates_to_codepoint (int low, int high) return 0x10000 + (low - 0xDC00) + ((high - 0xD800) * 0x400); } +/* Create a multibyte Lisp string from the NUL-terminated UTF-8 string + beginning at DATA. If the string is not a valid UTF-8 string, an + unspecified string is returned. */ + +INLINE Lisp_Object +build_utf8_string (const char *data) +{ + return make_utf8_string (data, strlen (data)); +} + + extern Lisp_Object preferred_coding_system (void); /* Coding system to be used to encode text for terminal display when diff --git a/src/emacs-module.c b/src/emacs-module.c index 80a04bafc2d..b9050942559 100644 --- a/src/emacs-module.c +++ b/src/emacs-module.c @@ -223,8 +223,6 @@ static void module_reset_handlerlist (struct handler **); static bool value_storage_contains_p (const struct emacs_value_storage *, emacs_value, ptrdiff_t *); static Lisp_Object module_encode (Lisp_Object); -static Lisp_Object module_decode (Lisp_Object); -static Lisp_Object module_decode_copy (Lisp_Object); static bool module_assertions = false; @@ -532,10 +530,7 @@ module_make_function (emacs_env *env, ptrdiff_t min_arity, ptrdiff_t max_arity, function->data = data; if (documentation) - { - AUTO_STRING (unibyte_doc, documentation); - function->documentation = module_decode_copy (unibyte_doc); - } + function->documentation = build_utf8_string (documentation); Lisp_Object result; XSET_MODULE_FUNCTION (result, function); @@ -668,8 +663,8 @@ module_make_string (emacs_env *env, const char *str, ptrdiff_t length) MODULE_FUNCTION_BEGIN (NULL); if (! (0 <= length && length <= STRING_BYTES_BOUND)) overflow_error (); - Lisp_Object lstr = make_unibyte_string (str, length); - return lisp_to_value (env, module_decode (lstr)); + Lisp_Object lstr = make_utf8_string (str, length); + return lisp_to_value (env, lstr); } static emacs_value @@ -1030,18 +1025,6 @@ module_encode (Lisp_Object string) return code_convert_string (string, Qutf_8_unix, Qt, true, true, true); } -static Lisp_Object -module_decode (Lisp_Object string) -{ - return code_convert_string (string, Qutf_8_unix, Qt, false, true, true); -} - -static Lisp_Object -module_decode_copy (Lisp_Object string) -{ - return code_convert_string (string, Qutf_8_unix, Qt, false, false, true); -} - /* Value conversion. */ diff --git a/src/json.c b/src/json.c index 03468e9f338..cc98914423b 100644 --- a/src/json.c +++ b/src/json.c @@ -215,47 +215,11 @@ json_has_suffix (const char *string, const char *suffix) #endif -/* Create a multibyte Lisp string from the UTF-8 string in - [DATA, DATA + SIZE). If the range [DATA, DATA + SIZE) does not - contain a valid UTF-8 string, the returned string will include raw - bytes. - Note that all callers below either pass only value UTF-8 strings or - use this function for formatting error messages; in the latter case - correctness isn't critical. */ - -static Lisp_Object -json_make_string (const char *data, ptrdiff_t size) -{ - ptrdiff_t chars, bytes; - parse_str_as_multibyte ((const unsigned char *) data, size, &chars, &bytes); - /* If DATA is a valid UTF-8 string, we can convert it to a Lisp - string directly. Otherwise, we need to decode it. */ - if (chars == size || bytes == size) - return make_specified_string (data, chars, size, true); - else - { - struct coding_system coding; - setup_coding_system (Qutf_8_unix, &coding); - coding.mode |= CODING_MODE_LAST_BLOCK; - coding.source = (const unsigned char *) data; - decode_coding_object (&coding, Qnil, 0, 0, size, size, Qt); - return coding.dst_object; - } -} - -/* Create a multibyte Lisp string from the NUL-terminated UTF-8 - string beginning at DATA. If the string is not a valid UTF-8 - string, an unspecified string is returned. Note that all callers - below either pass only value UTF-8 strings or use this function for +/* Note that all callers of make_utf8_string and build_utf8_string + below either pass only value UTF-8 strings or use the functionf for formatting error messages; in the latter case correctness isn't critical. */ -static Lisp_Object -json_build_string (const char *data) -{ - return json_make_string (data, strlen (data)); -} - /* Return a unibyte string containing the sequence of UTF-8 encoding units of the UTF-8 representation of STRING. If STRING does not represent a sequence of Unicode scalar values, return a string with @@ -303,8 +267,8 @@ json_parse_error (const json_error_t *error) symbol = Qjson_parse_error; #endif xsignal (symbol, - list5 (json_build_string (error->text), - json_build_string (error->source), INT_TO_INTEGER (error->line), + list5 (build_utf8_string (error->text), + build_utf8_string (error->source), INT_TO_INTEGER (error->line), INT_TO_INTEGER (error->column), INT_TO_INTEGER (error->position))); } @@ -648,7 +612,7 @@ usage: (json-serialize OBJECT &rest ARGS) */) json_out_of_memory (); record_unwind_protect_ptr (json_free, string); - return unbind_to (count, json_build_string (string)); + return unbind_to (count, build_utf8_string (string)); } struct json_buffer_and_size @@ -855,7 +819,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf) case JSON_REAL: return make_float (json_real_value (json)); case JSON_STRING: - return json_make_string (json_string_value (json), + return make_utf8_string (json_string_value (json), json_string_length (json)); case JSON_ARRAY: { @@ -915,7 +879,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf) json_t *value; json_object_foreach (json, key_str, value) { - Lisp_Object key = json_build_string (key_str); + Lisp_Object key = build_utf8_string (key_str); EMACS_UINT hash; ptrdiff_t i = hash_lookup (h, key, &hash); /* Keys in JSON objects are unique, so the key can't @@ -932,7 +896,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf) json_t *value; json_object_foreach (json, key_str, value) { - Lisp_Object key = Fintern (json_build_string (key_str), Qnil); + Lisp_Object key = Fintern (build_utf8_string (key_str), Qnil); result = Fcons (Fcons (key, json_to_lisp (value, conf)), result); -- 2.39.2