Refactoring: move UTF-8 decoding functions into coding.h.

author Philipp Stephani <phst@google.com>

Sun, 28 Apr 2019 10:28:27 +0000 (12:28 +0200)

committer Philipp Stephani <phst@google.com>

Sun, 28 Apr 2019 10:28:27 +0000 (12:28 +0200)
author Philipp Stephani <phst@google.com>
Sun, 28 Apr 2019 10:28:27 +0000 (12:28 +0200)
committer Philipp Stephani <phst@google.com>
Sun, 28 Apr 2019 10:28:27 +0000 (12:28 +0200)
diff --git a/src/coding.c b/src/coding.c

index 2c6b2c4d0512ea4fa993338679bca76c9a163fcc..71f687a14e3d050faf87d1a4945bfd557fbe45f2 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -6353,6 +6353,25 @@ utf8_string_p (Lisp_Object string)
    return check_utf_8 (&coding) != -1;
  }
  
+Lisp_Object
+make_utf8_string (const char *data, ptrdiff_t size)
+{
+  ptrdiff_t chars, bytes;
+  parse_str_as_multibyte ((const unsigned char *) data, size, &chars, &bytes);
+  /* If DATA is a valid UTF-8 string, we can convert it to a Lisp
+     string directly.  Otherwise, we need to decode it.  */
+  if (chars == size || bytes == size)
+    return make_specified_string (data, chars, size, true);
+  else
+    {
+      struct coding_system coding;
+      setup_coding_system (Qutf_8_unix, &coding);
+      coding.mode |= CODING_MODE_LAST_BLOCK;
+      coding.source = (const unsigned char *) data;
+      decode_coding_object (&coding, Qnil, 0, 0, size, size, Qt);
+      return coding.dst_object;
+    }
+}
  
  /* Detect how end-of-line of a text of length SRC_BYTES pointed by
     SOURCE is encoded.  If CATEGORY is one of
diff --git a/src/coding.h b/src/coding.h

index 0c03d1a44edd5e0d5f3b6aa76345314e20a4f2b4..773df9abb904b118687e6c33c09604732f29525c 100644 (file)
--- a/src/coding.h
+++ b/src/coding.h
@@ -695,6 +695,7 @@ extern Lisp_Object raw_text_coding_system (Lisp_Object);
  extern bool raw_text_coding_system_p (struct coding_system *);
  extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
  extern Lisp_Object complement_process_encoding_system (Lisp_Object);
+extern Lisp_Object make_utf8_string (const char *, ptrdiff_t);
  
  extern void decode_coding_gap (struct coding_system *,
                                ptrdiff_t, ptrdiff_t);
@@ -762,6 +763,17 @@ surrogates_to_codepoint (int low, int high)
    return 0x10000 + (low - 0xDC00) + ((high - 0xD800) * 0x400);
  }
  
+/* Create a multibyte Lisp string from the NUL-terminated UTF-8 string
+   beginning at DATA.  If the string is not a valid UTF-8 string, an
+   unspecified string is returned.  */
+
+INLINE Lisp_Object
+build_utf8_string (const char *data)
+{
+  return make_utf8_string (data, strlen (data));
+}
+
+
  extern Lisp_Object preferred_coding_system (void);
  
  /* Coding system to be used to encode text for terminal display when
diff --git a/src/emacs-module.c b/src/emacs-module.c

index 80a04bafc2dd46375a13146d8acf0fa48987a94d..b90509425591f31856a76cb2f6fc8dadedb570e3 100644 (file)
--- a/src/emacs-module.c
+++ b/src/emacs-module.c
@@ -223,8 +223,6 @@ static void module_reset_handlerlist (struct handler **);
  static bool value_storage_contains_p (const struct emacs_value_storage *,
                                        emacs_value, ptrdiff_t *);
  static Lisp_Object module_encode (Lisp_Object);
-static Lisp_Object module_decode (Lisp_Object);
-static Lisp_Object module_decode_copy (Lisp_Object);
  
  static bool module_assertions = false;
  \f
@@ -532,10 +530,7 @@ module_make_function (emacs_env *env, ptrdiff_t min_arity, ptrdiff_t max_arity,
    function->data = data;
  
    if (documentation)
-    {
-      AUTO_STRING (unibyte_doc, documentation);
-      function->documentation = module_decode_copy (unibyte_doc);
-    }
+    function->documentation = build_utf8_string (documentation);
  
    Lisp_Object result;
    XSET_MODULE_FUNCTION (result, function);
@@ -668,8 +663,8 @@ module_make_string (emacs_env *env, const char *str, ptrdiff_t length)
    MODULE_FUNCTION_BEGIN (NULL);
    if (! (0 <= length && length <= STRING_BYTES_BOUND))
      overflow_error ();
-  Lisp_Object lstr = make_unibyte_string (str, length);
-  return lisp_to_value (env, module_decode (lstr));
+  Lisp_Object lstr = make_utf8_string (str, length);
+  return lisp_to_value (env, lstr);
  }
  
  static emacs_value
@@ -1030,18 +1025,6 @@ module_encode (Lisp_Object string)
    return code_convert_string (string, Qutf_8_unix, Qt, true, true, true);
  }
  
-static Lisp_Object
-module_decode (Lisp_Object string)
-{
-  return code_convert_string (string, Qutf_8_unix, Qt, false, true, true);
-}
-
-static Lisp_Object
-module_decode_copy (Lisp_Object string)
-{
-  return code_convert_string (string, Qutf_8_unix, Qt, false, false, true);
-}
-
  \f
  /* Value conversion.  */
  
diff --git a/src/json.c b/src/json.c

index 03468e9f3380248efaff18b7378a07df4dab331a..cc98914423b4e125aa4d53f48c0c318a8230f984 100644 (file)
--- a/src/json.c
+++ b/src/json.c
@@ -215,47 +215,11 @@ json_has_suffix (const char *string, const char *suffix)
  
  #endif
  
-/* Create a multibyte Lisp string from the UTF-8 string in
-   [DATA, DATA + SIZE).  If the range [DATA, DATA + SIZE) does not
-   contain a valid UTF-8 string, the returned string will include raw
-   bytes.
-   Note that all callers below either pass only value UTF-8 strings or
-   use this function for formatting error messages; in the latter case
-   correctness isn't critical.  */
-
-static Lisp_Object
-json_make_string (const char *data, ptrdiff_t size)
-{
-  ptrdiff_t chars, bytes;
-  parse_str_as_multibyte ((const unsigned char *) data, size, &chars, &bytes);
-  /* If DATA is a valid UTF-8 string, we can convert it to a Lisp
-     string directly.  Otherwise, we need to decode it.  */
-  if (chars == size || bytes == size)
-    return make_specified_string (data, chars, size, true);
-  else
-    {
-      struct coding_system coding;
-      setup_coding_system (Qutf_8_unix, &coding);
-      coding.mode |= CODING_MODE_LAST_BLOCK;
-      coding.source = (const unsigned char *) data;
-      decode_coding_object (&coding, Qnil, 0, 0, size, size, Qt);
-      return coding.dst_object;
-    }
-}
-
-/* Create a multibyte Lisp string from the NUL-terminated UTF-8
-   string beginning at DATA.  If the string is not a valid UTF-8
-   string, an unspecified string is returned.  Note that all callers
-   below either pass only value UTF-8 strings or use this function for
+/* Note that all callers of make_utf8_string and build_utf8_string
+   below either pass only value UTF-8 strings or use the functionf for
     formatting error messages; in the latter case correctness isn't
     critical.  */
  
-static Lisp_Object
-json_build_string (const char *data)
-{
-  return json_make_string (data, strlen (data));
-}
-
  /* Return a unibyte string containing the sequence of UTF-8 encoding
     units of the UTF-8 representation of STRING.  If STRING does not
     represent a sequence of Unicode scalar values, return a string with
@@ -303,8 +267,8 @@ json_parse_error (const json_error_t *error)
      symbol = Qjson_parse_error;
  #endif
    xsignal (symbol,
-           list5 (json_build_string (error->text),
-                  json_build_string (error->source), INT_TO_INTEGER (error->line),
+           list5 (build_utf8_string (error->text),
+                  build_utf8_string (error->source), INT_TO_INTEGER (error->line),
                    INT_TO_INTEGER (error->column), INT_TO_INTEGER (error->position)));
  }
  
@@ -648,7 +612,7 @@ usage: (json-serialize OBJECT &rest ARGS)  */)
      json_out_of_memory ();
    record_unwind_protect_ptr (json_free, string);
  
-  return unbind_to (count, json_build_string (string));
+  return unbind_to (count, build_utf8_string (string));
  }
  
  struct json_buffer_and_size
@@ -855,7 +819,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf)
      case JSON_REAL:
        return make_float (json_real_value (json));
      case JSON_STRING:
-      return json_make_string (json_string_value (json),
+      return make_utf8_string (json_string_value (json),
                                 json_string_length (json));
      case JSON_ARRAY:
        {
@@ -915,7 +879,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf)
                json_t *value;
                json_object_foreach (json, key_str, value)
                  {
-                  Lisp_Object key = json_build_string (key_str);
+                  Lisp_Object key = build_utf8_string (key_str);
                    EMACS_UINT hash;
                    ptrdiff_t i = hash_lookup (h, key, &hash);
                    /* Keys in JSON objects are unique, so the key can't
@@ -932,7 +896,7 @@ json_to_lisp (json_t *json, struct json_configuration *conf)
                json_t *value;
                json_object_foreach (json, key_str, value)
                  {
-                  Lisp_Object key = Fintern (json_build_string (key_str), Qnil);
+                  Lisp_Object key = Fintern (build_utf8_string (key_str), Qnil);
                    result
                      = Fcons (Fcons (key, json_to_lisp (value, conf)),
                               result);
author	Philipp Stephani <phst@google.com>
	Sun, 28 Apr 2019 10:28:27 +0000 (12:28 +0200)
committer	Philipp Stephani <phst@google.com>
	Sun, 28 Apr 2019 10:28:27 +0000 (12:28 +0200)
src/coding.c		patch \| blob \| history
src/coding.h		patch \| blob \| history
src/emacs-module.c		patch \| blob \| history
src/json.c		patch \| blob \| history