Avoid expensive recoding for ASCII identity cases (bug#40407)

author Mattias Engdegård <mattiase@acm.org>

Fri, 3 Apr 2020 14:01:01 +0000 (16:01 +0200)

committer Mattias Engdegård <mattiase@acm.org>

Sun, 5 Apr 2020 13:37:55 +0000 (15:37 +0200)
author Mattias Engdegård <mattiase@acm.org>
Fri, 3 Apr 2020 14:01:01 +0000 (16:01 +0200)
committer Mattias Engdegård <mattiase@acm.org>
Sun, 5 Apr 2020 13:37:55 +0000 (15:37 +0200)
diff --git a/src/coding.c b/src/coding.c

index 1049f1b755afa81d2320aa865f666d9d8e183c61..97a6eb949a8523ced03bbaf30ebf4ab7b9432c8a 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -9471,6 +9471,17 @@ not fully specified.)  */)
    return code_convert_region (start, end, coding_system, destination, 1, 0);
  }
  
+/* Whether a string only contains chars in the 0..127 range.  */
+static bool
+string_ascii_p (Lisp_Object str)
+{
+  ptrdiff_t nbytes = SBYTES (str);
+  for (ptrdiff_t i = 0; i < nbytes; i++)
+    if (SREF (str, i) > 127)
+      return false;
+  return true;
+}
+
  Lisp_Object
  code_convert_string (Lisp_Object string, Lisp_Object coding_system,
                      Lisp_Object dst_object, bool encodep, bool nocopy,
@@ -9502,7 +9513,21 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
    chars = SCHARS (string);
    bytes = SBYTES (string);
  
-  if (BUFFERP (dst_object))
+  if (EQ (dst_object, Qt))
+    {
+      /* Fast path for ASCII-only input and an ASCII-compatible coding:
+         act as identity.  */
+      Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
+      if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
+          && (STRING_MULTIBYTE (string)
+              ? (chars == bytes) : string_ascii_p (string)))
+       return (nocopy
+                ? string
+                : (encodep
+                   ? make_unibyte_string (SDATA (string), bytes)
+                   : make_multibyte_string (SDATA (string), bytes, bytes)));
+    }
+  else if (BUFFERP (dst_object))
      {
        struct buffer *buf = XBUFFER (dst_object);
        ptrdiff_t buf_pt = BUF_PT (buf);
diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el

index 110ff1269645bcb83e8bebcd4831006cea8c2438..93e6709d442cf84e3b46888adf16b28f30c58dc1 100644 (file)
--- a/test/src/coding-tests.el
+++ b/test/src/coding-tests.el
@@ -383,6 +383,17 @@
      (should-not (eq (encode-coding-string s nil nil) s))
      (should (eq (encode-coding-string s nil t) s))))
  
+(ert-deftest coding-nocopy-ascii ()
+  "Check that the NOCOPY parameter works for ASCII-only strings."
+  (let* ((uni (apply #'string (number-sequence 0 127)))
+         (multi (string-to-multibyte uni)))
+    (dolist (s (list uni multi))
+      (dolist (coding '(us-ascii iso-latin-1 utf-8))
+        (should-not (eq (decode-coding-string s coding nil) s))
+        (should-not (eq (encode-coding-string s coding nil) s))
+        (should (eq (decode-coding-string s coding t) s))
+        (should (eq (encode-coding-string s coding t) s))))))
+
  ;; Local Variables:
  ;; byte-compile-warnings: (not obsolete)
  ;; End:
author	Mattias Engdegård <mattiase@acm.org>
	Fri, 3 Apr 2020 14:01:01 +0000 (16:01 +0200)
committer	Mattias Engdegård <mattiase@acm.org>
	Sun, 5 Apr 2020 13:37:55 +0000 (15:37 +0200)
src/coding.c		patch \| blob \| history
test/src/coding-tests.el		patch \| blob \| history