From: Eli Zaretskii Date: Thu, 9 Apr 2020 09:18:30 +0000 (+0300) Subject: Fix decoding ASCII strings with embedded CR characters X-Git-Tag: emacs-28.0.90~7627 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=faf996dc6e963a8dd74e9e794ded0467dd78ea18;p=emacs.git Fix decoding ASCII strings with embedded CR characters * src/coding.c (string_ascii_p): Return a negative value if an all-ASCII string STR includes the CR character, otherwise a positive value. (code_convert_string): If the string is ASCII, but includes CR characters, use the fast path only if EOL doesn't need to be decoded. (Bug#40519) * test/src/coding-tests.el (coding-nocopy-ascii): Add tests for bug#40519. --- diff --git a/src/coding.c b/src/coding.c index 49c1e625d57..24a832ff3ee 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9471,15 +9471,22 @@ not fully specified.) */) return code_convert_region (start, end, coding_system, destination, 1, 0); } -/* Whether a string only contains chars in the 0..127 range. */ -static bool +/* Non-zero if STR contains only characterss in the 0..127 range. + Positive if STR includes characters that don't need EOL conversion + on decoding, negative otherwise. */ +static int string_ascii_p (Lisp_Object str) { ptrdiff_t nbytes = SBYTES (str); + bool CR_Seen = false; for (ptrdiff_t i = 0; i < nbytes; i++) - if (SREF (str, i) > 127) - return false; - return true; + { + if (SREF (str, i) > 127) + return 0; + if (SREF (str, i) == '\r') + CR_Seen = true; + } + return CR_Seen ? -1 : 1; } Lisp_Object @@ -9517,15 +9524,23 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, { /* Fast path for ASCII-only input and an ASCII-compatible coding: act as identity. */ + int ascii_p; Lisp_Object attrs = CODING_ID_ATTRS (coding.id); if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) && (STRING_MULTIBYTE (string) - ? (chars == bytes) : string_ascii_p (string))) - return (nocopy - ? string - : (encodep - ? make_unibyte_string (SSDATA (string), bytes) - : make_multibyte_string (SSDATA (string), bytes, bytes))); + ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0))) + { + if (ascii_p > 0 + || (ascii_p < 0 + && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) + || inhibit_eol_conversion))) + return (nocopy + ? string + : (encodep + ? make_unibyte_string (SSDATA (string), bytes) + : make_multibyte_string (SSDATA (string), + bytes, bytes))); + } } else if (BUFFERP (dst_object)) { diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el index 93e6709d442..83a06b8179e 100644 --- a/test/src/coding-tests.el +++ b/test/src/coding-tests.el @@ -387,6 +387,23 @@ "Check that the NOCOPY parameter works for ASCII-only strings." (let* ((uni (apply #'string (number-sequence 0 127))) (multi (string-to-multibyte uni))) + (dolist (s (list uni multi)) + (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix)) + (should-not (eq (decode-coding-string s coding nil) s)) + (should-not (eq (encode-coding-string s coding nil) s)) + (should (eq (decode-coding-string s coding t) s)) + (should (eq (encode-coding-string s coding t) s))))) + (let* ((uni (apply #'string (number-sequence 15 127))) + (multi (string-to-multibyte uni))) + (dolist (s (list uni multi)) + (dolist (coding '(us-ascii iso-latin-1 utf-8)) + (should-not (eq (decode-coding-string s coding nil) s)) + (should-not (eq (encode-coding-string s coding nil) s)) + (should (eq (decode-coding-string s coding t) s)) + (should (eq (encode-coding-string s coding t) s))))) + (let* ((uni (apply #'string (number-sequence 0 127))) + (multi (string-to-multibyte uni)) + (inhibit-eol-conversion t)) (dolist (s (list uni multi)) (dolist (coding '(us-ascii iso-latin-1 utf-8)) (should-not (eq (decode-coding-string s coding nil) s))