From d3e2c88041b4844422bda64b1ee51678dc8a2e88 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Thu, 9 Apr 2020 12:04:22 +0200 Subject: [PATCH] Fix ASCII-only conversion logic (bug#40407) To sidestep conversion altogether when EOL conversion applies, we must either be encoding a string without NL, or decoding without CR. * src/coding.c (string_ascii_p): Revert to a pure predicate. (code_convert_string): Fix logic. Don't use uninitialised ascii_p (removed). Use memchr to detect CR or LF in string when needed. * test/src/coding-tests.el (coding-nocopy-ascii): Update tests to include encodings with explicit EOL conversions. --- src/coding.c | 46 +++++++++++++++------------------------- test/src/coding-tests.el | 33 +++++++++++++++++----------- 2 files changed, 38 insertions(+), 41 deletions(-) diff --git a/src/coding.c b/src/coding.c index ffcb9cf0a1a..450c498f1e8 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9474,22 +9474,15 @@ not fully specified.) */) return code_convert_region (start, end, coding_system, destination, 1, 0); } -/* Non-zero if STR contains only characters in the 0..127 range. - Positive if STR includes characters that don't need EOL conversion - on decoding, negative otherwise. */ -static int -string_ascii_p (Lisp_Object str) +/* Whether STRING only contains chars in the 0..127 range. */ +static bool +string_ascii_p (Lisp_Object string) { - ptrdiff_t nbytes = SBYTES (str); - bool CR_Seen = false; + ptrdiff_t nbytes = SBYTES (string); for (ptrdiff_t i = 0; i < nbytes; i++) - { - if (SREF (str, i) > 127) - return 0; - if (SREF (str, i) == '\r') - CR_Seen = true; - } - return CR_Seen ? -1 : 1; + if (SREF (string, i) > 127) + return false; + return true; } Lisp_Object @@ -9526,24 +9519,19 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system, if (EQ (dst_object, Qt)) { /* Fast path for ASCII-only input and an ASCII-compatible coding: - act as identity if no EOL conversion is neede. */ - int ascii_p; + act as identity if no EOL conversion is needed. */ Lisp_Object attrs = CODING_ID_ATTRS (coding.id); if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)) && (STRING_MULTIBYTE (string) - ? (chars == bytes) : ((ascii_p = string_ascii_p (string)) != 0))) - { - if (ascii_p > 0 - || (ascii_p < 0 - && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) - || inhibit_eol_conversion))) - return (nocopy - ? string - : (encodep - ? make_unibyte_string (SSDATA (string), bytes) - : make_multibyte_string (SSDATA (string), - bytes, bytes))); - } + ? (chars == bytes) : string_ascii_p (string)) + && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix) + || inhibit_eol_conversion + || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes))) + return (nocopy + ? string + : (encodep + ? make_unibyte_string (SSDATA (string), bytes) + : make_multibyte_string (SSDATA (string), bytes, bytes))); } else if (BUFFERP (dst_object)) { diff --git a/test/src/coding-tests.el b/test/src/coding-tests.el index 8d92bcdcd1a..9f6fac3edd8 100644 --- a/test/src/coding-tests.el +++ b/test/src/coding-tests.el @@ -388,29 +388,38 @@ (let* ((uni (apply #'string (number-sequence 0 127))) (multi (string-to-multibyte uni))) (dolist (s (list uni multi)) + ;; Encodings without EOL conversion. (dolist (coding '(us-ascii-unix iso-latin-1-unix utf-8-unix)) (should-not (eq (decode-coding-string s coding nil) s)) (should-not (eq (encode-coding-string s coding nil) s)) (should (eq (decode-coding-string s coding t) s)) - (should (eq (encode-coding-string s coding t) s))))) - (let* ((uni (apply #'string (number-sequence 15 127))) + (should (eq (encode-coding-string s coding t) s))) + + ;; With EOL conversion inhibited. + (let ((inhibit-eol-conversion t)) + (dolist (coding '(us-ascii iso-latin-1 utf-8)) + (should-not (eq (decode-coding-string s coding nil) s)) + (should-not (eq (encode-coding-string s coding nil) s)) + (should (eq (decode-coding-string s coding t) s)) + (should (eq (encode-coding-string s coding t) s)))))) + + ;; Check identity decoding with EOL conversion for ASCII except CR. + (let* ((uni (apply #'string (delq ?\r (number-sequence 0 127)))) (multi (string-to-multibyte uni))) (dolist (s (list uni multi)) - (dolist (coding '(us-ascii iso-latin-1 utf-8)) + (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac)) (should-not (eq (decode-coding-string s coding nil) s)) - (should-not (eq (encode-coding-string s coding nil) s)) - (should (eq (decode-coding-string s coding t) s)) - (should (eq (encode-coding-string s coding t) s))))) - (let* ((uni (apply #'string (number-sequence 0 127))) - (multi (string-to-multibyte uni)) - (inhibit-eol-conversion t)) + (should (eq (decode-coding-string s coding t) s))))) + + ;; Check identity encoding with EOL conversion for ASCII except LF. + (let* ((uni (apply #'string (delq ?\n (number-sequence 0 127)))) + (multi (string-to-multibyte uni))) (dolist (s (list uni multi)) - (dolist (coding '(us-ascii iso-latin-1 utf-8)) - (should-not (eq (decode-coding-string s coding nil) s)) + (dolist (coding '(us-ascii-dos iso-latin-1-dos utf-8-dos mac-roman-mac)) (should-not (eq (encode-coding-string s coding nil) s)) - (should (eq (decode-coding-string s coding t) s)) (should (eq (encode-coding-string s coding t) s)))))) + (ert-deftest coding-check-coding-systems-region () (should (equal (check-coding-systems-region "aå" nil '(utf-8)) nil)) -- 2.39.2