From 98e5639c3c3caf2424f35e4a9f9c53ff48f43897 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Wed, 12 May 2021 16:41:03 +0300 Subject: [PATCH] Fix the tests for 'string-limit' * test/lisp/emacs-lisp/subr-x-tests.el (subr-string-limit-coding): Fix the expected results of string-limit when encoding with UTF-16. Add tests for UTF-8 with BOM. (Bug#48324) * lisp/emacs-lisp/subr-x.el (string-limit): Add FIXME comment about the current implementation, which is faulty by design. --- lisp/emacs-lisp/subr-x.el | 12 ++++++++++++ test/lisp/emacs-lisp/subr-x-tests.el | 7 +++++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el index 9c8c967ee9c..5a8885c0427 100644 --- a/lisp/emacs-lisp/subr-x.el +++ b/lisp/emacs-lisp/subr-x.el @@ -289,6 +289,18 @@ than this function." (let ((result nil) (result-length 0) (index (if end (1- (length string)) 0))) + ;; FIXME: This implementation, which uses encode-coding-char + ;; to encode the string one character at a time, is in general + ;; incorrect: coding-systems that produce prefix or suffix + ;; bytes, such as ISO-2022-based or UTF-8/16 with BOM, will + ;; produce those bytes for each character, instead of just + ;; once for the entire string. encode-coding-char attempts to + ;; remove those extra bytes at least in some situations, but + ;; it cannot do that in all cases. And in any case, producing + ;; what is supposed to be a UTF-16 or ISO-2022-CN encoded + ;; string which lacks the BOM bytes at the beginning and the + ;; charset designation sequences at the head and tail of the + ;; result will definitely surprise the callers in some cases. (while (let ((encoded (encode-coding-char (aref string index) coding-system))) (and (<= (+ (length encoded) result-length) length) diff --git a/test/lisp/emacs-lisp/subr-x-tests.el b/test/lisp/emacs-lisp/subr-x-tests.el index 112f3c1dac1..ef04cde3867 100644 --- a/test/lisp/emacs-lisp/subr-x-tests.el +++ b/test/lisp/emacs-lisp/subr-x-tests.el @@ -607,18 +607,21 @@ (should (equal (string-limit "foó" 4 nil 'utf-8) "fo\303\263")) (should (equal (string-limit "foóa" 4 nil 'utf-8) "fo\303\263")) (should (equal (string-limit "foóá" 4 nil 'utf-8) "fo\303\263")) + (should (equal (string-limit "foóá" 4 nil 'utf-8-with-signature) + "fo\303\263")) (should (equal (string-limit "foóa" 4 nil 'iso-8859-1) "fo\363a")) (should (equal (string-limit "foóá" 4 nil 'iso-8859-1) "fo\363\341")) - (should (equal (string-limit "foóá" 4 nil 'utf-16) "\376\377\000f")) + (should (equal (string-limit "foóá" 4 nil 'utf-16) "\000f\000o")) (should (equal (string-limit "foó" 10 t 'utf-8) "fo\303\263")) (should (equal (string-limit "foó" 3 t 'utf-8) "o\303\263")) (should (equal (string-limit "foó" 4 t 'utf-8) "fo\303\263")) (should (equal (string-limit "foóa" 4 t 'utf-8) "o\303\263a")) (should (equal (string-limit "foóá" 4 t 'utf-8) "\303\263\303\241")) + (should (equal (string-limit "foóá" 2 t 'utf-8-with-signature) "\303\241")) (should (equal (string-limit "foóa" 4 t 'iso-8859-1) "fo\363a")) (should (equal (string-limit "foóá" 4 t 'iso-8859-1) "fo\363\341")) - (should (equal (string-limit "foóá" 4 t 'utf-16) "\376\377\000\341"))) + (should (equal (string-limit "foóá" 4 t 'utf-16) "\000\363\000\341"))) (ert-deftest subr-string-lines () (should (equal (string-lines "foo") '("foo"))) -- 2.39.5