From 0526e4f63b92e4225488a0558120c5a2536d6c5e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Mon, 13 May 2024 10:44:05 +0200 Subject: [PATCH] Avoid resizing mutation in subst-char-in-string, take two This time we take care to preserve properties, and add a test. * lisp/subr.el (subst-char-in-string): Use string-replace to avoid resizing mutation and O(n^2) time. * test/lisp/subr-tests.el (subr--subst-char-in-string): New test. (cherry picked from commit 49e243c0c85d18fc775970d9ebd846eba3a6866e) --- lisp/subr.el | 26 ++++++++++++++++------ test/lisp/subr-tests.el | 48 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 7 deletions(-) diff --git a/lisp/subr.el b/lisp/subr.el index de5d5c72911..3ddc0d192d2 100644 --- a/lisp/subr.el +++ b/lisp/subr.el @@ -5694,13 +5694,25 @@ The SEPARATOR regexp defaults to \"\\s-+\"." (defun subst-char-in-string (fromchar tochar string &optional inplace) "Replace FROMCHAR with TOCHAR in STRING each time it occurs. Unless optional argument INPLACE is non-nil, return a new string." - (let ((i (length string)) - (newstr (if inplace string (copy-sequence string)))) - (while (> i 0) - (setq i (1- i)) - (if (eq (aref newstr i) fromchar) - (aset newstr i tochar))) - newstr)) + (if (and (not inplace) + (if (multibyte-string-p string) + (> (max fromchar tochar) 127) + (> tochar 255))) + ;; Avoid quadratic behaviour from resizing replacement. + (let ((res (string-replace (string fromchar) (string tochar) string))) + (unless (eq res string) + ;; Mend properties broken by the replacement. + ;; Not fast, but this case never was. + (dolist (p (object-intervals string)) + (set-text-properties (nth 0 p) (nth 1 p) (nth 2 p) res))) + res) + (let ((i (length string)) + (newstr (if inplace string (copy-sequence string)))) + (while (> i 0) + (setq i (1- i)) + (if (eq (aref newstr i) fromchar) + (aset newstr i tochar))) + newstr))) (defun string-replace (from-string to-string in-string) "Replace FROM-STRING with TO-STRING in IN-STRING each time it occurs." diff --git a/test/lisp/subr-tests.el b/test/lisp/subr-tests.el index 119c124f3a5..6f28e057342 100644 --- a/test/lisp/subr-tests.el +++ b/test/lisp/subr-tests.el @@ -1330,5 +1330,53 @@ final or penultimate step during initialization.")) (t x) (:success (1+ x))) '(error ""))))) +(ert-deftest subr--subst-char-in-string () + ;; Cross-validate `subst-char-in-string' with `string-replace', + ;; which should produce the same results when there are no properties. + (dolist (str '("ananas" "na\x80ma\x80s" "hétérogénéité" + "Ω, Ω, Ω" "é-\x80-\x80")) + (dolist (mb '(nil t)) + (unless (and (not mb) (multibyte-string-p str)) + (let ((str (if (and mb (not (multibyte-string-p str))) + (string-to-multibyte str) + str))) + (dolist (inplace '(nil t)) + (dolist (from '(?a ?é ?Ω #x80 #x3fff80)) + (dolist (to '(?o ?á ?ƒ ?☃ #x1313f #xff #x3fffc9)) + ;; Can't put a non-byte value in a non-ASCII unibyte string. + (unless (and (not mb) (> to #xff) + (not (string-match-p (rx bos (* ascii) eos) str))) + (let* ((in (copy-sequence str)) + (ref (if (and (not mb) (> from #xff)) + in ; nothing to replace + (string-replace + (if (and (not mb) (<= from #xff)) + (unibyte-string from) + (string from)) + (if (and (not mb) (<= to #xff)) + (unibyte-string to) + (string to)) + in))) + (out (subst-char-in-string from to in inplace))) + (should (equal out ref)) + (if inplace + (should (eq out in)) + (should (equal in str)))))))))))) + + ;; Verify that properties are preserved. + (dolist (str (list "cocoa" (string-to-multibyte "cocoa") "écalé")) + (dolist (from '(?a ?o ?c ?é)) + (dolist (to '(?i ?à ?☃)) + (let ((in (copy-sequence str))) + (put-text-property 0 5 'alpha 1 in) + (put-text-property 1 4 'beta 2 in) + (put-text-property 0 2 'gamma 3 in) + (put-text-property 1 4 'delta 4 in) + (put-text-property 2 3 'epsilon 5 in) + (let* ((props-in (copy-tree (object-intervals in))) + (out (subst-char-in-string from to in)) + (props-out (object-intervals out))) + (should (equal props-out props-in)))))))) + (provide 'subr-tests) ;;; subr-tests.el ends here -- 2.39.5