From: Eli Zaretskii Date: Sat, 21 Jun 2025 08:08:05 +0000 (+0300) Subject: Fix 'split-string' when TRIM is used X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=3d397d385a8f83ea59beee87f5bb28d0d41a3f25;p=emacs.git Fix 'split-string' when TRIM is used * lisp/subr.el (split-string): Support the case where STRING begins with a match for SEPARATORS, and a match for SEPARATORS also matches TRIM. Doc fix. (Bug#78690) * test/lisp/subr-tests.el (subr-test-split-string): New test. (cherry picked from commit 4607d3e426115ded71f0d8c19e7187ed45c19734) --- diff --git a/lisp/subr.el b/lisp/subr.el index a372c78901a..26c2e82fc9a 100644 --- a/lisp/subr.el +++ b/lisp/subr.el @@ -5347,9 +5347,9 @@ the substrings between the splitting points are collected as a list, which is returned. If SEPARATORS is non-nil, it should be a regular expression matching text -that separates, but is not part of, the substrings. If nil it defaults to -`split-string-default-separators', normally \"[ \\f\\t\\n\\r\\v]+\", and -OMIT-NULLS is forced to t. +that separates, but is not part of, the substrings. If omitted or nil, +it defaults to `split-string-default-separators', whose value is +normally \"[ \\f\\t\\n\\r\\v]+\", and OMIT-NULLS is then forced to t. If OMIT-NULLS is t, zero-length substrings are omitted from the list (so that for the default value of SEPARATORS leading and trailing whitespace @@ -5360,11 +5360,6 @@ If TRIM is non-nil, it should be a regular expression to match text to trim from the beginning and end of each substring. If trimming makes the substring empty, it is treated as null. -If you want to trim whitespace from the substrings, the reliably correct -way is using TRIM. Making SEPARATORS match that whitespace gives incorrect -results when there is whitespace at the start or end of STRING. If you -see such calls to `split-string', please fix them. - Note that the effect of `(split-string STRING)' is the same as `(split-string STRING split-string-default-separators t)'. In the rare case that you wish to retain zero-length substrings when splitting on @@ -5377,7 +5372,9 @@ Modifies the match data; use `save-match-data' if necessary." (start 0) this-start this-end notfirst + match-beg (list nil) + (strlen (length string)) (push-one ;; Push the substring in range THIS-START to THIS-END ;; onto LIST, trimming it and perhaps discarding it. @@ -5386,6 +5383,7 @@ Modifies the match data; use `save-match-data' if necessary." ;; Discard the trim from start of this substring. (let ((tem (string-match trim string this-start))) (and (eq tem this-start) + (<= (match-end 0) this-end) (setq this-start (match-end 0))))) (when (or keep-nulls (< this-start this-end)) @@ -5403,18 +5401,25 @@ Modifies the match data; use `save-match-data' if necessary." (while (and (string-match rexp string (if (and notfirst - (= start (match-beginning 0)) - (< start (length string))) + (= start match-beg) ; empty match + (< start strlen)) (1+ start) start)) - (< start (length string))) - (setq notfirst t) - (setq this-start start this-end (match-beginning 0) - start (match-end 0)) + (< start strlen)) + (setq notfirst t + match-beg (match-beginning 0)) + ;; If the separator is right at the beginning, produce an empty + ;; substring in the result list. + (if (= start match-beg) + (setq this-start (match-end 0) + this-end this-start) + ;; Otherwise produce a substring from start to the separator. + (setq this-start start this-end match-beg)) + (setq start (match-end 0)) (funcall push-one)) ;; Handle the substring at the end of STRING. - (setq this-start start this-end (length string)) + (setq this-start start this-end strlen) (funcall push-one) (nreverse list))) diff --git a/test/lisp/subr-tests.el b/test/lisp/subr-tests.el index 024cbe85bba..f598ee57f8d 100644 --- a/test/lisp/subr-tests.el +++ b/test/lisp/subr-tests.el @@ -1505,5 +1505,16 @@ final or penultimate step during initialization.")) (should (hash-table-contains-p 'cookie h)) (should (hash-table-contains-p 'milk h)))) +(ert-deftest subr-test-split-string () + (let ((text "-*- lexical-binding: t; -*-") + (seps "-\\*-") + (trim "[ \t\n\r-]+")) + (should (equal (split-string text seps nil trim) + '("" "lexical-binding: t;" ""))) + (should (equal (split-string text seps t trim) + '("lexical-binding: t;"))) + (should (equal (split-string text "[ \t\n\r-]*-\\*-[ \t\n\r-]*") + '("" "lexical-binding: t;" ""))))) + (provide 'subr-tests) ;;; subr-tests.el ends here