From 558b6dbca7bc933fe01255be9ebeffebd44a2645 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Wed, 25 Nov 2020 15:32:08 +0100 Subject: [PATCH] Fix replace-regexp-in-string substring match data translation For certain patterns, re-matching the same regexp on the matched substring does not produce correctly translated match data (bug#15107 and bug#44861). Using a new builtin function also improves performance since the number of calls to string-match is halved. Reported by Kevin Ryde and Shigeru Fukaya. * lisp/subr.el (replace-regexp-in-string): Translate the match data using match-data--translate instead of trusting a call to string-match on the matched string to do the job. * test/lisp/subr-tests.el (subr-replace-regexp-in-string): Add test cases. * src/search.c (Fmatch_data__translate): New internal function. (syms_of_search): Register it as a subroutine. --- lisp/subr.el | 7 +++---- src/search.c | 18 ++++++++++++++++++ test/lisp/subr-tests.el | 6 +++++- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/lisp/subr.el b/lisp/subr.el index 1fb0f9ab7e6..e009dcc2b9a 100644 --- a/lisp/subr.el +++ b/lisp/subr.el @@ -4546,10 +4546,9 @@ and replace a sub-expression, e.g. (when (= me mb) (setq me (min l (1+ mb)))) ;; Generate a replacement for the matched substring. ;; Operate on only the substring to minimize string consing. - ;; Set up match data for the substring for replacement; - ;; presumably this is likely to be faster than munging the - ;; match data directly in Lisp. - (string-match regexp (setq str (substring string mb me))) + ;; Translate the match data so that it applies to the matched substring. + (match-data--translate (- mb)) + (setq str (substring string mb me)) (setq matches (cons (replace-match (if (stringp rep) rep diff --git a/src/search.c b/src/search.c index e7f90949464..4eb634a3c03 100644 --- a/src/search.c +++ b/src/search.c @@ -3031,6 +3031,23 @@ If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */) return Qnil; } +DEFUN ("match-data--translate", Fmatch_data__translate, Smatch_data__translate, + 1, 1, 0, + doc: /* Add N to all string positions in the match data. Internal. */) + (Lisp_Object n) +{ + CHECK_FIXNUM (n); + EMACS_INT delta = XFIXNUM (n); + if (EQ (last_thing_searched, Qt)) /* String match data only. */ + for (ptrdiff_t i = 0; i < search_regs.num_regs; i++) + if (search_regs.start[i] >= 0) + { + search_regs.start[i] = max (0, search_regs.start[i] + delta); + search_regs.end[i] = max (0, search_regs.end[i] + delta); + } + return Qnil; +} + /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data if asynchronous code (filter or sentinel) is running. */ static void @@ -3388,6 +3405,7 @@ is to bind it with `let' around a small expression. */); defsubr (&Smatch_end); defsubr (&Smatch_data); defsubr (&Sset_match_data); + defsubr (&Smatch_data__translate); defsubr (&Sregexp_quote); defsubr (&Snewline_cache_check); diff --git a/test/lisp/subr-tests.el b/test/lisp/subr-tests.el index c77be511dc2..67f7fc97496 100644 --- a/test/lisp/subr-tests.el +++ b/test/lisp/subr-tests.el @@ -545,7 +545,11 @@ See https://debbugs.gnu.org/cgi/bugreport.cgi?bug=19350." (match-beginning 1) (match-end 1))) "babbcaacabc") "ba")) - ) + ;; anchors (bug#15107, bug#44861) + (should (equal (replace-regexp-in-string "a\\B" "b" "a aaaa") + "a bbba")) + (should (equal (replace-regexp-in-string "\\`\\|x" "z" "--xx--") + "z--zz--"))) (provide 'subr-tests) ;;; subr-tests.el ends here -- 2.39.5