From 36f5a1a7e74442272796413575f85ba9bd18cb53 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Sun, 23 Oct 2022 15:40:37 +0200 Subject: [PATCH] Fix regexp matching with atomic strings and optimised backtracking This bug occurs when an atomic pattern is matched at the end of a string and the on-failure-keep-string-jump optimisation is in effect, as in: (string-match "\\'\\(?:ab\\)*\\'" "a") which succeeded but clearly should not (bug#58726). Reported by Michael Heerdegen. * src/regex-emacs.c (PREFETCH): Add reset parameter. (re_match_2_internal): Use it for proper atomic pattern treatment. * test/src/regex-emacs-tests.el (regexp-atomic-failure): New test. --- src/regex-emacs.c | 14 +++++++++----- test/src/regex-emacs-tests.el | 5 +++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 9b2c14c413d..626560911f6 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -3446,14 +3446,18 @@ static bool bcmp_translate (re_char *, re_char *, ptrdiff_t, /* Call before fetching a character with *d. This switches over to string2 if necessary. + `reset' is executed before backtracking if there are no more characters. Check re_match_2_internal for a discussion of why end_match_2 might not be within string2 (but be equal to end_match_1 instead). */ -#define PREFETCH() \ +#define PREFETCH(reset) \ while (d == dend) \ { \ /* End of string2 => fail. */ \ if (dend == end_match_2) \ - goto fail; \ + { \ + reset; \ + goto fail; \ + } \ /* End of string1 => advance to string2. */ \ d = string2; \ dend = end_match_2; \ @@ -4252,7 +4256,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, int pat_charlen, buf_charlen; int pat_ch, buf_ch; - PREFETCH (); + PREFETCH (d = dfail); if (multibyte) pat_ch = string_char_and_length (p, &pat_charlen); else @@ -4280,7 +4284,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, int pat_charlen; int pat_ch, buf_ch; - PREFETCH (); + PREFETCH (d = dfail); if (multibyte) { pat_ch = string_char_and_length (p, &pat_charlen); @@ -4486,7 +4490,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, if (d2 == dend2) break; /* If necessary, advance to next segment in data. */ - PREFETCH (); + PREFETCH (d = dfail); /* How many characters left in this segment to match. */ dcnt = dend - d; diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el index ff0d6be3f5d..b323f592dca 100644 --- a/test/src/regex-emacs-tests.el +++ b/test/src/regex-emacs-tests.el @@ -867,4 +867,9 @@ This evaluates the TESTS test cases from glibc." (should (equal (string-match "[[:lower:]]" "ẞ") 0)) (should (equal (string-match "[[:upper:]]" "ẞ") 0)))) +(ert-deftest regexp-atomic-failure () + "Bug#58726." + (should (equal (string-match "\\`\\(?:ab\\)*\\'" "a") nil)) + (should (equal (string-match "\\`a\\{2\\}*\\'" "a") nil))) + ;;; regex-emacs-tests.el ends here -- 2.39.5