]> git.eshelyaron.com Git - emacs.git/commitdiff
Fix regexp matching with atomic strings and optimised backtracking
authorMattias Engdegård <mattiase@acm.org>
Sun, 23 Oct 2022 13:40:37 +0000 (15:40 +0200)
committerMattias Engdegård <mattiase@acm.org>
Mon, 24 Oct 2022 09:50:13 +0000 (11:50 +0200)
This bug occurs when an atomic pattern is matched at the end of
a string and the on-failure-keep-string-jump optimisation is
in effect, as in:

  (string-match "\\'\\(?:ab\\)*\\'" "a")

which succeeded but clearly should not (bug#58726).

Reported by Michael Heerdegen.

* src/regex-emacs.c (PREFETCH): Add reset parameter.
(re_match_2_internal): Use it for proper atomic pattern treatment.
* test/src/regex-emacs-tests.el (regexp-atomic-failure): New test.

src/regex-emacs.c
test/src/regex-emacs-tests.el

index 9b2c14c413da09ae0475c97bd455135fbb5b48d4..626560911f672ce1e7336e0b10f0325afd7b7309 100644 (file)
@@ -3446,14 +3446,18 @@ static bool bcmp_translate (re_char *, re_char *, ptrdiff_t,
 
 /* Call before fetching a character with *d.  This switches over to
    string2 if necessary.
+   `reset' is executed before backtracking if there are no more characters.
    Check re_match_2_internal for a discussion of why end_match_2 might
    not be within string2 (but be equal to end_match_1 instead).  */
-#define PREFETCH()                                                     \
+#define PREFETCH(reset)                                                        \
   while (d == dend)                                                    \
     {                                                                  \
       /* End of string2 => fail.  */                                   \
       if (dend == end_match_2)                                         \
-       goto fail;                                                      \
+        {                                                              \
+         reset;                                                        \
+         goto fail;                                                    \
+       }                                                               \
       /* End of string1 => advance to string2.  */                     \
       d = string2;                                                     \
       dend = end_match_2;                                              \
@@ -4252,7 +4256,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
                int pat_charlen, buf_charlen;
                int pat_ch, buf_ch;
 
-               PREFETCH ();
+               PREFETCH (d = dfail);
                if (multibyte)
                  pat_ch = string_char_and_length (p, &pat_charlen);
                else
@@ -4280,7 +4284,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
                int pat_charlen;
                int pat_ch, buf_ch;
 
-               PREFETCH ();
+               PREFETCH (d = dfail);
                if (multibyte)
                  {
                    pat_ch = string_char_and_length (p, &pat_charlen);
@@ -4486,7 +4490,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
                if (d2 == dend2) break;
 
                /* If necessary, advance to next segment in data.  */
-               PREFETCH ();
+               PREFETCH (d = dfail);
 
                /* How many characters left in this segment to match.  */
                dcnt = dend - d;
index ff0d6be3f5d1cd0d0668a8fa591d1c152fa4a290..b323f592dca31a1d7bdcf63c202199db8af5b0b4 100644 (file)
@@ -867,4 +867,9 @@ This evaluates the TESTS test cases from glibc."
     (should (equal (string-match "[[:lower:]]" "ẞ") 0))
     (should (equal (string-match "[[:upper:]]" "ẞ") 0))))
 
+(ert-deftest regexp-atomic-failure ()
+  "Bug#58726."
+  (should (equal (string-match "\\`\\(?:ab\\)*\\'" "a") nil))
+  (should (equal (string-match "\\`a\\{2\\}*\\'" "a") nil)))
+
 ;;; regex-emacs-tests.el ends here