From 7c26501175f7dc657c677c151d49d04291ea67e7 Mon Sep 17 00:00:00 2001 From: Stefan Monnier Date: Thu, 28 Sep 2023 12:37:44 -0400 Subject: [PATCH] regex.c (mutually_exclusive_aux) : Remove optimization Another case that was too optimistic. Better use \> or \< rather than \b if you want your regexp to be handled efficiently. * src/regex-emacs.c (mutually_exclusive_aux) : Cancel optimization. * test/src/regex-emacs-tests.el (regexp-tests-backtrack-optimization): New test. --- src/regex-emacs.c | 17 +++++++++++++++-- test/src/regex-emacs-tests.el | 1 + 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/regex-emacs.c b/src/regex-emacs.c index ae82dd63917..ad140908609 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -3874,9 +3874,22 @@ mutually_exclusive_aux (struct re_pattern_buffer *bufp, re_char *p1, return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]); case wordbound: + /* FIXME: This optimization seems correct after the first iteration + of the loop, but not for the very first :-( + IOW we'd need to pull out the first iteration and do: + + syntaxspec w + on_failure_keep_string_jump end + loop: + syntaxspec w + goto loop + end: + wordbound + return (((re_opcode_t) *p1 == notsyntaxspec - || (re_opcode_t) *p1 == syntaxspec) - && p1[1] == Sword); + || (re_opcode_t) *p1 == syntaxspec) + && p1[1] == Sword); */ + return false; case categoryspec: return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]); diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el index d78f70ba409..621e4dbe2c0 100644 --- a/test/src/regex-emacs-tests.el +++ b/test/src/regex-emacs-tests.el @@ -907,6 +907,7 @@ This evaluates the TESTS test cases from glibc." ;; Regression check for overly optimistic optimization. (should (eq 0 (string-match "\\(ca*\\|ab\\)+d" "cabd"))) (should (string-match "\\(aa*\\|b\\)*c" "ababc")) + (should (string-match " \\sw*\\bfoo" " foo")) )) (ert-deftest regexp-tests-zero-width-assertion-repetition () -- 2.39.5