For historical compatibility, a repetition operator is treated as ordinary
if it appears at the start of a regular expression
-or after @samp{^}, @samp{\(}, @samp{\(?:} or @samp{\|}.
+or after @samp{^}, @samp{\`}, @samp{\(}, @samp{\(?:} or @samp{\|}.
For example, @samp{*foo} is treated as @samp{\*foo}, and
@samp{two\|^\@{2\@}} is treated as @samp{two\|^@{2@}}.
It is poor practice to depend on this behavior; use proper backslash
escaping anyway, regardless of where the repetition operator appears.
-Also, a repetition operator should not immediately follow a backslash escape
-that matches only empty strings, as Emacs has bugs in this area.
-For example, it is unwise to use @samp{\b*}, which can be omitted
-without changing the documented meaning of the regular expression.
As a @samp{\} is not special inside a bracket expression, it can
never remove the special meaning of @samp{-}, @samp{^} or @samp{]}.
/* Address of start of the most recently finished expression.
This tells, e.g., postfix * where to find the start of its
- operand. Reset at the beginning of groups and alternatives. */
+ operand. Reset at the beginning of groups and alternatives,
+ and after ^ and \` for dusty-deck compatibility. */
unsigned char *laststart = 0;
/* Address of beginning of regexp, or inside of last group. */
case '^':
if (! (p == pattern + 1 || at_begline_loc_p (pattern, p)))
goto normal_char;
+ /* Special case for compatibility: postfix ops after ^ become
+ literals. */
+ laststart = 0;
BUF_PUSH (begline);
break;
case '$':
if (! (p == pend || at_endline_loc_p (p, pend)))
goto normal_char;
+ laststart = b;
BUF_PUSH (endline);
break;
/* Star, etc. applied to an empty pattern is equivalent
to an empty pattern. */
- if (!laststart || laststart == b)
+ if (laststart == b)
break;
/* Now we know whether or not zero matches is allowed
break;
case 'b':
+ laststart = b;
BUF_PUSH (wordbound);
break;
case 'B':
+ laststart = b;
BUF_PUSH (notwordbound);
break;
case '`':
+ /* Special case for compatibility: postfix ops after \` become
+ literals, as for ^ (see above). */
+ laststart = 0;
BUF_PUSH (begbuf);
break;
case '\'':
+ laststart = b;
BUF_PUSH (endbuf);
break;
(should (looking-at "x*\\(=\\|:\\)*"))
(should (looking-at "x*=*?"))))
+(ert-deftest regexp-tests-zero-width-assertion-repetition ()
+ ;; Check compatibility behaviour with repetition operators after
+ ;; certain zero-width assertions (bug#64128).
+
+ ;; This function is just to hide ugly regexps from relint so that it
+ ;; doesn't complain about them.
+ (cl-flet ((smatch (re str) (string-match re str)))
+ ;; Postfix operators after ^ and \` become literals, for historical
+ ;; compatibility. Only the first character of a lazy operator (like *?)
+ ;; becomes a literal.
+ (should (equal (smatch "^*a" "x\n*a") 2))
+ (should (equal (smatch "^*?a" "x\n*a") 2))
+ (should (equal (smatch "^*?a" "x\na") 2))
+ (should (equal (smatch "^*?a" "x\n**a") nil))
+
+ (should (equal (smatch "\\`*a" "*a") 0))
+ (should (equal (smatch "\\`*?a" "*a") 0))
+ (should (equal (smatch "\\`*?a" "a") 0))
+ (should (equal (smatch "\\`*?a" "**a") nil))
+
+ ;; Other zero-width assertions are treated as normal elements, so postfix
+ ;; operators apply to them alone (which is pointless but valid).
+ (should (equal (smatch "\\b*!" "*!") 1))
+ (should (equal (smatch "!\\b+;" "!;") nil))
+ (should (equal (smatch "!\\b+a" "!a") 0))
+
+ (should (equal (smatch "\\B*!" "*!") 1))
+ (should (equal (smatch "!\\B+;" "!;") 0))
+ (should (equal (smatch "!\\B+a" "!a") nil))
+
+ (should (equal (smatch "\\<*b" "*b") 1))
+ (should (equal (smatch "a\\<*b" "ab") 0))
+ (should (equal (smatch ";\\<*b" ";b") 0))
+ (should (equal (smatch "a\\<+b" "ab") nil))
+ (should (equal (smatch ";\\<+b" ";b") 0))
+
+ (should (equal (smatch "\\>*;" "*;") 1))
+ (should (equal (smatch "a\\>*b" "ab") 0))
+ (should (equal (smatch "a\\>*;" "a;") 0))
+ (should (equal (smatch "a\\>+b" "ab") nil))
+ (should (equal (smatch "a\\>+;" "a;") 0))
+
+ (should (equal (smatch "a\\'" "ab") nil))
+ (should (equal (smatch "b\\'" "ab") 1))
+ (should (equal (smatch "a\\'*b" "ab") 0))
+ (should (equal (smatch "a\\'+" "ab") nil))
+ (should (equal (smatch "b\\'+" "ab") 1))
+ (should (equal (smatch "\\'+" "+") 1))
+
+ (should (equal (smatch "\\_<*b" "*b") 1))
+ (should (equal (smatch "a\\_<*b" "ab") 0))
+ (should (equal (smatch " \\_<*b" " b") 0))
+ (should (equal (smatch "a\\_<+b" "ab") nil))
+ (should (equal (smatch " \\_<+b" " b") 0))
+
+ (should (equal (smatch "\\_>*;" "*;") 1))
+ (should (equal (smatch "a\\_>*b" "ab") 0))
+ (should (equal (smatch "a\\_>* " "a ") 0))
+ (should (equal (smatch "a\\_>+b" "ab") nil))
+ (should (equal (smatch "a\\_>+ " "a ") 0))
+
+ (should (equal (smatch "\\=*b" "*b") 1))
+ (should (equal (smatch "a\\=*b" "a*b") nil))
+ (should (equal (smatch "a\\=*b" "ab") 0))
+ ))
+
;;; regex-emacs-tests.el ends here