From: Mattias Engdegård Date: Wed, 26 Feb 2020 13:46:01 +0000 (+0100) Subject: Signal an error for the regexp "[:alnum:]" X-Git-Tag: emacs-28.0.90~7829 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=8d5e8cddab732ac90e9ae930c63f7830f9dab24f;p=emacs.git Signal an error for the regexp "[:alnum:]" Omitting the extra brackets is a common mistake; see discussion at https://lists.gnu.org/archive/html/emacs-devel/2020-02/msg00215.html * src/regex-emacs.c (reg_errcode_t, re_error_msgid): Add REG_ECLASSBR. (regex_compile): Check for the mistake. * test/src/regex-emacs-tests.el (regexp-invalid): Test. * etc/NEWS: Announce. --- diff --git a/etc/NEWS b/etc/NEWS index ee3a3c19e7c..96a612b5340 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -202,6 +202,11 @@ Emacs now supports bignums so this old glitch is no longer needed. 'previous-system-time-locale' have been removed, as they were created by mistake and were not useful to Lisp code. +** The regexp mistake '[:digit:]' is now an error. +The correct syntax is '[[:digit:]]'. Previously, forgetting the extra +brackets silently resulted in a regexp that did not at all work as +intended. + * Lisp Changes in Emacs 28.1 diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 694431c95e2..38824370e05 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -818,7 +818,8 @@ typedef enum REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ REG_ERPAREN, /* Unmatched ) or \); not returned from regcomp. */ REG_ERANGEX, /* Range striding over charsets. */ - REG_ESIZEBR /* n or m too big in \{n,m\} */ + REG_ESIZEBR, /* n or m too big in \{n,m\} */ + REG_ECLASSBR, /* Missing [] around [:class:]. */ } reg_errcode_t; static const char *re_error_msgid[] = @@ -842,6 +843,7 @@ static const char *re_error_msgid[] = [REG_ERPAREN] = "Unmatched ) or \\)", [REG_ERANGEX ] = "Range striding over charsets", [REG_ESIZEBR ] = "Invalid content of \\{\\}", + [REG_ECLASSBR] = "Class syntax is [[:digit:]]; missing brackets", }; /* For 'regs_allocated'. */ @@ -2000,6 +2002,23 @@ regex_compile (re_char *pattern, ptrdiff_t size, laststart = b; + /* Check for the mistake of forgetting the extra square brackets, + as in "[:alpha:]". */ + if (*p == ':') + { + re_char *q = p + 1; + while (q != pend && *q != ']') + { + if (*q == ':') + { + if (q + 1 != pend && q[1] == ']' && q > p + 1) + FREE_STACK_RETURN (REG_ECLASSBR); + break; + } + q++; + } + } + /* Test '*p == '^' twice, instead of using an if statement, so we need only one BUF_PUSH. */ BUF_PUSH (*p == '^' ? charset_not : charset); diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el index f9372e37b11..661d416e6a7 100644 --- a/test/src/regex-emacs-tests.el +++ b/test/src/regex-emacs-tests.el @@ -803,4 +803,9 @@ This evaluates the TESTS test cases from glibc." (should-not (string-match "Ã¥" "\xe5")) (should-not (string-match "[Ã¥]" "\xe5"))) +(ert-deftest regexp-invalid () + ;; relint suppression: Duplicated + (should-error (string-match "[:space:]" "") + :type 'invalid-regexp)) + ;;; regex-emacs-tests.el ends here