From: Mattias EngdegÄrd Date: Tue, 14 May 2019 09:43:49 +0000 (+0200) Subject: Add standard unmatchable regexp X-Git-Tag: emacs-27.0.90~2883 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=26f735ff198e52370aafe09ed5ed669e78f196ab;p=emacs.git Add standard unmatchable regexp Add `regexp-unmatchable' as a standard unmatchable regexp, defined as "\\`a\\`". Use it where such a regexp is needed, replacing slower expressions in several places. From a suggestion by Philippe Schnoebelen. * lisp/subr.el (regexp-unmatchable): New defconst. * etc/NEWS (Lisp Changes): Mention `regexp-unmatchable'. * doc/lispref/searching.texi (Regexp Functions): Document it. * lisp/emacs-lisp/regexp-opt.el (regexp-opt) * lisp/progmodes/cc-defs.el (cc-conditional-require-after-load) (c-make-keywords-re) * lisp/progmodes/cc-engine.el (c-beginning-of-statement-1) (c-forward-<>-arglist-recur, c-forward-decl-or-cast-1) (c-looking-at-decl-block) * lisp/progmodes/cc-fonts.el (c-doc-line-join-re) (c-doc-bright-comment-start-re) * lisp/progmodes/cc-langs.el (c-populate-syntax-table) (c-assignment-op-regexp) (c-block-comment-ender-regexp, c-font-lock-comment-end-skip) (c-block-comment-start-regexp, c-line-comment-start-regexp) (c-doc-comment-start-regexp, c-decl-start-colon-kwd-re) (c-type-decl-prefix-key, c-type-decl-operator-prefix-key) (c-pre-id-bracelist-key, c-enum-clause-introduction-re) (c-nonlabel-token-2-key) * lisp/progmodes/cc-mode.el (c-doc-fl-decl-start, c-doc-fl-decl-end) * lisp/progmodes/cc-vars.el (c-noise-macro-with-parens-name-re) (c-noise-macro-name-re, c-make-noise-macro-regexps) * lisp/progmodes/octave.el (octave-help-mode) * lisp/vc/vc-bzr.el (vc-bzr-log-view-mode, vc-bzr-revision-completion-table) * lisp/vc/vc-git.el (vc-git-log-view-mode) * lisp/vc/vc-hg.el (vc-hg-log-view-mode) * lisp/vc/vc-mtn.el (vc-mtn-log-view-mode): Use `regexp-unmatchable'. * lisp/textmodes/ispell.el (ispell-non-empty-string): Use `regexp-unmatchable', fixing a broken never-match regexp. --- diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 8775254dd07..24f30b4dac6 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi @@ -1070,6 +1070,13 @@ list of characters @var{chars}. @c Internal functions: regexp-opt-group +@defvar regexp-unmatchable +This variable contains a regexp that is guaranteed not to match any +string at all. It is particularly useful as default value for +variables that may be set to a pattern that actually matches +something. +@end defvar + @node Regexp Search @section Regular Expression Searching @cindex regular expression searching diff --git a/etc/NEWS b/etc/NEWS index fc3ca1ea928..699a04b5246 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1990,6 +1990,10 @@ returns a regexp that never matches anything, which is an identity for this operation. Previously, the empty string was returned in this case. +** New constant 'regexp-unmatchable' contains a never-matching regexp. +It is a convenient and readable way to specify a regexp that should +not match anything, and is as fast as any such regexp can be. + * Changes in Emacs 27.1 on Non-Free Operating Systems diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index d883752d712..00f72e284ad 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el @@ -144,9 +144,9 @@ usually more efficient than that of a simplified version: (sort (copy-sequence strings) 'string-lessp))) (re (cond - ;; No strings: return a\` which cannot match anything. + ;; No strings: return an unmatchable regexp. ((null strings) - (concat (or open "\\(?:") "a\\`\\)")) + (concat (or open "\\(?:") regexp-unmatchable "\\)")) ;; If we cannot reorder, give up all attempts at ;; optimisation. There is room for improvement (Bug#34641). ((and keep-order (regexp-opt--contains-prefix sorted-strings)) diff --git a/lisp/net/ange-ftp.el b/lisp/net/ange-ftp.el index 5af9ea75ed1..b0a1e1799f5 100644 --- a/lisp/net/ange-ftp.el +++ b/lisp/net/ange-ftp.el @@ -1989,7 +1989,7 @@ on the gateway machine to do the FTP instead." (make-local-variable 'comint-password-prompt-regexp) ;; This is a regexp that can't match anything. ;; ange-ftp has its own ways of handling passwords. - (setq comint-password-prompt-regexp "\\`a\\`") + (setq comint-password-prompt-regexp regexp-unmatchable) (make-local-variable 'paragraph-start) (setq paragraph-start comint-prompt-regexp)) diff --git a/lisp/progmodes/cc-defs.el b/lisp/progmodes/cc-defs.el index cd4ed6b352e..d20e3ef32d9 100644 --- a/lisp/progmodes/cc-defs.el +++ b/lisp/progmodes/cc-defs.el @@ -81,7 +81,7 @@ (progn (require 'font-lock) (let (font-lock-keywords) - (font-lock-compile-keywords '("a\\`")) ; doesn't match anything. + (font-lock-compile-keywords (list regexp-unmatchable)) font-lock-keywords)))) @@ -1890,8 +1890,8 @@ when it's needed. The default is the current language taken from ;; Produce a regexp that doesn't match anything. (if adorn - "\\(a\\`\\)" - "a\\`"))) + (concat "\\(" regexp-unmatchable "\\)") + regexp-unmatchable))) (put 'c-make-keywords-re 'lisp-indent-function 1) diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el index ed8310d0e67..41bab270daa 100644 --- a/lisp/progmodes/cc-engine.el +++ b/lisp/progmodes/cc-engine.el @@ -907,7 +907,7 @@ comment at the start of cc-engine.el for more info." stack ;; Regexp which matches "for", "if", etc. (cond-key (or c-opt-block-stmt-key - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) ;; Return value. (ret 'same) ;; Positions of the last three sexps or bounds we've stopped at. @@ -7646,7 +7646,9 @@ comment at the start of cc-engine.el for more info." (progn (c-forward-syntactic-ws) (when (or (and c-record-type-identifiers all-types) - (not (equal c-inside-<>-type-key "\\(a\\`\\)"))) + (not (equal c-inside-<>-type-key + (concat + "\\(" regexp-unmatchable "\\)")))) (c-forward-syntactic-ws) (cond ((eq (char-after) ??) @@ -9253,7 +9255,7 @@ This function might do hidden buffer changes." ;; Skip over type decl prefix operators. (Note similar code in ;; `c-forward-declarator'.) (if (and c-recognize-typeless-decls - (equal c-type-decl-prefix-key "a\\`")) ; Regexp which doesn't match + (equal c-type-decl-prefix-key regexp-unmatchable)) (when (eq (char-after) ?\() (progn (setq paren-depth (1+ paren-depth)) @@ -10886,7 +10888,7 @@ comment at the start of cc-engine.el for more info." ;; legal because it's part of a "compound keyword" like ;; "enum class". Of course, if c-after-brace-list-key ;; is nil, we can skip the test. - (or (equal c-after-brace-list-key "a\\`") ; Regexp which doesn't match + (or (equal c-after-brace-list-key regexp-unmatchable) (save-match-data (save-excursion (not diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el index 5f09be60a67..b3a9dd480b8 100644 --- a/lisp/progmodes/cc-fonts.el +++ b/lisp/progmodes/cc-fonts.el @@ -2580,14 +2580,14 @@ need for `pike-font-lock-extra-types'.") ;;; Doc comments. -(defvar c-doc-line-join-re "a\\`") +(defvar c-doc-line-join-re regexp-unmatchable) ;; Matches a join of two lines in a doc comment. ;; This should not be changed directly, but instead set by ;; `c-setup-doc-comment-style'. This variable is used in `c-find-decl-spots' ;; in (e.g.) autodoc style comments to bridge the gap between a "@\n" at an ;; EOL and the token following "//!" on the next line. -(defvar c-doc-bright-comment-start-re "a\\`") +(defvar c-doc-bright-comment-start-re regexp-unmatchable) ;; Matches the start of a "bright" comment, one whose contents may be ;; fontified by, e.g., `c-font-lock-declarations'. diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el index 30da10a6c03..9d2f689e584 100644 --- a/lisp/progmodes/cc-langs.el +++ b/lisp/progmodes/cc-langs.el @@ -945,7 +945,7 @@ file name in angle brackets or quotes." (c-make-keywords-re 'appendable (c-lang-const c-cpp-include-directives)) "[ \t]*") - "a\\`")) ; Doesn't match anything + regexp-unmatchable)) (c-lang-defvar c-cpp-include-key (c-lang-const c-cpp-include-key)) (c-lang-defconst c-opt-cpp-macro-define @@ -1331,7 +1331,7 @@ operators." (c--set-difference (c-lang-const c-assignment-operators) '("=") :test 'string-equal))) - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-defvar c-assignment-op-regexp (c-lang-const c-assignment-op-regexp)) @@ -1554,7 +1554,7 @@ properly." ;; language) t (if (c-lang-const c-block-comment-ender) (regexp-quote (c-lang-const c-block-comment-ender)) - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-defvar c-block-comment-ender-regexp (c-lang-const c-block-comment-ender-regexp)) @@ -1565,7 +1565,7 @@ properly." ;; `font-lock-comment-delimiter-face'. t (if (c-lang-const c-block-comment-ender) (concat "[ \t]*" (c-lang-const c-block-comment-ender-regexp)) - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-setvar font-lock-comment-end-skip (c-lang-const c-font-lock-comment-end-skip)) @@ -1584,7 +1584,7 @@ properly." ;; language) t (if (c-lang-const c-block-comment-starter) (regexp-quote (c-lang-const c-block-comment-starter)) - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-defvar c-block-comment-start-regexp (c-lang-const c-block-comment-start-regexp)) @@ -1593,7 +1593,7 @@ properly." ;; language; it does in all 7 CC Mode languages). t (if (c-lang-const c-line-comment-starter) (regexp-quote (c-lang-const c-line-comment-starter)) - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-defvar c-line-comment-start-regexp (c-lang-const c-line-comment-start-regexp)) @@ -1628,7 +1628,7 @@ starter." (c-lang-defconst c-doc-comment-start-regexp "Regexp to match the start of documentation comments." - t "a\\`" ; Doesn't match anything. + t regexp-unmatchable ;; From font-lock.el: `doxygen' uses /*! while others use /**. (c c++ objc) "/\\*[*!]" java "/\\*\\*" @@ -3112,7 +3112,7 @@ Note that Java specific rules are currently applied to tell this from "Regexp matching a keyword that is followed by a colon, where the whole construct can precede a declaration. E.g. \"public:\" in C++." - t "a\\`" ; Doesn't match anything. + t regexp-unmatchable c++ (c-make-keywords-re t (c-lang-const c-protection-kwds))) (c-lang-defvar c-decl-start-colon-kwd-re (c-lang-const c-decl-start-colon-kwd-re)) @@ -3309,7 +3309,7 @@ Identifier syntax is in effect when this is matched \(see t (if (c-lang-const c-type-modifier-kwds) (concat (regexp-opt (c-lang-const c-type-modifier-kwds) t) "\\>") ;; Default to a regexp that never matches. - "a\\`") + regexp-unmatchable) ;; Check that there's no "=" afterwards to avoid matching tokens ;; like "*=". (c objc) (concat "\\(" @@ -3347,7 +3347,7 @@ that might precede the identifier in a declaration, e.g. the as the end of the operator. Identifier syntax is in effect when this is matched \(see `c-identifier-syntax-table')." t ;; Default to a regexp that never matches. - "a\\`" + regexp-unmatchable ;; Check that there's no "=" afterwards to avoid matching tokens ;; like "*=". (c objc) (concat "\\(\\*\\)" @@ -3506,7 +3506,7 @@ list." (c-lang-defconst c-pre-id-bracelist-key "A regexp matching tokens which, preceding an identifier, signify a bracelist. " - t "a\\`" ; Doesn't match anything. + t regexp-unmatchable c++ "new\\([^[:alnum:]_$]\\|$\\)\\|&&?\\(\\S.\\|$\\)") (c-lang-defvar c-pre-id-bracelist-key (c-lang-const c-pre-id-bracelist-key)) @@ -3562,7 +3562,7 @@ the invalidity of the putative template construct." ;; before the '{' of the enum list, to avoid searching too far. "[^][{};/#=]*" "{") - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-defvar c-enum-clause-introduction-re (c-lang-const c-enum-clause-introduction-re)) @@ -3703,7 +3703,7 @@ Only used if `c-recognize-colon-labels' is set." "Regexp matching things that can't occur two symbols before a colon in a label construct. This catches C++'s inheritance construct \"class foo : bar\". Only used if `c-recognize-colon-labels' is set." - t "a\\`" ; Doesn't match anything. + t regexp-unmatchable c++ (c-make-keywords-re t '("class"))) (c-lang-defvar c-nonlabel-token-2-key (c-lang-const c-nonlabel-token-2-key)) diff --git a/lisp/progmodes/cc-mode.el b/lisp/progmodes/cc-mode.el index bd62fc754ab..e4ff9f019d3 100644 --- a/lisp/progmodes/cc-mode.el +++ b/lisp/progmodes/cc-mode.el @@ -1825,7 +1825,7 @@ Note that this is a strict tail, so won't match, e.g. \"0x....\".") ;; by `c-doc-line-join-re'), return the position of the first line of the ;; sequence. Otherwise, return nil. Point has no significance at entry to ;; and exit from this function. - (when (not (equal c-doc-line-join-re "a\\`")) + (when (not (equal c-doc-line-join-re regexp-unmatchable)) (goto-char pos) (back-to-indentation) (and (or (looking-at c-comment-start-regexp) @@ -1842,7 +1842,7 @@ Note that this is a strict tail, so won't match, e.g. \"0x....\".") ;; marker (as defined by `c-doc-line-join-re), return the position of ;; the BOL at the end of the sequence. Otherwise, return nil. Point has no ;; significance at entry to and exit from this function. - (when (not (equal c-doc-line-join-re "a\\`")) + (when (not (equal c-doc-line-join-re regexp-unmatchable)) (goto-char pos) (back-to-indentation) (let ((here (point))) diff --git a/lisp/progmodes/cc-vars.el b/lisp/progmodes/cc-vars.el index 6e8acd4c0dd..b818bced730 100644 --- a/lisp/progmodes/cc-vars.el +++ b/lisp/progmodes/cc-vars.el @@ -1648,9 +1648,9 @@ white space either before or after the operator, but not both." :group 'c) ;; Initialize the next two to a regexp which never matches. -(defvar c-noise-macro-with-parens-name-re "a\\`") +(defvar c-noise-macro-with-parens-name-re regexp-unmatchable) (make-variable-buffer-local 'c-noise-macro-with-parens-name-re) -(defvar c-noise-macro-name-re "a\\`") +(defvar c-noise-macro-name-re regexp-unmatchable) (make-variable-buffer-local 'c-noise-macro-name-re) (defcustom c-noise-macro-names nil @@ -1682,7 +1682,7 @@ These are recognized by CC Mode only in declarations." ;; Convert `c-noise-macro-names' and `c-noise-macro-with-parens-names' into ;; `c-noise-macro-name-re' and `c-noise-macro-with-parens-name-re'. (setq c-noise-macro-with-parens-name-re - (cond ((null c-noise-macro-with-parens-names) "a\\`") ; Never matches. + (cond ((null c-noise-macro-with-parens-names) regexp-unmatchable) ((consp c-noise-macro-with-parens-names) (concat (regexp-opt c-noise-macro-with-parens-names t) "\\([^[:alnum:]_$]\\|$\\)")) @@ -1691,7 +1691,7 @@ These are recognized by CC Mode only in declarations." (t (error "c-make-noise-macro-regexps: \ c-noise-macro-with-parens-names is invalid: %s" c-noise-macro-with-parens-names)))) (setq c-noise-macro-name-re - (cond ((null c-noise-macro-names) "a\\`") ; Never matches anything. + (cond ((null c-noise-macro-names) regexp-unmatchable) ((consp c-noise-macro-names) (concat (regexp-opt c-noise-macro-names t) "\\([^[:alnum:]_$]\\|$\\)")) diff --git a/lisp/progmodes/grep.el b/lisp/progmodes/grep.el index 85f9078d46d..79178c4346e 100644 --- a/lisp/progmodes/grep.el +++ b/lisp/progmodes/grep.el @@ -837,7 +837,8 @@ This function is called from `compilation-filter-hook'." grep-mode-line-matches) ;; compilation-directory-matcher can't be nil, so we set it to a regexp that ;; can never match. - (set (make-local-variable 'compilation-directory-matcher) '("\\`a\\`")) + (set (make-local-variable 'compilation-directory-matcher) + (list regexp-unmatchable)) (set (make-local-variable 'compilation-process-setup-function) 'grep-process-setup) (set (make-local-variable 'compilation-disable-input) t) diff --git a/lisp/progmodes/octave.el b/lisp/progmodes/octave.el index 52e5fd477f4..8a7e24e5ada 100644 --- a/lisp/progmodes/octave.el +++ b/lisp/progmodes/octave.el @@ -1691,7 +1691,7 @@ code line." (eval-and-compile (require 'help-mode)) ;; Don't highlight `EXAMPLE' as elisp symbols by using a regexp that ;; can never match. - (setq-local help-xref-symbol-regexp "x\\`")) + (setq-local help-xref-symbol-regexp regexp-unmatchable)) (defun octave-help (fn) "Display the documentation of FN." diff --git a/lisp/subr.el b/lisp/subr.el index be21dc67a0d..05fb9fea68f 100644 --- a/lisp/subr.el +++ b/lisp/subr.el @@ -5544,4 +5544,8 @@ returned list are in the same order as in TREE. ;; for discoverability: (defalias 'flatten-list 'flatten-tree) +;; The initial anchoring is for better performance in searching matches. +(defconst regexp-unmatchable "\\`a\\`" + "Standard regexp guaranteed not to match any string at all.") + ;;; subr.el ends here diff --git a/lisp/textmodes/ispell.el b/lisp/textmodes/ispell.el index 6553a2799bb..0c5e6a183b5 100644 --- a/lisp/textmodes/ispell.el +++ b/lisp/textmodes/ispell.el @@ -4016,7 +4016,7 @@ You can bind this to the key C-c i in GNUS or mail by adding to (defun ispell-non-empty-string (string) (if (or (not string) (string-equal string "")) - "\\'\\`" ; An unmatchable string if string is null. + regexp-unmatchable (regexp-quote string))) diff --git a/lisp/vc/vc-bzr.el b/lisp/vc/vc-bzr.el index ab5a449cd3d..ee1646cae5a 100644 --- a/lisp/vc/vc-bzr.el +++ b/lisp/vc/vc-bzr.el @@ -702,7 +702,7 @@ or a superior directory.") (remove-hook 'log-view-mode-hook 'vc-bzr-log-view-mode) ;Deactivate the hack. (require 'add-log) (set (make-local-variable 'log-view-per-file-logs) nil) - (set (make-local-variable 'log-view-file-re) "\\`a\\`") + (set (make-local-variable 'log-view-file-re) regexp-unmatchable) (set (make-local-variable 'log-view-message-re) (if (eq vc-log-view-type 'short) "^ *\\([0-9.]+\\): \\(.*?\\)[ \t]+\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}\\)\\( \\[merge\\]\\)?" @@ -1319,7 +1319,8 @@ stream. Standard error output is discarded." ((string-match "\\`annotate:" string) (completion-table-with-context (substring string 0 (match-end 0)) - (apply-partially #'completion-table-with-terminator '(":" . "\\`a\\`") + (apply-partially #'completion-table-with-terminator + (cons ":" regexp-unmatchable) #'completion-file-name-table) (substring string (match-end 0)) pred action)) diff --git a/lisp/vc/vc-git.el b/lisp/vc/vc-git.el index 192e6cf68f6..61c13026cc5 100644 --- a/lisp/vc/vc-git.el +++ b/lisp/vc/vc-git.el @@ -1087,7 +1087,7 @@ If LIMIT is a revision string, use it as an end-revision." (define-derived-mode vc-git-log-view-mode log-view-mode "Git-Log-View" (require 'add-log) ;; We need the faces add-log. ;; Don't have file markers, so use impossible regexp. - (set (make-local-variable 'log-view-file-re) "\\`a\\`") + (set (make-local-variable 'log-view-file-re) regexp-unmatchable) (set (make-local-variable 'log-view-per-file-logs) nil) (set (make-local-variable 'log-view-message-re) (if (not (eq vc-log-view-type 'long)) diff --git a/lisp/vc/vc-hg.el b/lisp/vc/vc-hg.el index d3f132dae70..876d824ceac 100644 --- a/lisp/vc/vc-hg.el +++ b/lisp/vc/vc-hg.el @@ -444,7 +444,7 @@ If LIMIT is non-nil, show no more than this many entries." (define-derived-mode vc-hg-log-view-mode log-view-mode "Hg-Log-View" (require 'add-log) ;; we need the add-log faces - (set (make-local-variable 'log-view-file-re) "\\`a\\`") + (set (make-local-variable 'log-view-file-re) regexp-unmatchable) (set (make-local-variable 'log-view-per-file-logs) nil) (set (make-local-variable 'log-view-message-re) (if (eq vc-log-view-type 'short) diff --git a/lisp/vc/vc-mtn.el b/lisp/vc/vc-mtn.el index f0b12489c1b..91cc28021cf 100644 --- a/lisp/vc/vc-mtn.el +++ b/lisp/vc/vc-mtn.el @@ -240,7 +240,7 @@ If LIMIT is non-nil, show no more than this many entries." (define-derived-mode vc-mtn-log-view-mode log-view-mode "Mtn-Log-View" ;; Don't match anything. - (set (make-local-variable 'log-view-file-re) "\\`a\\`") + (set (make-local-variable 'log-view-file-re) regexp-unmatchable) (set (make-local-variable 'log-view-per-file-logs) nil) ;; TODO: Use a more precise regexp than "[ |/]+" to avoid false positives ;; in the ChangeLog text.