From 7bda34af52687440632127b4b79986e951b978b1 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Mon, 25 Feb 2019 15:22:02 +0100 Subject: [PATCH] Correct regexp-opt return value for empty string list When regexp-opt is called with an empty list of strings, return a regexp that doesn't match anything instead of the empty string (Bug#20307). * doc/lispref/searching.texi (Regular Expression Functions): * etc/NEWS: Document the new behaviour. * lisp/emacs-lisp/regexp-opt.el (regexp-opt): Return a never-match regexp for empty inputs. --- doc/lispref/searching.texi | 3 +++ etc/NEWS | 6 ++++++ lisp/emacs-lisp/regexp-opt.el | 23 +++++++++++++++-------- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index fb7f48474d5..38e62040550 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi @@ -960,6 +960,9 @@ possible. A hand-tuned regular expression can sometimes be slightly more efficient, but is almost never worth the effort.}. @c E.g., see https://debbugs.gnu.org/2816 +If @var{strings} is the empty list, the return value is a regexp that +never matches anything. + The optional argument @var{paren} can be any of the following: @table @asis diff --git a/etc/NEWS b/etc/NEWS index 7c95988ff52..65eb9ba1af2 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1649,6 +1649,12 @@ in any order. If the new third argument is non-nil, the match is guaranteed to be performed in the order given, as if the strings were made into a regexp by joining them with '\|'. ++++ +** The function 'regexp-opt', when given an empty list of strings, now +returns a regexp that never matches anything, which is an identity for +this operation. Previously, the empty string was returned in this +case. + * Changes in Emacs 27.1 on Non-Free Operating Systems diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index d0c5f2d3fc4..4404b905a6f 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el @@ -90,6 +90,9 @@ Each string should be unique in STRINGS and should not contain any regexps, quoted or not. Optional PAREN specifies how the returned regexp is surrounded by grouping constructs. +If STRINGS is the empty list, the return value is a regexp that +never matches anything. + The optional argument PAREN can be any of the following: a string @@ -140,14 +143,18 @@ usually more efficient than that of a simplified version: (sorted-strings (delete-dups (sort (copy-sequence strings) 'string-lessp))) (re - ;; If NOREORDER is non-nil and the list contains a prefix - ;; of another string, we give up all attempts at optimisation. - ;; There is plenty of room for improvement (Bug#34641). - (if (and noreorder (regexp-opt--contains-prefix sorted-strings)) - (concat (or open "\\(?:") - (mapconcat #'regexp-quote strings "\\|") - "\\)") - (regexp-opt-group sorted-strings (or open t) (not open))))) + (cond + ;; No strings: return a\` which cannot match anything. + ((null strings) + (concat (or open "\\(?:") "a\\`\\)")) + ;; If we cannot reorder, give up all attempts at + ;; optimisation. There is room for improvement (Bug#34641). + ((and noreorder (regexp-opt--contains-prefix sorted-strings)) + (concat (or open "\\(?:") + (mapconcat #'regexp-quote strings "\\|") + "\\)")) + (t + (regexp-opt-group sorted-strings (or open t) (not open)))))) (cond ((eq paren 'words) (concat "\\<" re "\\>")) ((eq paren 'symbols) -- 2.39.2