;; Author: Simon Marshall <simon@gnu.ai.mit.edu>
;; Keywords: strings, regexps
-;; Version: 1.04.01
+;; Version: 1.05
;; This file is part of GNU Emacs.
;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i\\(se\\|ze\\)\\)".
;;
-;; This package generates a regexp from a given list of strings (that matches
-;; one of those strings) that is equivalent to but more efficient than:
+;; This package generates a regexp from a given list of strings (which matches
+;; one of those strings) so that the regexp generated by:
;;
-;; (mapconcat 'identity (mapcar 'regexp-quote strings) "\\|")
+;; (regexp-opt strings)
+;;
+;; is equivalent to, but more efficient than, the regexp generated by:
+;;
+;; (mapconcat 'regexp-quote strings "\\|")
;;
;; For example:
;;
;; (concat "(" (regexp-opt strings t) "\\>"))
;; => "(\\(c\\(atch\\|ond\\(ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(less\\|wind-protect\\)\\|wh\\(en\\|ile\\)\\)\\>"
;;
-;; Searching using the above example `regexp-opt' regexp is significantly
-;; faster than searching using the equivalent `mapconcat' regexp, taking
-;; approximately two-thirds of the time.
-;;
+;; Searching using the above example `regexp-opt' regexp takes approximately
+;; two-thirds of the time taken using the equivalent `mapconcat' regexp.
+
;; Since this package was written to produce efficient regexps, not regexps
;; efficiently, it is probably not a good idea to in-line too many calls in
;; your code, unless you use the following trick with `eval-when-compile':
;; (defvar definition-regexp
;; "^(\\(def\\(alias\\|const\\|macro\\|subst\\|un\\|var\\)\\)\\>")
;;
-;; Originally written for font-lock.el, from an idea from Stig's hl319.el.
+;; Note that if you use this trick for all instances of `regexp-opt' and
+;; `regexp-opt-depth' in your code, regexp-opt.el would only have to be loaded
+;; at compile time. But note also that using this trick means that should
+;; regexp-opt.el be changed, perhaps to fix a bug or to add a feature to
+;; improve the efficiency of `regexp-opt' regexps, you would have to recompile
+;; your code for such changes to have effect in your code.
+
+;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with
+;; thanks for ideas also to Michael Ernst, Bob Glickstein and Dan Nicolaescu.
;; Please don't tell me that it doesn't produce optimal regexps; I know that
;; already. For example, the above explanation for the meaning of "opt" would
;; be more efficient as "optim\\(al\\|i[sz]e\\)", but this requires complex
;;;###autoload
(defun regexp-opt (strings &optional paren)
"Return a regexp to match a string in STRINGS.
+Each string should be unique in STRINGS and should not contain any regexps.
If optional PAREN non-nil, ensure that the returned regexp is enclosed by at
least one regexp grouping construct.
-Each string in STRINGS should be unique and should not contain any regexps.
The returned regexp is typically more efficient than the equivalent regexp:
- (mapconcat 'identity (mapcar 'regexp-quote STRINGS) \"\\\\|\")
+ (let ((open-paren (if PAREN \"\\\\(\" \"\")) (close-paren (if PAREN \"\\\\)\" \"\")))
+ (concat open-paren (mapconcat 'regexp-quote STRINGS \"\\\\|\") close-paren))
-but typically contains regexp grouping constructs. Use `regexp-opt-depth' to
-count them."
+but typically contains more regexp grouping constructs.
+Use `regexp-opt-depth' to count them."
(save-match-data
;; Recurse on the sorted list.
(let ((max-lisp-eval-depth (* 1024 1024))
close-group)))
;;
;; If there are several one-character strings, remove them and recurse
- ;; on the rest.
+ ;; on the rest (first so the final regexp finds the longest match).
((> (length letters) 1)
(let ((rest (let ((completion-regexp-list '("^..+$")))
(all-completions "" (mapcar 'list strings)))))
(concat open-group
- (regexp-opt-charset letters) "\\|" (regexp-opt-group rest)
+ (regexp-opt-group rest) "\\|" (regexp-opt-charset letters)
close-group)))
;;
;; Otherwise, divide the list into those that start with a particular
(bracket "") (dash "") (caret ""))
;;
;; Make a character map but extract character set meta characters.
- (let (char)
- (while chars
- (setq char (string-to-char (pop chars)))
- (cond ((eq char ?\])
- (setq bracket "]"))
- ((eq char ?^)
- (setq caret "^"))
- ((eq char ?-)
- (setq dash "-"))
- (t
- (aset charmap char t)))))
+ (dolist (char (mapcar 'string-to-char chars))
+ (case char
+ (?\]
+ (setq bracket "]"))
+ (?^
+ (setq caret "^"))
+ (?-
+ (setq dash "-"))
+ (otherwise
+ (aset charmap char t))))
;;
;; Make a character set from the map using ranges where applicable.
- (let ((elt 0) start)
- (while (< elt charwidth)
- (when (aref charmap elt)
- (setq start (1+ elt))
- (while (and (< start charwidth) (aref charmap start))
- (incf start))
- (if (< (- start elt) 4)
- (setq charset (format "%s%c" charset elt))
- (setq charset (format "%s%c-%c" charset elt (1- start))
- elt start)))
- (incf elt)))
+ (dotimes (elt charwidth)
+ (when (aref charmap elt)
+ (let ((start elt))
+ (while (and (< elt charwidth) (aref charmap elt))
+ (incf elt))
+ (if (> (- elt start) 3)
+ (setq charset (format "%s%c-%c" charset start (1- elt)))
+ (setq charset (format "%s%c" charset (setq elt start)))))))
;;
;; Make sure a caret is not first and a dash is first or last.
(if (and (string-equal charset "") (string-equal bracket ""))