From 25544ce1bd4992b990365f8a09070f963aefb67d Mon Sep 17 00:00:00 2001 From: Simon Marshall Date: Fri, 6 Jun 1997 07:10:24 +0000 Subject: [PATCH] emit charsets after strings so that the final regexp finds the longest match. --- lisp/emacs-lisp/regexp-opt.el | 82 +++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 37 deletions(-) diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index adf8e9e8da7..140fd8005dd 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el @@ -4,7 +4,7 @@ ;; Author: Simon Marshall ;; Keywords: strings, regexps -;; Version: 1.04.01 +;; Version: 1.05 ;; This file is part of GNU Emacs. @@ -27,10 +27,14 @@ ;; The "opt" in "regexp-opt" stands for "optim\\(al\\|i\\(se\\|ze\\)\\)". ;; -;; This package generates a regexp from a given list of strings (that matches -;; one of those strings) that is equivalent to but more efficient than: +;; This package generates a regexp from a given list of strings (which matches +;; one of those strings) so that the regexp generated by: ;; -;; (mapconcat 'identity (mapcar 'regexp-quote strings) "\\|") +;; (regexp-opt strings) +;; +;; is equivalent to, but more efficient than, the regexp generated by: +;; +;; (mapconcat 'regexp-quote strings "\\|") ;; ;; For example: ;; @@ -42,10 +46,9 @@ ;; (concat "(" (regexp-opt strings t) "\\>")) ;; => "(\\(c\\(atch\\|ond\\(ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(less\\|wind-protect\\)\\|wh\\(en\\|ile\\)\\)\\>" ;; -;; Searching using the above example `regexp-opt' regexp is significantly -;; faster than searching using the equivalent `mapconcat' regexp, taking -;; approximately two-thirds of the time. -;; +;; Searching using the above example `regexp-opt' regexp takes approximately +;; two-thirds of the time taken using the equivalent `mapconcat' regexp. + ;; Since this package was written to produce efficient regexps, not regexps ;; efficiently, it is probably not a good idea to in-line too many calls in ;; your code, unless you use the following trick with `eval-when-compile': @@ -62,7 +65,15 @@ ;; (defvar definition-regexp ;; "^(\\(def\\(alias\\|const\\|macro\\|subst\\|un\\|var\\)\\)\\>") ;; -;; Originally written for font-lock.el, from an idea from Stig's hl319.el. +;; Note that if you use this trick for all instances of `regexp-opt' and +;; `regexp-opt-depth' in your code, regexp-opt.el would only have to be loaded +;; at compile time. But note also that using this trick means that should +;; regexp-opt.el be changed, perhaps to fix a bug or to add a feature to +;; improve the efficiency of `regexp-opt' regexps, you would have to recompile +;; your code for such changes to have effect in your code. + +;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with +;; thanks for ideas also to Michael Ernst, Bob Glickstein and Dan Nicolaescu. ;; Please don't tell me that it doesn't produce optimal regexps; I know that ;; already. For example, the above explanation for the meaning of "opt" would ;; be more efficient as "optim\\(al\\|i[sz]e\\)", but this requires complex @@ -73,15 +84,16 @@ ;;;###autoload (defun regexp-opt (strings &optional paren) "Return a regexp to match a string in STRINGS. +Each string should be unique in STRINGS and should not contain any regexps. If optional PAREN non-nil, ensure that the returned regexp is enclosed by at least one regexp grouping construct. -Each string in STRINGS should be unique and should not contain any regexps. The returned regexp is typically more efficient than the equivalent regexp: - (mapconcat 'identity (mapcar 'regexp-quote STRINGS) \"\\\\|\") + (let ((open-paren (if PAREN \"\\\\(\" \"\")) (close-paren (if PAREN \"\\\\)\" \"\"))) + (concat open-paren (mapconcat 'regexp-quote STRINGS \"\\\\|\") close-paren)) -but typically contains regexp grouping constructs. Use `regexp-opt-depth' to -count them." +but typically contains more regexp grouping constructs. +Use `regexp-opt-depth' to count them." (save-match-data ;; Recurse on the sorted list. (let ((max-lisp-eval-depth (* 1024 1024)) @@ -165,12 +177,12 @@ in REGEXP." close-group))) ;; ;; If there are several one-character strings, remove them and recurse - ;; on the rest. + ;; on the rest (first so the final regexp finds the longest match). ((> (length letters) 1) (let ((rest (let ((completion-regexp-list '("^..+$"))) (all-completions "" (mapcar 'list strings))))) (concat open-group - (regexp-opt-charset letters) "\\|" (regexp-opt-group rest) + (regexp-opt-group rest) "\\|" (regexp-opt-charset letters) close-group))) ;; ;; Otherwise, divide the list into those that start with a particular @@ -196,30 +208,26 @@ in REGEXP." (bracket "") (dash "") (caret "")) ;; ;; Make a character map but extract character set meta characters. - (let (char) - (while chars - (setq char (string-to-char (pop chars))) - (cond ((eq char ?\]) - (setq bracket "]")) - ((eq char ?^) - (setq caret "^")) - ((eq char ?-) - (setq dash "-")) - (t - (aset charmap char t))))) + (dolist (char (mapcar 'string-to-char chars)) + (case char + (?\] + (setq bracket "]")) + (?^ + (setq caret "^")) + (?- + (setq dash "-")) + (otherwise + (aset charmap char t)))) ;; ;; Make a character set from the map using ranges where applicable. - (let ((elt 0) start) - (while (< elt charwidth) - (when (aref charmap elt) - (setq start (1+ elt)) - (while (and (< start charwidth) (aref charmap start)) - (incf start)) - (if (< (- start elt) 4) - (setq charset (format "%s%c" charset elt)) - (setq charset (format "%s%c-%c" charset elt (1- start)) - elt start))) - (incf elt))) + (dotimes (elt charwidth) + (when (aref charmap elt) + (let ((start elt)) + (while (and (< elt charwidth) (aref charmap elt)) + (incf elt)) + (if (> (- elt start) 3) + (setq charset (format "%s%c-%c" charset start (1- elt))) + (setq charset (format "%s%c" charset (setq elt start))))))) ;; ;; Make sure a caret is not first and a dash is first or last. (if (and (string-equal charset "") (string-equal bracket "")) -- 2.39.2