From: Mattias EngdegÄrd Date: Thu, 20 Feb 2020 14:45:44 +0000 (+0100) Subject: Remove subsumed repetitions in regexps X-Git-Tag: emacs-28.0.90~7858 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=770f76f050;p=emacs.git Remove subsumed repetitions in regexps Make regexps smaller and faster by removing terms that are superfluous by virtue of standing next to another term that matches more. See https://lists.gnu.org/archive/html/emacs-devel/2020-01/msg00949.html for details. * lisp/bs.el (bs--make-header-match-string): * lisp/gnus/deuglify.el (gnus-outlook-repair-attribution-block): * lisp/gnus/message.el (message-subject-trailing-was-ask-regexp) (message-subject-trailing-was-regexp): * lisp/informat.el (Info-validate): * lisp/net/browse-url.el (browse-url-button-regexp): * lisp/net/rcirc.el (rcirc-url-regexp): * lisp/org/ob-core.el (org-babel-remove-result): * lisp/org/ob-fortran.el (org-babel-fortran-ensure-main-wrap): * lisp/org/org-capture.el (org-capture-set-target-location): * lisp/org/org-table.el (org-table-expand-lhs-ranges): * lisp/org/org.el (org-maybe-keyword-time-regexp, org-ts-regexp) (org-ts-regexp-inactive, org-ts-regexp-both): * lisp/play/gametree.el (gametree-hack-file-layout): * lisp/progmodes/cc-mode.el (c-Java-defun-prompt-regexp): * lisp/progmodes/idlw-shell.el (idlwave-shell-halting-error): * lisp/progmodes/ruby-mode.el (ruby-mode-set-encoding): * lisp/progmodes/verilog-mode.el (verilog-error-font-lock-keywords) (verilog-verilint-off, verilog-case-indent-level) (verilog-within-translate-off, verilog-start-translate-off) (verilog-back-to-start-translate-off, verilog-end-translate-off) (verilog-expand-dirnames): * lisp/term.el (term-control-seq-regexp): * lisp/textmodes/reftex-vars.el (featurep): * lisp/url/url-gw.el (url-open-telnet): * lisp/vc/ediff-ptch.el (ediff-context-diff-label-regexp): * lisp/vc/pcvs-parse.el (cvs-parse-status): * test/src/regex-emacs-tests.el (regex-tests-PCRE): Remove subsumed repetitions. * lisp/progmodes/sh-script.el (sh-syntax-propertize-function): Simplify repetition of a repetition. --- diff --git a/lisp/bs.el b/lisp/bs.el index f5cb93b5169..337d22ecf83 100644 --- a/lisp/bs.el +++ b/lisp/bs.el @@ -173,7 +173,12 @@ return a string representing the column's value." (defun bs--make-header-match-string () "Return a regexp matching the first line of a Buffer Selection Menu buffer." - (concat "^\\(" (mapconcat #'car bs-attributes-list " *") " *$\\)")) + (concat "^\\(" + (apply #'concat (mapcan (lambda (e) + (and (not (equal (car e) "")) + (list " *" (car e)))) + bs-attributes-list)) + " *$\\)")) ;; Font-Lock-Settings (defvar bs-mode-font-lock-keywords diff --git a/lisp/gnus/deuglify.el b/lisp/gnus/deuglify.el index 0ce4a7d2928..647f643c962 100644 --- a/lisp/gnus/deuglify.el +++ b/lisp/gnus/deuglify.el @@ -403,9 +403,9 @@ NODISPLAY is non-nil, don't redisplay the article buffer." (gnus-with-article-buffer (article-goto-body) (when (re-search-forward - (concat "^[" cite-marks " \t]*--* ?[^-]+ [^-]+ ?--*\\s *\n" + (concat "^[" cite-marks " \t]*--*[^-]+ [^-]+--*\\s *\n" "[^\n:]+:[ \t]*\\([^\n]+\\)\n" - "\\([^\n:]+:[ \t]*[^\n]+\n\\)+") + "\\([^\n:]+:[^\n]+\n\\)+") nil t) (gnus-kill-all-overlays) (replace-match "\\1 wrote:\n") diff --git a/lisp/gnus/message.el b/lisp/gnus/message.el index 3abeec7e2f2..f1ace0eeb2e 100644 --- a/lisp/gnus/message.el +++ b/lisp/gnus/message.el @@ -322,7 +322,7 @@ used." :group 'message-various) (defcustom message-subject-trailing-was-ask-regexp - "[ \t]*\\([[(]+[Ww][Aa][Ss]:?[ \t]*.*[])]+\\)" + "[ \t]*\\([[(]+[Ww][Aa][Ss].*[])]+\\)" "Regexp matching \"(was: )\" in the subject line. The function `message-strip-subject-trailing-was' uses this regexp if @@ -337,7 +337,7 @@ It is okay to create some false positives here, as the user is asked." :type 'regexp) (defcustom message-subject-trailing-was-regexp - "[ \t]*\\((*[Ww][Aa][Ss]:[ \t]*.*)\\)" + "[ \t]*\\((*[Ww][Aa][Ss]:.*)\\)" "Regexp matching \"(was: )\" in the subject line. If `message-subject-trailing-was-query' is set to t, the subject is diff --git a/lisp/informat.el b/lisp/informat.el index 9873f66f215..7750ab00898 100644 --- a/lisp/informat.el +++ b/lisp/informat.el @@ -337,7 +337,7 @@ Check that every node pointer points to an existing node." (point)))) (Info-extract-menu-node-name)))) (goto-char (point-min)) - (while (re-search-forward "\\*note[ \n]*[^:\t]*:" nil t) + (while (re-search-forward "\\*note\\>[^:\t]*:" nil t) (goto-char (+ (match-beginning 0) 5)) (skip-chars-forward " \n") (Info-validate-node-name diff --git a/lisp/net/browse-url.el b/lisp/net/browse-url.el index 25aabf6d61d..7aad44b2876 100644 --- a/lisp/net/browse-url.el +++ b/lisp/net/browse-url.el @@ -216,7 +216,7 @@ be used instead." "\\(?:" ;; Match paired parentheses, e.g. in Wikipedia URLs: ;; http://thread.gmane.org/47B4E3B2.3050402@gmail.com - "[" chars punct "]+" "(" "[" chars punct "]+" "[" chars "]*)" + "[" chars punct "]+" "(" "[" chars punct "]+" ")" "\\(?:" "[" chars punct "]+" "[" chars "]" "\\)?" "\\|" "[" chars punct "]+" "[" chars "]" diff --git a/lisp/net/rcirc.el b/lisp/net/rcirc.el index ad06d31cf9a..a2234166903 100644 --- a/lisp/net/rcirc.el +++ b/lisp/net/rcirc.el @@ -2421,7 +2421,7 @@ keywords when no KEYWORD is given." (concat "\\(?:" ;; Match paired parentheses, e.g. in Wikipedia URLs: - "[" chars punct "]+" "(" "[" chars punct "]+" "[" chars "]*)" "[" chars "]" + "[" chars punct "]+" "(" "[" chars punct "]+" ")" "[" chars "]" "\\|" "[" chars punct "]+" "[" chars "]" "\\)")) diff --git a/lisp/org/ob-core.el b/lisp/org/ob-core.el index 651561a2018..a5ad97a468b 100644 --- a/lisp/org/ob-core.el +++ b/lisp/org/ob-core.el @@ -2437,7 +2437,7 @@ INFO may provide the values of these header arguments (in the (when location (save-excursion (goto-char location) - (when (looking-at (concat org-babel-result-regexp ".*$")) + (when (looking-at org-babel-result-regexp) (delete-region (if keep-keyword (line-beginning-position 2) (save-excursion diff --git a/lisp/org/ob-fortran.el b/lisp/org/ob-fortran.el index 154465f28e1..149058f05f4 100644 --- a/lisp/org/ob-fortran.el +++ b/lisp/org/ob-fortran.el @@ -106,7 +106,7 @@ its header arguments." (defun org-babel-fortran-ensure-main-wrap (body params) "Wrap body in a \"program ... end program\" block if none exists." - (if (string-match "^[ \t]*program[ \t]*.*" (capitalize body)) + (if (string-match "^[ \t]*program\\>" (capitalize body)) (let ((vars (org-babel--get-vars params))) (when vars (error "Cannot use :vars if `program' statement is present")) body) diff --git a/lisp/org/org-capture.el b/lisp/org/org-capture.el index 003cbef1fdf..ace51270175 100644 --- a/lisp/org/org-capture.el +++ b/lisp/org/org-capture.el @@ -1021,7 +1021,7 @@ Store them in the capture property list." (apply #'encode-time 0 0 org-extend-today-until (cl-cdddr (decode-time prompt-time)))) - ((string-match "\\([^ ]+\\)--?[^ ]+[ ]+\\(.*\\)" + ((string-match "\\([^ ]+\\)-[^ ]+[ ]+\\(.*\\)" org-read-date-final-answer) ;; Replace any time range by its start. (apply #'encode-time diff --git a/lisp/org/org-table.el b/lisp/org/org-table.el index 469e01be5d2..98702feb375 100644 --- a/lisp/org/org-table.el +++ b/lisp/org/org-table.el @@ -3099,7 +3099,7 @@ function assumes the table is already analyzed (i.e., using (let ((lhs (car e)) (rhs (cdr e))) (cond - ((string-match-p "\\`@-?[-+0-9]+\\$-?[0-9]+\\'" lhs) + ((string-match-p "\\`@[-+0-9]+\\$-?[0-9]+\\'" lhs) ;; This just refers to one fixed field. (push e res)) ((string-match-p "\\`[a-zA-Z][_a-zA-Z0-9]*\\'" lhs) diff --git a/lisp/org/org.el b/lisp/org/org.el index e8e1ef99f26..52b72817650 100644 --- a/lisp/org/org.el +++ b/lisp/org/org.el @@ -460,7 +460,7 @@ Matched keyword is in group 1.") org-clock-string) t) "\\)?" - " *\\([[<][0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\} ?[^]\r\n>]*?[]>]" + " *\\([[<][0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}[^]\r\n>]*[]>]" "\\|" "<%%([^\r\n>]*>\\)") "Matches a timestamp, possibly preceded by a keyword.") @@ -564,14 +564,14 @@ Effort estimates given in this property need to have the format H:MM.") ;;;; Timestamp -(defconst org-ts-regexp "<\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\} ?[^\r\n>]*?\\)>" +(defconst org-ts-regexp "<\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}[^\r\n>]*\\)>" "Regular expression for fast time stamp matching.") (defconst org-ts-regexp-inactive - "\\[\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\} ?[^\r\n>]*?\\)\\]" + "\\[\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}[^\r\n>]*\\)\\]" "Regular expression for fast inactive time stamp matching.") -(defconst org-ts-regexp-both "[[<]\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\} ?[^]\r\n>]*?\\)[]>]" +(defconst org-ts-regexp-both "[[<]\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}[^]\r\n>]*\\)[]>]" "Regular expression for fast time stamp matching.") (defconst org-ts-regexp0 diff --git a/lisp/play/gametree.el b/lisp/play/gametree.el index aa99b553244..ba74afce298 100644 --- a/lisp/play/gametree.el +++ b/lisp/play/gametree.el @@ -324,7 +324,7 @@ This value is simply the outline heading level of the current line." (defun gametree-hack-file-layout () (save-excursion (goto-char (point-min)) - (if (looking-at "[^\n]*-*-[^\n]*gametree-local-layout: \\([^;\n]*\\);") + (if (looking-at "[^\n]*-[^\n]*gametree-local-layout: \\([^;\n]*\\);") (progn (goto-char (match-beginning 1)) (delete-region (point) (match-end 1)) diff --git a/lisp/progmodes/cc-mode.el b/lisp/progmodes/cc-mode.el index 7496684d939..9c62b2bb065 100644 --- a/lisp/progmodes/cc-mode.el +++ b/lisp/progmodes/cc-mode.el @@ -2671,7 +2671,7 @@ Key bindings: ;; since it's practically impossible to write a regexp that reliably ;; matches such a construct. Other tools are necessary. (defconst c-Java-defun-prompt-regexp - "^[ \t]*\\(\\(\\(public\\|protected\\|private\\|const\\|abstract\\|synchronized\\|final\\|static\\|threadsafe\\|transient\\|native\\|volatile\\)\\s-+\\)*\\(\\(\\([[a-zA-Z][][_$.a-zA-Z0-9]*[][_$.a-zA-Z0-9]+\\|[[a-zA-Z]\\)\\s-*\\)\\s-+\\)\\)?\\(\\([[a-zA-Z][][_$.a-zA-Z0-9]*\\s-+\\)\\s-*\\)?\\([_a-zA-Z][^][ \t:;.,{}()\^?=]*\\|\\([_$a-zA-Z][_$.a-zA-Z0-9]*\\)\\)\\s-*\\(([^);{}]*)\\)?\\([] \t]*\\)\\(\\s-*\\\\s-*\\(\\([_$a-zA-Z][_$.a-zA-Z0-9]*\\)[, \t\n\r\f\v]*\\)+\\)?\\s-*") + "^[ \t]*\\(\\(\\(public\\|protected\\|private\\|const\\|abstract\\|synchronized\\|final\\|static\\|threadsafe\\|transient\\|native\\|volatile\\)\\s-+\\)*\\(\\(\\([[a-zA-Z][][_$.a-zA-Z0-9]+\\|[[a-zA-Z]\\)\\s-*\\)\\s-+\\)\\)?\\(\\([[a-zA-Z][][_$.a-zA-Z0-9]*\\s-+\\)\\s-*\\)?\\([_a-zA-Z][^][ \t:;.,{}()\^?=]*\\|\\([_$a-zA-Z][_$.a-zA-Z0-9]*\\)\\)\\s-*\\(([^);{}]*)\\)?\\([] \t]*\\)\\(\\s-*\\\\s-*\\(\\([_$a-zA-Z][_$.a-zA-Z0-9]*\\)[, \t\n\r\f\v]*\\)+\\)?\\s-*") (easy-menu-define c-java-menu java-mode-map "Java Mode Commands" (cons "Java" (c-lang-const c-mode-menu java))) diff --git a/lisp/progmodes/idlw-shell.el b/lisp/progmodes/idlw-shell.el index dba70cb2821..6770fbe8abc 100644 --- a/lisp/progmodes/idlw-shell.el +++ b/lisp/progmodes/idlw-shell.el @@ -1598,7 +1598,7 @@ number.") "A regular expression to match any IDL error.") (defvar idlwave-shell-halting-error - "^% .*\n\\([^%].*\n\\)*% Execution halted at:\\(\\s-*\\S-+\\s-*[0-9]+\\s-*.*\\)\n" + "^% .*\n\\([^%].*\n\\)*% Execution halted at:\\(\\s-*\\S-+\\s-*[0-9]+.*\\)\n" "A regular expression to match errors which halt execution.") (defvar idlwave-shell-cant-continue-error diff --git a/lisp/progmodes/ruby-mode.el b/lisp/progmodes/ruby-mode.el index 5da5577c108..e16225c7fa9 100644 --- a/lisp/progmodes/ruby-mode.el +++ b/lisp/progmodes/ruby-mode.el @@ -801,7 +801,7 @@ The style of the comment is controlled by `ruby-encoding-magic-comment-style'." (let ((coding-system (ruby--detect-encoding))) (when coding-system (if (looking-at "^#!") (beginning-of-line 2)) - (cond ((looking-at "\\s *#\\s *.*\\(en\\)?coding\\s *:\\s *\\([-a-z0-9_]*\\)") + (cond ((looking-at "\\s *#.*\\(en\\)?coding\\s *:\\s *\\([-a-z0-9_]*\\)") ;; update existing encoding comment if necessary (unless (string= (match-string 2) coding-system) (goto-char (match-beginning 2)) diff --git a/lisp/progmodes/sh-script.el b/lisp/progmodes/sh-script.el index a241a1e69ba..044d7820ee3 100644 --- a/lisp/progmodes/sh-script.el +++ b/lisp/progmodes/sh-script.el @@ -1096,7 +1096,7 @@ subshells can nest." (")" (0 (sh-font-lock-paren (match-beginning 0)))) ;; Highlight (possibly nested) subshells inside "" quoted ;; regions correctly. - ("\"\\(?:\\(?:[^\\\"]\\|\\\\.\\)*?\\)??\\(\\$(\\|`\\)" + ("\"\\(?:[^\\\"]\\|\\\\.\\)*?\\(\\$(\\|`\\)" (1 (ignore (if (nth 8 (save-excursion (syntax-ppss (match-beginning 0)))) (goto-char (1+ (match-beginning 0))) diff --git a/lisp/progmodes/verilog-mode.el b/lisp/progmodes/verilog-mode.el index 460957b7161..cc601601193 100644 --- a/lisp/progmodes/verilog-mode.el +++ b/lisp/progmodes/verilog-mode.el @@ -958,8 +958,8 @@ See `compilation-error-regexp-alist-alist' for the formatting. For XEmacs.") ("syntax error:.*\n\\([^ \t]+\\) *\\([0-9]+\\):" 1 bold t) ("syntax error:.*\n\\([^ \t]+\\) *\\([0-9]+\\):" 2 bold t) ;; verilog-verilator - (".*%?\\(Error\\|Warning\\)\\(-[^:]+\\|\\):[\n ]*\\([^ \t:]+\\):\\([0-9]+\\):" 3 bold t) - (".*%?\\(Error\\|Warning\\)\\(-[^:]+\\|\\):[\n ]*\\([^ \t:]+\\):\\([0-9]+\\):" 4 bold t) + (".*\\(Error\\|Warning\\)\\(-[^:]+\\|\\):[\n ]*\\([^ \t:]+\\):\\([0-9]+\\):" 3 bold t) + (".*\\(Error\\|Warning\\)\\(-[^:]+\\|\\):[\n ]*\\([^ \t:]+\\):\\([0-9]+\\):" 4 bold t) ;; verilog-leda ("^In file \\([^ \t]+\\)[ \t]+line[ \t]+\\([0-9]+\\):\n[^\n]*\n[^\n]*\n\\(Warning\\|Error\\|Failure\\)[^\n]*" 1 bold t) ("^In file \\([^ \t]+\\)[ \t]+line[ \t]+\\([0-9]+\\):\n[^\n]*\n[^\n]*\n\\(Warning\\|Error\\|Failure\\)[^\n]*" 2 bold t) @@ -5345,7 +5345,7 @@ becomes: (interactive) (save-excursion (beginning-of-line) - (when (looking-at "\\(.*\\)([WE]\\([0-9A-Z]+\\)).*,\\s +line\\s +[0-9]+:\\s +\\([^:\n]+\\):?.*$") + (when (looking-at "\\(.*\\)([WE]\\([0-9A-Z]+\\)).*,\\s +line\\s +[0-9]+:\\s +\\([^:\n]+\\).*$") (replace-match (format ;; %3s makes numbers 1-999 line up nicely "\\1//Verilint %3s off // WARNING: \\3" @@ -6788,7 +6788,7 @@ Do not count named blocks or case-statements." ((looking-at verilog-named-block-re) (current-column)) ((and (not (looking-at verilog-extended-case-re)) - (looking-at "^[^:;]+[ \t]*:")) + (looking-at "^[^:;]+:")) (verilog-re-search-forward ":" nil t) (skip-chars-forward " \t") (current-column)) @@ -7782,7 +7782,7 @@ If search fails, other files are checked based on "Return point if within translate-off region, else nil." (and (save-excursion (re-search-backward - (concat "//\\s-*.*\\s-*" verilog-directive-regexp "\\(on\\|off\\)\\>") + (concat "//.*" verilog-directive-regexp "\\(on\\|off\\)\\>") nil t)) (equal "off" (match-string 2)) (point))) @@ -7790,14 +7790,14 @@ If search fails, other files are checked based on (defun verilog-start-translate-off (limit) "Return point before translate-off directive if before LIMIT, else nil." (when (re-search-forward - (concat "//\\s-*.*\\s-*" verilog-directive-regexp "off\\>") + (concat "//.*" verilog-directive-regexp "off\\>") limit t) (match-beginning 0))) (defun verilog-back-to-start-translate-off (limit) "Return point before translate-off directive if before LIMIT, else nil." (when (re-search-backward - (concat "//\\s-*.*\\s-*" verilog-directive-regexp "off\\>") + (concat "//.*" verilog-directive-regexp "off\\>") limit t) (match-beginning 0))) @@ -7805,7 +7805,7 @@ If search fails, other files are checked based on "Return point after translate-on directive if before LIMIT, else nil." (re-search-forward (concat - "//\\s-*.*\\s-*" verilog-directive-regexp "on\\>") limit t)) + "//.*" verilog-directive-regexp "on\\>") limit t)) (defun verilog-match-translate-off (limit) "Match a translate-off block, setting `match-data' and returning t, else nil. @@ -9982,7 +9982,7 @@ Or, just the existing dirnames themselves if there are no wildcards." (while dirnames (setq dirname (car dirnames) dirnames (cdr dirnames)) - (cond ((string-match (concat "^\\(\\|[/\\]*[^*?]*[/\\]\\)" ; root + (cond ((string-match (concat "^\\(\\|[^*?]*[/\\]\\)" ; root "\\([^/\\]*[*?][^/\\]*\\)" ; filename with *? "\\(.*\\)") ; rest dirname) diff --git a/lisp/term.el b/lisp/term.el index 09dfeb61d17..b990c83cfcb 100644 --- a/lisp/term.el +++ b/lisp/term.el @@ -2796,12 +2796,12 @@ See `term-prompt-regexp'." "\\(?:[\r\n\000\007\t\b\016\017]\\|" ;; some Emacs specific control sequences, implemented by ;; `term-command-hook', - "\032[^\n]+\r?\n\\|" + "\032[^\n]+\n\\|" ;; a C1 escape coded character (see [ECMA-48] section 5.3 "Elements ;; of the C1 set"), "\e\\(?:[DM78c]\\|" ;; another Emacs specific control sequence, - "AnSiT[^\n]+\r?\n\\|" + "AnSiT[^\n]+\n\\|" ;; or an escape sequence (section 5.4 "Control Sequences"), "\\[\\([\x30-\x3F]*\\)[\x20-\x2F]*[\x40-\x7E]\\)\\)") "Regexp matching control sequences handled by term.el.") diff --git a/lisp/textmodes/reftex-vars.el b/lisp/textmodes/reftex-vars.el index 50dd6cd5f5a..c9fd19d2324 100644 --- a/lisp/textmodes/reftex-vars.el +++ b/lisp/textmodes/reftex-vars.el @@ -925,7 +925,7 @@ DOWNCASE t: Downcase words before using them." "\\