From 905c0a13f7929298cb36151f46dbef03f7bdcbe4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Thu, 16 Apr 2020 11:04:24 +0200 Subject: [PATCH] Fix bugs, inefficiencies and bad style in regexps Found by relint. See discussion at https://lists.gnu.org/archive/html/emacs-devel/2020-04/msg00265.html * lisp/org/org-table.el (org-table-finish-edit-field): * lisp/arc-mode.el (archive-rar-summarize): Avoid wrapped subsumption in repeated sequences. * lisp/erc/erc-dcc.el (erc-dcc-ctcp-query-send-regexp): Replace inefficient repeated empty-matching expression with a plain greedy form. (erc-dcc-handle-ctcp-send): Adjust group numbers. * lisp/net/puny.el (puny-encode-domain): Fix fast-path shortcut pattern so that it actually works as intended. * lisp/progmodes/gdb-mi.el (gdb-control-commands-regexp): * lisp/vc/diff-mode.el (diff-imenu-generic-expression): Remove superfluous backslashes. * lisp/progmodes/scheme.el (scheme-imenu-generic-expression): Correct confused definition-matching pattern which would match more than intended. * lisp/textmodes/sgml-mode.el (sgml-tag-name-re): Avoid inefficient matching by using the fact that the first character cannot match the last char of sgml-name-re. --- lisp/arc-mode.el | 2 +- lisp/erc/erc-dcc.el | 14 +++++++------- lisp/net/puny.el | 2 +- lisp/org/org-table.el | 2 +- lisp/progmodes/gdb-mi.el | 2 +- lisp/progmodes/scheme.el | 2 +- lisp/textmodes/sgml-mode.el | 5 ++++- lisp/vc/diff-mode.el | 2 +- 8 files changed, 17 insertions(+), 14 deletions(-) diff --git a/lisp/arc-mode.el b/lisp/arc-mode.el index 4d366679690..c918f06c80e 100644 --- a/lisp/arc-mode.el +++ b/lisp/arc-mode.el @@ -2032,7 +2032,7 @@ This doesn't recover lost files, it just undoes changes in the buffer itself." (call-process "lsar" nil t nil "-l" (or file copy)) (if copy (delete-file copy))) (goto-char (point-min)) - (re-search-forward "^\\(\s+=+\s*\\)+\n") + (re-search-forward "^\\(?:\s+=+\\)+\s*\n") (while (looking-at (concat "^\s+[0-9.]+\s+D?-+\s+" ; Flags "\\([0-9-]+\\)\s+" ; Size "\\([-0-9.]+\\)%?\s+" ; Ratio diff --git a/lisp/erc/erc-dcc.el b/lisp/erc/erc-dcc.el index 26701cec1e4..8ccceec4594 100644 --- a/lisp/erc/erc-dcc.el +++ b/lisp/erc/erc-dcc.el @@ -627,11 +627,11 @@ that subcommand." ?q query ?n nick ?u login ?h host)))) (defconst erc-dcc-ctcp-query-send-regexp - (concat "^DCC SEND \\(" + (concat "^DCC SEND \\(?:" ;; Following part matches either filename without spaces ;; or filename enclosed in double quotes with any number ;; of escaped double quotes inside. - "\"\\(\\(.*?\\(\\\\\"\\)?\\)+?\\)\"\\|\\([^ ]+\\)" + "\"\\(\\(?:\\\\\"\\|[^\"\\]\\)+\\)\"\\|\\([^ ]+\\)" "\\) \\([0-9]+\\) \\([0-9]+\\) *\\([0-9]*\\)")) (define-inline erc-dcc-unquote-filename (filename) @@ -653,11 +653,11 @@ It extracts the information about the dcc request and adds it to ?r "SEND" ?n nick ?u login ?h host)) ((string-match erc-dcc-ctcp-query-send-regexp query) (let ((filename - (or (match-string 5 query) - (erc-dcc-unquote-filename (match-string 2 query)))) - (ip (erc-decimal-to-ip (match-string 6 query))) - (port (match-string 7 query)) - (size (match-string 8 query))) + (or (match-string 2 query) + (erc-dcc-unquote-filename (match-string 1 query)))) + (ip (erc-decimal-to-ip (match-string 3 query))) + (port (match-string 4 query)) + (size (match-string 5 query))) ;; FIXME: a warning really should also be sent ;; if the ip address != the host the dcc sender is on. (erc-display-message diff --git a/lisp/net/puny.el b/lisp/net/puny.el index 60a6c12e6c7..6987d253248 100644 --- a/lisp/net/puny.el +++ b/lisp/net/puny.el @@ -35,7 +35,7 @@ For instance, \"fśf.org\" => \"xn--ff-2sa.org\"." ;; The vast majority of domain names are not IDNA domain names, so ;; add a check first to avoid doing unnecessary work. - (if (string-match "\\'[[:ascii:]]+\\'" domain) + (if (string-match "\\`[[:ascii:]]+\\'" domain) domain (mapconcat 'puny-encode-string (split-string domain "[.]") "."))) diff --git a/lisp/org/org-table.el b/lisp/org/org-table.el index 98702feb375..8927b1c2ed9 100644 --- a/lisp/org/org-table.el +++ b/lisp/org/org-table.el @@ -2005,7 +2005,7 @@ the table and kill the editing buffer." text) (goto-char (point-min)) (while (re-search-forward "^#.*\n?" nil t) (replace-match "")) - (while (re-search-forward "\\([ \t]*\n[ \t]*\\)+" nil t) + (while (re-search-forward "[ \t]*\\(?:\n[ \t]*\\)+" nil t) (replace-match " ")) (setq text (org-trim (buffer-string))) (set-window-configuration cw) diff --git a/lisp/progmodes/gdb-mi.el b/lisp/progmodes/gdb-mi.el index ba586981de6..c1184211d06 100644 --- a/lisp/progmodes/gdb-mi.el +++ b/lisp/progmodes/gdb-mi.el @@ -1867,7 +1867,7 @@ static char *magick[] = { "\\|def\\(i\\(ne?\\)?\\)?\\|doc\\(u\\(m\\(e\\(nt?\\)?\\)?\\)?\\)?\\|" gdb-python-guile-commands-regexp "\\|while-stepping\\|stepp\\(i\\(ng?\\)?\\)?\\|ws\\|actions" - "\\|expl\\(o\\(r\\e?\\)?\\)?" + "\\|expl\\(o\\(re?\\)?\\)?" "\\)\\([[:blank:]]+\\([^[:blank:]]*\\)\\)*$") "Regexp matching GDB commands that enter a recursive reading loop. As long as GDB is in the recursive reading loop, it does not expect diff --git a/lisp/progmodes/scheme.el b/lisp/progmodes/scheme.el index 751d7da5427..33ba0d11d80 100644 --- a/lisp/progmodes/scheme.el +++ b/lisp/progmodes/scheme.el @@ -116,7 +116,7 @@ (defvar scheme-imenu-generic-expression '((nil - "^(define\\(\\|-\\(generic\\(\\|-procedure\\)\\|method\\)\\)*\\s-+(?\\(\\sw+\\)" 4) + "^(define\\(?:-\\(?:generic\\(?:-procedure\\)?\\|method\\)\\)?\\s-+(?\\(\\sw+\\)" 1) ("Types" "^(define-class\\s-+(?\\(\\sw+\\)" 1) ("Macros" diff --git a/lisp/textmodes/sgml-mode.el b/lisp/textmodes/sgml-mode.el index 6152a8ad0a7..9b29b844d01 100644 --- a/lisp/textmodes/sgml-mode.el +++ b/lisp/textmodes/sgml-mode.el @@ -286,7 +286,10 @@ separated by a space." (defconst sgml-namespace-re "[_[:alpha:]][-_.[:alnum:]]*") (defconst sgml-name-re "[_:[:alpha:]][-_.:[:alnum:]]*") (defconst sgml-tag-name-re (concat "<\\([!/?]?" sgml-name-re "\\)")) -(defconst sgml-attrs-re "\\(?:[^\"'/><]\\|\"[^\"]*\"\\|'[^']*'\\)*") +(defconst sgml-attrs-re + ;; This pattern cannot begin with a character matched by the end of + ;; `sgml-name-re' above. + "\\(?:[^_.:\"'/><[:alnum:]-]\\(?:[^\"'/><]\\|\"[^\"]*\"\\|'[^']*'\\)*\\)?") (defconst sgml-start-tag-regex (concat "<" sgml-name-re sgml-attrs-re) "Regular expression that matches a non-empty start tag. Any terminating `>' or `/' is not matched.") diff --git a/lisp/vc/diff-mode.el b/lisp/vc/diff-mode.el index da2d5ed50e4..d194d6c0a0e 100644 --- a/lisp/vc/diff-mode.el +++ b/lisp/vc/diff-mode.el @@ -484,7 +484,7 @@ and the face `diff-added' for added lines.") ;; Prefer second name as first is most likely to be a backup or ;; version-control name. The [\t\n] at the end of the unidiff pattern ;; catches Debian source diff files (which lack the trailing date). - '((nil "\\+\\+\\+\\ \\([^\t\n]+\\)[\t\n]" 1) ; unidiffs + '((nil "\\+\\+\\+ \\([^\t\n]+\\)[\t\n]" 1) ; unidiffs (nil "^--- \\([^\t\n]+\\)\t.*\n\\*" 1))) ; context diffs ;;;; -- 2.39.5