From: Mattias EngdegÄrd Date: Sun, 28 Feb 2021 12:06:24 +0000 (+0100) Subject: Fix pcase 'rx' pattern match-data bug X-Git-Tag: emacs-28.0.90~3500 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=bdea1883cc8feb8a607c3d05191e7dc8d12f0aa0;p=emacs.git Fix pcase 'rx' pattern match-data bug The pcase 'rx' pattern would in some cases allow the match data to be clobbered before it is read. For example: (pcase "PQR" ((and (rx (let a nonl)) (rx ?z)) (list 'one a)) ((rx (let b ?Q)) (list 'two b))) The above returned (two "P") instead of the correct (two "Q"). This occurred because the calls to string-match and match-string were presented as separate patterns to pcase, which would interleave them with other patterns. As a remedy, combine string matching and match-data extraction into a single pcase pattern. This introduces a slight inefficiency for two or more submatches as they are grouped into a list structure which then has to be destructured. Found by Stefan Monnier. See discussion at https://lists.gnu.org/archive/html/emacs-devel/2021-02/msg02010.html * lisp/emacs-lisp/rx.el (rx--reduce-right): New helper. (rx [pcase macro]): Combine string-match and match-string calls into a single pcase pattern. * test/lisp/emacs-lisp/rx-tests.el (rx-pcase): Add test cases. --- diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index ffc21951b64..56e588ee0d5 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el @@ -1418,6 +1418,12 @@ into a plain rx-expression, collecting names into `rx--pcase-vars'." (cons head (mapcar #'rx--pcase-transform rest))) (_ rx))) +(defun rx--reduce-right (f l) + "Right-reduction on L by F. L must be non-empty." + (if (cdr l) + (funcall f (car l) (rx--reduce-right f (cdr l))) + (car l))) + ;;;###autoload (pcase-defmacro rx (&rest regexps) "A pattern that matches strings against `rx' REGEXPS in sexp form. @@ -1436,17 +1442,28 @@ following constructs: introduced by a previous (let REF ...) construct." (let* ((rx--pcase-vars nil) - (regexp (rx--to-expr (rx--pcase-transform (cons 'seq regexps))))) + (regexp (rx--to-expr (rx--pcase-transform (cons 'seq regexps)))) + (nvars (length rx--pcase-vars))) `(and (pred stringp) - ;; `pcase-let' takes a match for granted and discards all unnecessary - ;; conditions, which means that a `pred' clause cannot be used for - ;; the match condition. The following construct seems to survive. - (app (lambda (s) (string-match ,regexp s)) (pred identity)) - ,@(let ((i 0)) - (mapcar (lambda (name) - (setq i (1+ i)) - `(app (match-string ,i) ,name)) - (reverse rx--pcase-vars)))))) + ,(if (zerop nvars) + ;; No variables bound: a single predicate suffices. + `(pred (string-match ,regexp)) + ;; Pack the submatches into a dotted list which is then + ;; immediately destructured into individual variables again. + ;; This is of course slightly inefficient when NVARS > 1. + ;; A dotted list is used to reduce the number of conses + ;; to create and take apart. + `(app (lambda (s) + (and (string-match ,regexp s) + ,(rx--reduce-right + (lambda (a b) `(cons ,a ,b)) + (mapcar (lambda (i) `(match-string ,i s)) + (number-sequence 1 nvars))))) + ,(list '\` + (rx--reduce-right + #'cons + (mapcar (lambda (name) (list '\, name)) + (reverse rx--pcase-vars))))))))) ;; Obsolete internal symbol, used in old versions of the `flycheck' package. (define-obsolete-function-alias 'rx-submatch-n 'rx-to-string "27.1") diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index fecdcf55aff..2dd1bca22d1 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el @@ -156,6 +156,8 @@ "....."))) (ert-deftest rx-pcase () + (should (equal (pcase "i18n" ((rx (let x (+ digit))) (list 'ok x))) + '(ok "18"))) (should (equal (pcase "a 1 2 3 1 1 b" ((rx (let u (+ digit)) space (let v (+ digit)) space @@ -176,6 +178,12 @@ ((rx nonl) 'wrong) (_ 'correct)) 'correct)) + (should (equal (pcase "PQR" + ((and (rx (let a nonl)) (rx ?z)) + (list 'one a)) + ((rx (let b ?Q)) + (list 'two b))) + '(two "Q"))) (should (equal (pcase-let (((rx ?B (let z nonl)) "ABC")) (list 'ok z)) '(ok "C")))