From d02c9bcd096c44b4e3d5e2834c75967b56cdecdd Mon Sep 17 00:00:00 2001 From: Stefan Monnier Date: Tue, 10 Aug 2010 15:18:14 +0200 Subject: [PATCH] * lisp/emacs-lisp/pcase.el: New file. --- etc/NEWS | 2 + lisp/ChangeLog | 16 +- lisp/emacs-lisp/pcase.el | 489 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 501 insertions(+), 6 deletions(-) create mode 100644 lisp/emacs-lisp/pcase.el diff --git a/etc/NEWS b/etc/NEWS index a92e0e3a658..bec5b3b3468 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -371,6 +371,8 @@ threads simultaneously. * New Modes and Packages in Emacs 24.1 +** pcase.el provides the ML-style pattern matching macro `pcase'. + ** smie.el is a package providing a simple generic indentation engine. ** secrets.el is an implementation of the Secret Service API, an diff --git a/lisp/ChangeLog b/lisp/ChangeLog index c8b49e4b5e7..e5b6c8182d8 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,10 +1,14 @@ +2010-08-10 Stefan Monnier + + * emacs-lisp/pcase.el: New file. + 2010-08-10 Michael Albinus * net/tramp.el (tramp-vc-registered-read-file-names): Read input as here-document, otherwise the command could exceed maximum length of command line. - (tramp-handle-vc-registered): Call script accordingly. Reported - by Toru TSUNEYOSHI . + (tramp-handle-vc-registered): Call script accordingly. + Reported by Toru TSUNEYOSHI . 2010-08-10 Kenichi Handa @@ -21,11 +25,11 @@ (package-installed-p, package-compute-transaction) (package-read-all-archive-contents) (package--add-to-archive-contents, package-buffer-info) - (package-tar-file-info, package-list-packages-internal): Use - version-to-list and version-list-*. + (package-tar-file-info, package-list-packages-internal): + Use version-to-list and version-list-*. - * emacs-lisp/package-x.el (package-upload-buffer-internal): Use - version-to-list. + * emacs-lisp/package-x.el (package-upload-buffer-internal): + Use version-to-list. (package-upload-buffer-internal): Use version-list-<=. 2010-08-09 Kenichi Handa diff --git a/lisp/emacs-lisp/pcase.el b/lisp/emacs-lisp/pcase.el new file mode 100644 index 00000000000..03d760b2df5 --- /dev/null +++ b/lisp/emacs-lisp/pcase.el @@ -0,0 +1,489 @@ +;;; pcase.el --- ML-style pattern-matching macro for Elisp + +;; Copyright (C) 2010 Stefan Monnier + +;; Author: Stefan Monnier +;; Keywords: + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;; ML-style pattern matching. +;; The entry points are autoloaded. + +;;; Code: + +(eval-when-compile (require 'cl)) + +;; Macro-expansion of pcase is reasonably fast, so it's not a problem +;; when byte-compiling a file, but when interpreting the code, if the pcase +;; is in a loop, the repeated macro-expansion becomes terribly costly, so we +;; memoize previous macro expansions to try and avoid recomputing them +;; over and over again. +(defconst pcase-memoize (make-hash-table :weakness t :test 'equal)) + +;;;###autoload +(defmacro pcase (exp &rest cases) + "Perform ML-style pattern matching on EXP. +CASES is a list of elements of the form (UPATTERN CODE...). + +UPatterns can take the following forms: + _ matches anything. + SYMBOL matches anything and binds it to SYMBOL. + (or UPAT...) matches if any of the patterns matches. + (and UPAT...) matches if all the patterns match. + `QPAT matches if the QPattern QPAT matches. + (pred PRED) matches if PRED applied to the object returns non-nil. + +QPatterns can take the following forms: + (QPAT1 . QPAT2) matches if QPAT1 matches the car and QPAT2 the cdr. + ,UPAT matches if the UPattern UPAT matches. + ATOM matches if the object is `eq' to ATOM. +QPatterns for vectors are not implemented yet. + +PRED can take the form + FUNCTION in which case it gets called with one argument. + (FUN ARG1 .. ARGN) in which case it gets called with N+1 arguments. +A PRED of the form FUNCTION is equivalent to one of the form (FUNCTION). +PRED patterns can refer to variables bound earlier in the pattern. +E.g. you can match pairs where the cdr is larger than the car with a pattern +like `(,a . ,(pred (< a))) or, with more checks: +`(,(and a (pred numberp)) . ,(and (pred numberp) (pred (< a))))" + (declare (indent 1) (debug case)) + (or (gethash (cons exp cases) pcase-memoize) + (puthash (cons exp cases) + (pcase-expand exp cases) + pcase-memoize))) + +;;;###autoload +(defmacro pcase-let* (bindings body) + "Like `let*' but where you can use `pcase' patterns for bindings. +BODY should be an expression, and BINDINGS should be a list of bindings +of the form (UPAT EXP)." + (if (null bindings) body + `(pcase ,(cadr (car bindings)) + (,(caar bindings) (plet* ,(cdr bindings) ,body)) + (t (error "Pattern match failure in `plet'"))))) + +;;;###autoload +(defmacro pcase-let (bindings body) + "Like `let' but where you can use `pcase' patterns for bindings. +BODY should be an expression, and BINDINGS should be a list of bindings +of the form (UPAT EXP)." + (if (null (cdr bindings)) + `(plet* ,bindings ,body) + (setq bindings (mapcar (lambda (x) (cons (make-symbol "x") x)) bindings)) + `(let ,(mapcar (lambda (binding) (list (nth 0 binding) (nth 2 binding))) + bindings) + (plet* ,(mapcar (lambda (binding) (list (nth 1 binding) (nth 0 binding))) + bindings) + ,body)))) + +(defun pcase-expand (exp cases) + (let* ((defs (if (symbolp exp) '() + (let ((sym (make-symbol "x"))) + (prog1 `((,sym ,exp)) (setq exp sym))))) + (seen '()) + (codegen + (lambda (code vars) + (let ((prev (assq code seen))) + (if (not prev) + (let ((res (pcase-codegen code vars))) + (push (list code vars res) seen) + res) + ;; Since we use a tree-based pattern matching + ;; technique, the leaves (the places that contain the + ;; code to run once a pattern is matched) can get + ;; copied a very large number of times, so to avoid + ;; code explosion, we need to keep track of how many + ;; times we've used each leaf and move it + ;; to a separate function if that number is too high. + ;; + ;; We've already used this branch. So it is shared. + (destructuring-bind (code prevvars res) prev + (unless (symbolp res) + ;; This is the first repeat, so we have to move + ;; the branch to a separate function. + (let ((bsym + (make-symbol (format "pcase-%d" (length defs))))) + (push `(,bsym (lambda ,(mapcar #'car prevvars) ,@code)) defs) + (setcar res 'funcall) + (setcdr res (cons bsym (mapcar #'cdr prevvars))) + (setcar (cddr prev) bsym) + (setq res bsym))) + (setq vars (copy-sequence vars)) + (let ((args (mapcar (lambda (pa) + (let ((v (assq (car pa) vars))) + (setq vars (delq v vars)) + (cdr v))) + prevvars))) + (when vars ;New additional vars. + (error "The vars %s are only bound in some paths" + (mapcar #'car vars))) + `(funcall ,res ,@args))))))) + (main + (pcase-u + (mapcar (lambda (case) + `((match ,exp . ,(car case)) + ,(apply-partially + (if (pcase-small-branch-p (cdr case)) + ;; Don't bother sharing multiple + ;; occurrences of this leaf since it's small. + #'pcase-codegen codegen) + (cdr case)))) + cases)))) + `(let ,defs ,main))) + +(defun pcase-codegen (code vars) + `(let ,(mapcar (lambda (b) (list (car b) (cdr b))) vars) + ,@code)) + +(defun pcase-small-branch-p (code) + (and (= 1 (length code)) + (or (not (consp (car code))) + (let ((small t)) + (dolist (e (car code)) + (if (consp e) (setq small nil))) + small)))) + +;; Try to use `cond' rather than a sequence of `if's, so as to reduce +;; the depth of the generated tree. +(defun pcase-if (test then else) + (cond + ((eq else :pcase-dontcare) then) + ((eq (car-safe else) 'if) + `(cond (,test ,then) + (,(nth 1 else) ,(nth 2 else)) + (t ,@(nthcdr 3 else)))) + ((eq (car-safe else) 'cond) + `(cond (,test ,then) + ,@(cdr else))) + (t `(if ,test ,then ,else)))) + +(defun pcase-upat (qpattern) + (cond + ((eq (car-safe qpattern) '\,) (cadr qpattern)) + (t (list '\` qpattern)))) + +;; Note about MATCH: +;; When we have patterns like `(PAT1 . PAT2), after performing the `consp' +;; check, we want to turn all the similar patterns into ones of the form +;; (and (match car PAT1) (match cdr PAT2)), so you naturally need conjunction. +;; Earlier code hence used branches of the form (MATCHES . CODE) where +;; MATCHES was a list (implicitly a conjunction) of (SYM . PAT). +;; But if we have a pattern of the form (or `(PAT1 . PAT2) PAT3), there is +;; no easy way to eliminate the `consp' check in such a representation. +;; So we replaced the MATCHES by the MATCH below which can be made up +;; of conjunctions and disjunctions, so if we know `foo' is a cons, we can +;; turn (match foo . (or `(PAT1 . PAT2) PAT3)) into +;; (or (and (match car . `PAT1) (match cdr . `PAT2)) (match foo . PAT3)). +;; The downside is that we now have `or' and `and' both in MATCH and +;; in PAT, so there are different equivalent representations and we +;; need to handle them all. We do not try to systematically +;; canonicalize them to one form over another, but we do occasionally +;; turn one into the other. + +(defun pcase-u (branches) + "Expand matcher for rules BRANCHES. +Each BRANCH has the form (MATCH CODE . VARS) where +CODE is the code generator for that branch. +VARS is the set of vars already bound by earlier matches. +MATCH is the pattern that needs to be matched, of the form: + (match VAR . UPAT) + (and MATCH ...) + (or MATCH ...)" + (when (setq branches (delq nil branches)) + (destructuring-bind (match code &rest vars) (car branches) + (pcase-u1 (list match) code vars (cdr branches))))) + +(defun pcase-and (match matches) + (if matches `(and ,match ,@matches) match)) + +(defun pcase-split-match (sym splitter match) + (case (car match) + ((match) + (if (not (eq sym (cadr match))) + (cons match match) + (let ((pat (cddr match))) + (cond + ;; Hoist `or' and `and' patterns to `or' and `and' matches. + ((memq (car-safe pat) '(or and)) + (pcase-split-match sym splitter + (cons (car pat) + (mapcar (lambda (alt) + `(match ,sym . ,alt)) + (cdr pat))))) + (t (let ((res (funcall splitter (cddr match)))) + (cons (or (car res) match) (or (cdr res) match)))))))) + ((or and) + (let ((then-alts '()) + (else-alts '()) + (neutral-elem (if (eq 'or (car match)) :pcase-fail :pcase-succeed)) + (zero-elem (if (eq 'or (car match)) :pcase-succeed :pcase-fail))) + (dolist (alt (cdr match)) + (let ((split (pcase-split-match sym splitter alt))) + (unless (eq (car split) neutral-elem) + (push (car split) then-alts)) + (unless (eq (cdr split) neutral-elem) + (push (cdr split) else-alts)))) + (cons (cond ((memq zero-elem then-alts) zero-elem) + ((null then-alts) neutral-elem) + ((null (cdr then-alts)) (car then-alts)) + (t (cons (car match) (nreverse then-alts)))) + (cond ((memq zero-elem else-alts) zero-elem) + ((null else-alts) neutral-elem) + ((null (cdr else-alts)) (car else-alts)) + (t (cons (car match) (nreverse else-alts))))))) + (t (error "Uknown MATCH %s" match)))) + +(defun pcase-split-rest (sym splitter rest) + (let ((then-rest '()) + (else-rest '())) + (dolist (branch rest) + (let* ((match (car branch)) + (code&vars (cdr branch)) + (splitted + (pcase-split-match sym splitter match))) + (unless (eq (car splitted) :pcase-fail) + (push (cons (car splitted) code&vars) then-rest)) + (unless (eq (cdr splitted) :pcase-fail) + (push (cons (cdr splitted) code&vars) else-rest)))) + (cons (nreverse then-rest) (nreverse else-rest)))) + +(defun pcase-split-consp (syma symd pat) + (cond + ;; A QPattern for a cons, can only go the `then' side. + ((and (eq (car-safe pat) '\`) (consp (cadr pat))) + (let ((qpat (cadr pat))) + (cons `(and (match ,syma . ,(pcase-upat (car qpat))) + (match ,symd . ,(pcase-upat (cdr qpat)))) + :pcase-fail))) + ;; A QPattern but not for a cons, can only go the `else' side. + ((eq (car-safe pat) '\`) (cons :pcase-fail nil)))) + +(defun pcase-split-eq (elem pat) + (cond + ;; The same match will give the same result. + ((and (eq (car-safe pat) '\`) (equal (cadr pat) elem)) + (cons :pcase-succeed :pcase-fail)) + ;; A different match will fail if this one succeeds. + ((and (eq (car-safe pat) '\`) + ;; (or (integerp (cadr pat)) (symbolp (cadr pat)) + ;; (consp (cadr pat))) + ) + (cons :pcase-fail nil)))) + +(defun pcase-split-memq (elems pat) + ;; Based on pcase-split-eq. + (cond + ;; The same match will give the same result. + ((and (eq (car-safe pat) '\`) (member (cadr pat) elems)) + (cons :pcase-succeed nil)) + ;; A different match will fail if this one succeeds. + ((and (eq (car-safe pat) '\`) + ;; (or (integerp (cadr pat)) (symbolp (cadr pat)) + ;; (consp (cadr pat))) + ) + (cons :pcase-fail nil)))) + +(defun pcase-split-pred (upat pat) + ;; FIXME: For predicates like (pred (> a)), two such predicates may + ;; actually refer to different variables `a'. + (if (equal upat pat) + (cons :pcase-succeed :pcase-fail))) + +(defun pcase-fgrep (vars sexp) + "Check which of the symbols VARS appear in SEXP." + (let ((res '())) + (while (consp sexp) + (dolist (var (pcase-fgrep vars (pop sexp))) + (unless (memq var res) (push var res)))) + (and (memq sexp vars) (not (memq sexp res)) (push sexp res)) + res)) + +;; It's very tempting to use `pcase' below, tho obviously, it'd create +;; bootstrapping problems. +(defun pcase-u1 (matches code vars rest) + "Return code that runs CODE (with VARS) if MATCHES match. +and otherwise defers to REST which is a list of branches of the form +\(ELSE-MATCH ELSE-CODE . ELSE-VARS)." + ;; Depending on the order in which we choose to check each of the MATCHES, + ;; the resulting tree may be smaller or bigger. So in general, we'd want + ;; to be careful to chose the "optimal" order. But predicate + ;; patterns make this harder because they create dependencies + ;; between matches. So we don't bother trying to reorder anything. + (cond + ((null matches) (funcall code vars)) + ((eq :pcase-fail (car matches)) (pcase-u rest)) + ((eq :pcase-succeed (car matches)) + (pcase-u1 (cdr matches) code vars rest)) + ((eq 'and (caar matches)) + (pcase-u1 (append (cdar matches) (cdr matches)) code vars rest)) + ((eq 'or (caar matches)) + (let* ((alts (cdar matches)) + (var (if (eq (caar alts) 'match) (cadr (car alts)))) + (simples '()) (others '())) + (when var + (dolist (alt alts) + (if (and (eq (car alt) 'match) (eq var (cadr alt)) + (let ((upat (cddr alt))) + (and (eq (car-safe upat) '\`) + (or (integerp (cadr upat)) (symbolp (cadr upat)))))) + (push (cddr alt) simples) + (push alt others)))) + (cond + ((null alts) (error "Please avoid it") (pcase-u rest)) + ((> (length simples) 1) + ;; De-hoist the `or' MATCH into an `or' pattern that will be + ;; turned into a `memq' below. + (pcase-u1 (cons `(match ,var or . ,(nreverse simples)) (cdr matches)) + code vars + (if (null others) rest + (cons (list* + (pcase-and (if (cdr others) + (cons 'or (nreverse others)) + (car others)) + (cdr matches)) + code vars) + rest)))) + (t + (pcase-u1 (cons (pop alts) (cdr matches)) code vars + (if (null alts) (progn (error "Please avoid it") rest) + (cons (list* + (pcase-and (if (cdr alts) + (cons 'or alts) (car alts)) + (cdr matches)) + code vars) + rest))))))) + ((eq 'match (caar matches)) + (destructuring-bind (op sym &rest upat) (pop matches) + (cond + ((memq upat '(t _)) (pcase-u1 matches code vars rest)) + ((eq upat 'dontcare) :pcase-dontcare) + ((functionp upat) (error "Feature removed, use (pred %s)" upat)) + ((eq (car-safe upat) 'pred) + (destructuring-bind (then-rest &rest else-rest) + (pcase-split-rest + sym (apply-partially 'pcase-split-pred upat) rest) + (pcase-if (if (symbolp (cadr upat)) + `(,(cadr upat) ,sym) + (let* ((exp (cadr upat)) + ;; `vs' is an upper bound on the vars we need. + (vs (pcase-fgrep (mapcar #'car vars) exp))) + (if vs + ;; Let's not replace `vars' in `exp' since it's + ;; too difficult to do it right, instead just + ;; let-bind `vars' around `exp'. + `(let ,(mapcar (lambda (var) + (list var (cdr (assq var vars)))) + vs) + ;; FIXME: `vars' can capture `sym'. E.g. + ;; (pcase x ((and `(,x . ,y) (pred (fun x))))) + (,@exp ,sym)) + `(,@exp ,sym)))) + (pcase-u1 matches code vars then-rest) + (pcase-u else-rest)))) + ((symbolp upat) + (pcase-u1 matches code (cons (cons upat sym) vars) rest)) + ((eq (car-safe upat) '\`) + (pcase-q1 sym (cadr upat) matches code vars rest)) + ((eq (car-safe upat) 'or) + (let ((all (> (length (cdr upat)) 1))) + (when all + (dolist (alt (cdr upat)) + (unless (and (eq (car-safe alt) '\`) + (or (symbolp (cadr alt)) (integerp (cadr alt)))) + (setq all nil)))) + (if all + ;; Use memq for (or `a `b `c `d) rather than a big tree. + (let ((elems (mapcar 'cadr (cdr upat)))) + (destructuring-bind (then-rest &rest else-rest) + (pcase-split-rest + sym (apply-partially 'pcase-split-memq elems) rest) + (pcase-if `(memq ,sym ',elems) + (pcase-u1 matches code vars then-rest) + (pcase-u else-rest)))) + (pcase-u1 (cons `(match ,sym ,@(cadr upat)) matches) code vars + (append (mapcar (lambda (upat) + `((and (match ,sym . ,upat) ,@matches) + ,code ,@vars)) + (cddr upat)) + rest))))) + ((eq (car-safe upat) 'and) + (pcase-u1 (append (mapcar (lambda (upat) `(match ,sym ,@upat)) (cdr upat)) + matches) + code vars rest)) + ((eq (car-safe upat) 'not) + ;; FIXME: The implementation below is naive and results in + ;; inefficient code. + ;; To make it work right, we would need to turn pcase-u1's + ;; `code' and `vars' into a single argument of the same form as + ;; `rest'. We would also need to split this new `then-rest' argument + ;; for every test (currently we don't bother to do it since + ;; it's only useful for odd patterns like (and `(PAT1 . PAT2) + ;; `(PAT3 . PAT4)) which the programmer can easily rewrite + ;; to the more efficient `(,(and PAT1 PAT3) . ,(and PAT2 PAT4))). + (pcase-u1 `((match ,sym . ,(cadr upat))) + (lexical-let ((rest rest)) + ;; FIXME: This codegen is not careful to share its + ;; code if used several times: code blow up is likely. + (lambda (vars) + ;; `vars' will likely contain bindings which are + ;; not always available in other paths to + ;; `rest', so there' no point trying to pass + ;; them down. + (pcase-u rest))) + vars + (list `((and . ,matches) ,code . ,vars)))) + (t (error "Unknown upattern `%s'" upat))))) + (t (error "Incorrect MATCH %s" (car matches))))) + +(defun pcase-q1 (sym qpat matches code vars rest) + "Return code that runs CODE if SYM matches QPAT and if MATCHES match. +and if not, defers to REST which is a list of branches of the form +\(OTHER_MATCH OTHER-CODE . OTHER-VARS)." + (cond + ((eq (car-safe qpat) '\,) (error "Can't use `,UPATTERN")) + ((floatp qpat) (error "Floating point patterns not supported")) + ((vectorp qpat) + ;; FIXME. + (error "Vector QPatterns not implemented yet")) + ((consp qpat) + (let ((syma (make-symbol "xcar")) + (symd (make-symbol "xcdr"))) + (destructuring-bind (then-rest &rest else-rest) + (pcase-split-rest sym (apply-partially 'pcase-split-consp syma symd) + rest) + (pcase-if `(consp ,sym) + `(let ((,syma (car ,sym)) + (,symd (cdr ,sym))) + ,(pcase-u1 `((match ,syma . ,(pcase-upat (car qpat))) + (match ,symd . ,(pcase-upat (cdr qpat))) + ,@matches) + code vars then-rest)) + (pcase-u else-rest))))) + ((or (integerp qpat) (symbolp qpat)) + (destructuring-bind (then-rest &rest else-rest) + (pcase-split-rest sym (apply-partially 'pcase-split-eq qpat) rest) + (pcase-if `(eq ,sym ',qpat) + (pcase-u1 matches code vars then-rest) + (pcase-u else-rest)))) + (t (error "Unkown QPattern %s" qpat)))) + + +(provide 'pcase) +;;; pcase.el ends here -- 2.39.2