From a6d961ae2fd0eb93938f2afd932f4d3cb63a0412 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Mon, 26 Dec 2022 17:16:59 -0800 Subject: [PATCH] Add a new tree-sitter query predicate 'pred' I realized that using an arbitrary function as the predicate in queries is very helpful for some queries I'm writing for python and javascript, and presumably most other languages[1]. Granted, we can already filter out unwanted nodes by using a function instead of a face for the capture name, and (1) determine whether the captured node is valid and (2) fontify that node if it's valid. However, such approach is a bit more cumbersome and more importantly gets in the way of another potential use of the fontification queries: context extraction. For example, I could use the query for the 'variable' feature to get all the variables in a certain region. In this use-case, we want the filtering happen before returning the captured nodes. Besides, the change is relatively small and straightforward: most code are already there, I just need to add some boilerplate. [1] For a code like aa.bb(cc), we want bb to be in function face, because obviously its a function. But for aa.bb, we want bb to be in property face, because it's a property. In the AST, bb is always a property, the difference between the two cases is the enclosing node: in the first case, aa.bb is in a "call_expression" node, indicating that bb is used as a function (a method). So we want a predicate function that checks whether bb is used as a function or a property, and determine whether it should be in function or property face. * doc/lispref/parsing.texi (Pattern Matching): Update manual. * src/treesit.c (Ftreesit_pattern_expand): Handle :pred. (treesit_predicate_capture_name_to_node): A new function extracted from treesit_predicate_capture_name_to_text. (treesit_predicate_capture_name_to_text): Use the newly extracted function. (treesit_predicate_pred): New predicate function. (treesit_eval_predicates): Add new predicate. Also fix a bug: we want to AND the results of each predicate. * test/src/treesit-tests.el (treesit--ert-pred-last-sibling): New helper function. (treesit-query-api): Test #pred predicate. --- doc/lispref/parsing.texi | 14 +++++++--- src/treesit.c | 57 ++++++++++++++++++++++++++++++--------- test/src/treesit-tests.el | 13 ++++++--- 3 files changed, 65 insertions(+), 19 deletions(-) diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 5d1b11935cf..63741b69c22 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -1266,10 +1266,11 @@ example, with the following pattern: @end example @noindent -tree-sitter only matches arrays where the first element equals to -the last element. To attach a predicate to a pattern, we need to -group them together. A predicate always starts with a @samp{#}. -Currently there are two predicates, @code{#equal} and @code{#match}. +tree-sitter only matches arrays where the first element equals to the +last element. To attach a predicate to a pattern, we need to group +them together. A predicate always starts with a @samp{#}. Currently +there are three predicates, @code{#equal}, @code{#match}, and +@code{#pred}. @deffn Predicate equal arg1 arg2 Matches if @var{arg1} equals to @var{arg2}. Arguments can be either @@ -1282,6 +1283,11 @@ Matches if the text that @var{capture-name}'s node spans in the buffer matches regular expression @var{regexp}. Matching is case-sensitive. @end deffn +@deffn Predicate pred fn &rest nodes +Matches if function @var{fn} returns non-@code{nil} when passed each +node in @var{nodes} as arguments. +@end deffn + Note that a predicate can only refer to capture names that appear in the same pattern. Indeed, it makes little sense to refer to capture names in other patterns. diff --git a/src/treesit.c b/src/treesit.c index ecc977745a6..813d4222f98 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -2170,6 +2170,8 @@ See Info node `(elisp)Pattern Matching' for detailed explanation. */) return build_pure_c_string ("#equal"); if (EQ (pattern, QCmatch)) return build_pure_c_string ("#match"); + if (EQ (pattern, QCpred)) + return build_pure_c_string ("#pred"); Lisp_Object opening_delimeter = build_pure_c_string (VECTORP (pattern) ? "[" : "("); Lisp_Object closing_delimiter @@ -2269,10 +2271,10 @@ treesit_predicates_for_pattern (TSQuery *query, uint32_t pattern_index) return Fnreverse (result); } -/* Translate a capture NAME (symbol) to the text of the captured node. +/* Translate a capture NAME (symbol) to a node. Signals treesit-query-error if such node is not captured. */ static Lisp_Object -treesit_predicate_capture_name_to_text (Lisp_Object name, +treesit_predicate_capture_name_to_node (Lisp_Object name, struct capture_range captures) { Lisp_Object node = Qnil; @@ -2292,6 +2294,16 @@ treesit_predicate_capture_name_to_text (Lisp_Object name, name, build_pure_c_string ("A predicate can only refer" " to captured nodes in the " "same pattern")); + return node; +} + +/* Translate a capture NAME (symbol) to the text of the captured node. + Signals treesit-query-error if such node is not captured. */ +static Lisp_Object +treesit_predicate_capture_name_to_text (Lisp_Object name, + struct capture_range captures) +{ + Lisp_Object node = treesit_predicate_capture_name_to_node (name, captures); struct buffer *old_buffer = current_buffer; set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer)); @@ -2365,13 +2377,30 @@ treesit_predicate_match (Lisp_Object args, struct capture_range captures) return false; } -/* About predicates: I decide to hard-code predicates in C instead of - implementing an extensible system where predicates are translated - to Lisp functions, and new predicates can be added by extending a - list of functions, because I really couldn't imagine any useful - predicates besides equal and match. If we later found out that - such system is indeed useful and necessary, it can be easily - added. */ +/* Handles predicate (#pred FN ARG...). Return true if FN returns + non-nil; return false otherwise. The arity of FN must match the + number of ARGs */ +static bool +treesit_predicate_pred (Lisp_Object args, struct capture_range captures) +{ + if (XFIXNUM (Flength (args)) < 2) + xsignal2 (Qtreesit_query_error, + build_pure_c_string ("Predicate `pred' requires " + "at least two arguments, " + "but was only given"), + Flength (args)); + + Lisp_Object fn = Fintern (XCAR (args), Qnil); + Lisp_Object nodes = Qnil; + Lisp_Object tail = XCDR (args); + FOR_EACH_TAIL (tail) + nodes = Fcons (treesit_predicate_capture_name_to_node (XCAR (tail), + captures), + nodes); + nodes = Fnreverse (nodes); + + return !NILP (CALLN (Fapply, fn, nodes)); +} /* If all predicates in PREDICATES passes, return true; otherwise return false. */ @@ -2387,14 +2416,17 @@ treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates) Lisp_Object fn = XCAR (predicate); Lisp_Object args = XCDR (predicate); if (!NILP (Fstring_equal (fn, build_pure_c_string ("equal")))) - pass = treesit_predicate_equal (args, captures); + pass &= treesit_predicate_equal (args, captures); else if (!NILP (Fstring_equal (fn, build_pure_c_string ("match")))) - pass = treesit_predicate_match (args, captures); + pass &= treesit_predicate_match (args, captures); + else if (!NILP (Fstring_equal (fn, build_pure_c_string ("pred")))) + pass &= treesit_predicate_pred (args, captures); else xsignal3 (Qtreesit_query_error, build_pure_c_string ("Invalid predicate"), fn, build_pure_c_string ("Currently Emacs only supports" - " equal and match predicate")); + " equal, match, and pred" + " predicate")); } /* If all predicates passed, add captures to result list. */ return pass; @@ -3217,6 +3249,7 @@ syms_of_treesit (void) DEFSYM (QCanchor, ":anchor"); DEFSYM (QCequal, ":equal"); DEFSYM (QCmatch, ":match"); + DEFSYM (QCpred, ":pred"); DEFSYM (Qnot_found, "not-found"); DEFSYM (Qsymbol_error, "symbol-error"); diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index 3fe59a78d07..3770a4d01e5 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -335,6 +335,9 @@ BODY is the test body." ;;; Query +(defun treesit--ert-pred-last-sibling (node) + (null (treesit-node-next-sibling node t))) + (ert-deftest treesit-query-api () "Tests for query API." (skip-unless (treesit-language-available-p 'json)) @@ -357,13 +360,16 @@ BODY is the test body." (pair key: (_) @keyword) ((_) @bob (#match \"^B.b$\" @bob)) (number) @number -((number) @n3 (#equal \"3\" @n3)) " +((number) @n3 (#equal \"3\" @n3)) +((number) @n3p (#pred treesit--ert-pred-last-sibling @n3p))" ;; Sexp query. ((string) @string (pair key: (_) @keyword) ((_) @bob (:match "^B.b$" @bob)) (number) @number - ((number) @n3 (:equal "3" @n3))))) + ((number) @n3 (:equal "3" @n3)) + ((number) @n3p (:pred treesit--ert-pred-last-sibling + @n3p))))) ;; Test `treesit-query-compile'. (dolist (query (list query1 (treesit-query-compile 'json query1))) @@ -375,7 +381,8 @@ BODY is the test body." (string . "\"Bob\"") (bob . "Bob") (number . "3") - (n3 . "3")) + (n3 . "3") + (n3p . "3")) (mapcar (lambda (entry) (cons (car entry) (treesit-node-text -- 2.39.2