From eba65824364474bde89bdce5f57a772d74a2c409 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Sat, 24 Sep 2022 19:35:11 -0700 Subject: [PATCH] Add the treesit-search functions that supplant the removed ones The signatures also changed. treesit-traverse-depth-first -> treesit-search-subtree treesit-traverse-breadth-first -> treesit-traverse-forward -> treesit-search-forward treesit-search-forward -> treesit-search-forward-goto treesit-search-beginning/end -> treesit-search-forward-goto -> treesit-induce-sparse-tree * doc/lispref/parsing.texi (Retrieving Node): Add relevant manual sections. * lisp/treesit.el (treesit-search-forward-goto): New function. * src/treesit.c (ts_traverse_sibling_helper) (ts_traverse_match_predicate) (ts_search_dfs) (ts_search_forward) (treesit-search-subtree) (treesit-search-forward) (ts_build_sparse_tree) (Ftreesit_induce_sparse_tree): Add functions. * test/src/treesit-tests.el (treesit-node-supplemental): Add comments. --- doc/lispref/parsing.texi | 105 +++++++++++ lisp/treesit.el | 24 ++- src/treesit.c | 358 ++++++++++++++++++++++++++++++++++++++ test/src/treesit-tests.el | 5 + 4 files changed, 489 insertions(+), 3 deletions(-) diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 0dbc70ce2d3..868b9bc0744 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -580,11 +580,116 @@ for named child (@pxref{tree-sitter named node, named node}). @heading Searching for node +@defun treesit-search-subtree node predicate &optional all backward limit +This function traverses the subtree of @var{node}, and match +@var{predicate} with each node along the way. And @var{predicate} is +a regexp that matches against each node's type, or a function that +takes a node and returns nil/non-nil. If a node matches, that node is +returned, if no node ever matches, nil is returned. + +By default, this function only traverses named nodes, if @var{all} is +non-nil, it traverses all nodes. If @var{backward} is non-nil, it +traverse backwards. If @var{limit} is non-nil, it only traverses that +number of levels down in the tree. +@end defun + +@defun treesit-search-forward start predicate &optional all backward up +This function is somewhat similar to @code{treesit-search-subtree}. +It also traverse the parse tree and match each node with +@var{predicate} (except for @var{start}), where @var{predicate} can be +a regexp or a function. For a tree like the below where @var{start} +is marked 1, this function will traverse as numbered: + +@example +@group + o + | + 3--------4-----------8 + | | | +o--o-+--1 5--+--6 9---+-----12 +| | | | | | +o o 2 7 +-+-+ +--+--+ + | | | | | + 10 11 13 14 15 +@end group +@end example + +Same as in @code{treesit-search-subtree}, this function only searches +for named nodes by default. But if @var{all} is non-nil, it searches +for all nodes. And If @var{backward} is non-nil, it searches +backwards. +If @var{up} is non-nil, this function will only traverse to siblings +and parents. In that case, only 1 3 4 8 would be traversed. @end defun +@defun treesit-search-forward-goto start predicate side &optional all backward up +For those who want to not only search for a node but also move to it, +this is the function to use. Parameter @var{start}, @var{predicate}, +@var{all}, @var{backward}, and @var{up} are the same as in +@code{treesit-search-forward}. And @var{side} controls which side of +the matched no do we stop at, it can be @code{'start} or @code{'end}. + +Beware of this common pitfall: + +@example +@group +;; This will not move point forward. +(while (treesit-search-forward-goto + (treesit-node-at (point)) + "xxx" + 'start) + ...) + +;; This is will move point forward. +(let ((node (treesit-node-at (point)))) + (while (setq node (treesit-search-forward-goto + node "xxx" 'start)) + ...)) +@end group +@end example + +The exact reason why is left as an exercise for the reader. @end defun +@defun treesit-induce-sparse-tree root predicate &optional process-fn limit +This function creates a sparse tree of @var{root}'s subtree. + +Basically, it takes the subtree under @var{root}, and combs it so only +the nodes that match @var{predicate} are left, like picking out grapes +on the vine. Like previous functions, @var{predicate} can be a regexp +string that matches against each node's type, or a function that takes +a node and return nil/non-nil. + +For example, for a subtree on the left that consist of both numbers +and letters, if @var{predicate} is ``is letter'', the returned tree is +the one on the right. + +@example +@group + a a a + | | | ++---+---+ +---+---+ +---+---+ +| | | | | | | | | +b 1 2 b | | b c d + | | => | | => | + c +--+ c + e + | | | | | + +--+ d 4 +--+ d + | | | + e 5 e +@end group +@end example + +If @var{process-fn} is non-nil, instead of returning the matched +nodes, pass each node to @var{process-fn} use the return value +instead. If non-nil, @var{limit} is the number of levels to go down +from @var{root}. + +Each node in the returned tree looks like @code{(@var{node} +. (@var{child} ...))}. The root of this tree might be nil, if +@var{root} doesn't match @var{pred}. If no node matches +@var{predicate}, return nil. @end defun @heading More convenient functions diff --git a/lisp/treesit.el b/lisp/treesit.el index 2defd83dc6f..9bdff83da89 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -722,9 +722,27 @@ indentation (target) is in green, current indentation is in red." ;;; Search - - - +(defun treesit-search-forward-goto + (start predicate side &optional all backward up) + "Search for node in the parse tree and move point to it. + +Start traversing the tree from node START, and match PREDICATE with +each node along the way (except START). PREDICATE can be either a +regexp that matches against each node's type, or a function that takes +a node and returns nil/non-nil for match/no match. + +If a node matches, move to that node and return the node, +otherwise return nil. SIDE controls whether we move to the start +or end of the matches node, it can be either \\='start or +\\='end. + +ALL, BACKWARD, and UP are the same as in `treesit-search-forward'." + (when-let ((node (treesit-search-forward + start predicate all backward up))) + (pcase side + ('start (goto-char (treesit-node-start node))) + ('end (goto-char (treesit-node-end node)))) + node)) ;;; Debugging diff --git a/src/treesit.c b/src/treesit.c index 51261c34a26..f3efcbe5964 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1805,6 +1805,358 @@ query. */) return Fnreverse (result); } +/*** Navigation */ + +/* Return the next/previous named/unnamed sibling of NODE. FORWARD + controls the direction and NAMED controls the nameness. + */ +static TSNode +ts_traverse_sibling_helper (TSNode node, bool forward, bool named) +{ + if (forward) + { + if (named) + return ts_node_next_named_sibling (node); + else + return ts_node_next_sibling (node); + } + else + { + if (named) + return ts_node_prev_named_sibling (node); + else + return ts_node_prev_sibling (node); + } +} + +/* Return true if NODE matches PRED. PRED can be a string or a + function. This function doesn't check for PRED's type. */ +static bool +ts_traverse_match_predicate +(TSNode node, Lisp_Object pred, Lisp_Object parser) +{ + if (STRINGP (pred)) + { + const char *type = ts_node_type (node); + return (fast_c_string_match_ignore_case + (pred, type, strlen (type)) >= 0); + } + else + { + Lisp_Object lisp_node = make_ts_node (parser, node); + return !NILP (CALLN (Ffuncall, pred, lisp_node)); + } + +} + +/* Traverse the parse tree starting from ROOT (but ROOT is not + matches against PRED). PRED can be a function (takes a node and + returns nil/non-nil),or a string (treated as regexp matching the + node's type, ignores case, must be all single byte characters). If + the node satisfies PRED , terminate, set ROOT to that node, and + return true. If no node satisfies PRED, return FALSE. PARSER is + the parser of ROOT. + + LIMIT is the number of levels we descend in the tree. If NO_LIMIT + is true, LIMIT is ignored. FORWARD controls the direction in which + we traverse the tree, true means forward, false backward. If NAMED + is true, only traverse named nodes, if false, all nodes. If + SKIP_ROOT is true, don't match ROOT. */ +static bool +ts_search_dfs +(TSNode *root, Lisp_Object pred, Lisp_Object parser, + bool named, bool forward, ptrdiff_t limit, bool no_limit, + bool skip_root) +{ + /* TSTreeCursor doesn't allow us to move backward, so we can't use + it. We could use limit == -1 to indicate no_limit == true, but + separating them is safer. */ + TSNode node = *root; + + if (!skip_root && ts_traverse_match_predicate (node, pred, parser)) + { + *root = node; + return true; + } + + if (!no_limit && limit <= 0) + return false; + else + { + int count = named ? + ts_node_named_child_count( node) + : ts_node_child_count (node); + for (int offset=0; offset < count; offset++) + { + uint32_t idx = forward ? offset + : count - offset - 1; + TSNode child = ts_node_child (node, idx); + + if (!ts_node_is_null (child) + && ts_search_dfs (&child, pred, parser, named, + forward, limit - 1, no_limit, false)) + { + *root = child; + return true; + } + } + return false; + } +} + +/* Go thought the whole tree linearly depth first, starting from + START. PRED, PARSER, NAMED, FORWARD are the same as in + ts_search_subtre. If UP_ONLY is true, never go to children, only + sibling and parents. If SKIP_START is true, don'tt match + START. */ +static bool +ts_search_forward +(TSNode *start, Lisp_Object pred, Lisp_Object parser, + bool named, bool forward, bool up_only, bool skip_start) +{ + TSNode node = *start; + + if (!up_only && ts_search_dfs + (start, pred, parser, named, forward, 0, true, skip_start)) + return true; + + TSNode next = ts_traverse_sibling_helper(node, forward, named); + while (ts_node_is_null (next)) + { + node = ts_node_parent (node); + if (ts_node_is_null (node)) + return false; + + if (ts_traverse_match_predicate (node, pred, parser)) + { + *start = node; + return false; + } + next = ts_traverse_sibling_helper(node, forward, named); + } + if (ts_search_forward + (&next, pred, parser, named, forward, up_only, false)) + { + *start = next; + return true; + } + else + return false; +} + +DEFUN ("treesit-search-subtree", + Ftreesit_search_subtree, + Streesit_search_subtree, 2, 5, 0, + doc: /* Traverse the parse tree depth-first. + +Traverse the subtree of NODE, and match PREDICATE with each node along +the way. PREDICATE is a regexp string that matches against each +node's type, or a function that takes a node and returns nil/non-nil. + +By default, only traverse named nodes, if ALL is non-nil, traverse all +nodes. If BACKWARD is non-nil, traverse backwards. If LIMIT is +non-nil, we only traverse that number of levels down in the tree. + +Return the first matched node, or nil if none matches. */) + (Lisp_Object node, Lisp_Object predicate, Lisp_Object all, + Lisp_Object backward, Lisp_Object limit) +{ + CHECK_TS_NODE (node); + CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), + list3 (Qor, Qstringp, Qfunctionp), predicate); + CHECK_SYMBOL (all); + CHECK_SYMBOL (backward); + + ptrdiff_t the_limit = 0; + bool no_limit = false; + if (NILP (limit)) + no_limit = true; + else + { + CHECK_FIXNUM (limit); + the_limit = XFIXNUM (limit); + } + + TSNode ts_node = XTS_NODE (node)->node; + Lisp_Object parser = XTS_NODE (node)->parser; + if (ts_search_dfs + (&ts_node, predicate, parser, NILP (all), + NILP (backward), the_limit, no_limit, false)) + { + return make_ts_node (parser, ts_node); + } + else + return Qnil; +} + +DEFUN ("treesit-search-forward", + Ftreesit_search_forward, + Streesit_search_forward, 2, 5, 0, + doc: /* Search for node in the parse tree. + +Start traversing the tree from node START, and match PREDICATE with +each node along the way (except START). PREDICATE is a regexp string +that matches against each node's type, or a function that takes a node +and returns nil/non-nil. + +By default, only search for named nodes, if ALL is non-nil, search for +all nodes. If BACKWARD is non-nil, search backwards. + +Return the first matched node, or nil if none matches. + +For a tree like the below where START is marked 1, traverse as +numbered: + 16 + | + 3--------4-----------8 + | | | + o--o-+--1 5--+--6 9---+-----12 + | | | | | | + o o 2 7 +-+-+ +--+--+ + | | | | | + 10 11 13 14 15 + +If UP is non-nil, only traverse to siblings and parents. In that +case, only 1 3 4 8 16 would be traversed. */) + (Lisp_Object start, Lisp_Object predicate, Lisp_Object all, + Lisp_Object backward, Lisp_Object up) +{ + CHECK_TS_NODE (start); + CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), + list3 (Qor, Qstringp, Qfunctionp), predicate); + CHECK_SYMBOL (all); + CHECK_SYMBOL (backward); + CHECK_SYMBOL (up); + + TSNode ts_start = XTS_NODE (start)->node; + Lisp_Object parser = XTS_NODE (start)->parser; + if (ts_search_forward + (&ts_start, predicate, parser, NILP (all), + NILP (backward), !NILP (up), true)) + { + return make_ts_node (parser, ts_start); + } + else + return Qnil; +} + +/* Recursively traverse the tree under CURSOR, and append the result + subtree to PARENT's cdr. See more in `ts_build_sparse_tree'. */ +static void +ts_build_sparse_tree +(TSTreeCursor *cursor, Lisp_Object parent, Lisp_Object pred, + Lisp_Object process_fn, ptrdiff_t limit, + bool no_limit, Lisp_Object parser) +{ + + TSNode node = ts_tree_cursor_current_node (cursor); + bool match = ts_traverse_match_predicate (node, pred, parser); + if (match) + { + /* If this node matches pred, add a new node to the parent's + children list. */ + Lisp_Object lisp_node = make_ts_node (parser, node); + if (!NILP (process_fn)) + { + lisp_node = CALLN (Ffuncall, process_fn, lisp_node); + } + Lisp_Object this = Fcons (lisp_node, Qnil); + Fsetcdr (parent, Fcons (this, Fcdr (parent))); + /* Now for children nodes, this is the new parent. */ + parent = this; + } + /* Go through each child. */ + if ((no_limit || limit > 0) + && ts_tree_cursor_goto_first_child (cursor)) + { + do + { + /* Make sure not to use node after the recursive funcall. + Then C compilers should be smart enough not to copy NODE + to stack. */ + ts_build_sparse_tree + (cursor, parent, pred, process_fn, + limit - 1, no_limit, parser); + } + while (ts_tree_cursor_goto_next_sibling (cursor)); + /* Don't forget to come back to this node. */ + ts_tree_cursor_goto_parent (cursor); + } + /* Before we go, reverse children in the sparse tree. */ + if (match) + { + /* When match == true, "parent" is actually the node we added in + this layer (parent = this). */ + Fsetcdr (parent, Fnreverse (Fcdr (parent))); + } +} + +DEFUN ("treesit-induce-sparse-tree", + Ftreesit_induce_sparse_tree, + Streesit_induce_sparse_tree, 2, 4, 0, + doc: /* Create a sparse tree of ROOT's subtree. + +Basically, take the subtree under ROOT, and comb it so only the nodes +that match PREDICATE are left, like picking out grapes on the vine. +PREDICATE is a regexp string that matches against each node's type. + +For a subtree on the left that consist of both numbers and letters, if +PREDICATE is "is letter", the returned tree is the one on the right. + + a a a + | | | + +---+---+ +---+---+ +---+---+ + | | | | | | | | | + b 1 2 b | | b c d + | | => | | => | + c +--+ c + e + | | | | | + +--+ d 4 +--+ d + | | | + e 5 e + +If PROCESS-FN is non-nil, instead of returning the matched nodes, pass +each node to PROCESS-FN use the return value instead. If non-nil, +LIMIT is the number of levels to go down from ROOT. + +Each node in the returned tree looks like (NODE . (CHILD ...)). The +root of this tree might be nil, if ROOT doesn't match PREDICATE. If +no node matches PRED, return nil. + +PREDICATE can also be a function that takes a node and returns +nil/non-nil, but it is slower and more memory consuming than +regexp. */) + (Lisp_Object root, Lisp_Object predicate, Lisp_Object process_fn, + Lisp_Object limit) +{ + CHECK_TS_NODE (root); + CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate), + list3 (Qor, Qstringp, Qfunctionp), predicate); + + if (!NILP (process_fn)) + CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn); + ptrdiff_t the_limit = 0; + bool no_limit = false; + if (NILP (limit)) + no_limit = true; + else + { + CHECK_FIXNUM (limit); + the_limit = XFIXNUM (limit); + } + + TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node); + Lisp_Object parser = XTS_NODE (root)->parser; + Lisp_Object parent = Fcons (Qnil, Qnil); + ts_build_sparse_tree + (&cursor, parent, predicate, process_fn, + the_limit, no_limit, parser); + if (NILP (Fcdr (parent))) + return Qnil; + else + return parent; +} + /*** Initialization */ /* Initialize the tree-sitter routines. */ @@ -1835,6 +2187,8 @@ syms_of_treesit (void) "user-emacs-directory"); DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted"); + DEFSYM (Qor, "or"); + define_error (Qtreesit_error, "Generic tree-sitter error", Qerror); define_error (Qtreesit_query_error, "Query pattern is malformed", Qtreesit_error); @@ -1925,4 +2279,8 @@ dynamic libraries, in that order. */); defsubr (&Streesit_query_expand); defsubr (&Streesit_query_compile); defsubr (&Streesit_query_capture); + + defsubr (&Streesit_search_subtree); + defsubr (&Streesit_search_forward); + defsubr (&Streesit_induce_sparse_tree); } diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el index fbf99ff0874..6fa891a136a 100644 --- a/test/src/treesit-tests.el +++ b/test/src/treesit-tests.el @@ -434,12 +434,17 @@ visible_end.)" ;; `treesit-parent-while' ;; `treesit-node-children' ;; `treesit-node-field-name' + ;; `treesit-search-forward-goto' )) ;; TODO ;; - Functions in treesit.el ;; - treesit-load-name-override-list +;; - treesit-search-subtree ;; - treesit-search-forward +;; - treesit-induce-sparse-tree +;; - treesit-search-forward + (provide 'treesit-tests) ;;; treesit-tests.el ends here -- 2.39.5