From e492c21e81040b9539139b78f6baf98df17bbaab Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Fri, 23 Dec 2022 15:22:31 -0800 Subject: [PATCH] Fix treesit_cursor_helper (bug#60267) The cause of that bug is that in a particular parse tree, the node treesit_cursor_helper tries to go to is a missing node, not only is it a missing node, it is the first node of a subtree. So when treesit_cursor_helper follows the algorithm and goes down the tree, it goes down the previous subtree (because that subtree's end = end_pos, because the target node has zero width). o | o--+-o | | +-+ +-+-+ | | | | | o x t o o (We ended up in x when the target is t, because t has zero width.) One way to solve it is to go back up the tree if we are at a leaf node and still haven't matched the target node. That's too ugly and finicky so I resorted to recursion. Now one more functions will return give up (treesit_node_parent) if we are in a werid parse tree that is super deep. But since we already kind of give up on this kind of parse trees (bug#59426), it doesn't really hurt. * src/treesit.c (treesit_cursor_helper_1): New function. (treesit_cursor_helper): Use the new function. Change return type to bool, and accept a cursor pointer. (Ftreesit_node_parent) (Ftreesit_search_subtree) (Ftreesit_search_forward) (Ftreesit_induce_sparse_tree): Use the new signature. --- src/treesit.c | 129 +++++++++++++++++++++++++++++--------------------- 1 file changed, 76 insertions(+), 53 deletions(-) diff --git a/src/treesit.c b/src/treesit.c index c882d455137..dc2043e6109 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -1762,7 +1762,7 @@ If NODE is nil, return nil. */) return build_string (string); } -static TSTreeCursor treesit_cursor_helper (TSNode, Lisp_Object); +static bool treesit_cursor_helper (TSTreeCursor *, TSNode, Lisp_Object); DEFUN ("treesit-node-parent", Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0, @@ -1778,7 +1778,10 @@ Return nil if NODE has no parent. If NODE is nil, return nil. */) TSNode treesit_node = XTS_NODE (node)->node; Lisp_Object parser = XTS_NODE (node)->parser; - TSTreeCursor cursor = treesit_cursor_helper (treesit_node, parser); + TSTreeCursor cursor; + if (!treesit_cursor_helper (&cursor, treesit_node, parser)) + return return_value; + if (ts_tree_cursor_goto_parent (&cursor)) { TSNode parent = ts_tree_cursor_current_node (&cursor); @@ -2637,8 +2640,59 @@ treesit_assume_true (bool val) eassert (val == true); } +/* Tries to move CURSOR to point to TARGET. END_POS is the end of + TARGET. If success, return true, otherwise move CURSOR back to + starting position and return false. LIMIT is the recursion + limit. */ +static bool +treesit_cursor_helper_1 (TSTreeCursor *cursor, TSNode *target, + uint32_t end_pos, ptrdiff_t limit) +{ + if (limit <= 0) + return false; + + TSNode cursor_node = ts_tree_cursor_current_node (cursor); + if (ts_node_eq (cursor_node, *target)) + return true; + + if (!ts_tree_cursor_goto_first_child (cursor)) + return false; + + /* Skip nodes that definitely don't contain TARGET. */ + while (ts_node_end_byte (cursor_node) < end_pos) + { + if (!ts_tree_cursor_goto_next_sibling (cursor)) + break; + cursor_node = ts_tree_cursor_current_node (cursor); + } + + /* Go through each sibling that could contain TARGET. Because of + missing nodes (their width is 0), there could be multiple + siblings that could contain TARGET. */ + while (ts_node_start_byte (cursor_node) <= end_pos) + { + if (treesit_cursor_helper_1 (cursor, target, end_pos, limit - 1)) + return true; + + if (!ts_tree_cursor_goto_next_sibling (cursor)) + break; + cursor_node = ts_tree_cursor_current_node (cursor); + } + + /* Couldn't find TARGET, must be not in this subtree, move cursor + back and pray that other brothers and sisters can succeed. */ + treesit_assume_true (ts_tree_cursor_goto_parent (cursor)); + return false; +} + /* Create a TSTreeCursor pointing at NODE. PARSER is the lisp parser - that produced NODE. + that produced NODE. If success, return true, otherwise return + false. This function should almost always succeed, but if the parse + tree is strangely too deep and exceeds the recursion limit, this + function will fail and return false. + + If this function returns true, caller needs to free CURSOR; if + returns false, caller don't need to free CURSOR. The reason we need this instead of simply using ts_tree_cursor_new is that we have to create the cursor on the root node and traverse @@ -2646,56 +2700,16 @@ treesit_assume_true (bool val) Otherwise going to sibling or parent of NODE wouldn't work. (Wow perfect filling.) */ -static TSTreeCursor -treesit_cursor_helper (TSNode node, Lisp_Object parser) +static bool +treesit_cursor_helper (TSTreeCursor *cursor, TSNode node, Lisp_Object parser) { uint32_t end_pos = ts_node_end_byte (node); TSNode root = ts_tree_root_node (XTS_PARSER (parser)->tree); - TSTreeCursor cursor = ts_tree_cursor_new (root); - TSNode cursor_node = ts_tree_cursor_current_node (&cursor); - /* This is like treesit-node-at. We go down from the root node, - either to first child or next sibling, repeatedly, and finally - arrive at NODE. */ - while (!ts_node_eq (node, cursor_node)) - { - treesit_assume_true (ts_tree_cursor_goto_first_child (&cursor)); - cursor_node = ts_tree_cursor_current_node (&cursor); - /* ts_tree_cursor_goto_first_child_for_byte is not reliable, so - we just go through each sibling. */ - while (ts_node_is_missing (cursor_node) - || ts_node_end_byte (cursor_node) < end_pos) - { - /* A "missing" node has zero width, so it's possible that - its end = NODE.end but it's not NODE, so we skip them. - But we need to make sure this missing node is not the - node we are looking for before skipping it. */ - if (ts_node_is_missing (cursor_node) - && ts_node_eq (node, cursor_node)) - return cursor; - treesit_assume_true (ts_tree_cursor_goto_next_sibling (&cursor)); - cursor_node = ts_tree_cursor_current_node (&cursor); - } - /* Right now CURSOR.end >= NODE.end. But what if CURSOR.end = - NODE.end, and there are missing nodes after CURSOR, and the - missing node after CURSOR is the NODE we are looking for?? - Well, create a probe and look ahead. (This is tested by - treesit-cursor-helper-with-missing-node.) */ - TSTreeCursor probe = ts_tree_cursor_copy (&cursor); - TSNode probe_node; - while (ts_tree_cursor_goto_next_sibling (&probe)) - { - probe_node = ts_tree_cursor_current_node (&probe); - if (!ts_node_is_missing (probe_node)) - break; - if (ts_node_eq (probe_node, node)) - { - ts_tree_cursor_delete (&cursor); - return probe; - } - } - ts_tree_cursor_delete (&probe); - } - return cursor; + *cursor = ts_tree_cursor_new (root); + bool success = treesit_cursor_helper_1 (cursor, &node, end_pos, 1000); + if (!success) + ts_tree_cursor_delete (cursor); + return success; } /* Move CURSOR to the next/previous sibling. FORWARD controls the @@ -2968,7 +2982,10 @@ Return the first matched node, or nil if none matches. */) Lisp_Object parser = XTS_NODE (node)->parser; Lisp_Object return_value = Qnil; - TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (node)->node, parser); + TSTreeCursor cursor; + if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser)) + return return_value; + if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward), NILP (all), the_limit, false)) { @@ -3022,7 +3039,10 @@ always traverse leaf nodes first, then upwards. */) Lisp_Object parser = XTS_NODE (start)->parser; Lisp_Object return_value = Qnil; - TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (start)->node, parser); + TSTreeCursor cursor; + if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser)) + return return_value; + if (treesit_search_forward (&cursor, predicate, parser, NILP (backward), NILP (all))) { @@ -3141,7 +3161,10 @@ a regexp. */) Lisp_Object parser = XTS_NODE (root)->parser; Lisp_Object parent = Fcons (Qnil, Qnil); - TSTreeCursor cursor = treesit_cursor_helper (XTS_NODE (root)->node, parser); + TSTreeCursor cursor; + if (!treesit_cursor_helper (&cursor, XTS_NODE (root)->node, parser)) + return Qnil; + treesit_build_sparse_tree (&cursor, parent, predicate, process_fn, the_limit, parser); ts_tree_cursor_delete (&cursor); -- 2.39.2