From 0ba5125be31aac09857d9a0c3c9695ba68f1edc0 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Thu, 27 Feb 2025 03:07:34 -0800 Subject: [PATCH] Enable treesit-query-capture to return grouped captures This is needed for creating embedded parsers for embedded code blocks of which language cannot be known ahead of time. E.g., markdown and org mode's code block. * src/treesit.c (Ftreesit_query_capture): Add parameter GROUPED. (cherry picked from commit 625b2b02a3c9bad6d7abf57ea7f95ece29855906) --- doc/lispref/parsing.texi | 10 ++++++-- src/treesit.c | 52 +++++++++++++++++++++++++++++++++------- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index f12104ea267..2b224036de0 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -1225,7 +1225,7 @@ example, the capture name @code{biexp}: @cindex query functions, tree-sitter Now we can introduce the @dfn{query functions}. -@defun treesit-query-capture node query &optional beg end node-only +@defun treesit-query-capture node query &optional beg end node-only grouped This function matches patterns in @var{query} within @var{node}. The argument @var{query} can be either an s-expression, a string, or a compiled query object. For now, we focus on the s-expression syntax; @@ -1247,6 +1247,12 @@ matching node whose span overlaps with the region between @var{beg} and @var{end} is captured; it doesn't have to be completely contained in the region. +If @var{grouped} is non-@code{nil}, instead of returning a list of +@w{@code{(@var{capture_name} . @var{node})}}, this function returns a +list of list of it. The grouping is determined by @var{query}. +Captures in the same match of a pattern in @var{query} are grouped +together. + @vindex treesit-query-error @findex treesit-query-validate This function raises the @code{treesit-query-error} error if @@ -1284,7 +1290,7 @@ For example, it could have two top-level patterns: @group (setq query '((binary_expression) @@biexp - (number_literal) @@number @@biexp) + (number_literal) @@number) @end group @end example diff --git a/src/treesit.c b/src/treesit.c index 16308193bf5..c8af17a5b8b 100644 --- a/src/treesit.c +++ b/src/treesit.c @@ -3272,7 +3272,7 @@ treesit_initialize_query (Lisp_Object query, const TSLanguage *lang, DEFUN ("treesit-query-capture", Ftreesit_query_capture, - Streesit_query_capture, 2, 5, 0, + Streesit_query_capture, 2, 6, 0, doc: /* Query NODE with patterns in QUERY. Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name @@ -3289,7 +3289,11 @@ in which the query is executed. Any matching node whose span overlaps with the region between BEG and END are captured, it doesn't have to be completely in the region. -If NODE-ONLY is non-nil, return a list of nodes. +If GROUPED is non-nil, group captures into matches and return a list of +MATCH, where each MATH is a list of (CAPTURE_NAME . NODE). + +If NODE-ONLY is non-nil, return nodes only, and don't include +CAPTURE_NAME. Besides a node, NODE can be a parser, in which case the root node of that parser is used. NODE can also be a language symbol, in which case @@ -3300,7 +3304,8 @@ Signal `treesit-query-error' if QUERY is malformed or something else goes wrong. You can use `treesit-query-validate' to validate and debug the query. */) (Lisp_Object node, Lisp_Object query, - Lisp_Object beg, Lisp_Object end, Lisp_Object node_only) + Lisp_Object beg, Lisp_Object end, Lisp_Object node_only, + Lisp_Object grouped) { if (!(TS_COMPILED_QUERY_P (query) || CONSP (query) || STRINGP (query))) @@ -3385,8 +3390,22 @@ the query. */) while (ts_query_cursor_next_match (cursor, &match)) { - /* Record the checkpoint that we may roll back to. */ + /* Depends on the value of GROUPED, we have two modes of + operation. + + If GROUPED is nil (mode 1), we return a list of captures; in + this case, we append the captures first, and revert back if the + captures don't match. + + If GROUPED is non-nil (mode 2), we return a list of match + groups; in this case, we collect captures into a list first, + and append to the results after verifying that the group + matches. */ + + /* Mode 1: Record the checkpoint that we may roll back to. */ prev_result = result; + /* Mode 2: Create a list storing captures of this match group. */ + Lisp_Object match_group = Qnil; /* 1. Get captured nodes. */ const TSQueryCapture *captures = match.captures; for (int idx = 0; idx < match.capture_count; idx++) @@ -3408,7 +3427,10 @@ the query. */) else cap = captured_node; - result = Fcons (cap, result); + if (NILP (grouped)) + result = Fcons (cap, result); /* Mode 1. */ + else + match_group = Fcons (cap, match_group); /* Mode 2. */ } /* 2. Get predicates and check whether this match can be included in the result list. */ @@ -3421,15 +3443,27 @@ the query. */) } /* captures_lisp = Fnreverse (captures_lisp); */ + /* Mode 1. */ struct capture_range captures_range = { result, prev_result }; - bool match = treesit_eval_predicates (captures_range, predicates, - &predicate_signal_data); + /* Mode 2. */ + if (!NILP (grouped)) + { + captures_range.start = match_group; + captures_range.end = Qnil; + } + bool match + = treesit_eval_predicates (captures_range, predicates, + &predicate_signal_data); + if (!NILP (predicate_signal_data)) break; - /* Predicates didn't pass, roll back. */ - if (!match) + /* Mode 1: Predicates didn't pass, roll back. */ + if (!match && NILP (grouped)) result = prev_result; + /* Mode 2: Predicates pass, add this match group. */ + if (match && !NILP (grouped)) + result = Fcons (Fnreverse (match_group), result); } /* Final clean up. */ -- 2.39.5