From d05494a9ff89eb0a669274428994fbc58a6e4541 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Tue, 5 Sep 2023 19:57:34 -0700 Subject: [PATCH] Support tree-sitter local parsers * doc/lispref/parsing.texi (Multiple Languages): Update manual. * lisp/treesit.el (treesit-range-settings): Add LOCAL-P to range setting. (treesit-range-rules): Support :local keyword. (treesit-local-parsers-at) (treesit-local-parsers-in) (treesit--update-ranges-local): New functions. (treesit-update-ranges) (treesit-font-lock-fontify-region) (treesit--indent-1): Support local parsers and prioritize it over global parsers. --- doc/lispref/parsing.texi | 32 ++++++ lisp/treesit.el | 232 +++++++++++++++++++++++++++------------ 2 files changed, 191 insertions(+), 73 deletions(-) diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi index 738ce322c57..20d4b09ed4c 100644 --- a/doc/lispref/parsing.texi +++ b/doc/lispref/parsing.texi @@ -1714,6 +1714,19 @@ If @var{query} is a tree-sitter query, it should be preceded by two specifies the embedded language, and the @code{:host} keyword specifies the host language. +@cindex local parser +If the query is given a @code{:local} keyword, and the value is +@code{t}, the range set by this query has a dedicated local parser; +otherwise the range shares a parser with other ranges for the same +language. + +A parser sees view its ranges continuously, rather than viewing them +as independent segments. Therefore, if the embedded range are +semantically independent segments, use local parsers for them. + +Local parser set to a range can be retrieved by +@code{treesit-local-parsers-at} and @code{treesit-local-parsers-in}. + @code{treesit-update-ranges} uses @var{query} to figure out how to set the ranges for parsers for the embedded language. It queries @var{query} in a host language parser, computes the ranges which the @@ -1749,6 +1762,23 @@ language of the buffer text at @var{pos}. This variable is used by @code{treesit-language-at}. @end defvar +@defun treesit-local-parsers-at &optional pos language +This function returns all the local parsers at @var{pos}. + +Local parsers are those who only parses a limited region marked by an +overlay. If @var{language} is non-@code{nil}, only return parsers for +that language. + +@var{pos} defaults to point. +@end defun + +@defun treesit-local-parsers-on &optional beg end language +This function is the same as @code{treesit-local-parsers-at}, but gets +the local parsers in a range instead of at a point. + +@var{beg} and @var{end} default to cover the whole buffer. +@end defun + @node Tree-sitter Major Modes @section Developing major modes with tree-sitter @cindex major mode, developing with tree-sitter @@ -1843,6 +1873,8 @@ add-log functions used by @code{add-log-current-defun}. If @code{treesit-simple-imenu-settings} (@pxref{Imenu}) is non-@code{nil}, it sets up Imenu. @end itemize + +@c TODO: Add treesit-thing-settings stuff once we finalize it. @end defun For more information on these built-in tree-sitter features, diff --git a/lisp/treesit.el b/lisp/treesit.el index 2c0361a8873..6cbf50b40c3 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -442,11 +442,13 @@ are ignored." (defvar-local treesit-range-settings nil "A list of range settings. -Each element of the list is of the form (QUERY LANGUAGE). +Each element of the list is of the form (QUERY LANGUAGE LOCAL-P). When updating the range of each parser in the buffer, `treesit-update-ranges' queries each QUERY, and sets LANGUAGE's range to the range spanned by captured nodes. QUERY must be a -compiled query. +compiled query. If LOCAL-P is t, give each range a separate +local parser rather than using a single parser for all the +ranges. Capture names generally don't matter, but names that starts with an underscore are ignored. @@ -487,15 +489,21 @@ this way: Emacs queries QUERY in the host language's parser, computes the ranges spanned by the captured nodes, and applies these ranges to parsers for the embedded language. +If there's a `:local' keyword with value t, the range computed by +this QUERY is given a dedicated local parser. Otherwise, the +range shares the same parser with other ranges. + QUERY can also be a function that takes two arguments, START and END. If QUERY is a function, it doesn't need the :KEYWORD VALUE pair preceding it. This function should set the ranges for parsers in the current buffer in the region between START and END. It is OK for this function to set ranges in a larger region that encompasses the region between START and END." - (let (host embed result) + (let (host embed result local) (while query-specs (pcase (pop query-specs) + (:local (when (eq t (pop query-specs)) + (setq local t))) (:host (let ((host-lang (pop query-specs))) (unless (symbolp host-lang) (signal 'treesit-error (list "Value of :host option should be a symbol" host-lang))) @@ -511,7 +519,7 @@ that encompasses the region between START and END." (when (null host) (signal 'treesit-error (list "Value of :host option cannot be omitted"))) (push (list (treesit-query-compile host query) - embed host) + embed local) result)) (setq host nil embed nil)))) (nreverse result))) @@ -562,6 +570,72 @@ those inside are kept." if (<= start (car range) (cdr range) end) collect range)) +(defun treesit-local-parsers-at (&optional pos language) + "Return all the local parsers at POS. + +Local parsers are those who only parses a limited region marked +by an overlay. If LANGUAGE is non-nil, only return parsers for +that language. + +POS defaults to point." + (let ((res nil)) + (dolist (ov (overlays-at (or pos (point)))) + (when-let ((parser (overlay-get ov 'treesit-parser))) + (when (or (null language) + (eq (treesit-parser-language parser) + language)) + (push parser res)))) + (nreverse res))) + +(defun treesit-local-parsers-in (&optional beg end language) + "Return all the local parsers between BEG END. + +Local parsers are those who has an `embedded' tag, and only +parses a limited region marked by an overlay. If LANGUAGE is +non-nil, only return parsers for that language. + +BEG and END default to cover the whole buffer." + (let ((res nil)) + (dolist (ov (overlays-in (or beg (point-min)) (or end (point-max)))) + (when-let ((parser (overlay-get ov 'treesit-parser))) + (when (or (null language) + (eq (treesit-parser-language parser) + language)) + (push parser res)))) + (nreverse res))) + +(defun treesit--update-ranges-local + (query embedded-lang &optional beg end) + "Update range for local parsers betwwen BEG and END. +Use QUERY to get the ranges, and make sure each range has a local +parser for EMBEDDED-LANG." + ;; Clean up. + (dolist (ov (overlays-in (or beg (point-min)) (or end (point-max)))) + (when-let ((parser (overlay-get ov 'treesit-parser))) + (when (eq (overlay-start ov) (overlay-end ov)) + (delete-overlay ov) + (treesit-parser-delete parser)))) + ;; Update range. + (let* ((host-lang (treesit-query-language query)) + (ranges (treesit-query-range host-lang query beg end))) + (pcase-dolist (`(,beg . ,end) ranges) + (let ((has-parser nil)) + (dolist (ov (overlays-in beg end)) + ;; Update range of local parser. + (let ((embedded-parser (overlay-get ov 'treesit-parser))) + (when (and embedded-parser + (eq (treesit-parser-language embedded-parser) + embedded-lang)) + (treesit-parser-set-included-ranges + embedded-parser `((,beg . ,end))) + (setq has-parser t)))) + ;; Create overlay and local parser. + (when (not has-parser) + (let ((embedded-parser (treesit-parser-create + embedded-lang nil t 'embedded)) + (ov (make-overlay beg end nil nil t))) + (overlay-put ov 'treesit-parser embedded-parser))))))) + (defun treesit-update-ranges (&optional beg end) "Update the ranges for each language in the current buffer. If BEG and END are non-nil, only update parser ranges in that @@ -574,9 +648,14 @@ region." (dolist (setting treesit-range-settings) (let ((query (nth 0 setting)) (language (nth 1 setting)) + (local (nth 2 setting)) (beg (or beg (point-min))) (end (or end (point-max)))) - (if (functionp query) (funcall query beg end) + (cond + ((functionp query) (funcall query beg end)) + (local + (treesit--update-ranges-local query language beg end)) + (t (let* ((host-lang (treesit-query-language query)) (parser (treesit-parser-create language)) (old-ranges (treesit-parser-included-ranges parser)) @@ -586,11 +665,9 @@ region." (treesit--merge-ranges old-ranges new-ranges beg end) (point-min) (point-max)))) - (dolist (parser (treesit-parser-list)) - (when (eq (treesit-parser-language parser) - language) - (treesit-parser-set-included-ranges - parser set-ranges)))))))) + (dolist (parser (treesit-parser-list language)) + (treesit-parser-set-included-ranges + parser set-ranges)))))))) (defun treesit-parser-range-on (parser beg &optional end) "Check if PARSER's range covers the portion between BEG and END. @@ -1042,70 +1119,77 @@ If LOUDLY is non-nil, display some debugging information." (message "Fontifying region: %s-%s" start end)) (treesit-update-ranges start end) (font-lock-unfontify-region start end) - (dolist (setting treesit-font-lock-settings) - (let* ((query (nth 0 setting)) - (enable (nth 1 setting)) - (override (nth 3 setting)) - (language (treesit-query-language query))) - - ;; Use deterministic way to decide whether to turn on "fast - ;; mode". (See bug#60691, bug#60223.) - (when (eq treesit--font-lock-fast-mode 'unspecified) - (pcase-let ((`(,max-depth ,max-width) - (treesit-subtree-stat - (treesit-buffer-root-node language)))) - (if (or (> max-depth 100) (> max-width 4000)) - (setq treesit--font-lock-fast-mode t) - (setq treesit--font-lock-fast-mode nil)))) - - (when-let* ((root (treesit-buffer-root-node language)) - (nodes (if (eq t treesit--font-lock-fast-mode) - (treesit--children-covering-range-recurse - root start end (* 4 jit-lock-chunk-size)) - (list (treesit-buffer-root-node language)))) - ;; Only activate if ENABLE flag is t. - (activate (eq t enable))) - (ignore activate) - - ;; Query each node. - (dolist (sub-node nodes) - (let* ((delta-start (car treesit--font-lock-query-expand-range)) - (delta-end (cdr treesit--font-lock-query-expand-range)) - (captures (treesit-query-capture - sub-node query - (max (- start delta-start) (point-min)) - (min (+ end delta-end) (point-max))))) - - ;; For each captured node, fontify that node. - (with-silent-modifications - (dolist (capture captures) - (let* ((face (car capture)) - (node (cdr capture)) - (node-start (treesit-node-start node)) - (node-end (treesit-node-end node))) - - ;; If node is not in the region, take them out. See - ;; comment #3 above for more detail. - (if (and (facep face) - (or (>= start node-end) (>= node-start end))) + (let* ((local-parsers (treesit-local-parsers-in start end)) + (global-parsers (treesit-parser-list)) + (root-nodes + (mapcar (lambda (parser) + (cons (treesit-parser-language parser) + (treesit-parser-root-node parser))) + (append local-parsers global-parsers)))) + (dolist (setting treesit-font-lock-settings) + (let* ((query (nth 0 setting)) + (enable (nth 1 setting)) + (override (nth 3 setting)) + (language (treesit-query-language query)) + (root (alist-get language root-nodes))) + + ;; Use deterministic way to decide whether to turn on "fast + ;; mode". (See bug#60691, bug#60223.) + (when (eq treesit--font-lock-fast-mode 'unspecified) + (pcase-let ((`(,max-depth ,max-width) + (treesit-subtree-stat + (treesit-buffer-root-node language)))) + (if (or (> max-depth 100) (> max-width 4000)) + (setq treesit--font-lock-fast-mode t) + (setq treesit--font-lock-fast-mode nil)))) + + ;; Only activate if ENABLE flag is t. + (when-let ((activate (eq t enable)) + (nodes (if (eq t treesit--font-lock-fast-mode) + (treesit--children-covering-range-recurse + root start end (* 4 jit-lock-chunk-size)) + (list root)))) + (ignore activate) + + ;; Query each node. + (dolist (sub-node nodes) + (let* ((delta-start (car treesit--font-lock-query-expand-range)) + (delta-end (cdr treesit--font-lock-query-expand-range)) + (captures (treesit-query-capture + sub-node query + (max (- start delta-start) (point-min)) + (min (+ end delta-end) (point-max))))) + + ;; For each captured node, fontify that node. + (with-silent-modifications + (dolist (capture captures) + (let* ((face (car capture)) + (node (cdr capture)) + (node-start (treesit-node-start node)) + (node-end (treesit-node-end node))) + + ;; If node is not in the region, take them out. See + ;; comment #3 above for more detail. + (if (and (facep face) + (or (>= start node-end) (>= node-start end))) + (when (or loudly treesit--font-lock-verbose) + (message "Captured node %s(%s-%s) but it is outside of fontifing region" node node-start node-end)) + + (cond + ((facep face) + (treesit-fontify-with-override + (max node-start start) (min node-end end) + face override)) + ((functionp face) + (funcall face node override start end))) + + ;; Don't raise an error if FACE is neither a face nor + ;; a function. This is to allow intermediate capture + ;; names used for #match and #eq. (when (or loudly treesit--font-lock-verbose) - (message "Captured node %s(%s-%s) but it is outside of fontifing region" node node-start node-end)) - - (cond - ((facep face) - (treesit-fontify-with-override - (max node-start start) (min node-end end) - face override)) - ((functionp face) - (funcall face node override start end))) - - ;; Don't raise an error if FACE is neither a face nor - ;; a function. This is to allow intermediate capture - ;; names used for #match and #eq. - (when (or loudly treesit--font-lock-verbose) - (message "Fontifying text from %d to %d, Face: %s, Node: %s" - (max node-start start) (min node-end end) - face (treesit-node-type node)))))))))))) + (message "Fontifying text from %d to %d, Face: %s, Node: %s" + (max node-start start) (min node-end end) + face (treesit-node-type node))))))))))))) `(jit-lock-bounds ,start . ,end)) (defun treesit--font-lock-notifier (ranges parser) @@ -1522,8 +1606,10 @@ Return (ANCHOR . OFFSET). This function is used by (forward-line 0) (skip-chars-forward " \t") (point))) + (local-parsers (treesit-local-parsers-at bol)) (smallest-node (cond ((null (treesit-parser-list)) nil) + (local-parsers (car local-parsers)) ((eq 1 (length (treesit-parser-list))) (treesit-node-at bol)) ((treesit-language-at (point)) -- 2.39.2