From 31b58161bb5e2afd30ade92854241976ee71bae0 Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Wed, 26 Apr 2023 20:09:42 -0700 Subject: [PATCH] Fix FOR_EACH_TAIL in c-ts-mode (bug#62951) * lisp/progmodes/c-ts-mode.el (c-ts-mode--indent-styles): New indent rule. (c-ts-mode--for-each-tail-regexp) (c-ts-mode--for-each-tail-body-matcher) (c-ts-mode--emacs-c-range-query) (c-ts-mode--for-each-tail-ranges) (c-ts-mode--reverse-ranges) (c-ts-mode--emacs-set-ranges): New functions and variables. (c-ts-mode): Create a emacs-c parser. More setup for Emacs source support. * lisp/treesit.el (treesit-query-range): Ignore underscore-prefixed capture names. --- lisp/progmodes/c-ts-mode.el | 99 ++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 2 deletions(-) diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el index 6100f00e3ba..8f7a0f26b44 100644 --- a/lisp/progmodes/c-ts-mode.el +++ b/lisp/progmodes/c-ts-mode.el @@ -357,7 +357,9 @@ PARENT, BOL, ARGS are the same as other anchor functions." "Indent rules supported by `c-ts-mode'. MODE is either `c' or `cpp'." (let ((common - `(((parent-is "translation_unit") column-0 0) + `((c-ts-mode--for-each-tail-body-matcher prev-line c-ts-mode-indent-offset) + + ((parent-is "translation_unit") column-0 0) ((query "(ERROR (ERROR)) @indent") column-0 0) ((node-is ")") parent 1) ((node-is "]") parent-bol 0) @@ -969,6 +971,81 @@ if `c-ts-mode-emacs-sources-support' is non-nil." (or (treesit-add-log-current-defun) (c-ts-mode--defun-name (c-ts-mode--emacs-defun-at-point)))) +;;; FOR_EACH_TAIL fix +;; +;; FOR_EACH_TAIL (and FOR_EACH_TAIL_SAFE) followed by a unbracketed +;; body will mess up the parser, which parses the thing as a function +;; declaration. We "fix" it by adding a shadow parser, emacs-c (which +;; is just c but under a different name). We use emacs-c to find each +;; FOR_EACH_TAIL with a unbracketed body, and set the ranges of the C +;; parser so that it skips those FOR_EACH_TAIL's. Note that we only +;; ignore FOR_EACH_TAIL's with a unbracketed body. Those with a +;; bracketed body parses more or less fine. + +(defvar c-ts-mode--for-each-tail-regexp + (rx "FOR_EACH_" (or "TAIL" "TAIL_SAFE" "ALIST_VALUE" + "LIVE_BUFFER" "FRAME")) + "A regexp matching all the FOR_EACH_TAIL variants.") + +(defun c-ts-mode--for-each-tail-body-matcher (_n _p bol &rest _) + "A matcher that matches the first line after a FOR_EACH_TAIL. +For BOL see `treesit-simple-indent-rules'." + (when c-ts-mode-emacs-sources-support + (save-excursion + (goto-char bol) + (forward-line -1) + (skip-chars-forward " \t") + (looking-at c-ts-mode--for-each-tail-regexp)))) + +(defvar c-ts-mode--emacs-c-range-query + (treesit-query-compile + 'emacs-c `(((declaration + type: (macro_type_specifier + name: (identifier) @_name) + @for-each-tail) + (:match ,c-ts-mode--for-each-tail-regexp + @_name)))) + "Query that finds the FOR_EACH_TAIL with a unbracketed body.") + +(defvar-local c-ts-mode--for-each-tail-ranges nil + "Ranges covering all the FOR_EACH_TAIL's in the buffer.") + +(defun c-ts-mode--reverse-ranges (ranges beg end) + "Reverse RANGES and return the new ranges between BEG and END. +Positions that were included RANGES are not in the returned +ranges, and vice versa. + +Return nil if RANGES is nil. This way, passing the returned +ranges to `treesit-parser-set-included-ranges' will make the +parser parse the whole buffer." + (if (null ranges) + nil + (let ((new-ranges nil) + (prev-end beg)) + (dolist (range ranges) + (when (< prev-end (car range)) + (push (cons prev-end (car range)) new-ranges)) + (setq prev-end (cdr range))) + (when (< prev-end end) + (push (cons prev-end end) new-ranges)) + (nreverse new-ranges)))) + +(defun c-ts-mode--emacs-set-ranges (beg end) + "Set ranges for the C parser to skip some FOR_EACH_TAIL's. +BEG and END are described in `treesit-range-rules'." + (let* ((c-parser (treesit-parser-create 'c)) + (old-ranges c-ts-mode--for-each-tail-ranges) + (new-ranges (treesit-query-range + 'emacs-c c-ts-mode--emacs-c-range-query beg end)) + (set-ranges (treesit--clip-ranges + (treesit--merge-ranges + old-ranges new-ranges beg end) + (point-min) (point-max))) + (reversed-ranges (c-ts-mode--reverse-ranges + set-ranges (point-min) (point-max)))) + (setq-local c-ts-mode--for-each-tail-ranges set-ranges) + (treesit-parser-set-included-ranges c-parser reversed-ranges))) + ;;; Modes (defvar-keymap c-ts-base-mode-map @@ -1072,6 +1149,17 @@ in your configuration." :after-hook (c-ts-mode-set-modeline) (when (treesit-ready-p 'c) + ;; Add a fake "emacs-c" language which is just C. Used for + ;; skipping FOR_EACH_TAIL, see `c-ts-mode--emacs-set-ranges'. + (setf (alist-get 'emacs-c treesit-load-name-override-list) + '("libtree-sitter-c" "tree_sitter_c")) + ;; If Emacs source support is enabled, make sure emacs-c parser is + ;; after c parser in the parser list. This way various tree-sitter + ;; functions will automatically use the c parser rather than the + ;; emacs-c parser. + (when c-ts-mode-emacs-sources-support + (treesit-parser-create 'emacs-c)) + (treesit-parser-create 'c) ;; Comments. (setq-local comment-start "/* ") @@ -1085,9 +1173,16 @@ in your configuration." (setq-local treesit-defun-tactic 'top-level) (treesit-major-mode-setup) + ;; Emacs source support: handle DEFUN and FOR_EACH_TAIL gracefully. (when c-ts-mode-emacs-sources-support (setq-local add-log-current-defun-function - #'c-ts-mode--emacs-current-defun-name)))) + #'c-ts-mode--emacs-current-defun-name) + + (setq-local treesit-range-settings + (treesit-range-rules 'c-ts-mode--emacs-set-ranges)) + + (setq-local treesit-language-at-point-function + (lambda (_pos) 'c))))) ;;;###autoload (define-derived-mode c++-ts-mode c-ts-base-mode "C++" -- 2.39.2