From 8997ebf8b443510cf755e0152845d603716971df Mon Sep 17 00:00:00 2001 From: Yuan Fu Date: Fri, 29 Nov 2024 20:43:09 -0800 Subject: [PATCH] Add baseline tree-sitter indent rule for C-like languages I found a really good baseline indent rule that handles a wide range of situations very well. Now major modes can just start with this rule and add exceptions on top. This is worth mentioning in the manual, but that'll be a large change, and doesn't have to be included in this commit. * lisp/progmodes/c-ts-common.el: (c-ts-common-list-indent-style): New variable. (c-ts-common--standalone-parent): (c-ts-common--prev-standalone-sibling): (c-ts-common-parent-ignore-preproc): (c-ts-common-baseline-indent-rule): New function. * lisp/treesit.el (treesit--indent-prev-line-node): New function. (treesit-simple-indent-presets): Add new preset. (cherry picked from commit e37cd4fa597beaec3b491edb1b15ea0c19e72be4) --- lisp/progmodes/c-ts-common.el | 163 ++++++++++++++++++++++++++++++++++ lisp/treesit.el | 14 +++ 2 files changed, 177 insertions(+) diff --git a/lisp/progmodes/c-ts-common.el b/lisp/progmodes/c-ts-common.el index cf55ddea8f7..7273f0f7dba 100644 --- a/lisp/progmodes/c-ts-common.el +++ b/lisp/progmodes/c-ts-common.el @@ -532,6 +532,169 @@ characters on the current line." (setq node (treesit-node-parent node))) (* level (symbol-value c-ts-common-indent-offset)))) +;;; Baseline indent rule + +(defvar c-ts-common-list-indent-style 'align + "Intructs `c-ts-common-baseline-indent-rule' how to indent lists. + +If the value is `align', indent lists like this: + +const a = [ + 1, 2, 3 + 4, 5, 6, + ]; + +If the value is `simple', indent lists like this: + +const a = [ + 1, 2, 3, + 4, 5, 6, +];") + +(defun c-ts-common--standalone-parent (parent) + "Find the first parent that starts on a new line. +Start searching from PARENT, so if PARENT satisfies the condition, it'll +be returned. Return the starting position of the parent, return nil if +no parent satisfies the condition." + (save-excursion + (catch 'term + (while parent + (goto-char (treesit-node-start parent)) + (when (looking-back (rx bol (* whitespace)) + (line-beginning-position)) + (throw 'term (point))) + (setq parent (treesit-node-parent parent)))))) + +(defun c-ts-common--prev-standalone-sibling (node) + "Return the previous sibling of NODE that starts on a new line. +Return nil if no sibling satisfies the condition." + (save-excursion + (setq node (treesit-node-prev-sibling node 'named)) + (goto-char (treesit-node-start node)) + (while (and node + (goto-char (treesit-node-start node)) + (not (looking-back (rx bol (* whitespace)) + (pos-bol)))) + (setq node (treesit-node-prev-sibling node 'named))) + node)) + +(defun c-ts-common-parent-ignore-preproc (node) + "Return the parent of NODE, skipping preproc nodes." + (let ((parent (treesit-node-parent node)) + (pred (if (treesit-thing-defined-p + 'preproc (or (and node (treesit-node-language node)) + (treesit-parser-language + treesit-primary-parser))) + 'preproc + "preproc"))) + (while (and parent (treesit-node-match-p parent pred)) + (setq parent (treesit-node-parent parent))) + parent)) + +(defun c-ts-common-baseline-indent-rule (node parent bol &rest _) + "Baseline indent rule for C-like languages. + +NODE PARENT, BOL are like in other simple indent rules. + +This rule works as follows: + +Let PREV-NODE be the largest node that starts on previous line, +basically the NODE we get if we were indenting previous line. + +0. Closing brace aligns with first parent that starts on a new line. + +1. If PREV-NODE and NODE are siblings, align this line to previous +line (PREV-NODE as the anchor, and offset is 0). + +2. If PARENT is a list, ie, (...) [...], align with NODE's first +sibling. For the first sibling and the closing paren or bracket, indent +according to `c-ts-common-list-indent-style'. This rule also handles +initializer lists like {...}, but initializer lists doesn't respect +`c-ts-common-list-indent-style'--they always indent in the `simple' +style. + +3. Otherwise, go up the parse tree from NODE and look for a parent that +starts on a new line. Use that parent as the anchor and indent one +level. But if the node is a top-level construct (ignoring preprocessor +nodes), don't indent it. + +This rule works for a wide range of scenarios including complex +situations. Major modes should use this as the fallback rule, and add +exception rules before it to cover the cases it doesn't apply. + +This rule tries to be smart and ignore proprocessor node in some +situations. By default, any node that has \"proproc\" in its type are +considered a preprocessor node. If that heuristic is inaccurate, define +a `preproc' thing in `treesit-thing-settings', and this rule will use +the thing definition instead." + (let ((prev-line-node (treesit--indent-prev-line-node bol)) + (offset (symbol-value c-ts-common-indent-offset))) + (cond + ;; Condition 0. + ((and (treesit-node-match-p node "}") + (treesit-node-match-p (treesit-node-child parent 0) "{")) + (cons (c-ts-common--standalone-parent parent) + 0)) + ;; Condition 1. + ((and (treesit-node-eq (treesit-node-parent prev-line-node) + parent) + (not (treesit-node-match-p node (rx (or ")" "]"))))) + (cons (treesit-node-start prev-line-node) + 0)) + ;; Condition 2. + ((treesit-node-match-p (treesit-node-child parent 0) + (rx (or "(" "["))) + (let ((first-sibling (treesit-node-child parent 0 'named))) + (cond + ;; Closing delimeters. + ((treesit-node-match-p node (rx (or ")" "]"))) + (if (eq c-ts-common-list-indent-style 'align) + (cons (treesit-node-start (treesit-node-child parent 0)) + 0) + (cons (c-ts-common--standalone-parent parent) + 0))) + ;; First sibling. + ((treesit-node-eq node first-sibling) + (if (eq c-ts-common-list-indent-style 'align) + (cons (treesit-node-start (treesit-node-child parent 0)) + 1) + (cons (c-ts-common--standalone-parent parent) + offset))) + ;; Not first sibling + (t (cons (treesit-node-start + (or (c-ts-common--prev-standalone-sibling node) + first-sibling)) + 0))))) + ;; Condition 2 for initializer list, only apply to + ;; second line. Eg, + ;; + ;; return { 1, 2, 3, + ;; 4, 5, 6, --> Handled by this condition. + ;; 7, 8, 9 }; --> Handled by condition 1. + ((and (treesit-node-match-p (treesit-node-child parent 0) "{") + (treesit-node-prev-sibling node 'named)) + ;; If first sibling is a comment, indent like code; otherwise + ;; align to first sibling. + (if (treesit-node-match-p + (treesit-node-child parent 0 'named) + c-ts-common--comment-regexp 'ignore-missing) + (cons (c-ts-common--standalone-parent parent) + offset) + (cons (treesit-node-start + (treesit-node-child parent 0 'named)) + 0))) + ;; Before we fallback to condition 3, make sure we don't indent + ;; top-level stuff. + ((treesit-node-eq (treesit-parser-root-node + (treesit-node-parser parent)) + (c-ts-common-parent-ignore-preproc node)) + (cons (pos-bol) 0)) + ;; Condition 3. + (t (cons (c-ts-common--standalone-parent parent) + offset))))) + + + (provide 'c-ts-common) ;;; c-ts-common.el ends here diff --git a/lisp/treesit.el b/lisp/treesit.el index 628a8b3c766..e4da8b19739 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -1585,6 +1585,14 @@ should take the same argument as MATCHER or ANCHOR. If it matches, return a cons (ANCHOR-POS . OFFSET), where ANCHOR-POS is a position and OFFSET is the indent offset; if it doesn't match, return nil.") +(defun treesit--indent-prev-line-node (pos) + "Return the largest node on the previous line of POS." + (save-excursion + (goto-char pos) + (when (eq (forward-line -1) 0) + (back-to-indentation) + (treesit--indent-largest-node-at (point))))) + (defvar treesit-simple-indent-presets (list (cons 'match (lambda @@ -1635,6 +1643,12 @@ OFFSET is the indent offset; if it doesn't match, return nil.") (lambda (node &rest _) (string-match-p type (or (treesit-node-type node) ""))))) + ;; FIXME: Add to manual. + (cons 'prev-line-is (lambda (type) + (lambda (_n _p bol &rest _) + (treesit-node-match-p + (treesit--indent-prev-line-node bol) + type)))) (cons 'field-is (lambda (name) (lambda (node &rest _) (string-match-p -- 2.39.5