From: Yuan Fu <casouri@gmail.com>
Date: Mon, 27 Feb 2023 02:05:13 +0000 (-0800)
Subject: New tree-sitter indent anchor standalone-parent used by c-ts-mode
X-Git-Tag: emacs-29.0.90~305
X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=0f15286c5396e3415e0f40c21b6f6d7554f46a5e;p=emacs.git

New tree-sitter indent anchor standalone-parent used by c-ts-mode

When writing c-ts-mode Theo used parent-bol which works well except
one case:

1 for (int i=0;
2      i < 5;
3      i++) {
4   func(i);
5 }

In this case, when indenting "func(i)", parent-bol returns the start
of "i++" on line 3, instead of the "correct" anchor, the start of
"for" on line 1.  parent-bol would have worked if the "for (...) {"
part is in one line.

To support this case I tried numerous things and added a bunch of
stuff, culminating in c-ts-common-statement-offset.  It's complicated,
requires extra setup, and slow.

Not anymore! I think the new anchor standalone-parent really captures
the logic behind how people expect indentation to work. It's simple
and fast, and requires no setup.

* doc/lispref/modes.texi (Parser-based Indentation): Update manual.

* lisp/progmodes/c-ts-mode.el:
(c-ts-mode--standalone-grandparent): New anchor.
(c-ts-mode--indent-styles): Replace c-ts-common-statement-offset with
standalone-parent.
(c-ts-base-mode): Add comment.

* lisp/treesit.el:
(treesit-simple-indent-presets): New anchor standalone-parent.
---

diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi
index 4c40f414ca0..11892aaa40e 100644
--- a/doc/lispref/modes.texi
+++ b/doc/lispref/modes.texi
@@ -5093,7 +5093,15 @@ This anchor is a function that is called with 3 arguments: @var{node},
 @item parent-bol
 This anchor is a function that is called with 3 arguments: @var{node},
 @var{parent}, and @var{bol}, and returns the first non-space character
-on the line of @var{parent}.
+on the line which @var{parent}'s start is on.
+
+@item parent-bol
+This anchor is a function that is called with 3 arguments: @var{node},
+@var{parent}, and @var{bol}.  It finds the first ancestor node
+(parent, grandparent, etc) of @var{node} that starts on its own line,
+and return the start of that node.  ``Starting on its own line'' means
+there is only whitespace character before the node on the line which
+the node's start is on.
 
 @item prev-sibling
 This anchor is a function that is called with 3 arguments: @var{node},
diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el
index 041461f220a..2c79cf46308 100644
--- a/lisp/progmodes/c-ts-mode.el
+++ b/lisp/progmodes/c-ts-mode.el
@@ -279,6 +279,12 @@ doesn't have a child."
     ;; prev-sibling doesn't have a child.
     (treesit-node-start prev-sibling)))
 
+(defun c-ts-mode--standalone-grandparent (_node parent bol &rest args)
+  "Like the standalone-parent anchor but pass it the grandparent.
+PARENT, BOL, ARGS are the same as other anchor functions."
+  (apply (alist-get 'standalone-parent treesit-simple-indent-presets)
+         parent (treesit-node-parent parent) bol args))
+
 (defun c-ts-mode--indent-styles (mode)
   "Indent rules supported by `c-ts-mode'.
 MODE is either `c' or `cpp'."
@@ -300,9 +306,9 @@ MODE is either `c' or `cpp'."
            ((parent-is "comment") prev-adaptive-prefix 0)
 
            ;; Labels.
-           ((node-is "labeled_statement") parent-bol 0)
+           ((node-is "labeled_statement") standalone-parent 0)
            ((parent-is "labeled_statement")
-            point-min c-ts-common-statement-offset)
+            c-ts-mode--standalone-grandparent c-ts-mode-indent-offset)
 
            ((node-is "preproc") point-min 0)
            ((node-is "#endif") point-min 0)
@@ -330,7 +336,7 @@ MODE is either `c' or `cpp'."
            ;; Closing bracket.  This should be before initializer_list
            ;; (and probably others) rule because that rule (and other
            ;; similar rules) will match the closing bracket.  (Bug#61398)
-           ((node-is "}") point-min c-ts-common-statement-offset)
+           ((node-is "}") standalone-parent 0)
            ,@(when (eq mode 'cpp)
                '(((node-is "access_specifier") parent-bol 0)
                  ;; Indent the body of namespace definitions.
@@ -341,25 +347,25 @@ MODE is either `c' or `cpp'."
            ((match nil "initializer_list" nil 1 1) parent-bol c-ts-mode-indent-offset)
            ((match nil "initializer_list" nil 2) c-ts-mode--anchor-prev-sibling 0)
            ;; Statement in enum.
-           ((match nil "enumerator_list" nil 1 1) point-min c-ts-common-statement-offset)
+           ((match nil "enumerator_list" nil 1 1) standalone-parent c-ts-mode-indent-offset)
            ((match nil "enumerator_list" nil 2) c-ts-mode--anchor-prev-sibling 0)
            ;; Statement in struct and union.
-           ((match nil "field_declaration_list" nil 1 1) point-min c-ts-common-statement-offset)
+           ((match nil "field_declaration_list" nil 1 1) standalone-parent c-ts-mode-indent-offset)
            ((match nil "field_declaration_list" nil 2) c-ts-mode--anchor-prev-sibling 0)
 
            ;; Statement in {} blocks.
-           ((match nil "compound_statement" nil 1 1) point-min c-ts-common-statement-offset)
+           ((match nil "compound_statement" nil 1 1) standalone-parent c-ts-mode-indent-offset)
            ((match nil "compound_statement" nil 2) c-ts-mode--anchor-prev-sibling 0)
            ;; Opening bracket.
-           ((node-is "compound_statement") point-min c-ts-common-statement-offset)
+           ((node-is "compound_statement") standalone-parent c-ts-mode-indent-offset)
            ;; Bug#61291.
-           ((match "expression_statement" nil "body") point-min c-ts-common-statement-offset)
+           ((match "expression_statement" nil "body") standalone-parent c-ts-mode-indent-offset)
            ;; These rules are for cases where the body is bracketless.
            ;; Tested by the "Bracketless Simple Statement" test.
-           ((parent-is "if_statement") point-min c-ts-common-statement-offset)
-           ((parent-is "for_statement") point-min c-ts-common-statement-offset)
-           ((parent-is "while_statement") point-min c-ts-common-statement-offset)
-           ((parent-is "do_statement") point-min c-ts-common-statement-offset)
+           ((parent-is "if_statement") standalone-parent c-ts-mode-indent-offset)
+           ((parent-is "for_statement") standalone-parent c-ts-mode-indent-offset)
+           ((parent-is "while_statement") standalone-parent c-ts-mode-indent-offset)
+           ((parent-is "do_statement") standalone-parent c-ts-mode-indent-offset)
 
            ,@(when (eq mode 'cpp)
                `(((node-is "field_initializer_list") parent-bol ,(* c-ts-mode-indent-offset 2)))))))
@@ -836,6 +842,8 @@ the semicolon.  This function skips the semicolon."
   (when (eq c-ts-mode-indent-style 'linux)
     (setq-local indent-tabs-mode t))
   (setq-local c-ts-common-indent-offset 'c-ts-mode-indent-offset)
+  ;; This setup is not needed anymore, but we might find uses for it
+  ;; later, so I'm keeping it.
   (setq-local c-ts-common-indent-type-regexp-alist
               `((block . ,(rx (or "compound_statement"
                                   "field_declaration_list"
diff --git a/lisp/treesit.el b/lisp/treesit.el
index 1decfc3d7cf..6b4db2a990c 100644
--- a/lisp/treesit.el
+++ b/lisp/treesit.el
@@ -1227,6 +1227,16 @@ See `treesit-simple-indent-presets'.")
                               (goto-char (treesit-node-start parent))
                               (back-to-indentation)
                               (point))))
+        (cons 'standalone-parent
+              (lambda (_n parent &rest _)
+                (save-excursion
+                  (catch 'term
+                    (while parent
+                      (goto-char (treesit-node-start parent))
+                      (when (looking-back (rx bol (* whitespace))
+                                          (line-beginning-position))
+                        (throw 'term (point)))
+                      (setq parent (treesit-node-parent parent)))))))
         (cons 'prev-sibling (lambda (node &rest _)
                               (treesit-node-start
                                (treesit-node-prev-sibling node))))
@@ -1323,6 +1333,11 @@ parent-bol
     Returns the beginning of non-space characters on the line where
     PARENT is on.
 
+standalone-parent
+
+    Finds the first ancestor node (parent, grandparent, etc) that
+    starts on its own line, and return the start of that node.
+
 prev-sibling
 
     Returns the start of NODE's previous sibling.