From: Yuan Fu <casouri@gmail.com> Date: Mon, 27 Feb 2023 02:05:13 +0000 (-0800) Subject: New tree-sitter indent anchor standalone-parent used by c-ts-mode X-Git-Tag: emacs-29.0.90~305 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=0f15286c5396e3415e0f40c21b6f6d7554f46a5e;p=emacs.git New tree-sitter indent anchor standalone-parent used by c-ts-mode When writing c-ts-mode Theo used parent-bol which works well except one case: 1 for (int i=0; 2 i < 5; 3 i++) { 4 func(i); 5 } In this case, when indenting "func(i)", parent-bol returns the start of "i++" on line 3, instead of the "correct" anchor, the start of "for" on line 1. parent-bol would have worked if the "for (...) {" part is in one line. To support this case I tried numerous things and added a bunch of stuff, culminating in c-ts-common-statement-offset. It's complicated, requires extra setup, and slow. Not anymore! I think the new anchor standalone-parent really captures the logic behind how people expect indentation to work. It's simple and fast, and requires no setup. * doc/lispref/modes.texi (Parser-based Indentation): Update manual. * lisp/progmodes/c-ts-mode.el: (c-ts-mode--standalone-grandparent): New anchor. (c-ts-mode--indent-styles): Replace c-ts-common-statement-offset with standalone-parent. (c-ts-base-mode): Add comment. * lisp/treesit.el: (treesit-simple-indent-presets): New anchor standalone-parent. --- diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi index 4c40f414ca0..11892aaa40e 100644 --- a/doc/lispref/modes.texi +++ b/doc/lispref/modes.texi @@ -5093,7 +5093,15 @@ This anchor is a function that is called with 3 arguments: @var{node}, @item parent-bol This anchor is a function that is called with 3 arguments: @var{node}, @var{parent}, and @var{bol}, and returns the first non-space character -on the line of @var{parent}. +on the line which @var{parent}'s start is on. + +@item parent-bol +This anchor is a function that is called with 3 arguments: @var{node}, +@var{parent}, and @var{bol}. It finds the first ancestor node +(parent, grandparent, etc) of @var{node} that starts on its own line, +and return the start of that node. ``Starting on its own line'' means +there is only whitespace character before the node on the line which +the node's start is on. @item prev-sibling This anchor is a function that is called with 3 arguments: @var{node}, diff --git a/lisp/progmodes/c-ts-mode.el b/lisp/progmodes/c-ts-mode.el index 041461f220a..2c79cf46308 100644 --- a/lisp/progmodes/c-ts-mode.el +++ b/lisp/progmodes/c-ts-mode.el @@ -279,6 +279,12 @@ doesn't have a child." ;; prev-sibling doesn't have a child. (treesit-node-start prev-sibling))) +(defun c-ts-mode--standalone-grandparent (_node parent bol &rest args) + "Like the standalone-parent anchor but pass it the grandparent. +PARENT, BOL, ARGS are the same as other anchor functions." + (apply (alist-get 'standalone-parent treesit-simple-indent-presets) + parent (treesit-node-parent parent) bol args)) + (defun c-ts-mode--indent-styles (mode) "Indent rules supported by `c-ts-mode'. MODE is either `c' or `cpp'." @@ -300,9 +306,9 @@ MODE is either `c' or `cpp'." ((parent-is "comment") prev-adaptive-prefix 0) ;; Labels. - ((node-is "labeled_statement") parent-bol 0) + ((node-is "labeled_statement") standalone-parent 0) ((parent-is "labeled_statement") - point-min c-ts-common-statement-offset) + c-ts-mode--standalone-grandparent c-ts-mode-indent-offset) ((node-is "preproc") point-min 0) ((node-is "#endif") point-min 0) @@ -330,7 +336,7 @@ MODE is either `c' or `cpp'." ;; Closing bracket. This should be before initializer_list ;; (and probably others) rule because that rule (and other ;; similar rules) will match the closing bracket. (Bug#61398) - ((node-is "}") point-min c-ts-common-statement-offset) + ((node-is "}") standalone-parent 0) ,@(when (eq mode 'cpp) '(((node-is "access_specifier") parent-bol 0) ;; Indent the body of namespace definitions. @@ -341,25 +347,25 @@ MODE is either `c' or `cpp'." ((match nil "initializer_list" nil 1 1) parent-bol c-ts-mode-indent-offset) ((match nil "initializer_list" nil 2) c-ts-mode--anchor-prev-sibling 0) ;; Statement in enum. - ((match nil "enumerator_list" nil 1 1) point-min c-ts-common-statement-offset) + ((match nil "enumerator_list" nil 1 1) standalone-parent c-ts-mode-indent-offset) ((match nil "enumerator_list" nil 2) c-ts-mode--anchor-prev-sibling 0) ;; Statement in struct and union. - ((match nil "field_declaration_list" nil 1 1) point-min c-ts-common-statement-offset) + ((match nil "field_declaration_list" nil 1 1) standalone-parent c-ts-mode-indent-offset) ((match nil "field_declaration_list" nil 2) c-ts-mode--anchor-prev-sibling 0) ;; Statement in {} blocks. - ((match nil "compound_statement" nil 1 1) point-min c-ts-common-statement-offset) + ((match nil "compound_statement" nil 1 1) standalone-parent c-ts-mode-indent-offset) ((match nil "compound_statement" nil 2) c-ts-mode--anchor-prev-sibling 0) ;; Opening bracket. - ((node-is "compound_statement") point-min c-ts-common-statement-offset) + ((node-is "compound_statement") standalone-parent c-ts-mode-indent-offset) ;; Bug#61291. - ((match "expression_statement" nil "body") point-min c-ts-common-statement-offset) + ((match "expression_statement" nil "body") standalone-parent c-ts-mode-indent-offset) ;; These rules are for cases where the body is bracketless. ;; Tested by the "Bracketless Simple Statement" test. - ((parent-is "if_statement") point-min c-ts-common-statement-offset) - ((parent-is "for_statement") point-min c-ts-common-statement-offset) - ((parent-is "while_statement") point-min c-ts-common-statement-offset) - ((parent-is "do_statement") point-min c-ts-common-statement-offset) + ((parent-is "if_statement") standalone-parent c-ts-mode-indent-offset) + ((parent-is "for_statement") standalone-parent c-ts-mode-indent-offset) + ((parent-is "while_statement") standalone-parent c-ts-mode-indent-offset) + ((parent-is "do_statement") standalone-parent c-ts-mode-indent-offset) ,@(when (eq mode 'cpp) `(((node-is "field_initializer_list") parent-bol ,(* c-ts-mode-indent-offset 2))))))) @@ -836,6 +842,8 @@ the semicolon. This function skips the semicolon." (when (eq c-ts-mode-indent-style 'linux) (setq-local indent-tabs-mode t)) (setq-local c-ts-common-indent-offset 'c-ts-mode-indent-offset) + ;; This setup is not needed anymore, but we might find uses for it + ;; later, so I'm keeping it. (setq-local c-ts-common-indent-type-regexp-alist `((block . ,(rx (or "compound_statement" "field_declaration_list" diff --git a/lisp/treesit.el b/lisp/treesit.el index 1decfc3d7cf..6b4db2a990c 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -1227,6 +1227,16 @@ See `treesit-simple-indent-presets'.") (goto-char (treesit-node-start parent)) (back-to-indentation) (point)))) + (cons 'standalone-parent + (lambda (_n parent &rest _) + (save-excursion + (catch 'term + (while parent + (goto-char (treesit-node-start parent)) + (when (looking-back (rx bol (* whitespace)) + (line-beginning-position)) + (throw 'term (point))) + (setq parent (treesit-node-parent parent))))))) (cons 'prev-sibling (lambda (node &rest _) (treesit-node-start (treesit-node-prev-sibling node)))) @@ -1323,6 +1333,11 @@ parent-bol Returns the beginning of non-space characters on the line where PARENT is on. +standalone-parent + + Finds the first ancestor node (parent, grandparent, etc) that + starts on its own line, and return the start of that node. + prev-sibling Returns the start of NODE's previous sibling.