From 0562006da3b6f0147069a9aea75c08a9a0a4e6d8 Mon Sep 17 00:00:00 2001 From: Perry Smith Date: Mon, 2 Jan 2023 02:57:38 +0200 Subject: [PATCH] Add ruby-ts-mode * etc/NEWS: Mention the new mode. * lisp/progmodes/ruby-ts-mode.el: New file. * test/lisp/progmodes/ruby-ts-mode-tests.el: New file. * lisp/progmodes/eglot.el (eglot-server-programs): Add ruby-ts-mode to the Ruby entry. Co-authored-by: Dmitry Gutov --- etc/NEWS | 5 + lisp/progmodes/eglot.el | 2 +- lisp/progmodes/ruby-ts-mode.el | 958 ++++++++++++++++++++++ test/lisp/progmodes/ruby-ts-mode-tests.el | 254 ++++++ 4 files changed, 1218 insertions(+), 1 deletion(-) create mode 100644 lisp/progmodes/ruby-ts-mode.el create mode 100644 test/lisp/progmodes/ruby-ts-mode-tests.el diff --git a/etc/NEWS b/etc/NEWS index 36044d26244..355ba6ba8aa 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -3261,6 +3261,11 @@ written in YAML. It is auto-enabled for files with the ".yaml" or A major mode based on the tree-sitter library for editing programs in the Rust language. It is auto-enabled for files with the ".rs" extension. +--- +*** New major mode 'ruby-ts-mode'. +An optional major mode based on the tree-sitter library for editing +programs in the Ruby language. + * Incompatible Lisp Changes in Emacs 29.1 diff --git a/lisp/progmodes/eglot.el b/lisp/progmodes/eglot.el index 791108001d2..6d192d9b333 100644 --- a/lisp/progmodes/eglot.el +++ b/lisp/progmodes/eglot.el @@ -205,7 +205,7 @@ chosen (interactively or automatically)." (((caml-mode :language-id "ocaml") (tuareg-mode :language-id "ocaml") reason-mode) . ("ocamllsp")) - (ruby-mode + ((ruby-mode ruby-ts-mode) . ("solargraph" "socket" "--port" :autoport)) (haskell-mode . ("haskell-language-server-wrapper" "--lsp")) diff --git a/lisp/progmodes/ruby-ts-mode.el b/lisp/progmodes/ruby-ts-mode.el new file mode 100644 index 00000000000..2654f08fe85 --- /dev/null +++ b/lisp/progmodes/ruby-ts-mode.el @@ -0,0 +1,958 @@ +;;; ruby-ts-mode.el --- Major mode for editing Ruby files using tree-sitter -*- lexical-binding: t; -*- + +;; Copyright (C) 2022-2023 Free Software Foundation, Inc. + +;; Author: Perry Smith +;; Created: December 2022 +;; Keywords: ruby languages tree-sitter + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;; This file defines ruby-ts-mode which is a major mode for editting +;; Ruby files that uses Tree Sitter to parse the language. More +;; information about Tree Sitter can be found in the ELisp Info pages +;; as well as this website: https://tree-sitter.github.io/tree-sitter/ + +;; For this major mode to work, Emacs has to be compiled with +;; tree-sitter support, and the Ruby grammar has to be compiled and +;; put somewhere Emacs can find it. See the docstring of +;; `treesit-extra-load-path'. + +;; This mode doesn't associate itself with .rb files automatically. +;; You can do that either by prepending to the value of +;; `auto-mode-alist', or using `major-mode-remap-alist'. + +;; Tree Sitter brings a lot of power and versitility which can be +;; broken into these features. + +;; * Font Lock + +;; The ability to color the source code is not new but what is new is +;; the versatility to enable and disable particular font lock rules. +;; I suggest reviewing variable treesit-font-lock-level and function +;; treesit-font-lock-recompute-features to get a better understanding +;; of the following. + +;; Currently tree treesit-font-lock-feature-list is set with the +;; following levels: +;; 1: comment method-definition +;; 2: keyword regexp string type +;; 3: builtin constant delimiter escape-sequence +;; global instance +;; interpolation literal symbol variable +;; 4: bracket error function operator punctuation + +;; Thus if treesit-font-lock-level is set to level 3 which is its +;; default, all the features listed in levels 1 through 3 above will +;; be enabled. i.e. those features will font lock or colorize the +;; code accordingly. Individual features can be added and removed via +;; treesit-font-lock-recompute-features. + +;; describe-face can be used to view how a face looks. + +;; * Indent + +;; ruby-ts-mode tries to adhere to the indentation related user +;; options from ruby-mode, such as ruby-indent-level, +;; ruby-indent-tabs-mode, and so on. + +;; * IMenu +;; * Navigation +;; * Which-func + +;;; Code: + +(require 'treesit) +(require 'ruby-mode) + +(declare-function treesit-parser-create "treesit.c") + +(defgroup ruby-ts nil + "Major mode for editing Ruby code." + :prefix "ruby-ts-" + :group 'languages) + +(defcustom ruby-ts-highlight-predefined-constants t + "When non-nil, the pre-defined constants are highlighted. +They will be highlighted the same way as the pre-defined variables." + :type 'boolean) + +(defvar ruby-ts--operators + '("+" "-" "*" "/" "%" "**" + "==" "!=" ">" "<" ">=" "<=" "<=>" "===" + "=" "+=" "-=" "*=" "/=" "%=" "**=" + "&" "|" "^" "~" "<<" ">>" + "!" "&&" "and" "not" "or" "||" + "?" ":" + ".." "..." + "defined?" + "." "::") + "Ruby operators for tree-sitter font-locking.") + +(defvar ruby-ts--delimiters '("," ";") + "Ruby's punctuation characters.") + +(defvar ruby-ts--predefined-constants + (rx (or "ARGF" "ARGV" "DATA" "ENV" "RUBY_COPYRIGHT" + "RUBY_DESCRIPTION" "RUBY_ENGINE" "RUBY_ENGINE_VERSION" + "RUBY_PATCHLEVEL" "RUBY_PLATFORM" "RUBY_RELEASE_DATE" + "RUBY_REVISION" "RUBY_VERSION" "STDERR" "STDIN" "STDOUT" + "TOPLEVEL_BINDING")) + "Ruby predefined global constants. +These are currently unused") + +(defvar ruby-ts--predefined-variables + (rx (or "$!" "$@" "$~" "$&" "$‘" "$‘" "$+" "$=" "$/" "$\\" "$," "$;" + "$." "$<" "$>" "$_" "$*" "$$" "$?" "$:" "$LOAD_PATH" + "$LOADED_FEATURES" "$DEBUG" "$FILENAME" "$stderr" "$stdin" + "$stdout" "$VERBOSE" "$-a" "$-i" "$-l" "$-p" + (seq "$" (+ digit)))) + "Ruby global variables (but not global constants.") + +(defconst ruby-ts--class-or-module-regex + (rx string-start + (or "class" "module" "singleton_class") + string-end) + "Regular expression that matches a class or module's node type.") + +(defconst ruby-ts--method-regex + (rx string-start + (or "method" "singleton_method") + string-end) + "Regular expression matching methods and singleton methods.") + +(defconst ruby-ts--statement-container-regexp + (rx string-start + (or "program" + "block_body" + "begin_block" + "end_block" + "do" + "else" + "then" + "ensure" + "body_statement" + "parenthesized_statements" + "interpolation") + string-end) + "Regular expression of the nodes that can constain statements.") + +(defun ruby-ts--lineno (node) + "Return line number of NODE's start." + (line-number-at-pos (treesit-node-start node))) + +;; doc/keywords.rdoc in the Ruby git repository considers these to be +;; reserved keywords. If these keywords are added to the list, it +;; causes the font-lock to stop working. +;; +;; "__ENCODING__" "__FILE__" "__LINE__" "false" "self" "super" "true" +;; +;; "nil" (which does not exhibit this issue) is also considered a +;; keyword but I removed it and added it as a constant. +;; +(defvar ruby-ts--keywords + '("BEGIN" "END" "alias" "and" "begin" "break" "case" "class" + "def" "defined?" "do" "else" "elsif" "end" "ensure" "for" + "if" "in" "module" "next" "not" "or" "redo" "rescue" + "retry" "return" "then" "undef" "unless" "until" "when" + "while" "yield") + "Ruby keywords for tree-sitter font-locking.") + +(defun ruby-ts--comment-font-lock (node override start end &rest _) + "Apply font lock to comment NODE within START and END. +Applies `font-lock-comment-delimiter-face' and +`font-lock-comment-face' See `treesit-fontify-with-override' for +values of OVERRIDE" + ;; Emperically it appears as if (treesit-node-start node) will be + ;; where the # character is at and (treesit-node-end node) will be + ;; the end of the line + (let* ((node-start (treesit-node-start node)) + (plus-1 (1+ node-start)) + (node-end (treesit-node-end node)) + (text (treesit-node-text node t))) + (if (and (>= node-start start) + (<= plus-1 end) + (string-match-p "\\`#" text)) + (treesit-fontify-with-override node-start plus-1 + font-lock-comment-delimiter-face override)) + (treesit-fontify-with-override (max plus-1 start) (min node-end end) + font-lock-comment-face override))) + +(defun ruby-ts--font-lock-settings (language) + "Tree-sitter font-lock settings for Ruby." + (treesit-font-lock-rules + :language language + :feature 'comment + '((comment) @ruby-ts--comment-font-lock) + + :language language + :feature 'builtin + `(((global_variable) @var (:match ,ruby-ts--predefined-variables @var)) @font-lock-builtin-face + ,@(when ruby-ts-highlight-predefined-constants + `(((constant) @var (:match ,ruby-ts--predefined-constants @var)) @font-lock-builtin-face))) + + :language language + :feature 'keyword + `([,@ruby-ts--keywords] @font-lock-keyword-face) + + :language language + :feature 'constant + '((true) @font-lock-doc-markup-face + (false) @font-lock-doc-markup-face + (nil) @font-lock-doc-markup-face + (self) @font-lock-doc-markup-face + (super) @font-lock-doc-markup-face) + + :language language + :feature 'symbol + '((bare_symbol) @font-lock-constant-face + (delimited_symbol (string_content) @font-lock-constant-face) + (hash_key_symbol) @font-lock-constant-face + (simple_symbol) @font-lock-constant-face) + + ;; Before 'operator so (unary) works. + :language language + :feature 'literal + '((unary ["+" "-"] [(integer) (rational) (float) (complex)]) @font-lock-number-face + (integer) @font-lock-number-face + (float) @font-lock-number-face + (complex) @font-lock-number-face + (rational) @font-lock-number-face) + + ;; Also before 'operator because % and / are operators + :language language + :feature 'regexp + '((regex "/" @font-lock-regexp-grouping-construct) + (regex _ (string_content) @font-lock-regexp-grouping-backslash)) + + :language language + :feature 'operator + `("!" @font-lock-negation-char-face + [,@ruby-ts--operators] @font-lock-operator-face) + + ;; TODO: Consider using a different face for string delimiters. + ;; font-lock-delimiter-face is not a good choice, though, because it + ;; looks like 'default' in the default theme, and its documented purpose + ;; is characters like commas, semicolons, etc. + :language language + :feature 'string + '((delimited_symbol [ ":\"" "\"" ] @font-lock-string-face) + (string "\"" @font-lock-string-face) + (string_array [ "%w(" ")" ] @font-lock-delimiter-face) + (subshell "`" @font-lock-delimiter-face) + (symbol_array [ "%i(" ")"] @font-lock-delimiter-face)) + + :language language + :feature 'string + '((string_content) @font-lock-string-face + (heredoc_beginning) @font-lock-string-face + (heredoc_content) @font-lock-string-face + (heredoc_end) @font-lock-string-face) + + :language language + :feature 'interpolation + '((interpolation "#{" @font-lock-doc-face) + (interpolation "}" @font-lock-doc-face)) + + :language language + :feature 'type + '((constant) @font-lock-type-face) + + :language language + :feature 'global + '((global_variable) @font-lock-variable-name-face) + + :language language + :feature 'instance + '((instance_variable) @font-lock-variable-name-face) + + :language language + :feature 'method-definition + '((method + name: (identifier) @font-lock-function-name-face)) + + ;; Yuan recommends also putting method definitions into the + ;; 'function' category (thus keeping it in both). I've opted to + ;; just use separate categories for them -- dgutov. + :language language + :feature 'function + '((call + method: (identifier) @font-lock-function-name-face)) + + :language language + :feature 'error + '((ERROR) @font-lock-warning-face) + + :feature 'escape-sequence + :language language + :override t + '((escape_sequence) @font-lock-escape-face) + + :language language + :feature 'bracket + '((["(" ")" "[" "]" "{" "}"]) @font-lock-bracket-face) + + :language language + :feature 'punctuation + `(([,@ruby-ts--delimiters] @font-lock-delimiter-face)))) + +(defun ruby-ts--first-non-comment-child (node) + "Return the first named child of NODE that is not a comment." + (let ((child (treesit-node-child node 0 t))) + (while (and child + (equal "comment" (treesit-node-type child))) + (setq child (treesit-node-next-sibling child t))) + child)) + +;; +;; These routines would be better added to treesit.el They are +;; intended to be used with indent rules +;; +;; I think this is over simplified but basically +;; treesit--simple-indent-eval calls the result with node, parent, and +;; bol. Thus all of these functions return a lambda that accepts three +;; arguments. Somewhere something explains that &rest should always +;; be used in case extra arguments are added in the future. +;; + +(defun ruby-ts--type-pred (regexp) + "Return predicate taking a node returning non-nil if REGEXP matches type of node." + (lambda (node) + (string-match-p regexp (treesit-node-type node)))) + +(defun ruby-ts--parent-node (_n parent &rest _) + "Return the PARENT node matching ident rule." + parent) + +(defun ruby-ts--align-keywords (pred) + "Return either start or bol of PRED. +PRED should specify a node that is listed in +`ruby-alignable-keywords'. If PRED is listed in user option +`ruby-align-to-stmt-keywords', then return the BOL of PRED. +Otherwise return start of PRED." + (lambda (node parent bol &rest rest) + (let* ((pred-node (funcall pred node parent bol rest)) + (temp (treesit-node-start pred-node)) + (keyword (treesit-node-type pred-node)) + (bol (ruby-smie--indent-to-stmt-p keyword))) + (when temp + (if bol + (save-excursion + (goto-char temp) + (back-to-indentation) + (point)) + temp))))) + +(defun ruby-ts--bol (pred) + "Return bol of PRED. +PRED should take (node parent bol &rest rest) and return a node. +Returns bol of the current line if PRED returns nil." + (lambda (node parent bol &rest rest) + (save-excursion + (let ((temp (treesit-node-start (funcall pred node parent bol rest)))) + (if temp + (goto-char temp)) + (back-to-indentation) + (point))))) + +(defun ruby-ts--grand-parent-is (type) + "Check grand parent's type matches regexp TYPE." + (lambda (_n parent &rest _) + (string-match-p type (treesit-node-type (treesit-node-parent parent))))) + +(defun ruby-ts--grand-parent-node (_n parent &rest _) + "Return parent of PARENT node." + (treesit-node-parent parent)) + +(defun ruby-ts--ancestor-start (type) + "Return start of closest ancestor matching regexp TYPE." + (lambda (node &rest _) + (treesit-node-start (treesit-parent-until node (ruby-ts--type-pred type))))) + +(defun ruby-ts--ancestor-is (type) + "Check that ancestor's type matches regexp TYPE." + (lambda (node &rest _) + (treesit-parent-until node (ruby-ts--type-pred type)))) + +(defun ruby-ts--align-chain-p (&rest _) + "Return value of `ruby-align-chained-calls'." + ruby-align-chained-calls) + +(defun ruby-ts--parenless-call-arguments-indent-p (&rest _) + "Return value of `ruby-parenless-call-arguments-indent'." + ruby-parenless-call-arguments-indent) + +(defun ruby-ts--align-chain (_n parent &rest _) + "Align chained method call. +Align NODE which will be the dot (.) to the dot of the +first (outermost) call in the chain. See +`ruby-align-chained-calls' for details. PARENT will be the +\"call\" node. Called only when `ruby-align-chained-calls' is +non-nil." + (let* (first-call ) + (while (and parent + (setq first-call (treesit-node-parent parent)) + (string-match-p "call" (treesit-node-type first-call))) + (setq parent first-call)) + (treesit-node-start (treesit-search-subtree parent "\\." nil t)))) + +(defun ruby-ts--same-line-args-p (_n parent &rest _) + "Return non-nil when first argument is on the same line as the method. +PARENT will be argument_list. NODE can be the close paren." + (let* ((method (treesit-node-parent parent)) + (first-param (ruby-ts--first-non-comment-child parent))) + (= (ruby-ts--lineno method) (ruby-ts--lineno first-param)))) + +(defun ruby-ts--same-line-params-p (_n parent &rest _) + "Return non-nil when first parameter is on the same line as the method. +PARENT will be method_parameters. NODE can be the close paren." + (let* ((method (treesit-node-parent parent)) + (first-param (ruby-ts--first-non-comment-child parent))) + (= (ruby-ts--lineno method) (ruby-ts--lineno first-param)))) + +(defun ruby-ts--param-indent (_n parent &rest _) + "Indent parameters that start on next line. +Given: NODE is the parameter. PARENT is +method_parameters. `ruby-ts--same-line-params-p' is nil. +Indent according to `ruby-method-params-indent'. + +If `ruby-method-params-indent' is 0 +def foo( + param1, + param2 +) + +Params start on next line, `ruby-method-params-indent' is t +def foo( + param1, + param2 + )" + (let ((method (treesit-node-parent parent))) + (if (eq t ruby-method-params-indent) + ;; For methods, the "name" is the name of the method but for + ;; singleton methods, we need to find "object" + (let* ((singleton (equal "singleton_method" (treesit-node-type method))) + (name-node (treesit-node-child-by-field-name + method + (if singleton "object" "name")))) + ;; (message "name-node: %S" name-node) + (treesit-node-start name-node)) + ;; Small Danger: if the method name plus the parent is less than + ;; `ruby-method-params-indent', then the addition will put the + ;; result on the next line and indented incorrectly. There are + ;; plausible ways to fix this but the probability seems rather + ;; remote. + (+ (treesit-node-start method) (or ruby-method-params-indent 0))))) + +(defun ruby-ts--true (&rest _) + "I have no idea why I can't just put t but I can put 0." + t) + +(defun ruby-ts--same-line-hash-array-p (_n parent &rest _) + "Return non-nil if first element and open brace are on the same line. +NODE is the element or closing brace or bracket. PARENT is the +array or hash." + (let* ((open-brace (treesit-node-child parent 0 nil)) + (first-child (ruby-ts--first-non-comment-child parent))) + (= (ruby-ts--lineno open-brace) (ruby-ts--lineno first-child)))) + +(defalias 'ancestor-node #'ruby-ts--ancestor-is + "Return ancestor node whose type matches regexp TYPE.") + +(defun ruby-ts--assignment-ancestor (node &rest _) + "Return the assignment ancestor of NODE if any." + (treesit-parent-until node (ruby-ts--type-pred "\\`assignment\\'"))) + +(defun ruby-ts--statement-ancestor (node &rest _) + "Return the statement ancestor of NODE if any. +A statement is defined as a child of a statement container where +a statement container is a node that matches +`ruby-ts--statement-container-regexp'." + (let* ((statement node) + (parent (treesit-node-parent statement))) + (while (and parent + statement + (not (string-match-p ruby-ts--statement-container-regexp + (treesit-node-type parent)))) + (setq statement parent + parent (treesit-node-parent parent))) + statement)) + +(defun ruby-ts--is-in-condition (node &rest _) + "Return the condition node if NODE is within a condition." + (while (and node + (not (equal "condition" (treesit-node-field-name node))) + (not (string-match-p ruby-ts--statement-container-regexp + (treesit-node-type node)))) + (setq node (treesit-node-parent node))) + (and (equal "condition" (treesit-node-field-name node)) node)) + +(defun ruby-ts--endless-method (node &rest _) + "Return the expression node if NODE is in an endless method. +i.e. expr of def foo(args) = expr is returned." + (let* ((method node)) + (while (and method + (not (string-match-p ruby-ts--method-regex (treesit-node-type method)))) + (setq method (treesit-node-parent method))) + (when method + (if (equal "=" (treesit-node-type (treesit-node-child method 3 nil))) + (treesit-node-child method 4 nil))))) + +;; +;; end of functions that can be used for queries +;; + +(defun ruby-ts--indent-rules () + "Indent rules supported by `ruby-ts-mode'." + (let ((common + `( + ;; Slam all top level nodes to the left margin + ((parent-is "program") parent 0) + + ;; Do not indent here docs or the end. Not sure why it + ;; takes the grand-parent but ok fine. + ((n-p-gp nil nil "heredoc_body") no-indent 0) + ((parent-is "heredoc_body") no-indent 0) + ((node-is "heredoc_body") no-indent 0) + ;; Do not indent multiline regexp + ((n-p-gp nil nil "regex") no-indent 0) + ((parent-is "regex") no-indent 0) + + ;; if then else elseif notes: + ;; + ;; 1. The "then" starts at the end of the line that ends + ;; the if condition which can be on a different line + ;; from the "if". + ;; + ;; 2. If there is an "elsif", it is a sibling to the then + ;; BUT the "else" that follows is now a child of the + ;; "elsif". + ;; + ;; 3. The statements within each of these are direct + ;; children. There is no intermediate construct such + ;; as a block_statement. + ;; + ;; I'm using very restrictive patterns hoping to reduce rules + ;; triggering unintentionally. + ((match "else" "if") + (ruby-ts--align-keywords ruby-ts--parent-node) 0) + ((match "elsif" "if") + (ruby-ts--align-keywords ruby-ts--parent-node) 0) + ((match "end" "if") + (ruby-ts--align-keywords ruby-ts--parent-node) 0) + ((n-p-gp nil "then\\|else\\|elsif" "if\\|unless") + (ruby-ts--align-keywords ruby-ts--grand-parent-node) ruby-indent-level) + + ;; case expression: when, in_clause, and else are all + ;; children of case. when and in_clause have pattern and + ;; body as fields. body has "then" and then the statemets. + ;; i.e. the statements are not children of when but then. + ;; But for the statements are children of else. + ((match "when" "case") + (ruby-ts--align-keywords ruby-ts--parent-node) 0) + ((match "in_clause" "case") + (ruby-ts--align-keywords ruby-ts--parent-node) 0) + ((match "else" "case") + (ruby-ts--align-keywords ruby-ts--parent-node) 0) + ((match "end" "case") + (ruby-ts--align-keywords ruby-ts--parent-node) 0) + ((n-p-gp nil "then" "when") grand-parent ruby-indent-level) + ((n-p-gp nil "then" "in_clause") grand-parent ruby-indent-level) + ((n-p-gp nil "else" "case") parent ruby-indent-level) + + ;; The beauty of inconsistency :-) + ;; while / until have only "do" as a child. The "end" is a + ;; child of "do". + ((n-p-gp "end" "do" "while\\|until") + (ruby-ts--align-keywords ruby-ts--grand-parent-node) 0) + ((n-p-gp nil "do" "while\\|until") + (ruby-ts--align-keywords ruby-ts--grand-parent-node) ruby-indent-level) + ;; begin can have rescue, ensure, else, and end. + ;; statements are a child of begin. rescue, ensure, else, + ;; and end are also children of begin. rescue has a then + ;; as a child thus statements will be grand children of + ;; rescue. + ((n-p-gp nil "then" "rescue") + (ruby-ts--align-keywords ruby-ts--grand-parent-node) ruby-indent-level) + ((n-p-gp nil "ensure\\|else" "begin") + (ruby-ts--align-keywords ruby-ts--parent-node) ruby-indent-level) + ((match "rescue\\|ensure\\|else\\|end" "begin") + (ruby-ts--align-keywords ruby-ts--parent-node) 0) + ((parent-is "begin") ;last + (ruby-ts--align-keywords ruby-ts--parent-node) ruby-indent-level) + + ;; for ... I don't think I have ever used a for loop in + ;; Ruby. The "in" (not an in_clause) and "do" are + ;; children. The statements are children of the "do". + ;; And, of course, the "end" is a child of the "do". + ((n-p-gp "end" "do" "for") + (ruby-ts--align-keywords ruby-ts--grand-parent-node) 0) + ((n-p-gp nil "do" "for") + (ruby-ts--align-keywords ruby-ts--grand-parent-node) ruby-indent-level) + + ;; method has a "body_statement" and the "end" as children. + ;; The body_statement can have rescue, ensure, and else as + ;; well as statements. Note that the first statement of a + ;; body_statement hits the node as "body_statement" and not + ;; as the assignment, etc. + ((match "end" ,ruby-ts--method-regex) + (ruby-ts--align-keywords ruby-ts--parent-node) 0) + ((n-p-gp "\\`\\(rescue\\|ensure\\|else\\)\\'" "body_statement" ,ruby-ts--method-regex) + (ruby-ts--align-keywords ruby-ts--grand-parent-node) 0) + ((n-p-gp nil "rescue\\|ensure\\|else" "body_statement") parent ruby-indent-level) + ((match "body_statement" ,ruby-ts--method-regex) ;first statement + (ruby-ts--align-keywords ruby-ts--parent-node) ruby-indent-level) + ((n-p-gp nil "body_statement" ,ruby-ts--method-regex) ;other statements + (ruby-ts--align-keywords ruby-ts--grand-parent-node) ruby-indent-level) + + ;; Chained calls: + ;; if `ruby-align-chained-calls' is true, the first query + ;; matches and the node is aligned under the first dot (.); + ;; else the second query aligns + ;; `ruby-indent-level' spaces in from the parent. + ((and ruby-ts--align-chain-p (match "\\." "call")) ruby-ts--align-chain 0) + ((match "\\." "call") parent ruby-indent-level) + + ;; ruby-indent-after-block-in-continued-expression + ((match "begin" "assignment") parent ruby-indent-level) + + ;; method parameters -- four styles: + ;; 1) With paren, first arg on same line: + ((and (query "(method_parameters \"(\" _ @indent)") + ruby-ts--same-line-params-p + (node-is ")")) + first-sibling 0) + ((and (query "(method_parameters \"(\" _ @indent)") + ruby-ts--same-line-params-p) + first-sibling 1) + ;; ;; 2) With paren, first arg on next line, ruby-method-params-indent eq t + ;; ;; 3) With paren, first arg on next line, ruby-method-params-indent neq t + ((and (query "(method_parameters \"(\" _ @indent)") (node-is ")")) ruby-ts--param-indent 0) + ((query "(method_parameters \"(\" _ @indent)") ruby-ts--param-indent ruby-indent-level) + ;; 4) No paren: + ((parent-is "method_parameters") first-sibling 0) + + ;; Argument lists: + ;; 1) With paren, 1st arg on same line + ((and (query "(argument_list \"(\" _ @indent)") + ruby-ts--same-line-args-p + (node-is ")")) + first-sibling 0) + ((and (query "(argument_list \"(\" _ @indent)") + ruby-ts--same-line-args-p) + first-sibling 1) + ;; 2) With paren, 1st arg on next line + ((and (query "(argument_list \"(\" _ @indent)") + (node-is ")")) + (ruby-ts--bol ruby-ts--grand-parent-node) 0) + ((query "(argument_list \"(\" _ @indent)") + (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) + ;; 3) No paren, ruby-parenless-call-arguments-indent is t + ((and ruby-ts--parenless-call-arguments-indent-p (parent-is "argument_list")) + first-sibling 0) + ;; 4) No paren, ruby-parenless-call-arguments-indent is nil + ((parent-is "argument_list") (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) + + ;; Old... probably too simple + ((parent-is "block_parameters") first-sibling 1) + + ((and (parent-is "binary") + (or ruby-ts--assignment-ancestor + ruby-ts--is-in-condition + ruby-ts--endless-method)) + first-sibling 0) + + ;; ruby-mode does not touch these... + ((match "bare_string" "string_array") no-indent 0) + + ;; hash and array other than assignments. Note that the + ;; first sibling is the "{" or "[". There is a special + ;; case where the hash is an argument to a method. These + ;; need to be processed first. + + ((and ruby-ts--same-line-hash-array-p (match "}" "hash")) + first-sibling 0) + ((and ruby-ts--same-line-hash-array-p (parent-is "hash")) + (nth-sibling 0 ruby-ts--true) 0) + ((and ruby-ts--same-line-hash-array-p (match "]" "array")) + first-sibling 0) + ((and ruby-ts--same-line-hash-array-p (parent-is "array")) + (nth-sibling 0 ruby-ts--true) 0) + + ;; NOTE to folks trying to understand my insanity... + ;; I having trouble understanding the "logic" of why things + ;; are indented like they are so I am adding special cases + ;; hoping at some point I will be struck by lightning. + ((and (n-p-gp "}" "hash" "pair") + (not ruby-ts--same-line-hash-array-p)) + grand-parent 0) + ((and (n-p-gp "pair" "hash" "pair") + (not ruby-ts--same-line-hash-array-p)) + grand-parent ruby-indent-level) + ((and (n-p-gp "}" "hash" "method") + (not ruby-ts--same-line-hash-array-p)) + grand-parent 0) + ((and (n-p-gp "pair" "hash" "method") + (not ruby-ts--same-line-hash-array-p)) + grand-parent ruby-indent-level) + + ((n-p-gp "}" "hash" "assignment") (ruby-ts--bol ruby-ts--grand-parent-node) 0) + ((n-p-gp nil "hash" "assignment") (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) + ((n-p-gp "]" "array" "assignment") (ruby-ts--bol ruby-ts--grand-parent-node) 0) + ((n-p-gp nil "array" "assignment") (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) + + ((n-p-gp "}" "hash" "argument_list") first-sibling 0) + ((n-p-gp nil "hash" "argument_list") first-sibling ruby-indent-level) + ((n-p-gp "]" "array" "argument_list") first-sibling 0) + ((n-p-gp nil "array" "argument_list") first-sibling ruby-indent-level) + + ((match "}" "hash") first-sibling 0) + ((parent-is "hash") first-sibling ruby-indent-level) + ((match "]" "array") first-sibling 0) + ((parent-is "array") first-sibling ruby-indent-level) + + ;; If the previous method isn't finished yet, this will get + ;; the next method indented properly. + ((n-p-gp ,ruby-ts--method-regex "body_statement" ,ruby-ts--class-or-module-regex) + (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) + + ;; Match the end of a class / modlue + ((match "end" ,ruby-ts--class-or-module-regex) parent 0) + + ;; A "do_block" has a "body_statement" child which has the + ;; statements as children within it. The problem is that + ;; the first statement starts at the same point as the + ;; body_statement and so treesit-simple-indent is called + ;; with node set to body_statement on the first statement + ;; but with node set to the statement and parent set to + ;; body_statement for all others. ... Fine. Be that way. + ;; Ditto for "block" and "block_body" + ((node-is "body_statement") parent-bol ruby-indent-level) + ((parent-is "body_statement") (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) + ((match "end" "do_block") parent-bol 0) + ((n-p-gp "block_body" "block" nil) parent-bol ruby-indent-level) + ((n-p-gp nil "block_body" "block") (ruby-ts--bol ruby-ts--grand-parent-node) ruby-indent-level) + ((match "}" "block") (ruby-ts--bol ruby-ts--grand-parent-node) 0) + + ;; Chained strings + ((match "string" "chained_string") first-sibling 0) + + ;; Try and indent two spaces when all else fails. + (catch-all parent-bol ruby-indent-level)))) + `((ruby . ,common)))) + +(defun ruby-ts--class-or-module-p (node) + "Predicate if NODE is a class or module." + (string-match-p ruby-ts--class-or-module-regex (treesit-node-type node))) + +(defun ruby-ts--get-name (node) + "Return the text of the `name' field of NODE." + (treesit-node-text (treesit-node-child-by-field-name node "name"))) + +(defun ruby-ts--full-name (node) + "Return the fully qualified name of NODE." + (let* ((name (ruby-ts--get-name node)) + (delimiter "#")) + (while (setq node (treesit-parent-until node #'ruby-ts--class-or-module-p)) + (setq name (concat (ruby-ts--get-name node) delimiter name)) + (setq delimiter "::")) + name)) + +(defun ruby-ts--imenu-helper (node) + "Convert a treesit sparse tree NODE in an imenu list. +Helper for `ruby-ts--imenu' which converts a treesit sparse +NODE into a list of imenu ( name . pos ) nodes" + (let* ((ts-node (car node)) + (subtrees (mapcan #'ruby-ts--imenu-helper (cdr node))) + (name (when ts-node + (ruby-ts--full-name ts-node))) + (marker (when ts-node + (set-marker (make-marker) + (treesit-node-start ts-node))))) + (cond + ((or (null ts-node) (null name)) subtrees) + ;; Don't include the anonymous "class" and "module" nodes + ((string-match-p "(\"\\(class\\|module\\)\")" + (treesit-node-string ts-node)) + nil) + (subtrees + `((,name ,(cons name marker) ,@subtrees))) + (t + `((,name . ,marker)))))) + +;; For now, this is going to work like ruby-mode and return a list of +;; class, modules, def (methods), and alias. It is likely that this +;; can be rigged to be easily extended. +(defun ruby-ts--imenu () + "Return Imenu alist for the current buffer." + (let* ((root (treesit-buffer-root-node)) + (nodes (treesit-induce-sparse-tree root "^\\(method\\|alias\\|class\\|module\\)$"))) + (ruby-ts--imenu-helper nodes))) + +(defun ruby-ts--arrow-up-start (arg) + "Move to the start ARG levels up or out." + (interactive "p") + (setq arg (or arg 1)) + (let* ((pnt (point)) + (found (treesit-node-at pnt)) + (pos (treesit-node-start found)) + new-pos) + (while (and found pos (> arg 0)) + (setq found (treesit-node-parent found) + new-pos (treesit-node-start found)) + (when (and new-pos (not (= new-pos pos))) + (setq arg (1- arg) + pos new-pos))) + (if pos + (goto-char pos) + (error "Something didn't work")))) + +(defun ruby-ts--class-name (node) + "Return NODE's name. +Assumes NODE's type is \"class\" or \"method\"" + (list + (treesit-node-text + (treesit-node-child-by-field-name + node + (if (equal "singleton_class" (treesit-node-type node)) "value" "name")) + t))) + +(defun ruby-ts--method-name (node) + "Return the method name of NODE. +Assumes NODE's type is method or singleton_method." + (if (equal "method" (treesit-node-type node)) + (list (treesit-node-text (treesit-node-child-by-field-name node "name") t)) + (let* ((children (treesit-node-children node)) + ;; 0th is "def" + (first (nth 1 children)) + (third (nth 3 children))) + (cond + ((equal "(" (treesit-node-type first)) + (list (treesit-node-text (nth 2 children) t) + (treesit-node-text (nth 5 children) t))) + ;; ((equal "self" (treesit-node-type first)) + ;; (list (treesit-node-text third t))) + (t (mapcar (lambda (n) + (treesit-node-text n t)) + (list first third))))))) + +(defun ruby-ts-add-log-current-function () + "Return the current method name as a string. +The hash (#) is for instance methods only which are methods +\"defined on a class\" -- which is 99% of methods. Otherwise, a +dot (.) is used. Double colon (::) is used between classes. The +leading double colon is not added." + (let* ((node (treesit-node-at (point))) + (method (treesit-parent-until node (ruby-ts--type-pred ruby-ts--method-regex))) + (class (or method node)) + (result nil) + (sep "#") + (method-list nil) + (class-list nil) + (method-name nil)) + + (when method + (setq method-list (ruby-ts--method-name method)) + (unless (= 1 (length method-list)) + (setq sep "."))) + (while (setq class (treesit-parent-until class + (ruby-ts--type-pred + ruby-ts--class-or-module-regex))) + (setq class-list (append (ruby-ts--class-name class) class-list))) + (setq method-name (car (last method-list)) + method-list (butlast method-list)) + (when (equal (car method-list) (car (last class-list))) + (setq method-list (cdr method-list))) + (dolist (ele (append class-list method-list)) + (cond + ((equal "self" ele) + (setq sep ".")) + ((string-match-p "\\`[^A-Z]" ele) ;not a class + (setq sep "." + result (if result + (concat result "::" ele) + ele))) + (t (setq result (if result + (concat result "::" ele) + ele))))) + (if method-name + (concat result sep method-name) + result))) + +(defvar-keymap ruby-ts-mode-map + :doc "Keymap used in Ruby mode" + :parent prog-mode-map + ;; (when ruby-use-smie + ;; (define-key map (kbd "M-C-d") 'smie-down-list)) + ;; (define-key map (kbd "M-C-p") 'ruby-beginning-of-block) + ;; (define-key map (kbd "M-C-n") 'ruby-end-of-block) + "C-c {" #'ruby-toggle-block + "C-c '" #'ruby-toggle-string-quotes + "C-c C-f" #'ruby-find-library-file) + +;;;###autoload +(define-derived-mode ruby-ts-mode prog-mode "Ruby" + "Major mode for editing Ruby, powered by tree-sitter." + :group 'ruby + :syntax-table ruby-mode-syntax-table + + (setq indent-tabs-mode ruby-indent-tabs-mode) + + (setq-local paragraph-start (concat "$\\|" page-delimiter)) + (setq-local paragraph-separate paragraph-start) + (setq-local paragraph-ignore-fill-prefix t) + + (setq-local comment-start "# ") + (setq-local comment-end "") + (setq-local comment-start-skip "#+ *") + + (unless (treesit-ready-p 'ruby) + (error "Tree-sitter for Ruby isn't available")) + + (treesit-parser-create 'ruby) + + (setq-local add-log-current-defun-function #'ruby-ts-add-log-current-function) + + ;; Navigation. + (setq-local treesit-defun-type-regexp ruby-ts--method-regex) + + ;; AFAIK, Ruby can not nest methods + (setq-local treesit-defun-prefer-top-level nil) + + ;; Imenu. + (setq-local imenu-create-index-function #'ruby-ts--imenu) + + (setq-local treesit-simple-indent-rules (ruby-ts--indent-rules)) + + ;; Font-lock. + (setq-local treesit-font-lock-settings (ruby-ts--font-lock-settings 'ruby)) + ;; Level 3 is the default. + (setq-local treesit-font-lock-feature-list + '(( comment method-definition ) + ( keyword regexp string type) + ( builtin constant + delimiter escape-sequence global + instance + interpolation literal symbol variable) + ( bracket error function operator punctuation))) + + (treesit-major-mode-setup)) + +(provide 'ruby-ts-mode) + +;;; ruby-ts-mode.el ends here diff --git a/test/lisp/progmodes/ruby-ts-mode-tests.el b/test/lisp/progmodes/ruby-ts-mode-tests.el new file mode 100644 index 00000000000..f48d0bf6330 --- /dev/null +++ b/test/lisp/progmodes/ruby-ts-mode-tests.el @@ -0,0 +1,254 @@ +;;; ruby-mode-tests.el --- Test suite for ruby-mode -*- lexical-binding:t -*- + +;; Copyright (C) 2023 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Commentary: + +;;; Code: + +(require 'ert) +(require 'ert-x) +(require 'ruby-ts-mode) + +(defmacro ruby-ts-with-temp-buffer (contents &rest body) + (declare (indent 1) (debug t)) + `(with-temp-buffer + (insert ,contents) + (ruby-ts-mode) + ,@body)) + +(defun ruby-ts-should-indent-buffer (expected content) + "Assert that CONTENT turns into EXPECTED after the buffer is re-indented. + +The whitespace before and including \"|\" on each line is removed." + (ruby-ts-with-temp-buffer (ruby-ts-test-string content) + (indent-region (point-min) (point-max)) + (should (string= (ruby-ts-test-string expected) (buffer-string))))) + +(defun ruby-ts-test-string (s &rest args) + (apply 'format (replace-regexp-in-string "^[ \t]*|" "" s) args)) + +(ert-deftest ruby-ts-indent-simple () + (skip-unless (treesit-available-p)) + (ruby-ts-should-indent-buffer + "if foo + | bar + |end + |zot + |" + "if foo + |bar + | end + | zot + |")) + +(ert-deftest ruby-ts-align-to-stmt-keywords-t () + (skip-unless (treesit-available-p)) + (let ((ruby-align-to-stmt-keywords t)) + (ruby-ts-should-indent-buffer + "foo = if bar? + | 1 + |else + | 2 + |end + | + |foo || begin + | bar + |end + | + |foo || + | begin + | bar + | end + |" + "foo = if bar? + | 1 + |else + | 2 + | end + | + | foo || begin + | bar + |end + | + | foo || + | begin + |bar + | end + |") + )) + +(ert-deftest ruby-ts-align-to-stmt-keywords-case () + (skip-unless (treesit-available-p)) + (let ((ruby-align-to-stmt-keywords '(case))) + (ruby-ts-should-indent-buffer + "b = case a + |when 13 + | 6 + |else + | 42 + |end" + "b = case a + | when 13 + | 6 + | else + | 42 + | end"))) + +(ert-deftest ruby-ts-add-log-current-method-examples () + (skip-unless (treesit-available-p)) + (let ((pairs '(("foo" . "#foo") + ("C.foo" . ".foo") + ("self.foo" . ".foo") + ("<<" . "#<<")))) + (dolist (pair pairs) + (let ((name (car pair)) + (value (cdr pair))) + (ruby-ts-with-temp-buffer (ruby-ts-test-string + "module M + | class C + | def %s + | _ + | end + | end + |end" + name) + (search-backward "_") + (forward-line) + (should (string= (ruby-ts-add-log-current-function) + (format "M::C%s" value)))))))) + +(ert-deftest ruby-ts-add-log-current-method-outside-of-method () + (skip-unless (treesit-available-p)) + (ruby-ts-with-temp-buffer (ruby-ts-test-string + "module M + | class C + | def foo + | end + | _ + | end + |end") + (search-backward "_") + (should (string= (ruby-ts-add-log-current-function) "M::C")))) + +(ert-deftest ruby-ts-add-log-current-method-in-singleton-class () + (skip-unless (treesit-available-p)) + (ruby-ts-with-temp-buffer (ruby-ts-test-string + "class C + | class << self + | def foo + | _ + | end + | end + |end") + (search-backward "_") + (should (string= (ruby-ts-add-log-current-function) "C.foo")))) + +(ert-deftest ruby-ts-add-log-current-method-namespace-shorthand () + (skip-unless (treesit-available-p)) + (ruby-ts-with-temp-buffer (ruby-ts-test-string + "class C::D + | def foo + | _ + | end + |end") + (search-backward "_") + (should (string= (ruby-ts-add-log-current-function) "C::D#foo")))) + +(ert-deftest ruby-ts-add-log-current-method-after-inner-class () + (skip-unless (treesit-available-p)) + (ruby-ts-with-temp-buffer (ruby-ts-test-string + "module M + | class C + | class D + | end + | def foo + | _ + | end + | end + |end") + (search-backward "_") + (should (string= (ruby-ts-add-log-current-function) "M::C#foo")))) + +(ert-deftest ruby-ts-add-log-current-method-after-inner-class-outside-methods () + (skip-unless (treesit-available-p)) + (ruby-ts-with-temp-buffer (ruby-ts-test-string + "module M + | class C + | class D + | end + | + |_ + | end + |end") + (search-backward "_") + (delete-char 1) + (should (string= (ruby-ts-add-log-current-function) "M::C")))) + +(ert-deftest ruby-ts-add-log-current-method-after-inner-class-outside-methods-with-text () + (skip-unless (treesit-available-p)) + (ruby-ts-with-temp-buffer (ruby-ts-test-string + "module M + | class C + | class D + | end + | + | FOO = 5 + | end + |end") + (search-backward "FOO") + (should (string= (ruby-ts-add-log-current-function) "M::C")))) + +(ert-deftest ruby-ts-add-log-current-method-after-endless-method () + (skip-unless (treesit-available-p)) + (ruby-ts-with-temp-buffer (ruby-ts-test-string + "module M + | class C + | def foo = + | 4_ + | end + |end") + (search-backward "_") + (delete-char 1) + (should (string= (ruby-ts-add-log-current-function) "M::C#foo")))) + +(defmacro ruby-ts-resource-file (file) + `(when-let ((testfile ,(or (macroexp-file-name) + buffer-file-name))) + (let ((default-directory (file-name-directory testfile))) + (file-truename + (expand-file-name (format "ruby-mode-resources/%s" ,file)))))) + +(defmacro ruby-ts-deftest-indent (file) + `(ert-deftest ,(intern (format "ruby-ts-indent-test/%s" file)) () + ;; :tags '(:expensive-test) + (skip-unless (treesit-available-p)) + (let ((buf (find-file-noselect (ruby-ts-resource-file ,file)))) + (unwind-protect + (with-current-buffer buf + (let ((orig (buffer-string))) + ;; Indent and check that we get the original text. + (indent-region (point-min) (point-max)) + (should (equal (buffer-string) orig)))) + (kill-buffer buf))))) + +(ruby-ts-deftest-indent "ruby-method-params-indent.rb") + +(provide 'ruby-ts-mode-tests) + +;;; ruby-ts-mode-tests.el ends here -- 2.39.2