From 37d93975780f294fd7b3fefe6281a0a36638192f Mon Sep 17 00:00:00 2001 From: Theodor Thornhill Date: Sun, 8 Jan 2023 20:28:02 +0100 Subject: [PATCH] Add forward-sentence with tree sitter support (bug#60623) * etc/NEWS: Mention the new changes. * lisp/textmodes/paragraphs.el (forward-sentence-default-function): Move old implementation to its own function. (forward-sentence-function): New defvar defaulting to old behavior. (forward-sentence): Use the variable in this function unconditionally. * lisp/treesit.el (treesit-sentence-type-regexp): New defvar. (treesit-forward-sentence): New defun. (treesit-major-mode-setup): Conditionally set forward-sentence-function. * doc/emacs/programs.texi (Defuns): Add new subsection. (Moving by Sentences): Add some documentation with xrefs to the elisp manual and related nodes. * doc/lispref/positions.texi (List Motion): Mention treesit-sentence-type-regexp and describe how to enable this functionality. --- doc/emacs/programs.texi | 56 ++++++++++++++++++++++++++++++++++++ doc/emacs/text.texi | 4 +++ doc/lispref/positions.texi | 17 +++++++++++ etc/NEWS | 18 ++++++++++++ lisp/textmodes/paragraphs.el | 15 ++++++++-- lisp/treesit.el | 27 +++++++++++++++++ 6 files changed, 135 insertions(+), 2 deletions(-) diff --git a/doc/emacs/programs.texi b/doc/emacs/programs.texi index 44cad5a148e..a2cdf6c6eb9 100644 --- a/doc/emacs/programs.texi +++ b/doc/emacs/programs.texi @@ -163,6 +163,7 @@ Emacs we use it for all languages. * Left Margin Paren:: An open-paren or similar opening delimiter starts a defun if it is at the left margin. * Moving by Defuns:: Commands to move over or mark a major definition. +* Moving by Sentences:: Commands to move over certain code units. * Imenu:: Making buffer indexes as menus. * Which Function:: Which Function mode shows which function you are in. @end menu @@ -254,6 +255,61 @@ they do their standard jobs in a way better fitting a particular language. Other major modes may replace any or all of these key bindings for that purpose. +@node Moving by Sentences +@subsection Moving by Sentences +@cindex sentences, in programming languages + + These commands move point or set up the region based on units of +code, also called @dfn{sentences}. Even though sentences are usually +considered when writing human languages, Emacs can use the same +commands to move over certain constructs in programming languages +(@pxref{Sentences}, @pxref{Moving by Defuns}). In a programming +language a sentence is usually a complete language construct smaller +than defuns, but larger than sexps (@pxref{List Motion,,, elisp, The +Emacs Lisp Reference Manual}). What exactly a sentence is in a +non-human language is dependent on the target language, but usually it +is complete statements, such as a variable definition and +initialization, or a conditional statement. An example of a sentence +in the C language could be + +@example +int x = 5; +@end example + +or in the JavaScript language it could look like + +@example +const thing = () => console.log("Hi"); + +const foo = [1] == '1' + ? "No way" + : "..."; +@end example + +@table @kbd +@item M-a +Move to beginning of current or preceding sentence +(@code{backward-sentence}). +@item M-e +Move to end of current or following sentence (@code{forward-sentence}). +@end table + +@cindex move to beginning or end of sentence +@cindex sentence, move to beginning or end +@kindex M-a @r{(programming modes)} +@kindex M-e @r{(programming modes)} +@findex backward-sentence @r{(programming modes)} +@findex forward-sentence @r{(programming modes)} + The commands to move to the beginning and end of the current +sentence are @kbd{M-a} (@code{backward-sentence}) and @kbd{M-e} +(@code{forward-sentence}). If you repeat one of these commands, or +use a positive numeric argument, each repetition moves to the next +sentence in the direction of motion. + + @kbd{M-a} with a negative argument @minus{}@var{n} moves forward +@var{n} times to the next end of a sentence. Likewise, @kbd{M-e} with +a negative argument moves back to a start of a sentence. + @node Imenu @subsection Imenu @cindex index of buffer definitions diff --git a/doc/emacs/text.texi b/doc/emacs/text.texi index 8fbf731a4f7..6e16e743a52 100644 --- a/doc/emacs/text.texi +++ b/doc/emacs/text.texi @@ -253,6 +253,10 @@ value of @code{sentence-end-double-space}. of a sentence. Set the variable @code{sentence-end-without-period} to @code{t} in such cases. + Even though the above mentioned sentence movement commands are based +on human languages, other Emacs modes can set these command to get +similar functionality (@pxref{Moving by Sentences}). + @node Paragraphs @section Paragraphs @cindex paragraphs diff --git a/doc/lispref/positions.texi b/doc/lispref/positions.texi index f3824436246..8d95ecee7ab 100644 --- a/doc/lispref/positions.texi +++ b/doc/lispref/positions.texi @@ -858,6 +858,23 @@ top-level defuns, if the value is @code{nested}, navigation functions recognize nested defuns. @end defvar +@defvar treesit-sentence-type-regexp +The value of this variable is a regexp matching the node type of sentence +nodes. (For ``node'' and ``node type'', @pxref{Parsing Program Source}.) +@end defvar + +@findex treesit-forward-sentence +@findex forward-sentence +@findex backward-sentence +If Emacs is compiled with tree-sitter, it can use the tree-sitter +parser information to move across syntax constructs. Since what +exactly is considered a sentence varies between languages, a major +mode should set @code{treesit-sentence-type-regexp} to determine that. +Then the mode can get navigation-by-sentence functionality for free, +by using @code{forward-sentence} and +@code{backward-sentence}(@pxref{Moving by Sentences,,, emacs, The +extensible self-documenting text editor}). + @node Skipping Characters @subsection Skipping Characters @cindex skipping characters diff --git a/etc/NEWS b/etc/NEWS index 3aa8f2abb77..0c782eeaee8 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -66,6 +66,24 @@ treesit.el now unconditionally sets 'transpose-sexps-function' for all Tree-sitter modes. This functionality utilizes the new 'transpose-sexps-function'. +** Commands and variables to move by program statements + +*** New variable 'forward-sentence-function'. +Major modes now can set this variable to customize the behavior of the +'forward-sentence' function. + +*** New function 'forward-sentence-default-function'. +The previous implementation of 'forward-sentence' is moved into its +own function, to be bound by 'forward-sentence-function'. + +*** New defvar-local 'treesit-sentence-type-regexp. +Similarly to 'treesit-defun-type-regexp', this variable is used to +navigate sentences in Tree-sitter enabled modes. + +*** New function 'treesit-forward-sentence'. +treesit.el now conditionally sets 'forward-sentence-function' for all +Tree-sitter modes that sets 'treesit-sentence-type-regexp'. + * Changes in Specialized Modes and Packages in Emacs 30.1 --- diff --git a/lisp/textmodes/paragraphs.el b/lisp/textmodes/paragraphs.el index 73abb155aaa..bf249fdcdfb 100644 --- a/lisp/textmodes/paragraphs.el +++ b/lisp/textmodes/paragraphs.el @@ -441,13 +441,12 @@ the current paragraph with the one containing the mark." (if (< (point) (point-max)) (end-of-paragraph-text)))))) -(defun forward-sentence (&optional arg) +(defun forward-sentence-default-function (&optional arg) "Move forward to next end of sentence. With argument, repeat. When ARG is negative, move backward repeatedly to start of sentence. The variable `sentence-end' is a regular expression that matches ends of sentences. Also, every paragraph boundary terminates sentences as well." - (interactive "^p") (or arg (setq arg 1)) (let ((opoint (point)) (sentence-end (sentence-end))) @@ -480,6 +479,18 @@ sentences. Also, every paragraph boundary terminates sentences as well." (let ((npoint (constrain-to-field nil opoint t))) (not (= npoint opoint))))) +(defvar forward-sentence-function #'forward-sentence-default-function + "Function to be used to calculate sentence movements. +See `forward-sentence' for a description of its behavior.") + +(defun forward-sentence (&optional arg) + "Move forward to next end of sentence. With argument ARG, repeat. +If ARG is negative, move backward repeatedly to start of +sentence. Delegates its work to `forward-sentence-function'." + (interactive "^p") + (or arg (setq arg 1)) + (funcall forward-sentence-function arg)) + (defun count-sentences (start end) "Count sentences in current buffer from START to END." (let ((sentences 0) diff --git a/lisp/treesit.el b/lisp/treesit.el index 25b2c70ce0a..f2e1b4ac807 100644 --- a/lisp/treesit.el +++ b/lisp/treesit.el @@ -1795,6 +1795,31 @@ comments and multiline string literals. For example, \"text_block\" in the case of a string. This is used by `prog-fill-reindent-defun' and friends.") +(defvar-local treesit-sentence-type-regexp nil + "A regexp that matches the node type of sentence nodes. + +A sentence node is a node that is bigger than a sexp, and +delimits larger statements in the source code. It is, however, +smaller in scope than defuns. This is used by +`treesit-forward-sentence' and friends.") + +(defun treesit-forward-sentence (&optional arg) + "Tree-sitter `forward-sentence-function' function. + +ARG is the same as in `forward-sentence'. + +If inside comment or other nodes described in +`treesit-sentence-type-regexp', use +`forward-sentence-default-function', else move across nodes as +described by `treesit-sentence-type-regexp'." + (if (string-match-p + treesit-text-type-regexp + (treesit-node-type (treesit-node-at (point)))) + (funcall #'forward-sentence-default-function arg) + (funcall + (if (> arg 0) #'treesit-end-of-thing #'treesit-beginning-of-thing) + treesit-sentence-type-regexp (abs arg)))) + (defun treesit-default-defun-skipper () "Skips spaces after navigating a defun. This function tries to move to the beginning of a line, either by @@ -2259,6 +2284,8 @@ before calling this function." #'treesit-add-log-current-defun)) (setq-local transpose-sexps-function #'treesit-transpose-sexps) + (when treesit-sentence-type-regexp + (setq-local forward-sentence-function #'treesit-forward-sentence)) ;; Imenu. (when treesit-simple-imenu-settings -- 2.39.5