* configure.ac (HAVE_TREE_SITTER, TREE_SITTER_OBJ): New variables.
(DYNAMIC_LIB_SUFFIX): new variable, I copied code from MODULES_SUFFIX
so the diff looks this way.
* doc/lispref/elisp.texi (Top): Add tree-sitter manual.
* doc/lispref/modes.texi (Font Lock Mode): mention tree-sitter.
(Parser-based Font Lock): New section.
(Auto-Indentation): Mention tree-sitter.
(Parser-based Indentation): New section.
* doc/lispref/parsing.texi (Parsing Program Source): New chapter.
* lisp/emacs-lisp/cl-preloaded.el (cl--typeof-types): Add
treesit-parser and treesit-node type.
* lisp/treesit.el: New file.
* src/Makefile.in (TREE_SITTER_LIBS, TREE_SITTER_FLAGS,
TREE_SITTER_OBJ): New variables.
* src/alloc.c:
(cleanup_vector): Add cleanup code for treesit-parser and
treesit-node.
* src/casefiddle.c (casify_region): Notify tree-sitter parser of
buffer change.
* src/data.c (Ftype_of): Add treesit-parser and treesit-node type
(Qtreesit_parser, Qtreesit_node): New symbol.
* src/emacs.c (main): Add symbols in treesit.c.
* src/eval.c (define_error): Move the function to here.
* src/insdel.c (insert_1_both, insert_from_string_1, insert_from_gap,
insert_from_buffer_1, replace_range, del_range_2): Notify tree-sitter
parser of buffer change.
* src/json.c (define_error): Move this function out.
* src/lisp.h (DEFINE_GDB_SYMBOL_BEGIN): Add treesit-parser and
treesit-node.
* src/lread.c (Vdynamic_library_suffixes): New variable.
* src/print.c (print_vectorlike): Add code for printing
treesit-parser and treesit-node.
* src/treesit.c: New file.
* src/treesit.h: New file.
* test/src/treesit-tests.el: New file.
OPTION_DEFAULT_OFF([imagemagick],[compile with ImageMagick image support])
OPTION_DEFAULT_ON([native-image-api], [don't use native image APIs (GDI+ on Windows)])
OPTION_DEFAULT_IFAVAILABLE([json], [compile with native JSON support])
+OPTION_DEFAULT_IFAVAILABLE([tree-sitter], [compile with tree-sitter])
OPTION_DEFAULT_ON([xft],[don't use XFT for anti aliased fonts])
OPTION_DEFAULT_ON([harfbuzz],[don't use HarfBuzz for text shaping])
AC_SUBST(JSON_CFLAGS)
AC_SUBST(JSON_OBJ)
+HAVE_TREE_SITTER=no
+TREE_SITTER_OBJ=
+
+if test "${with_tree_sitter}" != "no"; then
+ dnl TODO: we should use tree-sitter >= 0.20.2, but right now all
+ dnl tree-sitter libraries distributed are versioned at 0.0, so for
+ dnl the easy of development we'll just leave the version
+ dnl requirement at 0.0 for now.
+ EMACS_CHECK_MODULES([TREE_SITTER], [tree-sitter >= 0.0],
+ [HAVE_TREE_SITTER=yes], [HAVE_TREE_SITTER=no])
+ if test "${HAVE_TREE_SITTER}" = yes; then
+ AC_DEFINE(HAVE_TREE_SITTER, 1, [Define if using tree-sitter.])
+ TREE_SITTER_LIBS=-ltree-sitter
+ TREE_SITTER_OBJ="treesit.o"
+ fi
+fi
+
+AC_SUBST(TREE_SITTER_LIBS)
+AC_SUBST(TREE_SITTER_CFLAGS)
+AC_SUBST(TREE_SITTER_OBJ)
+
NOTIFY_OBJ=
NOTIFY_SUMMARY=no
fi
AC_SUBST(LIBZ)
+### Dynamic library support
+case $opsys in
+ cygwin|mingw32) DYNAMIC_LIB_SUFFIX=".dll" ;;
+ darwin) DYNAMIC_LIB_SUFFIX=".dylib" ;;
+ *) DYNAMIC_LIB_SUFFIX=".so" ;;
+esac
+case "${opsys}" in
+ darwin) DYNAMIC_LIB_SECONDARY_SUFFIX='.so' ;;
+ *) DYNAMIC_LIB_SECONDARY_SUFFIX='' ;;
+esac
+AC_DEFINE_UNQUOTED(DYNAMIC_LIB_SUFFIX, "$DYNAMIC_LIB_SUFFIX",
+ [System extension for dynamic libraries])
+AC_DEFINE_UNQUOTED(DYNAMIC_LIB_SECONDARY_SUFFIX, "$DYNAMIC_LIB_SECONDARY_SUFFIX",
+ [Alternative system extension for dynamic libraries.])
+
+AC_SUBST(DYNAMIC_LIB_SUFFIX)
+AC_SUBST(DYNAMIC_LIB_SECONDARY_SUFFIX)
+
### Dynamic modules support
LIBMODULES=
HAVE_MODULES=no
MODULES_OBJ=
NEED_DYNLIB=no
-case $opsys in
- cygwin|mingw32) MODULES_SUFFIX=".dll" ;;
- darwin) MODULES_SUFFIX=".dylib" ;;
- *) MODULES_SUFFIX=".so" ;;
-esac
-case "${opsys}" in
- darwin) MODULES_SECONDARY_SUFFIX='.so' ;;
- *) MODULES_SECONDARY_SUFFIX='' ;;
-esac
+MODULES_SUFFIX="${DYNAMIC_LIB_SUFFIX}"
+MODULES_SECONDARY_SUFFIX="${DYNAMIC_LIB_SECONDARY_SUFFIX}"
if test "${with_modules}" != "no"; then
case $opsys in
gnu|gnu-linux)
NEED_DYNLIB=yes
AC_DEFINE(HAVE_MODULES, 1, [Define to 1 if dynamic modules are enabled])
AC_DEFINE_UNQUOTED(MODULES_SUFFIX, "$MODULES_SUFFIX",
- [System extension for dynamic libraries])
+ [System extension for dynamic modules])
if test -n "${MODULES_SECONDARY_SUFFIX}"; then
AC_DEFINE_UNQUOTED(MODULES_SECONDARY_SUFFIX, "$MODULES_SECONDARY_SUFFIX",
- [Alternative system extension for dynamic libraries.])
+ [Alternative system extension for dynamic modules.])
fi
fi
AC_SUBST(MODULES_OBJ)
*) MISSING="$MISSING json"
WITH_IFAVAILABLE="$WITH_IFAVAILABLE --with-json=ifavailable";;
esac
+case $with_tree_sitter,$HAVE_TREE_SITTER in
+ no,* | ifavailable,* | *,yes) ;;
+ *) MISSING="$MISSING tree-sitter"
+ WITH_IFAVAILABLE="$WITH_IFAVAILABLE --with-tree-sitter=ifavailable";;
+esac
+
if test "X${MISSING}" != X; then
# If we have a missing library, and we don't have pkg-config installed,
# the missing pkg-config may be the reason. Give the user a hint.
optsep=
emacs_config_features=
for opt in ACL BE_APP CAIRO DBUS FREETYPE GCONF GIF GLIB GMP GNUTLS GPM GSETTINGS \
- HARFBUZZ IMAGEMAGICK JPEG JSON LCMS2 LIBOTF LIBSELINUX LIBSYSTEMD LIBXML2 \
+ HARFBUZZ IMAGEMAGICK JPEG JSON TREE-SITTER LCMS2 LIBOTF LIBSELINUX LIBSYSTEMD LIBXML2 \
M17N_FLT MODULES NATIVE_COMP NOTIFY NS OLDXMENU PDUMPER PGTK PNG RSVG SECCOMP \
SOUND SQLITE3 THREADS TIFF TOOLKIT_SCROLL_BARS \
UNEXEC WEBP X11 XAW3D XDBE XFT XIM XINPUT2 XPM XWIDGETS X_TOOLKIT \
Does Emacs use -lxft? ${HAVE_XFT}
Does Emacs use -lsystemd? ${HAVE_LIBSYSTEMD}
Does Emacs use -ljansson? ${HAVE_JSON}
+ Does Emacs use -ltree-sitter? ${HAVE_TREE_SITTER}
Does Emacs use the GMP library? ${HAVE_GMP}
Does Emacs directly use zlib? ${HAVE_ZLIB}
Does Emacs have dynamic modules support? ${HAVE_MODULES}
* Non-ASCII Characters:: Non-ASCII text in buffers and strings.
* Searching and Matching:: Searching buffers for strings or regexps.
* Syntax Tables:: The syntax table controls word and list parsing.
+* Parsing Program Source:: Generate syntax tree for program sources.
* Abbrevs:: How Abbrev mode works, and its data structures.
* Threads:: Concurrency in Emacs Lisp.
* Syntax Table Internals:: How syntax table information is stored.
* Categories:: Another way of classifying character syntax.
+Parsing Program Source
+
+* Language Definitions:: Loading tree-sitter language definitions.
+* Using Parser:: Introduction to parsers.
+* Retrieving Node:: Retrieving node from syntax tree.
+* Accessing Node:: Accessing node information.
+* Pattern Matching:: Pattern matching with query patterns.
+* Multiple Languages:: Parse text written in multiple languages.
+* Tree-sitter C API:: Compare the C API and the ELisp API.
+
Syntax Descriptors
* Syntax Class Table:: Table of syntax classes.
@include searching.texi
@include syntax.texi
+@include parsing.texi
@include abbrevs.texi
@include threads.texi
@include processes.texi
in which contexts. This section explains how to customize Font Lock for
a particular major mode.
- Font Lock mode finds text to highlight in two ways: through
-syntactic parsing based on the syntax table, and through searching
-(usually for regular expressions). Syntactic fontification happens
-first; it finds comments and string constants and highlights them.
-Search-based fontification happens second.
+ Font Lock mode finds text to highlight in three ways: through
+syntactic parsing based on the syntax table, through searching
+(usually for regular expressions), and through parsing based on a
+full-blown parser. Syntactic fontification happens first; it finds
+comments and string constants and highlights them. Search-based
+fontification happens second. Parser-based fontification can be
+optionally enabled and it will precede the other two fontifications.
@menu
* Font Lock Basics:: Overview of customizing Font Lock.
* Syntactic Font Lock:: Fontification based on syntax tables.
* Multiline Font Lock:: How to coerce Font Lock into properly
highlighting multiline constructs.
+* Parser-based Font Lock:: Use a parser for fontification.
@end menu
@node Font Lock Basics
reasonably fast.
@end defvar
+@node Parser-based Font Lock
+@subsection Parser-based Font Lock
+
+@c This node is written when the only parser Emacs has is tree-sitter,
+@c if in the future more parser are supported, feel free to reorganize
+@c and rewrite this node to describe multiple parsers in parallel.
+
+Besides simple syntactic font lock and search-based font lock, Emacs
+also provides complete syntactic font lock with the help of a parser,
+currently provided by the tree-sitter library (@pxref{Parsing Program
+Source}). Because it is an optional feature, parser-based font lock
+is less integrated with Emacs. Most variables introduced in previous
+sections only apply to search-based font lock, except for
+@var{font-lock-maximum-decoration}.
+
+@defun treesit-font-lock-enable
+This function enables parser-based font lock in the current buffer.
+@end defun
+
+Parser-based font lock and other font lock mechanism are not mutually
+exclusive. By default, if enabled, parser-based font lock runs first,
+then the simple syntactic font lock (if enabled), then search-based
+font lock.
+
+Although parser-based font lock doesn't share the same customization
+variables with search-based font lock, parser-based font lock uses
+similar customization schemes. Just like @var{font-lock-keywords} and
+@var{font-lock-defaults}, parser-based font lock has
+@var{treesit-font-lock-settings} and
+@var{treesit-font-lock-defaults}.
+
+@defvar treesit-font-lock-settings
+A list of @var{setting}s for tree-sitter font lock.
+
+Each @var{setting} should look like
+
+@example
+(@var{language} @var{query})
+@end example
+
+Each @var{setting} controls one parser (often of different language).
+And @var{language} is the language symbol (@pxref{Language
+Definitions}); @var{query} is either a string query or a sexp query
+(@pxref{Pattern Matching}).
+
+Capture names in @var{query} should be face names like
+@code{font-lock-keyword-face}. The captured node will be fontified
+with that face. Capture names can also be function names, in which
+case the function is called with (@var{start} @var{end} @var{node}),
+where @var{start} and @var{end} are the start and end position of the
+node in buffer, and @var{node} is the tree-sitter node object. If a
+capture name is both a face and a function, face takes priority.
+
+Generally, major modes should set @var{treesit-font-lock-defaults},
+and let Emacs automatically populate this variable.
+@end defvar
+
+@defvar treesit-font-lock-defaults
+This variable stores defaults for tree-sitter font Lock. It is a list
+of
+
+@example
+(@var{default} @var{:keyword} @var{value}...)
+@end example
+
+A @var{default} may be a symbol or a list of symbols (for different
+levels of fontification). The symbol(s) can be a variable or a
+function. If a symbol is both a variable and a function, it is used
+as a function. Different levels of fontification can be controlled by
+@var{font-lock-maximum-decoration}.
+
+The symbol(s) in @var{default} should contain or return a
+@var{setting} as described in @var{treesit-font-lock-settings}.
+
+The rest @var{keyword}s and @var{value}s are additional settings that
+could be used to alter the fontification behavior. Currently there
+aren't any.
+@end defvar
+
+Multi-language major modes should provide range functions in
+@var{treesit-range-functions}, and Emacs will set the ranges
+accordingly before fontifing a region (@pxref{Multiple Languages}).
+
@node Auto-Indentation
@section Automatic Indentation of code
so if your language seems somewhat similar to one of those languages,
you might try to use that engine. @c FIXME: documentation?
Another one is SMIE which takes an approach in the spirit
-of Lisp sexps and adapts it to non-Lisp languages.
+of Lisp sexps and adapts it to non-Lisp languages. Yet another one is
+to rely on a full-blown parser, for example, the tree-sitter library.
@menu
* SMIE:: A simple minded indentation engine.
+* Parser-based indentation:: Parser-based indentation engine.
@end menu
@node SMIE
@code{eval: (smie-config-local '(@var{rules}))}.
@end defun
+@node Parser-based Indentation
+@subsection Parser-based Indentation
+
+@c This node is written when the only parser Emacs has is tree-sitter,
+@c if in the future more parser are supported, feel free to reorganize
+@c and rewrite this node to describe multiple parsers in parallel.
+
+When built with the tree-sitter library (@pxref{Parsing Program
+Source}), Emacs could parse program source and produce a syntax tree.
+And this syntax tree can be used for indentation. For maximum
+flexibility, we could write a custom indent function that queries the
+syntax tree and indents accordingly for each language, but that would
+be a lot of work. It is more convenient to use the simple indentation
+engine described below: we only need to write some indentation rules
+and the engine takes care of the rest.
+
+To enable the indentation engine, set the value of
+@var{indent-line-function} to @code{treesit-indent}.
+
+@defvar treesit-indent-function
+This variable stores the actual function called by
+@code{treesit-indent}. By default, its value is
+@code{treesit-simple-indent}. In the future we might add other
+more complex indentation engines, if @code{treesit-simple-indent}
+proves to be insufficient.
+@end defvar
+
+@heading Writing indentation rules
+
+@defvar treesit-simple-indent-rules
+This local variable stores indentation rules for every language. It is
+a list of
+
+@example
+(@var{language} . @var{rules})
+@end example
+
+where @var{language} is a language symbol, @var{rules} is a list of
+
+@example
+(@var{matcher} @var{anchor} @var{offset})
+@end example
+
+The @var{matcher} determines whether this rule applies, @var{anchor}
+and @var{offset} together determines which column to indent to.
+
+A @var{matcher} is a function that takes three arguments (@var{node}
+@var{parent} @var{bol}). Argument @var{bol} is the point at where we
+are indenting: the position of the first non-whitespace character from
+the beginning of line; @var{node} is the largest (highest-in-tree)
+node that starts at that point; @var{parent} is the parent of
+@var{node};
+
+If @var{matcher} returns non-nil, meaning the rule matches, Emacs then
+uses @var{anchor} to find an anchor, it should be a function that
+takes the same argument (@var{node} @var{parent} @var{bol}) and
+returns a point.
+
+Finally Emacs computes the column of that point returned by
+@var{anchor} and adds @var{offset} to it, and indents to that column.
+
+For @var{matcher} and @var{anchor}, Emacs provides some convenient
+presets to spare us from writing these functions ourselves. They are
+stored in @var{treesit-simple-indent-presets}, see below.
+@end defvar
+
+@defvar treesit-simple-indent-presets
+This is a list of presets for @var{matcher}s and @var{anchor}s in
+@var{treesit-simple-indent-rules}. Each of them represent a
+function that takes @var{node}, @var{parent} and @var{bol} as
+arguments.
+
+@example
+(match @var{node-type} @var{parent-type}
+ @var{node-field} @var{node-index-min} @var{node-index-max})
+@end example
+
+This matcher checks if @var{node}'s type is @var{node-type},
+@var{parent}'s type is @var{parent-type}, @var{node}'s field name in
+@var{parent} is @var{node-field}, and @var{node}'s index among its
+siblings is between @var{node-index-min} and @var{node-index-max}. If
+the value of a constraint is nil, this matcher doesn't check for that
+constraint. For example, to match the first child where parent is
+@code{argument_list}, use
+
+@example
+(match nil "argument_list" nil nil 0 0)
+@end example
+
+@example
+no-node
+@end example
+
+This matcher matches the case where @var{node} is nil, i.e., there is
+no node that starts at @var{bol}. This is the case when @var{bol} is
+at an empty line or inside a multi-line string, etc.
+
+@example
+(parent-is @var{type})
+@end example
+
+This matcher matches if @var{parent}'s type is @var{type}.
+
+@example
+(node-is @var{type})
+@end example
+
+This matcher matches if @var{node}'s type is @var{type}.
+
+@example
+(query @var{query})
+@end example
+
+This matcher matches if querying @var{parent} with @var{query}
+captures @var{node}. The capture name does not matter.
+
+@example
+first-sibling
+@end example
+
+This anchor returns the start of the first child of @var{parent}.
+
+@example
+parent
+@end example
+
+This anchor returns the start of @var{parent}.
+
+@example
+parent-bol
+@end example
+
+This anchor returns the beginning of non-space characters on the line
+where @var{parent} is on.
+
+@example
+prev-sibling
+@end example
+
+This anchor returns the start of the previous sibling of @var{node}.
+
+@example
+no-indent
+@end example
+
+This anchor returns the start of @var{node}, i.e., do not indent.
+
+@example
+prev-line
+@end example
+
+This anchor returns the start of the first named node on the previous
+line. This can be used for indenting an empty line.
+@end defvar
+
+@heading Indentation utilities
+
+Here are some utility functions that can help writing indentation
+rules.
+
+@defun treesit-check-indent mode
+This function check current buffer's indentation against major mode
+@var{mode}. It indents the current line in @var{mode} and compares
+the indentation with the current indentation. Then it pops up a diff
+buffer showing the difference. Correct indentation (target) is in
+green, current indentation is in red.
+@end defun
+
+It is also helpful to use @code{treesit-inspect-mode} when writing
+indentation rules.
@node Desktop Save Mode
@section Desktop Save Mode
--- /dev/null
+@c -*- mode: texinfo; coding: utf-8 -*-
+@c This is part of the GNU Emacs Lisp Reference Manual.
+@c Copyright (C) 2021 Free Software Foundation, Inc.
+@c See the file elisp.texi for copying conditions.
+@node Parsing Program Source
+@chapter Parsing Program Source
+
+Emacs provides various ways to parse program source text and produce a
+@dfn{syntax tree}. In a syntax tree, text is no longer a
+one-dimensional stream but a structured tree of nodes, where each node
+representing a piece of text. Thus a syntax tree can enable
+interesting features like precise fontification, indentation,
+navigation, structured editing, etc.
+
+Emacs has a simple facility for parsing balanced expressions
+(@pxref{Parsing Expressions}). There is also SMIE library for generic
+navigation and indentation (@pxref{SMIE}).
+
+Emacs also provides integration with tree-sitter library
+(@uref{https://tree-sitter.github.io/tree-sitter}) if compiled with
+it. The tree-sitter library implements an incremental parser and has
+support from a wide range of programming languages.
+
+@defun treesit-available-p
+This function returns non-nil if tree-sitter features are available
+for this Emacs instance.
+@end defun
+
+For using tree-sitter features in font-lock and indentation,
+@pxref{Parser-based Font Lock}, @pxref{Parser-based Indentation}.
+
+To access the syntax tree of the text in a buffer, we need to first
+load a language definition and create a parser with it. Next, we can
+query the parser for specific nodes in the syntax tree. Then, we can
+access various information about the node, and we can pattern-match a
+node with a powerful syntax. Finally, we explain how to work with
+source files that mixes multiple languages. The following sections
+explain how to do each of the tasks in detail.
+
+@menu
+* Language Definitions:: Loading tree-sitter language definitions.
+* Using Parser:: Introduction to parsers.
+* Retrieving Node:: Retrieving node from syntax tree.
+* Accessing Node:: Accessing node information.
+* Pattern Matching:: Pattern matching with query patterns.
+* Multiple Languages:: Parse text written in multiple languages.
+* Tree-sitter C API:: Compare the C API and the ELisp API.
+@end menu
+
+@node Language Definitions
+@section Tree-sitter Language Definitions
+
+@heading Loading a language definition
+
+Tree-sitter relies on language definitions to parse text in that
+language. In Emacs, A language definition is represented by a symbol.
+For example, C language definition is represented as @code{c}, and
+@code{c} can be passed to tree-sitter functions as the @var{language}
+argument.
+
+@vindex treesit-extra-load-path
+@vindex treesit-load-language-error
+@vindex treesit-load-suffixes
+Tree-sitter language definitions are distributed as dynamic libraries.
+In order to use a language definition in Emacs, you need to make sure
+that the dynamic library is installed on the system. Emacs looks for
+language definitions under load paths in
+@var{treesit-extra-load-path}, @var{user-emacs-directory}/tree-sitter,
+and system default locations for dynamic libraries, in that order.
+Emacs tries each extensions in @var{treesit-load-suffixes}. If Emacs
+cannot find the library or has problem loading it, Emacs signals
+@var{treesit-load-language-error}. The signal data is a list of
+specific error messages.
+
+@defun treesit-language-available-p language
+This function checks whether the dynamic library for @var{language} is
+present on the system, and return non-nil if it is.
+@end defun
+
+@vindex treesit-load-name-override-list
+By convention, the dynamic library for @var{language} is
+@code{libtree-sitter-@var{language}.@var{ext}}, where @var{ext} is the
+system-specific extension for dynamic libraries. Also by convention,
+the function provided by that library is named
+@code{tree_sitter_<language>}. If a language definition doesn't
+follow this convention, you should add an entry
+
+@example
+(@var{language} @var{library-base-name} @var{function-name})
+@end example
+
+to @var{treesit-load-name-override-list}, where
+@var{library-base-name} is the base filename for the dynamic library
+(conventionally @code{libtree-sitter-@var{language}}), and
+@var{function-name} is the function provided by the library
+(conventionally @code{tree_sitter_@var{language}). For example,
+
+@example
+(cool-lang "libtree-sitter-coool" "tree_sitter_cooool")
+@end example
+
+for a language too cool to abide by the rules.
+
+@heading Concrete syntax tree
+
+A syntax tree is what a language definition defines (more or less) and
+what a parser generates. In a syntax tree, each node represents a
+piece of text, and is connected to each other by a parent-child
+relationship. For example, if the source text is
+
+@example
+1 + 2
+@end example
+
+@noindent
+its syntax tree could be
+
+@example
+@group
+ +--------------+
+ | root "1 + 2" |
+ +--------------+
+ |
+ +--------------------------------+
+ | expression "1 + 2" |
+ +--------------------------------+
+ | | |
++------------+ +--------------+ +------------+
+| number "1" | | operator "+" | | number "2" |
++------------+ +--------------+ +------------+
+@end group
+@end example
+
+We can also represent it in s-expression:
+
+@example
+(root (expression (number) (operator) (number)))
+@end example
+
+@subheading Node types
+
+@cindex tree-sitter node type
+@anchor{tree-sitter node type}
+@cindex tree-sitter named node
+@anchor{tree-sitter named node}
+@cindex tree-sitter anonymous node
+Names like @code{root}, @code{expression}, @code{number},
+@code{operator} are nodes' @dfn{type}. However, not all nodes in a
+syntax tree have a type. Nodes that don't are @dfn{anonymous nodes},
+and nodes with a type are @dfn{named nodes}. Anonymous nodes are
+tokens with fixed spellings, including punctuation characters like
+bracket @samp{]}, and keywords like @code{return}.
+
+@subheading Field names
+
+@cindex tree-sitter node field name
+@anchor{tree-sitter node field name} To make the syntax tree easier to
+analyze, many language definitions assign @dfn{field names} to child
+nodes. For example, a @code{function_definition} node could have a
+@code{declarator} and a @code{body}:
+
+@example
+@group
+(function_definition
+ declarator: (declaration)
+ body: (compound_statement))
+@end group
+@end example
+
+@deffn Command treesit-inspect-mode
+This minor mode displays the node that @emph{starts} at point in
+mode-line. The mode-line will display
+
+@example
+@var{parent} @var{field-name}: (@var{child} (@var{grand-child} (...)))
+@end example
+
+@var{child}, @var{grand-child}, and @var{grand-grand-child}, etc, are
+nodes that have their beginning at point. And @var{parent} is the
+parent of @var{child}.
+
+If there is no node that starts at point, i.e., point is in the middle
+of a node, then the mode-line only displays the smallest node that
+spans point, and its immediate parent.
+
+This minor mode doesn't create parsers on its own. It simply uses the
+first parser in @var{treesit-parser-list} (@pxref{Using Parser}).
+@end deffn
+
+@heading Reading the grammar definition
+
+Authors of language definitions define the @dfn{grammar} of a
+language, and this grammar determines how does a parser construct a
+concrete syntax tree out of the text. In order to used the syntax
+tree effectively, we need to read the @dfn{grammar file}.
+
+The grammar file is usually @code{grammar.js} in a language
+definition’s project repository. The link to a language definition’s
+home page can be found in tree-sitter’s homepage
+(@uref{https://tree-sitter.github.io/tree-sitter}).
+
+The grammar is written in JavaScript syntax. For example, the rule
+matching a @code{function_definition} node looks like
+
+@example
+@group
+function_definition: $ => seq(
+ $.declaration_specifiers,
+ field('declarator', $.declaration),
+ field('body', $.compound_statement)
+)
+@end group
+@end example
+
+The rule is represented by a function that takes a single argument
+@var{$}, representing the whole grammar. The function itself is
+constructed by other functions: the @code{seq} function puts together a
+sequence of children; the @code{field} function annotates a child with
+a field name. If we write the above definition in BNF syntax, it
+would look like
+
+@example
+@group
+function_definition :=
+ <declaration_specifiers> <declaration> <compound_statement>
+@end group
+@end example
+
+@noindent
+and the node returned by the parser would look like
+
+@example
+@group
+(function_definition
+ (declaration_specifier)
+ declarator: (declaration)
+ body: (compound_statement))
+@end group
+@end example
+
+Below is a list of functions that one will see in a grammar
+definition. Each function takes other rules as arguments and returns
+a new rule.
+
+@itemize @bullet
+@item
+@code{seq(rule1, rule2, ...)} matches each rule one after another.
+
+@item
+@code{choice(rule1, rule2, ...)} matches one of the rules in its
+arguments.
+
+@item
+@code{repeat(rule)} matches @var{rule} for @emph{zero or more} times.
+This is like the @samp{*} operator in regular expressions.
+
+@item
+@code{repeat1(rule)} matches @var{rule} for @emph{one or more} times.
+This is like the @samp{+} operator in regular expressions.
+
+@item
+@code{optional(rule)} matches @var{rule} for @emph{zero or one} time.
+This is like the @samp{?} operator in regular expressions.
+
+@item
+@code{field(name, rule)} assigns field name @var{name} to the child
+node matched by @var{rule}.
+
+@item
+@code{alias(rule, alias)} makes nodes matched by @var{rule} appear as
+@var{alias} in the syntax tree generated by the parser. For example,
+
+@example
+alias(preprocessor_call_exp, call_expression)
+@end example
+
+makes any node matched by @code{preprocessor_call_exp} to appear as
+@code{call_expression}.
+@end itemize
+
+Below are grammar functions less interesting for a reader of a
+language definition.
+
+@itemize
+@item
+@code{token(rule)} marks @var{rule} to produce a single leaf node.
+That is, instead of generating a parent node with individual child
+nodes under it, everything is combined into a single leaf node.
+
+@item
+Normally, grammar rules ignore preceding whitespaces,
+@code{token.immediate(rule)} changes @var{rule} to match only when
+there is no preceding whitespaces.
+
+@item
+@code{prec(n, rule)} gives @var{rule} a level @var{n} precedence.
+
+@item
+@code{prec.left([n,] rule)} marks @var{rule} as left-associative,
+optionally with level @var{n}.
+
+@item
+@code{prec.right([n,] rule)} marks @var{rule} as right-associative,
+optionally with level @var{n}.
+
+@item
+@code{prec.dynamic(n, rule)} is like @code{prec}, but the precedence
+is applied at runtime instead.
+@end itemize
+
+The tree-sitter project talks about writing a grammar in more detail:
+@uref{https://tree-sitter.github.io/tree-sitter/creating-parsers}.
+Read especially ``The Grammar DSL'' section.
+
+@node Using Parser
+@section Using Tree-sitter Parser
+@cindex Tree-sitter parser
+
+This section described how to create and configure a tree-sitter
+parser. In Emacs, each tree-sitter parser is associated with a
+buffer. As we edit the buffer, the associated parser is automatically
+kept up-to-date.
+
+@defvar treesit-disabled-modes
+Before creating a parser, it is perhaps good to check whether we
+should use tree-sitter at all. Sometimes a user don't want to use
+tree-sitter features for a major mode. To turn-off tree-sitter for a
+mode, they add that mode to this variable.
+@end defvar
+
+@defvar treesit-maximum-size
+If users want to turn off tree-sitter for buffers larger than a
+particular size (because tree-sitter consumes memory ~10 times the
+buffer size for storing the syntax tree), they set this variable to
+that size.
+@end defvar
+
+@defun treesit-should-enable-p &optional mode
+This function returns non-nil if @var{mode} (default to the current
+major mode) should activate tree-sitter features. The result depends
+on the value of @var{treesit-disabled-modes} and
+@var{treesit-maximum-size} described above. The result also
+depends on, of course, the result of @code{treesit-avaliabe-p}.
+
+Writer of major modes or other packages are responsible for calling
+this function and determine whether to activate tree-sitter features.
+@end defun
+
+
+@cindex Creating tree-sitter parsers
+To create a parser, we provide a buffer to parse and the language to
+use (@pxref{Language Definitions}). Emacs provides several creation
+functions for different use cases.
+
+@defun treesit-get-parser-create language
+This function is the most convenient one. It gives you a parser that
+recognizes @var{language} for the current buffer. The function
+checks if there already exists a parser suiting the need, and only
+creates a new one when it can't find one.
+
+@example
+@group
+;; Create a parser for C programming language.
+(treesit-get-parser-create 'c)
+ @c @result{} #<treesit-parser for c in *scratch*>
+@end group
+@end example
+@end defun
+
+@defun treesit-get-parser language
+This function is like @code{treesit-get-parser-create}, but it
+always creates a new parser.
+@end defun
+
+@defun treesit-parser-create buffer language
+This function is the most primitive, requiring both the buffer to
+associate to, and the language to use. If @var{buffer} is nil, the
+current buffer is used.
+@end defun
+
+Given a parser, we can query information about it:
+
+@defun treesit-parser-buffer parser
+Returns the buffer associated with @var{parser}.
+@end defun
+
+@defun treesit-parser-language parser
+Returns the language that @var{parser} uses.
+@end defun
+
+@defun treesit-parser-p object
+Checks if @var{object} is a tree-sitter parser. Return non-nil if it
+is, return nil otherwise.
+@end defun
+
+There is no need to explicitly parse a buffer, because parsing is done
+automatically and lazily. A parser only parses when we query for a
+node in its syntax tree. Therefore, when a parser is first created,
+it doesn't parse the buffer; instead, it waits until we query for a
+node for the first time. Similarly, when some change is made in the
+buffer, a parser doesn't re-parse immediately and only records some
+necessary information to later re-parse when necessary.
+
+@vindex treesit-buffer-too-large
+When a parser do parse, it checks for the size of the buffer.
+Tree-sitter can only handle buffer no larger than about 4GB. If the
+size exceeds that, Emacs signals @var{treesit-buffer-too-large}
+with signal data being the buffer size.
+
+@vindex treesit-parser-list
+Once a parser is created, Emacs automatically adds it to the
+buffer-local variable @var{treesit-parser-list}. Every time a
+change is made to the buffer, Emacs updates parsers in this list so
+they can update their syntax tree incrementally. Therefore, one must
+not remove parsers from this list and put the parser back in: if any
+change is made when that parser is absent, the parser will be
+permanently out-of-sync with the buffer content, and shouldn't be used
+anymore.
+
+@cindex tree-sitter narrowing
+@anchor{tree-sitter narrowing} Normally, a parser ``sees'' the whole
+buffer, but when the buffer is narrowed (@pxref{Narrowing}), the
+parser will only see the visible region. As far as the parser can
+tell, the hidden region is deleted. And when the buffer is later
+widened, the parser thinks text is inserted in the beginning and in
+the end. Although parsers respect narrowing, narrowing shouldn't be
+the mean to handle a multi-language buffer; instead, set the ranges in
+which a parser should operate in. @xref{Multiple Languages}.
+
+Because a parser parses lazily, when we narrow the buffer, the parser
+doesn't act immediately; as long as we don't query for a node while
+the buffer is narrowed, narrowing does not affect the parser.
+
+@cindex tree-sitter parse string
+@defun treesit-parse-string string language
+Besides creating a parser for a buffer, we can also just parse a
+string. Unlike a buffer, parsing a string is a one-time deal, and
+there is no way to update the result.
+
+This function parses @var{string} with @var{language}, and returns the
+root node of the generated syntax tree.
+@end defun
+
+@node Retrieving Node
+@section Retrieving Node
+
+@cindex tree-sitter find node
+@cindex tree-sitter get node
+There are two ways to retrieve a node: directly from the syntax tree,
+or by traveling from other nodes. But before we continue, lets go
+over some conventions of tree-sitter functions.
+
+We talk about a node being ``smaller'' or ``larger'', and ``lower'' or
+``higher''. A smaller and lower node is lower in the syntax tree and
+therefore spans a smaller piece of text; a larger and higher node is
+higher up in the syntax tree, containing many smaller nodes as its
+children, and therefore spans a larger piece of text.
+
+When a function cannot find a node, it returns nil. And for the
+convenience for function chaining, all the functions that take a node
+as argument and returns a node accept the node to be nil; in that
+case, the function just returns nil.
+
+@vindex treesit-node-outdated
+Nodes are not automatically updated when the associated buffer is
+modified. In fact, there is no way to update a node once it is
+retrieved. It is best to use a node and throw it away and not save
+it. A node is @dfn{outdated} if the buffer has changed since the node
+is retrieved. Using an outdated node throws
+@var{treesit-node-outdated} error.
+
+@heading Retrieving node from syntax tree
+
+@defun treesit-node-at beg &optional end parser-or-lang named
+This function returns the @emph{smallest} node that covers the span
+from @var{beg} to @var{end}. In other words, the start of the node
+@code{<=} @var{beg}, and the end of the node @code{>=} @var{end}. If
+@var{end} is omitted, it defaults to the value of @var{beg}.
+
+When @var{parser-or-lang} is nil, this function uses the first parser
+in @var{treesit-parser-list} in the current buffer. If
+@var{parser-or-lang} is a parser object, it use that parser; if
+@var{parser-or-lang} is a language, it finds the first parser using
+that language in @var{treesit-parser-list} and use that.
+
+If @var{named} is non-nil, this function looks for a named node
+instead (@pxref{tree-sitter named node, named node}).
+
+@example
+@group
+;; Find the node at point in a C parser's syntax tree.
+(treesit-node-at (point) (point) 'c)
+ @c @result{} #<treesit-node from 1 to 4 in *scratch*>
+@end group
+@end example
+@end defun
+
+@defun treesit-parser-root-node parser
+This function returns the root node of the syntax tree generated by
+@var{parser}.
+@end defun
+
+@defun treesit-buffer-root-node &optional language
+This function finds the first parser that uses @var{language} in
+@var{treesit-parser-list} in the current buffer, and returns the
+root node of that buffer. If it cannot find an appropriate parser, it
+returns nil.
+@end defun
+
+Once we have a node, we can retrieve other nodes from it, or query for
+information about this node.
+
+@heading Retrieving node from other nodes
+
+@subheading By kinship
+
+@defun treesit-node-parent node
+This function returns the immediate parent of @var{node}.
+@end defun
+
+@defun treesit-node-child node n &optional named
+This function returns the @var{n}'th child of @var{node}. If
+@var{named} is non-nil, then it only counts named nodes
+(@pxref{tree-sitter named node, named node}). For example, in a node
+that represents a string: @code{"text"}, there are three children
+nodes: the opening quote @code{"}, the string content @code{text}, and
+the enclosing quote @code{"}. Among these nodes, the first child is
+the opening quote @code{"}, the first named child is the string
+content @code{text}.
+@end defun
+
+@defun treesit-node-children node &optional named
+This function returns all of @var{node}'s children in a list. If
+@var{named} is non-nil, then it only retrieves named nodes
+(@pxref{tree-sitter named node, named node}).
+@end defun
+
+@defun treesit-next-sibling node &optional named
+This function finds the next sibling of @var{node}. If @var{named} is
+non-nil, it finds the next named sibling (@pxref{tree-sitter named
+node, named node}).
+@end defun
+
+@defun treesit-prev-sibling node &optional named
+This function finds the previous sibling of @var{node}. If
+@var{named} is non-nil, it finds the previous named sibling
+(@pxref{tree-sitter named node, named node}).
+@end defun
+
+@subheading By field name
+
+To make the syntax tree easier to analyze, many language definitions
+assign @dfn{field names} to child nodes (@pxref{tree-sitter node field
+name, field name}). For example, a @code{function_definition} node
+could have a @code{declarator} and a @code{body}.
+
+@defun treesit-child-by-field-name node field-name
+This function finds the child of @var{node} that has @var{field-name}
+as its field name.
+
+@example
+@group
+;; Get the child that has "body" as its field name.
+(treesit-child-by-field-name node "body")
+ @c @result{} #<treesit-node from 3 to 11 in *scratch*>
+@end group
+@end example
+@end defun
+
+@subheading By position
+
+@defun treesit-first-child-for-pos node pos &optional named
+This function finds the first child of @var{node} that extends beyond
+@var{pos}. ``Extend beyond'' means the end of the child node
+@code{>=} @var{pos}. This function only looks for immediate children of
+@var{node}, and doesn't look in its grand children. If @var{named} is
+non-nil, it only looks for named child (@pxref{tree-sitter named node,
+named node}).
+@end defun
+
+@defun treesit-node-descendant-for-range node beg end &optional named
+This function finds the @emph{smallest} (grand)child of @var{node}
+that spans the range from @var{beg} to @var{end}. It is similar to
+@code{treesit-node-at}. If @var{named} is non-nil, it only looks
+for named child (@pxref{tree-sitter named node, named node}).
+@end defun
+
+@heading More convenient functions
+
+@defun treesit-filter-child node pred &optional named
+This function finds children of @var{node} that satisfies @var{pred}.
+
+Function @var{pred} takes the child node as the argument and should
+return non-nil to indicated keeping the child. If @var{named}
+non-nil, this function only searches for named nodes."
+@end defun
+
+@defun treesit-parent-until node pred
+This function repeatedly finds the parent of @var{node}, and returns
+the parent if it satisfies @var{pred} (which takes the parent as the
+argument). If no parent satisfies @var{pred}, this function returns
+nil.
+@end defun
+
+@defun treesit-parent-while
+This function repeatedly finds the parent of @var{node}, and keeps
+doing so as long as the parent satisfies @var{pred} (which takes the
+parent as the single argument). I.e., this function returns the
+farthest parent that still satisfies @var{pred}.
+@end defun
+
+@node Accessing Node
+@section Accessing Node Information
+
+Before going further, make sure you have read the basic conventions
+about tree-sitter nodes in the previous node.
+
+@heading Basic information
+
+Every node is associated with a parser, and that parser is associated
+with a buffer. The following functions let you retrieve them.
+
+@defun treesit-node-parser node
+This function returns @var{node}'s associated parser.
+@end defun
+
+@defun treesit-node-buffer node
+This function returns @var{node}'s parser's associated buffer.
+@end defun
+
+@defun treesit-node-language node
+This function returns @var{node}'s parser's associated language.
+@end defun
+
+Each node represents a piece of text in the buffer. Functions below
+finds relevant information about that text.
+
+@defun treesit-node-start node
+Return the start position of @var{node}.
+@end defun
+
+@defun treesit-node-end node
+Return the end position of @var{node}.
+@end defun
+
+@defun treesit-node-text node &optional object
+Returns the buffer text that @var{node} represents. (If @var{node} is
+retrieved from parsing a string, it will be the text from that
+string.)
+@end defun
+
+Here are some basic checks on tree-sitter nodes.
+
+@defun treesit-node-p object
+Checks if @var{object} is a tree-sitter syntax node.
+@end defun
+
+@defun treesit-node-eq node1 node2
+Checks if @var{node1} and @var{node2} are the same node in a syntax
+tree.
+@end defun
+
+@heading Property information
+
+In general, nodes in a concrete syntax tree fall into two categories:
+@dfn{named nodes} and @dfn{anonymous nodes}. Whether a node is named
+or anonymous is determined by the language definition
+(@pxref{tree-sitter named node, named node}).
+
+@cindex tree-sitter missing node
+Apart from being named/anonymous, a node can have other properties. A
+node can be ``missing'': missing nodes are inserted by the parser in
+order to recover from certain kinds of syntax errors, i.e., something
+should probably be there according to the grammar, but not there.
+
+@cindex tree-sitter extra node
+A node can be ``extra'': extra nodes represent things like comments,
+which can appear anywhere in the text.
+
+@cindex tree-sitter node that has changes
+A node ``has changes'' if the buffer changed since when the node is
+retrieved. In this case, the node's start and end position would be
+off and we better throw it away and retrieve a new one.
+
+@cindex tree-sitter node that has error
+A node ``has error'' if the text it spans contains a syntax error. It
+can be the node itself has an error, or one of its (grand)children has
+an error.
+
+@defun treesit-node-check node property
+This function checks if @var{node} has @var{property}. @var{property}
+can be @code{'named}, @code{'missing}, @code{'extra},
+@code{'has-changes}, or @code{'has-error}.
+@end defun
+
+Named nodes have ``types'' (@pxref{tree-sitter node type, node type}).
+For example, a named node can be a @code{string_literal} node, where
+@code{string_literal} is its type.
+
+@defun treesit-node-type node
+Return @var{node}'s type as a string.
+@end defun
+
+@heading Information as a child or parent
+
+@defun treesit-node-index node &optional named
+This function returns the index of @var{node} as a child node of its
+parent. If @var{named} is non-nil, it only count named nodes
+(@pxref{tree-sitter named node, named node}).
+@end defun
+
+@defun treesit-node-field-name node
+A child of a parent node could have a field name (@pxref{tree-sitter
+node field name, field name}). This function returns the field name
+of @var{node} as a child of its parent.
+@end defun
+
+@defun treesit-node-field-name-for-child node n
+This is a more primitive function that returns the field name of the
+@var{n}'th child of @var{node}.
+@end defun
+
+@defun treesit-child-count node &optional named
+This function finds the number of children of @var{node}. If
+@var{named} is non-nil, it only counts named child (@pxref{tree-sitter
+named node, named node}).
+@end defun
+
+@node Pattern Matching
+@section Pattern Matching Tree-sitter Nodes
+
+Tree-sitter let us pattern match with a small declarative language.
+Pattern matching consists of two steps: first tree-sitter matches a
+@dfn{pattern} against nodes in the syntax tree, then it @dfn{captures}
+specific nodes in that pattern and returns the captured nodes.
+
+We describe first how to write the most basic query pattern and how to
+capture nodes in a pattern, then the pattern-match function, finally
+more advanced pattern syntax.
+
+@heading Basic query syntax
+
+@cindex Tree-sitter query syntax
+@cindex Tree-sitter query pattern
+A @dfn{query} consists of multiple @dfn{patterns}, each pattern is an
+s-expression that matches a certain node in the syntax node. A
+pattern has the following shape:
+
+@example
+(@var{type} @var{child}...)
+@end example
+
+@noindent
+For example, a pattern that matches a @code{binary_expression} node that
+contains @code{number_literal} child nodes would look like
+
+@example
+(binary_expression (number_literal))
+@end example
+
+To @dfn{capture} a node in the query pattern above, append
+@code{@@capture-name} after the node pattern you want to capture. For
+example,
+
+@example
+(binary_expression (number_literal) @@number-in-exp)
+@end example
+
+@noindent
+captures @code{number_literal} nodes that are inside a
+@code{binary_expression} node with capture name @code{number-in-exp}.
+
+We can capture the @code{binary_expression} node too, with capture
+name @code{biexp}:
+
+@example
+(binary_expression
+ (number_literal) @@number-in-exp) @@biexp
+@end example
+
+@heading Query function
+
+Now we can introduce the query functions.
+
+@defun treesit-query-capture node query &optional beg end
+This function matches patterns in @var{query} in @var{node}.
+Argument @var{query} can be a either string or a s-expression. For
+now, we focus on the string syntax; s-expression syntax is described
+at the end of the section.
+
+The function returns all captured nodes in a list of
+@code{(@var{capture_name} . @var{node})}. If @var{beg} and @var{end}
+are both non-nil, it only pattern matches nodes in that range.
+
+@vindex treesit-query-error
+This function raise a @var{treesit-query-error} if @var{query} is
+malformed. The signal data contains a description of the specific
+error.
+@end defun
+
+@defun treesit-query-in source query &optional beg end
+This function matches patterns in @var{query} in @var{source}, and
+returns all captured nodes in a list of @code{(@var{capture_name}
+. @var{node})}. If @var{beg} and @var{end} are both non-nil, it only
+pattern match nodes in that range.
+
+Argument @var{source} designates a node, it can be a language symbol,
+a parser, or simply a node. If a language symbol, @var{source}
+represents the root node of the first parser for that language in the
+current buffer; if a parser, @var{source} represents the root node of
+that parser.
+
+This function also raises @var{treesit-query-error}.
+@end defun
+
+For example, suppose @var{node}'s content is @code{1 + 2}, and
+@var{query} is
+
+@example
+@group
+(setq query
+ "(binary_expression
+ (number_literal) @@number-in-exp) @@biexp")
+@end group
+@end example
+
+@noindent
+Querying that query would return
+
+@example
+@group
+(treesit-query-capture node query)
+ @result{} ((biexp . @var{<node for "1 + 2">})
+ (number-in-exp . @var{<node for "1">})
+ (number-in-exp . @var{<node for "2">}))
+@end group
+@end example
+
+As we mentioned earlier, a @var{query} could contain multiple
+patterns. For example, it could have two top-level patterns:
+
+@example
+@group
+(setq query
+ "(binary_expression) @@biexp
+ (number_literal) @@number @@biexp")
+@end group
+@end example
+
+@defun treesit-query-string string query language
+This function parses @var{string} with @var{language}, pattern matches
+its root node with @var{query}, and returns the result.
+@end defun
+
+@heading More query syntax
+
+Besides node type and capture, tree-sitter's query syntax can express
+anonymous node, field name, wildcard, quantification, grouping,
+alternation, anchor, and predicate.
+
+@subheading Anonymous node
+
+An anonymous node is written verbatim, surrounded by quotes. A
+pattern matching (and capturing) keyword @code{return} would be
+
+@example
+"return" @@keyword
+@end example
+
+@subheading Wild card
+
+In a query pattern, @samp{(_)} matches any named node, and @samp{_}
+matches any named and anonymous node. For example, to capture any
+named child of a @code{binary_expression} node, the pattern would be
+
+@example
+(binary_expression (_) @@in_biexp)
+@end example
+
+@subheading Field name
+
+We can capture child nodes that has specific field names:
+
+@example
+@group
+(function_definition
+ declarator: (_) @@func-declarator
+ body: (_) @@func-body)
+@end group
+@end example
+
+We can also capture a node that doesn't have certain field, say, a
+@code{function_definition} without a @code{body} field.
+
+@example
+(function_definition !body) @@func-no-body
+@end example
+
+@subheading Quantify node
+
+Tree-sitter recognizes quantification operators @samp{*}, @samp{+} and
+@samp{?}. Their meanings are the same as in regular expressions:
+@samp{*} matches the preceding pattern zero or more times, @samp{+}
+matches one or more times, and @samp{?} matches zero or one time.
+
+For example, this pattern matches @code{type_declaration} nodes
+that has @emph{zero or more} @code{long} keyword.
+
+@example
+(type_declaration "long"* @@long-in-type)
+@end example
+
+@noindent
+And this pattern matches a type declaration that has zero or one
+@code{long} keyword:
+
+@example
+(type_declaration "long"?) @@type-decl
+@end example
+
+@subheading Grouping
+
+Similar to groups in regular expression, we can bundle patterns into a
+group and apply quantification operators to it. For example, to
+express a comma separated list of identifiers, one could write
+
+@example
+(identifier) ("," (identifier))*
+@end example
+
+@subheading Alternation
+
+Again, similar to regular expressions, we can express ``match anyone
+from this group of patterns'' in the query pattern. The syntax is a
+list of patterns enclosed in square brackets. For example, to capture
+some keywords in C, the query pattern would be
+
+@example
+@group
+[
+ "return"
+ "break"
+ "if"
+ "else"
+] @@keyword
+@end group
+@end example
+
+@subheading Anchor
+
+The anchor operator @samp{.} can be used to enforce juxtaposition,
+i.e., to enforce two things to be directly next to each other. The
+two ``things'' can be two nodes, or a child and the end of its parent.
+For example, to capture the first child, the last child, or two
+adjacent children:
+
+@example
+@group
+;; Anchor the child with the end of its parent.
+(compound_expression (_) @@last-child .)
+
+;; Anchor the child with the beginning of its parent.
+(compound_expression . (_) @@first-child)
+
+;; Anchor two adjacent children.
+(compound_expression
+ (_) @@prev-child
+ .
+ (_) @@next-child)
+@end group
+@end example
+
+Note that the enforcement of juxtaposition ignores any anonymous
+nodes.
+
+@subheading Predicate
+
+We can add predicate constraints to a pattern. For example, if we use
+the following query pattern
+
+@example
+@group
+(
+ (array . (_) @@first (_) @@last .)
+ (#equal @@first @@last)
+)
+@end group
+@end example
+
+Then tree-sitter only matches arrays where the first element equals to
+the last element. To attach a predicate to a pattern, we need to
+group then together. A predicate always starts with a @samp{#}.
+Currently there are two predicates, @code{#equal} and @code{#match}.
+
+@deffn Predicate equal arg1 arg2
+Matches if @var{arg1} equals to @var{arg2}. Arguments can be either a
+string or a capture name. Capture names represent the text that the
+captured node spans in the buffer.
+@end deffn
+
+@deffn Predicate match regexp capture-name
+Matches if the text that @var{capture-name}’s node spans in the buffer
+matches regular expression @var{regexp}. Matching is case-sensitive.
+@end deffn
+
+Note that a predicate can only refer to capture names appeared in the
+same pattern. Indeed, it makes little sense to refer to capture names
+in other patterns anyway.
+
+@heading S-expression patterns
+
+Besides strings, Emacs provides a s-expression based syntax for query
+patterns. It largely resembles the string-based syntax. For example,
+the following pattern
+
+@example
+@group
+(treesit-query-capture
+ node "(addition_expression
+ left: (_) @@left
+ \"+\" @@plus-sign
+ right: (_) @@right) @@addition
+
+ [\"return\" \"break\"] @@keyword")
+@end group
+@end example
+
+@noindent
+is equivalent to
+
+@example
+@group
+(treesit-query-capture
+ node '((addition_expression
+ left: (_) @@left
+ "+" @@plus-sign
+ right: (_) @@right) @@addition
+
+ ["return" "break"] @@keyword))
+@end group
+@end example
+
+Most pattern syntax can be written directly as strange but
+never-the-less valid s-expressions. Only a few of them needs
+modification:
+
+@itemize
+@item
+Anchor @samp{.} is written as @code{:anchor}.
+@item
+@samp{?} is written as @samp{:?}.
+@item
+@samp{*} is written as @samp{:*}.
+@item
+@samp{+} is written as @samp{:+}.
+@item
+@code{#equal} is written as @code{:equal}. In general, predicates
+change their @samp{#} to @samp{:}.
+@end itemize
+
+For example,
+
+@example
+@group
+"(
+ (compound_expression . (_) @@first (_)* @@rest)
+ (#match \"love\" @@first)
+ )"
+@end group
+@end example
+
+is written in s-expression as
+
+@example
+@group
+'((
+ (compound_expression :anchor (_) @@first (_) :* @@rest)
+ (:match "love" @@first)
+ ))
+@end group
+@end example
+
+@defun treesit-expand-query query
+This function expands the s-expression @var{query} into a string
+query. It is usually a good idea to expand the s-expression patterns
+into strings for font-lock queries since they are called repeatedly.
+@end defun
+
+Tree-sitter project's documentation about pattern-matching can be
+found at
+@uref{https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries}.
+
+@node Multiple Languages
+@section Parsing Text in Multiple Languages
+
+Sometimes, the source of a programming language could contain sources
+of other languages, HTML + CSS + JavaScript is one example. In that
+case, we need to assign individual parsers to text segments written in
+different languages. Traditionally this is achieved by using
+narrowing. While tree-sitter works with narrowing (@pxref{tree-sitter
+narrowing, narrowing}), the recommended way is to set ranges in which
+a parser will operate.
+
+@defun treesit-parser-set-included-ranges parser ranges
+This function sets the range of @var{parser} to @var{ranges}. Then
+@var{parser} will only read the text covered in each range. Each
+range in @var{ranges} is a list of cons @code{(@var{beg}
+. @var{end})}.
+
+Each range in @var{ranges} must come in order and not overlap. That
+is, in pseudo code:
+
+@example
+@group
+(cl-loop for idx from 1 to (1- (length ranges))
+ for prev = (nth (1- idx) ranges)
+ for next = (nth idx ranges)
+ should (<= (car prev) (cdr prev)
+ (car next) (cdr next)))
+@end group
+@end example
+
+@vindex treesit-range-invalid
+If @var{ranges} violates this constraint, or something else went
+wrong, this function signals a @var{treesit-range-invalid}. The
+signal data contains a specific error message and the ranges we are
+trying to set.
+
+This function can also be used for disabling ranges. If @var{ranges}
+is nil, the parser is set to parse the whole buffer.
+
+Example:
+
+@example
+@group
+(treesit-parser-set-included-ranges
+ parser '((1 . 9) (16 . 24) (24 . 25)))
+@end group
+@end example
+@end defun
+
+@defun treesit-parser-included-ranges parser
+This function returns the ranges set for @var{parser}. The return
+value is the same as the @var{ranges} argument of
+@code{treesit-parser-included-ranges}: a list of cons
+@code{(@var{beg} . @var{end})}. And if @var{parser} doesn't have any
+ranges, the return value is nil.
+
+@example
+@group
+(treesit-parser-included-ranges parser)
+ @result{} ((1 . 9) (16 . 24) (24 . 25))
+@end group
+@end example
+@end defun
+
+@defun treesit-set-ranges parser-or-lang ranges
+Like @code{treesit-parser-set-included-ranges}, this function sets
+the ranges of @var{parser-or-lang} to @var{ranges}. Conveniently,
+@var{parser-or-lang} could be either a parser or a language. If it is
+a language, this function looks for the first parser in
+@var{treesit-parser-list} for that language in the current buffer,
+and set range for it.
+@end defun
+
+@defun treesit-get-ranges parser-or-lang
+This function returns the ranges of @var{parser-or-lang}, like
+@code{treesit-parser-included-ranges}. And like
+@code{treesit-set-ranges}, @var{parser-or-lang} can be a parser or
+a language symbol.
+@end defun
+
+@defun treesit-query-range source pattern &optional beg end
+This function matches @var{source} with @var{pattern} and returns the
+ranges of captured nodes. The return value has the same shape of
+other functions: a list of @code{(@var{beg} . @var{end})}.
+
+For convenience, @var{source} can be a language symbol, a parser, or a
+node. If a language symbol, this function matches in the root node of
+the first parser using that language; if a parser, this function
+matches in the root node of that parser; if a node, this function
+matches in that node.
+
+Parameter @var{pattern} is the query pattern used to capture nodes
+(@pxref{Pattern Matching}). The capture names don't matter. Parameter
+@var{beg} and @var{end}, if both non-nil, limits the range in which
+this function queries.
+
+Like other query functions, this function raises an
+@var{treesit-query-error} if @var{pattern} is malformed.
+@end defun
+
+@defun treesit-language-at point
+This function tries to figure out which language is responsible for
+the text at @var{point}. It goes over each parser in
+@var{treesit-parser-list} and see if that parser's range covers
+@var{point}.
+@end defun
+
+@defvar treesit-range-functions
+A list of range functions. Font-locking and indenting code uses
+functions in this alist to set correct ranges for a language parser
+before using it.
+
+The signature of each function should be
+
+@example
+(@var{start} @var{end} &rest @var{_})
+@end example
+
+where @var{start} and @var{end} marks the region that is about to be
+used. A range function only need to (but not limited to) update
+ranges in that region.
+
+Each function in the list is called in-order.
+@end defvar
+
+@defun treesit-update-ranges &optional start end
+This function is used by font-lock and indent to update ranges before
+using any parser. Each range function in
+@var{treesit-range-functions} is called in-order. Arguments
+@var{start} and @var{end} are passed to each range function.
+@end defun
+
+@heading An example
+
+Normally, in a set of languages that can be mixed together, there is a
+major language and several embedded languages. The major language
+parses the whole document, and skips the embedded languages. Then the
+parser for the major language knows the ranges of the embedded
+languages. So we first parse the whole document with the major
+language’s parser, set ranges for the embedded languages, then parse
+the embedded languages.
+
+Suppose we want to parse a very simple document that mixes HTML, CSS
+and JavaScript:
+
+@example
+@group
+<html>
+ <script>1 + 2</script>
+ <style>body @{ color: "blue"; @}</style>
+</html>
+@end group
+@end example
+
+We first parse with HTML, then set ranges for CSS and JavaScript:
+
+@example
+@group
+;; Create parsers.
+(setq html (treesit-get-parser-create 'html))
+(setq css (treesit-get-parser-create 'css))
+(setq js (treesit-get-parser-create 'javascript))
+
+;; Set CSS ranges.
+(setq css-range
+ (treesit-query-range
+ 'html
+ "(style_element (raw_text) @@capture)"))
+(treesit-parser-set-included-ranges css css-range)
+
+;; Set JavaScript ranges.
+(setq js-range
+ (treesit-query-range
+ 'html
+ "(script_element (raw_text) @@capture)"))
+(treesit-parser-set-included-ranges js js-range)
+@end group
+@end example
+
+We use a query pattern @code{(style_element (raw_text) @@capture)} to
+find CSS nodes in the HTML parse tree. For how to write query
+patterns, @pxref{Pattern Matching}.
+
+@node Tree-sitter C API
+@section Tree-sitter C API Correspondence
+
+Emacs' tree-sitter integration doesn't expose every feature
+tree-sitter's C API provides. Missing features include:
+
+@itemize
+@item
+Creating a tree cursor and navigating the syntax tree with it.
+@item
+Setting timeout and cancellation flag for a parser.
+@item
+Setting the logger for a parser.
+@item
+Printing a DOT graph of the syntax tree to a file.
+@item
+Coping and modifying a syntax tree. (Emacs doesn't expose a tree
+object.)
+@item
+Using (row, column) coordinates as position.
+@item
+Updating a node with changes. (In Emacs, retrieve a new node instead
+of updating the existing one.)
+@item
+Querying statics of a language definition.
+@end itemize
+
+In addition, Emacs makes some changes to the C API to make the API more
+convenient and idiomatic:
+
+@itemize
+@item
+Instead of using byte positions, the ELisp API uses character
+positions.
+@item
+Null nodes are converted to nil.
+@end itemize
+
+Below is the correspondence between all C API functions and their
+ELisp counterparts. Sometimes one ELisp function corresponds to
+multiple C functions, and many C functions don't have an ELisp
+counterpart.
+
+@example
+ts_parser_new treesit-parser-create
+ts_parser_delete
+ts_parser_set_language
+ts_parser_language treesit-parser-language
+ts_parser_set_included_ranges treesit-parser-set-included-ranges
+ts_parser_included_ranges treesit-parser-included-ranges
+ts_parser_parse
+ts_parser_parse_string treesit-parse-string
+ts_parser_parse_string_encoding
+ts_parser_reset
+ts_parser_set_timeout_micros
+ts_parser_timeout_micros
+ts_parser_set_cancellation_flag
+ts_parser_cancellation_flag
+ts_parser_set_logger
+ts_parser_logger
+ts_parser_print_dot_graphs
+ts_tree_copy
+ts_tree_delete
+ts_tree_root_node
+ts_tree_language
+ts_tree_edit
+ts_tree_get_changed_ranges
+ts_tree_print_dot_graph
+ts_node_type treesit-node-type
+ts_node_symbol
+ts_node_start_byte treesit-node-start
+ts_node_start_point
+ts_node_end_byte treesit-node-end
+ts_node_end_point
+ts_node_string treesit-node-string
+ts_node_is_null
+ts_node_is_named treesit-node-check
+ts_node_is_missing treesit-node-check
+ts_node_is_extra treesit-node-check
+ts_node_has_changes treesit-node-check
+ts_node_has_error treesit-node-check
+ts_node_parent treesit-node-parent
+ts_node_child treesit-node-child
+ts_node_field_name_for_child treesit-node-field-name-for-child
+ts_node_child_count treesit-node-child-count
+ts_node_named_child treesit-node-child
+ts_node_named_child_count treesit-node-child-count
+ts_node_child_by_field_name treesit-node-by-field-name
+ts_node_child_by_field_id
+ts_node_next_sibling treesit-next-sibling
+ts_node_prev_sibling treesit-prev-sibling
+ts_node_next_named_sibling treesit-next-sibling
+ts_node_prev_named_sibling treesit-prev-sibling
+ts_node_first_child_for_byte treesit-first-child-for-pos
+ts_node_first_named_child_for_byte treesit-first-child-for-pos
+ts_node_descendant_for_byte_range treesit-descendant-for-range
+ts_node_descendant_for_point_range
+ts_node_named_descendant_for_byte_range treesit-descendant-for-range
+ts_node_named_descendant_for_point_range
+ts_node_edit
+ts_node_eq treesit-node-eq
+ts_tree_cursor_new
+ts_tree_cursor_delete
+ts_tree_cursor_reset
+ts_tree_cursor_current_node
+ts_tree_cursor_current_field_name
+ts_tree_cursor_current_field_id
+ts_tree_cursor_goto_parent
+ts_tree_cursor_goto_next_sibling
+ts_tree_cursor_goto_first_child
+ts_tree_cursor_goto_first_child_for_byte
+ts_tree_cursor_goto_first_child_for_point
+ts_tree_cursor_copy
+ts_query_new
+ts_query_delete
+ts_query_pattern_count
+ts_query_capture_count
+ts_query_string_count
+ts_query_start_byte_for_pattern
+ts_query_predicates_for_pattern
+ts_query_step_is_definite
+ts_query_capture_name_for_id
+ts_query_string_value_for_id
+ts_query_disable_capture
+ts_query_disable_pattern
+ts_query_cursor_new
+ts_query_cursor_delete
+ts_query_cursor_exec treesit-query-capture
+ts_query_cursor_did_exceed_match_limit
+ts_query_cursor_match_limit
+ts_query_cursor_set_match_limit
+ts_query_cursor_set_byte_range
+ts_query_cursor_set_point_range
+ts_query_cursor_next_match
+ts_query_cursor_remove_match
+ts_query_cursor_next_capture
+ts_language_symbol_count
+ts_language_symbol_name
+ts_language_symbol_for_name
+ts_language_field_count
+ts_language_field_name_for_id
+ts_language_field_id_for_name
+ts_language_symbol_type
+ts_language_version
+@end example
(font-spec atom) (font-entity atom) (font-object atom)
(vector array sequence atom)
(user-ptr atom)
+ (tree-sitter-parser atom)
+ (tree-sitter-node atom)
;; Plus, really hand made:
(null symbol list sequence atom))
"Alist of supertypes.
--- /dev/null
+;;; treesit.el --- tree-sitter utilities -*- lexical-binding: t -*-
+
+;; Copyright (C) 2021 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;;
+;; Note to self: we don't create parsers automatically in any provided
+;; functions.
+
+;;; Code:
+
+(eval-when-compile (require 'cl-lib))
+(require 'cl-seq)
+(require 'font-lock)
+
+;;; Activating tree-sitter
+
+(defgroup treesit
+ nil
+ "Tree-sitter is an incremental parser."
+ :group 'tools)
+
+(defcustom treesit-disabled-modes nil
+ "A list of major-modes for which tree-sitter support is disabled."
+ :type '(list symbol))
+
+(defcustom treesit-maximum-size (* 4 1024 1024)
+ "Maximum buffer size for enabling tree-sitter parsing."
+ :type 'integer)
+
+(defun treesit-available-p ()
+ "Return non-nil if tree-sitter features are available."
+ (fboundp 'treesit-parser-create))
+
+(defun treesit-should-enable-p (&optional mode)
+ "Return non-nil if MODE should activate tree-sitter support.
+MODE defaults to the value of `major-mode'. The result depends
+on the value of `treesit-disabled-modes',
+`treesit-maximum-size', and of course, whether tree-sitter is
+available on the system at all."
+ (let* ((mode (or mode major-mode))
+ (disabled (cl-loop
+ for disabled-mode in treesit-disabled-modes
+ if (provided-mode-derived-p mode disabled-mode)
+ return t
+ finally return nil)))
+ (and (treesit-available-p)
+ (not disabled)
+ (< (buffer-size) treesit-maximum-size))))
+
+;;; Parser API supplement
+
+(defun treesit-get-parser (language)
+ "Find the first parser using LANGUAGE in `treesit-parser-list'."
+ (catch 'found
+ (dolist (parser treesit-parser-list)
+ (when (eq language (treesit-parser-language parser))
+ (throw 'found parser)))))
+
+(defun treesit-get-parser-create (language)
+ "Find the first parser using LANGUAGE in `treesit-parser-list'.
+If none exists, create one and return it."
+ (or (treesit-get-parser language)
+ (treesit-parser-create
+ (current-buffer) language)))
+
+(defun treesit-parse-string (string language)
+ "Parse STRING using a parser for LANGUAGE.
+Return the root node of the syntax tree."
+ (with-temp-buffer
+ (insert string)
+ (treesit-parser-root-node
+ (treesit-parser-create (current-buffer) language))))
+
+(defun treesit-language-at (point)
+ "Return the language used at POINT."
+ (cl-loop for parser in treesit-parser-list
+ if (treesit-node-at point nil parser)
+ return (treesit-parser-language parser)))
+
+(defun treesit-set-ranges (parser-or-lang ranges)
+ "Set the ranges of PARSER-OR-LANG to RANGES."
+ (treesit-parser-set-included-ranges
+ (cond ((symbolp parser-or-lang)
+ (or (treesit-get-parser parser-or-lang)
+ (error "Cannot find a parser for %s" parser-or-lang)))
+ ((treesit-parser-p parser-or-lang)
+ parser-or-lang)
+ (t (error "Expecting a parser or language, but got %s"
+ parser-or-lang)))
+ ranges))
+
+(defun treesit-get-ranges (parser-or-lang)
+ "Get the ranges of PARSER-OR-LANG."
+ (treesit-parser-included-ranges
+ (cond ((symbolp parser-or-lang)
+ (or (treesit-get-parser parser-or-lang)
+ (error "Cannot find a parser for %s" parser-or-lang)))
+ ((treesit-parser-p parser-or-lang)
+ parser-or-lang)
+ (t (error "Expecting a parser or language, but got %s"
+ parser-or-lang)))))
+
+;;; Node API supplement
+
+(defun treesit-node-buffer (node)
+ "Return the buffer in where NODE belongs."
+ (treesit-parser-buffer
+ (treesit-node-parser node)))
+
+(defun treesit-node-language (node)
+ "Return the language symbol that NODE's parser uses."
+ (treesit-parser-language
+ (treesit-node-parser node)))
+
+(defun treesit-node-at (beg &optional end parser-or-lang named)
+ "Return the smallest node covering BEG to END.
+
+If omitted, END defaults to BEG. Return nil if none find. If
+NAMED non-nil, only look for named node. NAMED defaults to nil.
+
+If PARSER-OR-LANG is nil, use the first parser in
+`treesit-parser-list'; if PARSER-OR-LANG is a parser, use
+that parser; if PARSER-OR-LANG is a language, find a parser using
+that language in the current buffer, and use that."
+ (let ((root (if (treesit-parser-p parser-or-lang)
+ (treesit-parser-root-node parser-or-lang)
+ (treesit-buffer-root-node parser-or-lang))))
+ (treesit-node-descendant-for-range root beg (or end beg) named)))
+
+(defun treesit-buffer-root-node (&optional language)
+ "Return the root node of the current buffer.
+Use the first parser in `treesit-parser-list', if LANGUAGE is
+non-nil, use the first parser for LANGUAGE."
+ (if-let ((parser
+ (or (if language
+ (or (treesit-get-parser language)
+ (error "Cannot find a parser for %s" language))
+ (or (car treesit-parser-list)
+ (error "Buffer has no parser"))))))
+ (treesit-parser-root-node parser)))
+
+(defun treesit-filter-child (node pred &optional named)
+ "Return children of NODE that satisfies PRED.
+PRED is a function that takes one argument, the child node. If
+NAMED non-nil, only search for named node."
+ (let ((child (treesit-node-child node 0 named))
+ result)
+ (while child
+ (when (funcall pred child)
+ (push child result))
+ (setq child (treesit-node-next-sibling child named)))
+ (reverse result)))
+
+(defun treesit-node-text (node &optional no-property)
+ "Return the buffer (or string) content corresponding to NODE.
+If NO-PROPERTY is non-nil, remove text properties."
+ (with-current-buffer (treesit-node-buffer node)
+ (if no-property
+ (buffer-substring-no-properties
+ (treesit-node-start node)
+ (treesit-node-end node))
+ (buffer-substring
+ (treesit-node-start node)
+ (treesit-node-end node)))))
+
+(defun treesit-parent-until (node pred)
+ "Return the closest parent of NODE that satisfies PRED.
+Return nil if none found. PRED should be a function that takes
+one argument, the parent node."
+ (let ((node (treesit-node-parent node)))
+ (while (and node (not (funcall pred node)))
+ (setq node (treesit-node-parent node)))
+ node))
+
+(defun treesit-parent-while (node pred)
+ "Return the furthest parent of NODE that satisfies PRED.
+Return nil if none found. PRED should be a function that takes
+one argument, the parent node."
+ (let ((last nil))
+ (while (and node (funcall pred node))
+ (setq last node
+ node (treesit-node-parent node)))
+ last))
+
+(defun treesit-node-children (node &optional named)
+ "Return a list of NODE's children.
+If NAMED is non-nil, collect named child only."
+ (mapcar (lambda (idx)
+ (treesit-node-child node idx named))
+ (number-sequence
+ 0 (1- (treesit-node-child-count node named)))))
+
+(defun treesit-node-index (node &optional named)
+ "Return the index of NODE in its parent.
+If NAMED is non-nil, count named child only."
+ (let ((count 0))
+ (while (setq node (treesit-node-prev-sibling node named))
+ (cl-incf count))
+ count))
+
+(defun treesit-node-field-name (node)
+ "Return the field name of NODE as a child of its parent."
+ (when-let ((parent (treesit-node-parent node))
+ (idx (treesit-node-index node)))
+ (treesit-node-field-name-for-child parent idx)))
+
+;;; Query API supplement
+
+(defun treesit-query-in (source query &optional beg end)
+ "Query the current buffer with QUERY.
+
+SOURCE can be a language symbol, a parser, or a node. If a
+language symbol, use the root node of the first parser for that
+language; if a parser, use the root node of that parser; if a
+node, use that node.
+
+QUERY is either a string query or a sexp query. See Info node
+`(elisp)Pattern Matching' for how to write a query pattern in either
+string or s-expression form.
+
+BEG and END, if _both_ non-nil, specifies the range in which the query
+is executed.
+
+Raise an treesit-query-error if QUERY is malformed."
+ (treesit-query-capture
+ (cond ((symbolp source) (treesit-buffer-root-node source))
+ ((treesit-parser-p source)
+ (treesit-parser-root-node source))
+ ((treesit-node-p source) source))
+ query
+ beg end))
+
+(defun treesit-query-string (string query language)
+ "Query STRING with QUERY in LANGUAGE.
+See `treesit-query-capture' for QUERY."
+ (with-temp-buffer
+ (insert string)
+ (let ((parser (treesit-parser-create (current-buffer) language)))
+ (treesit-query-capture
+ (treesit-parser-root-node parser)
+ query))))
+
+(defun treesit-query-range (source query &optional beg end)
+ "Query the current buffer and return ranges of captured nodes.
+
+QUERY, SOURCE, BEG, END are the same as in
+`treesit-query-in'. This function returns a list
+of (START . END), where START and END specifics the range of each
+captured node. Capture names don't matter."
+ (cl-loop for capture
+ in (treesit-query-in source query beg end)
+ for node = (cdr capture)
+ collect (cons (treesit-node-start node)
+ (treesit-node-end node))))
+
+;;; Range API supplement
+
+(defvar-local treesit-range-functions nil
+ "A list of range functions.
+Font-locking and indenting code uses functions in this alist to
+set correct ranges for a language parser before using it.
+
+The signature of each function should be
+
+ (start end &rest _)
+
+where START and END marks the region that is about to be used. A
+range function only need to (but not limited to) update ranges in
+that region.
+
+Each function in the list is called in-order.")
+
+(defun treesit-update-ranges (&optional start end)
+ "Update the ranges for each language in the current buffer.
+Calls each range functions in `treesit-range-functions'
+in-order. START and END are passed to each range function."
+ (dolist (range-fn treesit-range-functions)
+ (funcall range-fn (or start (point-min)) (or end (point-max)))))
+
+;;; Font-lock
+
+(defvar-local treesit-font-lock-settings nil
+ "A list of SETTINGs for treesit-based fontification.
+
+Each SETTING should look like
+
+ (LANGUAGE QUERY)
+
+Each SETTING controls one parser (often of different language).
+LANGUAGE is the language symbol. See Info node `(elisp)Language
+Definitions'.
+
+QUERY is either a string query or a sexp query.
+See Info node `(elisp)Pattern Matching' for writing queries.
+
+Capture names in QUERY should be face names like
+`font-lock-keyword-face'. The captured node will be fontified
+with that face. Capture names can also be function names, in
+which case the function is called with (START END NODE), where
+START and END are the start and end position of the node in
+buffer, and NODE is the tree-sitter node object. If a capture
+name is both a face and a function, face takes priority.
+
+Generally, major modes should set
+`treesit-font-lock-defaults', and let Emacs automatically
+populate this variable.")
+
+(defvar-local treesit-font-lock-defaults nil
+ "Defaults for tree-sitter Font Lock specified by the major mode.
+
+This variable should be a list of
+
+ (DEFAULT :KEYWORD VALUE...)
+
+A DEFAULT may be a symbol or a list of symbols (specifying
+different levels of fontification). The symbol(s) can be of a
+variable or a function. If a symbol is both a variable and a
+function, it is used as a function. Different levels of
+fontification can be controlled by
+`font-lock-maximum-decoration'.
+
+The symbol(s) in DEFAULT should contain or return a SETTING as
+explained in `treesit-font-lock-settings', which looks like
+
+ (LANGUAGE QUERY)
+
+KEYWORD and VALUE are additional settings could be used to alter
+fontification behavior. Currently there aren't any.
+
+Multi-language major-modes should provide a range function for
+eacn language it supports in `treesit-range-functions', and
+Emacs will set the ranges accordingly before fontifing a region.
+See Info node `(elisp)Multiple Languages' for what does it mean
+to set ranges for a parser.")
+
+(defun treesit-font-lock-fontify-region (start end &optional loudly)
+ "Fontify the region between START and END.
+If LOUDLY is non-nil, message some debugging information."
+ (treesit-update-ranges start end)
+ (font-lock-unfontify-region start end)
+ (dolist (setting treesit-font-lock-settings)
+ (when-let* ((language (nth 0 setting))
+ (match-pattern (nth 1 setting))
+ (parser (treesit-get-parser-create language)))
+ (when-let ((node (treesit-node-at start end parser)))
+ (let ((captures (treesit-query-capture
+ node match-pattern
+ ;; Specifying the range is important. More
+ ;; often than not, NODE will be the root
+ ;; node, and if we don't specify the range,
+ ;; we are basically querying the whole file.
+ start end)))
+ (with-silent-modifications
+ (dolist (capture captures)
+ (let* ((face (car capture))
+ (node (cdr capture))
+ (start (treesit-node-start node))
+ (end (treesit-node-end node)))
+ (cond ((facep face)
+ (put-text-property start end 'face face))
+ ((functionp face)
+ (funcall face start end node))
+ (t (error "Capture name %s is neither a face nor a function" face)))
+ (when loudly
+ (message "Fontifying text from %d to %d, Face: %s Language: %s"
+ start end face language)))))))))
+ ;; Call regexp font-lock after tree-sitter, as it is usually used
+ ;; for custom fontification.
+ (let ((font-lock-unfontify-region-function #'ignore))
+ (funcall #'font-lock-default-fontify-region start end loudly)))
+
+(defun treesit-font-lock-enable ()
+ "Enable tree-sitter font-locking for the current buffer."
+ (let ((default (car treesit-font-lock-defaults))
+ (attributes (cdr treesit-font-lock-defaults)))
+ (ignore attributes)
+ (setq-local treesit-font-lock-settings
+ (font-lock-eval-keywords
+ (font-lock-choose-keywords
+ default
+ (font-lock-value-in-major-mode
+ font-lock-maximum-decoration)))))
+ (setq-local font-lock-fontify-region-function
+ #'treesit-font-lock-fontify-region)
+ ;; If we don't set `font-lock-defaults' to some non-nil value,
+ ;; font-lock doesn't enable properly (the font-lock-mode-internal
+ ;; doesn't run). See `font-lock-add-keywords'.
+ (when (and font-lock-mode
+ (null font-lock-keywords)
+ (null font-lock-defaults))
+ (font-lock-mode -1)
+ (setq-local font-lock-defaults '(nil t))
+ (font-lock-mode 1)))
+
+;;; Indent
+
+(defvar treesit--indent-verbose nil
+ "If non-nil, log progress when indenting.")
+
+;; This is not bound locally like we normally do with major-mode
+;; stuff, because for tree-sitter, a buffer could contain more than
+;; one language.
+(defvar treesit-simple-indent-rules nil
+ "A list of indent rule settings.
+Each indent rule setting should be (LANGUAGE . RULES),
+where LANGUAGE is a language symbol, and RULES is a list of
+
+ (MATCHER ANCHOR OFFSET).
+
+MATCHER determines whether this rule applies, ANCHOR and OFFSET
+together determines which column to indent to.
+
+A MATCHER is a function that takes three arguments (NODE PARENT
+BOL). BOL is the point where we are indenting: the beginning of
+line content, the position of the first non-whitespace character.
+NODE is the largest (highest-in-tree) node starting at that
+point. PARENT is the parent of NODE.
+
+If MATCHER returns non-nil, meaning the rule matches, Emacs then
+uses ANCHOR to find an anchor, it should be a function that takes
+the same argument (NODE PARENT BOL) and returns a point.
+
+Finally Emacs computes the column of that point returned by ANCHOR
+and adds OFFSET to it, and indents to that column.
+
+For MATCHER and ANCHOR, Emacs provides some convenient presets.
+See `treesit-simple-indent-presets'.")
+
+(defvar treesit-simple-indent-presets
+ '((match . (lambda
+ (&optional node-type parent-type node-field
+ node-index-min node-index-max)
+ `(lambda (node parent bol &rest _)
+ (and (or (null ,node-type)
+ (equal (treesit-node-type node)
+ ,node-type))
+ (or (null ,parent-type)
+ (equal (treesit-node-type parent)
+ ,parent-type))
+ (or (null ,node-field)
+ (equal (treesit-node-field-name node)
+ ,node-field))
+ (or (null ,node-index-min)
+ (>= (treesit-node-index node t)
+ ,node-index-min))
+ (or (null ,node-index-max)
+ (<= (treesit-node-index node t)
+ ,node-index-max))))))
+ (no-node . (lambda (node parent bol &rest _) (null node)))
+ (parent-is . (lambda (type)
+ `(lambda (node parent bol &rest _)
+ (equal ,type (treesit-node-type parent)))))
+
+ (node-is . (lambda (type)
+ `(lambda (node parent bol &rest _)
+ (equal ,type (treesit-node-type node)))))
+
+ (query . (lambda (pattern)
+ `(lambda (node parent bol &rest _)
+ (cl-loop for capture
+ in (treesit-query-capture
+ parent ,pattern)
+ if (treesit-node-eq node (cdr capture))
+ return t
+ finally return nil))))
+ (first-sibling . (lambda (node parent bol &rest _)
+ (treesit-node-start
+ (treesit-node-child parent 0 t))))
+
+ (parent . (lambda (node parent bol &rest _)
+ (treesit-node-start parent)))
+ (parent-bol . (lambda (node parent bol &rest _)
+ (save-excursion
+ (goto-char (treesit-node-start parent))
+ (back-to-indentation)
+ (point))))
+ (prev-sibling . (lambda (node parent bol &rest _)
+ (treesit-node-start
+ (treesit-node-prev-sibling node))))
+ (no-indent . (lambda (node parent bol &rest _) bol))
+ (prev-line . (lambda (node parent bol &rest _)
+ (save-excursion
+ (goto-char bol)
+ (forward-line -1)
+ (skip-chars-forward " \t")
+ (treesit-node-start
+ (treesit-node-at (point) nil nil t))))))
+ "A list of presets.
+These presets that can be used as MATHER and ANCHOR in
+`treesit-simple-indent-rules'.
+
+MATCHER:
+
+\(match NODE-TYPE PARENT-TYPE NODE-FIELD NODE-INDEX-MIN NODE-INDEX-MAX)
+
+ NODE-TYPE checks for node's type, PARENT-TYPE checks for
+ parent's type, NODE-FIELD checks for the filed name of node
+ in the parent, NODE-INDEX-MIN and NODE-INDEX-MAX checks for
+ the node's index in the parent. Therefore, to match the
+ first child where parent is \"argument_list\", use
+
+ (match nil \"argument_list\" nil nil 0 0).
+
+no-node
+
+ Matches the case where node is nil, i.e., there is no node
+ that starts at point. This is the case when indenting an
+ empty line.
+
+\(parent-is TYPE)
+
+ Check that the parent has type TYPE.
+
+\(node-is TYPE)
+
+ Checks that the node has type TYPE.
+
+\(query QUERY)
+
+ Queries the parent node with QUERY, and checks if the node
+ is captured (by any capture name).
+
+ANCHOR:
+
+first-sibling
+
+ Find the first child of the parent.
+
+parent
+
+ Find the parent.
+
+parent-bol
+
+ Find the beginning of non-space characters on the line where
+ the parent is on.
+
+prev-sibling
+
+ Find node's previous sibling.
+
+no-indent
+
+ Do nothing.
+
+prev-line
+
+ Find the named node on the previous line. This can be used when
+ indenting an empty line: just indent like the previous node.")
+
+(defun treesit--simple-apply (fn args)
+ "Apply ARGS to FN.
+
+If FN is a key in `treesit-simple-indent-presets', use the
+corresponding value as the function."
+ ;; We don't want to match uncompiled lambdas, so make sure this cons
+ ;; is not a function. We could move the condition functionp
+ ;; forward, but better be explicit.
+ (cond ((and (consp fn) (not (functionp fn)))
+ (apply (treesit--simple-apply (car fn) (cdr fn))
+ ;; We don't evaluate ARGS with `simple-apply', i.e.,
+ ;; no composing, better keep it simple.
+ args))
+ ((and (symbolp fn)
+ (alist-get fn treesit-simple-indent-presets))
+ (apply (alist-get fn treesit-simple-indent-presets)
+ args))
+ ((functionp fn) (apply fn args))
+ (t (error "Couldn't find the function corresponding to %s" fn))))
+
+;; This variable might seem unnecessary: why split
+;; `treesit-indent' and `treesit-simple-indent' into two
+;; functions? We add this variable in between because later we might
+;; add more powerful indentation engines, and that new engine can
+;; probably share `treesit-indent'. It is also useful, suggested
+;; by Stefan M, to have a function that figures out how much to indent
+;; but doesn't actually performs the indentation, because we might
+;; want to know where will a node indent to if we put it at some other
+;; location, and use that information to calculate the actual
+;; indentation. And `treesit-simple-indent' is that function. I
+;; forgot the example Stefan gave, but it makes a lot of sense.
+(defvar treesit-indent-function #'treesit-simple-indent
+ "Function used by `treesit-indent' to do some of the work.
+
+This function is called with
+
+ (NODE PARENT BOL &rest _)
+
+and returns
+
+ (ANCHOR . OFFSET).
+
+BOL is the position of the beginning of the line; NODE is the
+\"largest\" node that starts at BOL; PARENT is its parent; ANCHOR
+is a point (not a node), and OFFSET is a number. Emacs finds the
+column of ANCHOR and adds OFFSET to it as the final indentation
+of the current line.")
+
+(defun treesit-indent ()
+ "Indent according to the result of `treesit-indent-function'."
+ (treesit-update-ranges)
+ (let* ((orig-pos (point))
+ (bol (save-excursion
+ (forward-line 0)
+ (skip-chars-forward " \t")
+ (point)))
+ (smallest-node
+ (cl-loop for parser in treesit-parser-list
+ for node = (treesit-node-at
+ bol nil parser)
+ if node return node))
+ (node (treesit-parent-while
+ smallest-node
+ (lambda (node)
+ (eq bol (treesit-node-start node))))))
+ (pcase-let*
+ ((parser (if smallest-node
+ (treesit-node-parser smallest-node)
+ nil))
+ ;; NODE would be nil if BOL is on a whitespace. In that case
+ ;; we set PARENT to the "node at point", which would
+ ;; encompass the whitespace.
+ (parent (cond ((and node parser)
+ (treesit-node-parent node))
+ (parser
+ (treesit-node-at bol nil parser))
+ (t nil)))
+ (`(,anchor . ,offset)
+ (funcall treesit-indent-function node parent bol)))
+ (if (null anchor)
+ (when treesit--indent-verbose
+ (message "Failed to find the anchor"))
+ (let ((col (+ (save-excursion
+ (goto-char anchor)
+ (current-column))
+ offset)))
+ (if (< bol orig-pos)
+ (save-excursion
+ (indent-line-to col))
+ (indent-line-to col)))))))
+
+(defun treesit-simple-indent (node parent bol)
+ "Calculate indentation according to `treesit-simple-indent-rules'.
+
+BOL is the position of the first non-whitespace character on the
+current line. NODE is the largest node that starts at BOL,
+PARENT is NODE's parent.
+
+Return (ANCHOR . OFFSET) where ANCHOR is a node, OFFSET is the
+indentation offset, meaning indent to align with ANCHOR and add
+OFFSET."
+ (if (null parent)
+ (when treesit--indent-verbose
+ (message "PARENT is nil, not indenting"))
+ (let* ((language (treesit-node-language parent))
+ (rules (alist-get language
+ treesit-simple-indent-rules)))
+ (cl-loop for rule in rules
+ for pred = (nth 0 rule)
+ for anchor = (nth 1 rule)
+ for offset = (nth 2 rule)
+ if (treesit--simple-apply
+ pred (list node parent bol))
+ do (when treesit--indent-verbose
+ (message "Matched rule: %S" rule))
+ and
+ return (cons (treesit--simple-apply
+ anchor (list node parent bol))
+ offset)))))
+
+(defun treesit-check-indent (mode)
+ "Check current buffer's indentation against a major mode MODE.
+
+Pop up a diff buffer showing the difference. Correct
+indentation (target) is in green, current indentation is in red."
+ (interactive "CTarget major mode: ")
+ (let ((source-buf (current-buffer)))
+ (with-temp-buffer
+ (insert-buffer-substring source-buf)
+ (funcall mode)
+ (indent-region (point-min) (point-max))
+ (diff-buffers source-buf (current-buffer)))))
+
+;;; Debugging
+
+(defvar-local treesit--inspect-name nil
+ "treesit-inspect-mode uses this to show node name in mode-line.")
+
+(defun treesit-inspect-node-at-point (&optional arg)
+ "Show information of the node at point.
+If called interactively, show in echo area, otherwise set
+`treesit--inspect-name' (which will appear in the mode-line
+if `treesit-inspect-mode' is enabled). Uses the first parser
+in `treesit-parser-list'."
+ (interactive "p")
+ ;; NODE-LIST contains all the node that starts at point.
+ (let* ((node-list
+ (cl-loop for node = (treesit-node-at (point))
+ then (treesit-node-parent node)
+ while node
+ if (eq (treesit-node-start node)
+ (point))
+ collect node))
+ (largest-node (car (last node-list)))
+ (parent (treesit-node-parent largest-node))
+ ;; node-list-acending contains all the node bottom-up, then
+ ;; the parent.
+ (node-list-acending
+ (if (null largest-node)
+ ;; If there are no nodes that start at point, just show
+ ;; the node at point and its parent.
+ (list (treesit-node-at (point))
+ (treesit-node-parent
+ (treesit-node-at (point))))
+ (append node-list (list parent))))
+ (name ""))
+ ;; We draw nodes like (parent field-name: (node)) recursively,
+ ;; so it could be (node1 field-name: (node2 field-name: (node3))).
+ (dolist (node node-list-acending)
+ (setq
+ name
+ (concat
+ (if (treesit-node-field-name node)
+ (format " %s: " (treesit-node-field-name node))
+ " ")
+ (if (treesit-node-check node 'named) "(" "\"")
+ (or (treesit-node-type node)
+ "N/A")
+ name
+ (if (treesit-node-check node 'named) ")" "\""))))
+ (setq treesit--inspect-name name)
+ (force-mode-line-update)
+ (when arg
+ (if node-list
+ (message "%s" treesit--inspect-name)
+ (message "No node at point")))))
+
+(define-minor-mode treesit-inspect-mode
+ "Shows the node that _starts_ at point in the mode-line.
+
+The mode-line displays
+
+ PARENT FIELD-NAME: (CHILD (GRAND-CHILD (...)))
+
+CHILD, GRAND-CHILD, and GRAND-GRAND-CHILD, etc, are nodes that
+have their beginning at point. And PARENT is the parent of
+CHILD.
+
+If no node starts at point, i.e., point is in the middle of a
+node, then we just display the smallest node that spans point and
+its immediate parent.
+
+This minor mode doesn't create parsers on its own. It simply
+uses the first parser in `treesit-parser-list'."
+ :lighter nil
+ (if treesit-inspect-mode
+ (progn
+ (add-hook 'post-command-hook
+ #'treesit-inspect-node-at-point 0 t)
+ (add-to-list 'mode-line-misc-info
+ '(:eval treesit--inspect-name)))
+ (remove-hook 'post-command-hook
+ #'treesit-inspect-node-at-point t)
+ (setq mode-line-misc-info
+ (remove '(:eval treesit--inspect-name)
+ mode-line-misc-info))))
+
+(defun treesit-check-query (query language)
+ "Check if QUERY is valid for LANGUAGE.
+If QUERY is invalid, display the query in a popup buffer, jumps
+to the offending pattern and highlight the pattern."
+ (let ((buf (get-buffer-create "*tree-sitter check query*")))
+ (with-temp-buffer
+ (treesit-get-parser-create language)
+ (condition-case err
+ (progn (treesit-query-in language query)
+ (message "QUERY is valid"))
+ (treesit-query-error
+ (with-current-buffer buf
+ (let* ((data (cdr err))
+ (message (nth 0 data))
+ (start (nth 1 data)))
+ (erase-buffer)
+ (insert query)
+ (goto-char start)
+ (search-forward " " nil t)
+ (put-text-property start (point) 'face 'error)
+ (message "%s" (buffer-substring start (point)))
+ (goto-char (point-min))
+ (insert (format "%s: %d\n" message start))
+ (forward-char start)))
+ (pop-to-buffer buf))))))
+
+;;; Etc
+
+(declare-function find-library-name "find-func.el")
+(defun treesit--check-manual-covarage ()
+ "Print tree-sitter functions missing from the manual in message buffer."
+ (interactive)
+ (require 'find-func)
+ (let ((functions-in-source
+ (with-temp-buffer
+ (insert-file-contents (find-library-name "tree-sitter"))
+ (cl-remove-if
+ (lambda (name) (string-match "treesit--" name))
+ (cl-sort
+ (save-excursion
+ (goto-char (point-min))
+ (cl-loop while (re-search-forward
+ "^(defun \\([^ ]+\\)" nil t)
+ collect (match-string-no-properties 1)))
+ #'string<))))
+ (functions-in-manual
+ (with-temp-buffer
+ (insert-file-contents (expand-file-name
+ "doc/lispref/parsing.texi"
+ source-directory))
+ (insert-file-contents (expand-file-name
+ "doc/lispref/modes.texi"
+ source-directory))
+ (cl-sort
+ (save-excursion
+ (goto-char (point-min))
+ (cl-loop while (re-search-forward
+ "^@defun \\([^ ]+\\)" nil t)
+ collect (match-string-no-properties 1)))
+ #'string<))))
+ (message "Missing: %s"
+ (string-join
+ (cl-remove-if
+ (lambda (name) (member name functions-in-manual))
+ functions-in-source)
+ "\n"))))
+
+(provide 'treesit)
+
+;;; treesit.el ends here
JSON_CFLAGS = @JSON_CFLAGS@
JSON_OBJ = @JSON_OBJ@
+TREE_SITTER_LIBS = @TREE_SITTER_LIBS@
+TREE_SITTER_FLAGS = @TREE_SITTER_FLAGS@
+TREE_SITTER_OBJ = @TREE_SITTER_OBJ@
+
INTERVALS_H = dispextern.h intervals.h composite.h
GETLOADAVG_LIBS = @GETLOADAVG_LIBS@
$(XINPUT_CFLAGS) $(WEBP_CFLAGS) $(WEBKIT_CFLAGS) $(LCMS2_CFLAGS) \
$(SETTINGS_CFLAGS) $(FREETYPE_CFLAGS) $(FONTCONFIG_CFLAGS) \
$(HARFBUZZ_CFLAGS) $(LIBOTF_CFLAGS) $(M17N_FLT_CFLAGS) $(DEPFLAGS) \
- $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) \
+ $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) $(TREE_SITTER_CFLAGS) \
$(LIBGNUTLS_CFLAGS) $(NOTIFY_CFLAGS) $(CAIRO_CFLAGS) \
$(WERROR_CFLAGS) $(HAIKU_CFLAGS)
ALL_CFLAGS = $(EMACS_CFLAGS) $(WARN_CFLAGS) $(CFLAGS)
$(if $(HYBRID_MALLOC),sheap.o) \
$(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) \
$(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ) $(JSON_OBJ) \
- $(HAIKU_OBJ) $(PGTK_OBJ)
+ $(TREE_SITTER_OBJ) $(HAIKU_OBJ) $(PGTK_OBJ)
doc_obj = $(base_obj) $(NS_OBJC_OBJ)
obj = $(doc_obj) $(HAIKU_CXX_OBJ)
$(LIBGNUTLS_LIBS) $(LIB_PTHREAD) $(GETADDRINFO_A_LIBS) $(LCMS2_LIBS) \
$(NOTIFY_LIBS) $(LIB_MATH) $(LIBZ) $(LIBMODULES) $(LIBSYSTEMD_LIBS) \
$(JSON_LIBS) $(LIBGMP) $(LIBGCCJIT_LIBS) $(XINPUT_LIBS) $(HAIKU_LIBS) \
- $(SQLITE3_LIBS)
+ $(TREE_SITTER_LIBS) $(SQLITE3_LIBS)
## FORCE it so that admin/unidata can decide whether this file is
## up-to-date. Although since charprop depends on bootstrap-emacs,
#include TERM_HEADER
#endif /* HAVE_WINDOW_SYSTEM */
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
#include <flexmember.h>
#include <verify.h>
#include <execinfo.h> /* For backtrace. */
if (uptr->finalizer)
uptr->finalizer (uptr->p);
}
+#ifdef HAVE_TREE_SITTER
+ else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_TS_PARSER))
+ {
+ struct Lisp_TS_Parser *lisp_parser
+ = PSEUDOVEC_STRUCT (vector, Lisp_TS_Parser);
+ ts_tree_delete(lisp_parser->tree);
+ ts_parser_delete(lisp_parser->parser);
+ }
+#endif
#ifdef HAVE_MODULES
else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_MODULE_FUNCTION))
{
#include "composite.h"
#include "keymap.h"
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
/* State for casing individual characters. */
modify_text (start, end);
prepare_casing_context (&ctx, flag, true);
+#ifdef HAVE_TREE_SITTER
+ ptrdiff_t start_byte = CHAR_TO_BYTE (start);
+ ptrdiff_t old_end_byte = CHAR_TO_BYTE (end);
+#endif
+
ptrdiff_t orig_end = end;
record_delete (start, make_buffer_string (start, end, true), false);
if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
{
signal_after_change (start, end - start - added, end - start);
update_compositions (start, end, CHECK_ALL);
+#ifdef HAVE_TREE_SITTER
+ ts_record_change (start_byte, old_end_byte, CHAR_TO_BYTE (end));
+#endif
}
return orig_end + added;
return Qxwidget;
case PVEC_XWIDGET_VIEW:
return Qxwidget_view;
+ case PVEC_TS_PARSER:
+ return Qtreesit_parser;
+ case PVEC_TS_NODE:
+ return Qtreesit_node;
case PVEC_SQLITE:
return Qsqlite;
/* "Impossible" cases. */
DEFSYM (Qterminal, "terminal");
DEFSYM (Qxwidget, "xwidget");
DEFSYM (Qxwidget_view, "xwidget-view");
+ DEFSYM (Qtreesit_parser, "treesit-parser");
+ DEFSYM (Qtreesit_node, "treesit-node");
DEFSYM (Qdefun, "defun");
#include <sys/resource.h>
#endif
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
#include "pdumper.h"
#include "fingerprint.h"
#include "epaths.h"
syms_of_module ();
#endif
+#ifdef HAVE_TREE_SITTER
+ syms_of_treesit ();
+#endif
#ifdef HAVE_SOUND
syms_of_sound ();
#endif
xsignal (Qerror, Fcons (build_string (s), arg));
}
+void
+define_error (Lisp_Object name, const char *message, Lisp_Object parent)
+{
+ eassert (SYMBOLP (name));
+ eassert (SYMBOLP (parent));
+ Lisp_Object parent_conditions = Fget (parent, Qerror_conditions);
+ eassert (CONSP (parent_conditions));
+ eassert (!NILP (Fmemq (parent, parent_conditions)));
+ eassert (NILP (Fmemq (name, parent_conditions)));
+ Fput (name, Qerror_conditions, pure_cons (name, parent_conditions));
+ Fput (name, Qerror_message, build_pure_c_string (message));
+}
+
/* Use this for arithmetic overflow, e.g., when an integer result is
too large even for a bignum. */
void
#include "region-cache.h"
#include "pdumper.h"
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
static void insert_from_string_1 (Lisp_Object, ptrdiff_t, ptrdiff_t, ptrdiff_t,
ptrdiff_t, bool, bool);
static void insert_from_buffer_1 (struct buffer *, ptrdiff_t, ptrdiff_t, bool);
set_text_properties (make_fixnum (PT), make_fixnum (PT + nchars),
Qnil, Qnil, Qnil);
+#ifdef HAVE_TREE_SITTER
+ eassert (nbytes >= 0);
+ eassert (PT_BYTE >= 0);
+ ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes);
+#endif
+
adjust_point (nchars, nbytes);
check_markers ();
graft_intervals_into_buffer (intervals, PT, nchars,
current_buffer, inherit);
+#ifdef HAVE_TREE_SITTER
+ eassert (nbytes >= 0);
+ eassert (PT_BYTE >= 0);
+ ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes);
+#endif
+
adjust_point (nchars, outgoing_nbytes);
check_markers ();
current_buffer, 0);
}
+#ifdef HAVE_TREE_SITTER
+ eassert (nbytes >= 0);
+ eassert (ins_bytepos >= 0);
+ ts_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes);
+#endif
+
if (ins_charpos < PT)
adjust_point (nchars, nbytes);
/* Insert those intervals. */
graft_intervals_into_buffer (intervals, PT, nchars, current_buffer, inherit);
+#ifdef HAVE_TREE_SITTER
+ eassert (outgoing_nbytes >= 0);
+ eassert (PT_BYTE >= 0);
+ ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + outgoing_nbytes);
+#endif
+
adjust_point (nchars, outgoing_nbytes);
}
\f
graft_intervals_into_buffer (intervals, from, inschars,
current_buffer, inherit);
+#ifdef HAVE_TREE_SITTER
+ eassert (to_byte >= from_byte);
+ eassert (outgoing_insbytes >= 0);
+ eassert (from_byte >= 0);
+ ts_record_change (from_byte, to_byte, from_byte + outgoing_insbytes);
+#endif
+
/* Relocate point as if it were a marker. */
if (from < PT)
adjust_point ((from + inschars - (PT < to ? PT : to)),
If MARKERS, relocate markers.
Unlike most functions at this level, never call
- prepare_to_modify_buffer and never call signal_after_change. */
+ prepare_to_modify_buffer and never call signal_after_change.
+ Because this function is called in a loop, one character at a time.
+ The caller of 'replace_range_2' calls these hooks for the entire
+ region once. Apart from signal_after_change, any caller of this
+ function should also call ts_record_change. */
void
replace_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
evaporate_overlays (from);
+#ifdef HAVE_TREE_SITTER
+ eassert (from_byte <= to_byte);
+ eassert (from_byte >= 0);
+ ts_record_change (from_byte, to_byte, from_byte);
+#endif
+
return deletion;
}
return unbind_to (count, lisp);
}
-/* Simplified version of 'define-error' that works with pure
- objects. */
-
-static void
-define_error (Lisp_Object name, const char *message, Lisp_Object parent)
-{
- eassert (SYMBOLP (name));
- eassert (SYMBOLP (parent));
- Lisp_Object parent_conditions = Fget (parent, Qerror_conditions);
- eassert (CONSP (parent_conditions));
- eassert (!NILP (Fmemq (parent, parent_conditions)));
- eassert (NILP (Fmemq (name, parent_conditions)));
- Fput (name, Qerror_conditions, pure_cons (name, parent_conditions));
- Fput (name, Qerror_message, build_pure_c_string (message));
-}
-
void
syms_of_json (void)
{
your object -- this way, the same object could be used to represent
several disparate C structures.
+ In addition, you need to add switch branches in data.c for Ftype_of.
+
You also need to add the new type to the constant
`cl--typeof-types' in lisp/emacs-lisp/cl-preloaded.el. */
PVEC_CONDVAR,
PVEC_MODULE_FUNCTION,
PVEC_NATIVE_COMP_UNIT,
+ PVEC_TS_PARSER,
+ PVEC_TS_NODE,
PVEC_SQLITE,
/* These should be last, for internal_equal and sxhash_obj. */
maybe_garbage_collect ();
}
+/* Simplified version of 'define-error' that works with pure
+ objects. */
+void
+define_error (Lisp_Object name, const char *message, Lisp_Object parent);
+
INLINE_HEADER_END
#endif /* EMACS_LISP_H */
Fcons (build_pure_c_string (MODULES_SECONDARY_SUFFIX), Vload_suffixes);
#endif
+ DEFVAR_LISP ("dynamic-library-suffixes", Vdynamic_library_suffixes,
+ doc: /* A list of suffixes for loadable dynamic libraries. */);
+ Vdynamic_library_suffixes =
+ Fcons (build_pure_c_string (DYNAMIC_LIB_SECONDARY_SUFFIX), Qnil);
+ Vdynamic_library_suffixes =
+ Fcons (build_pure_c_string (DYNAMIC_LIB_SUFFIX),
+ Vdynamic_library_suffixes);
+
#endif
DEFVAR_LISP ("module-file-suffix", Vmodule_file_suffix,
doc: /* Suffix of loadable module file, or nil if modules are not supported. */);
# include <sys/socket.h> /* for F_DUPFD_CLOEXEC */
#endif
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
struct terminal;
/* Avoid actual stack overflow in print. */
}
break;
#endif
+
+#ifdef HAVE_TREE_SITTER
+ case PVEC_TS_PARSER:
+ print_c_string ("#<treesit-parser for ", printcharfun);
+ Lisp_Object language = XTS_PARSER (obj)->language_symbol;
+ print_string (Fsymbol_name (language), printcharfun);
+ print_c_string (" in ", printcharfun);
+ print_object (XTS_PARSER (obj)->buffer, printcharfun, escapeflag);
+ printchar ('>', printcharfun);
+ break;
+ case PVEC_TS_NODE:
+ print_c_string ("#<treesit-node from ", printcharfun);
+ print_object (Ftreesit_node_start (obj),
+ printcharfun, escapeflag);
+ print_c_string (" to ", printcharfun);
+ print_object (Ftreesit_node_end (obj),
+ printcharfun, escapeflag);
+ print_c_string (" in ", printcharfun);
+ print_object (XTS_PARSER (XTS_NODE (obj)->parser)->buffer,
+ printcharfun, escapeflag);
+ printchar ('>', printcharfun);
+ break;
+#endif
+
case PVEC_SQLITE:
{
print_c_string ("#<sqlite ", printcharfun);
--- /dev/null
+/* Tree-sitter integration for GNU Emacs.
+
+Copyright (C) 2021-2022 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include "lisp.h"
+#include "buffer.h"
+#include "treesit.h"
+
+/* Commentary
+
+ The Emacs wrapper of tree-sitter does not expose everything the C
+ API provides, most notably:
+
+ - It doesn't expose a syntax tree, we put the syntax tree in the
+ parser object, and updating the tree is handled in the C level.
+
+ - We don't expose tree cursor either. I think Lisp is slow enough
+ to nullify any performance advantage of using a cursor, though I
+ don't have evidence. Also I want to minimize the number of new
+ types we introduce, currently we only add parser and node type.
+
+ - Because updating the change is handled in the C level as each
+ change is made in the buffer, there is no way for Lisp to update
+ a node. But since we can just retrieve a new node, it shouldn't
+ be a limitation.
+
+ - I didn't expose setting timeout and cancellation flag for a
+ parser, mainly because I don't think they are really necessary
+ in Emacs' use cases.
+
+ - Many tree-sitter functions asks for a TSPoint, basically a (row,
+ column) location. Emacs uses a gap buffer and keeps no
+ information about row and column position. According to the
+ author of tree-sitter, tree-sitter only asks for (row, column)
+ position to carry it around and return back to the user later;
+ and the real position used is the byte position. He also said
+ that he _think_ that it will work to use byte position only.
+ That's why whenever a TSPoint is asked, we pass a dummy one to
+ it. Judging by the nature of parsing algorithms, I think it is
+ safe to use only byte position, and I don't think this will
+ change in the future.
+
+ REF: https://github.com/tree-sitter/tree-sitter/issues/445
+
+ treesit.h has some commentary on the two main data structure
+ for the parser and node. ts_ensure_position_synced has some
+ commentary on how do we make tree-sitter play well with narrowing
+ (tree-sitter parser only sees the visible region, so we need to
+ translate positions back and forth). Most action happens in
+ ts_ensure_parsed, ts_read_buffer and ts_record_change.
+
+ A complete correspondence list between tree-sitter functions and
+ exposed Lisp functions can be found in the manual (elisp)API
+ Correspondence.
+
+ Placement of CHECK_xxx functions: call CHECK_xxx before using any
+ unchecked Lisp values; these include argument of Lisp functions,
+ return value of Fsymbol_value, car of a cons.
+
+ Initializing tree-sitter: there are two entry points to tree-sitter
+ functions: 'treesit-parser-create' and
+ 'treesit-language-available-p'. Therefore we only need to call
+ initialization function in those two functions.
+
+ Tree-sitter offset (0-based) and buffer position (1-based):
+ tree-sitter offset + buffer position = buffer position
+ buffer position - buffer position = tree-sitter offset
+
+ Tree-sitter-related code in other files:
+ - src/alloc.c for gc for parser and node
+ - src/casefiddle.c & src/insdel.c for notifying tree-sitter
+ parser of buffer changes.
+ - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
+ node type.
+ */
+
+/*** Initialization */
+
+bool ts_initialized = false;
+
+static void *
+ts_calloc_wrapper (size_t n, size_t size)
+{
+ return xzalloc (n * size);
+}
+
+void
+ts_initialize ()
+{
+ if (!ts_initialized)
+ {
+ ts_set_allocator (xmalloc, ts_calloc_wrapper, xrealloc, xfree);
+ ts_initialized = true;
+ }
+}
+
+/*** Loading language library */
+
+/* Translates a symbol treesit-<lang> to a C name
+ treesit_<lang>. */
+void
+ts_symbol_to_c_name (char *symbol_name)
+{
+ for (int idx=0; idx < strlen (symbol_name); idx++)
+ {
+ if (symbol_name[idx] == '-')
+ symbol_name[idx] = '_';
+ }
+}
+
+bool
+ts_find_override_name
+(Lisp_Object language_symbol, Lisp_Object *name, Lisp_Object *c_symbol)
+{
+ for (Lisp_Object list = Vtreesit_load_name_override_list;
+ !NILP (list); list = XCDR (list))
+ {
+ Lisp_Object lang = XCAR (XCAR (list));
+ CHECK_SYMBOL (lang);
+ if (EQ (lang, language_symbol))
+ {
+ *name = Fnth (make_fixnum (1), XCAR (list));
+ CHECK_STRING (*name);
+ *c_symbol = Fnth (make_fixnum (2), XCAR (list));
+ CHECK_STRING (*c_symbol);
+ return true;
+ }
+ }
+ return false;
+}
+
+/* For example, if Vdynamic_library_suffixes is (".so", ".dylib"),
+ thsi function pushes "lib_base_name.so" and "lib_base_name.dylib"
+ into *path_candidates. Obiviously path_candidates should be a Lisp
+ list of Lisp strings. */
+void
+ts_load_language_push_for_each_suffix
+(Lisp_Object lib_base_name, Lisp_Object *path_candidates)
+{
+ for (Lisp_Object suffixes = Vdynamic_library_suffixes;
+ !NILP (suffixes); suffixes = XCDR (suffixes)) {
+ *path_candidates = Fcons (concat2 (lib_base_name, XCAR (suffixes)),
+ *path_candidates);
+ }
+}
+
+/* Load the dynamic library of LANGUAGE_SYMBOL and return the pointer
+ to the language definition. Signals
+ Qtreesit_load_language_error if something goes wrong.
+ Qtreesit_load_language_error carries the error message from
+ trying to load the library with each extension.
+
+ If SIGNAL is true, signal an error when failed to load LANGUAGE; if
+ false, return NULL when failed. */
+TSLanguage *
+ts_load_language (Lisp_Object language_symbol, bool signal)
+{
+ Lisp_Object symbol_name = Fsymbol_name (language_symbol);
+
+ /* Figure out the library name and C name. */
+ Lisp_Object lib_base_name =
+ (concat2 (build_pure_c_string ("libtree-sitter-"), symbol_name));
+ Lisp_Object base_name =
+ (concat2 (build_pure_c_string ("tree-sitter-"), symbol_name));
+ char *c_name = strdup (SSDATA (base_name));
+ ts_symbol_to_c_name (c_name);
+
+ /* Override the library name and C name, if appropriate. */
+ Lisp_Object override_name;
+ Lisp_Object override_c_name;
+ bool found_override = ts_find_override_name
+ (language_symbol, &override_name, &override_c_name);
+ if (found_override)
+ {
+ lib_base_name = override_name;
+ c_name = SSDATA (override_c_name);
+ }
+
+ /* Now we generate a list of possible library paths. */
+ Lisp_Object path_candidates = Qnil;
+ /* First push just the filenames to the candidate list, which will
+ make dynlib_open look under standard system load paths. */
+ ts_load_language_push_for_each_suffix
+ (lib_base_name, &path_candidates);
+ /* Then push ~/.emacs.d/tree-sitter paths. */
+ ts_load_language_push_for_each_suffix
+ (Fexpand_file_name
+ (concat2 (build_string ("tree-sitter/"), lib_base_name),
+ Fsymbol_value (Quser_emacs_directory)),
+ &path_candidates);
+ /* Then push paths from treesit-extra-load-path. */
+ for (Lisp_Object tail = Freverse (Vtreesit_extra_load_path);
+ !NILP (tail); tail = XCDR (tail))
+ {
+ ts_load_language_push_for_each_suffix
+ (Fexpand_file_name (lib_base_name, XCAR (tail)),
+ &path_candidates);
+ }
+
+ /* Try loading the dynamic library by each path candidate. Stop
+ when succeed, record the error message and try the next one when
+ fail. */
+ dynlib_handle_ptr handle;
+ char const *error;
+ Lisp_Object error_list = Qnil;
+ for (Lisp_Object tail = path_candidates;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ char *library_name = SSDATA (XCAR (tail));
+ dynlib_error ();
+ handle = dynlib_open (library_name);
+ error = dynlib_error ();
+ if (error == NULL)
+ break;
+ else
+ error_list = Fcons (build_string (error), error_list);
+ }
+ if (error != NULL)
+ {
+ if (signal)
+ xsignal2 (Qtreesit_load_language_error,
+ symbol_name, Fnreverse (error_list));
+ else
+ return NULL;
+ }
+
+ /* Load TSLanguage. */
+ dynlib_error ();
+ TSLanguage *(*langfn) ();
+ langfn = dynlib_sym (handle, c_name);
+ error = dynlib_error ();
+ if (error != NULL)
+ {
+ if (signal)
+ xsignal1 (Qtreesit_load_language_error,
+ build_string (error));
+ else
+ return NULL;
+ }
+ TSLanguage *lang = (*langfn) ();
+
+ /* Check if language version matches tree-sitter version. */
+ TSParser *parser = ts_parser_new ();
+ bool success = ts_parser_set_language (parser, lang);
+ ts_parser_delete (parser);
+ if (!success)
+ {
+ if (signal)
+ xsignal2 (Qtreesit_load_language_error,
+ build_pure_c_string ("Language version doesn't match tree-sitter version, language version:"),
+ make_fixnum (ts_language_version (lang)));
+ else
+ return NULL;
+ }
+ return lang;
+}
+
+DEFUN ("treesit-language-available-p",
+ Ftreesit_langauge_available_p,
+ Streesit_language_available_p,
+ 1, 1, 0,
+ doc: /* Return non-nil if LANGUAGE exists and is loadable. */)
+ (Lisp_Object language)
+{
+ CHECK_SYMBOL (language);
+ ts_initialize ();
+ if (ts_load_language(language, false) == NULL)
+ return Qnil;
+ else
+ return Qt;
+}
+
+/*** Parsing functions */
+
+/* An auxiliary function that saves a few lines of code. Assumes TREE
+ is not NULL. */
+static inline void
+ts_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
+ ptrdiff_t old_end_byte, ptrdiff_t new_end_byte)
+{
+ TSPoint dummy_point = {0, 0};
+ TSInputEdit edit = {(uint32_t) start_byte,
+ (uint32_t) old_end_byte,
+ (uint32_t) new_end_byte,
+ dummy_point, dummy_point, dummy_point};
+ ts_tree_edit (tree, &edit);
+}
+
+/* Update each parser's tree after the user made an edit. This
+function does not parse the buffer and only updates the tree. (So it
+should be very fast.) */
+void
+ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+ ptrdiff_t new_end_byte)
+{
+ for (Lisp_Object parser_list =
+ Fsymbol_value (Qtreesit_parser_list);
+ !NILP (parser_list);
+ parser_list = XCDR (parser_list))
+ {
+ CHECK_CONS (parser_list);
+ Lisp_Object lisp_parser = XCAR (parser_list);
+ CHECK_TS_PARSER (lisp_parser);
+ TSTree *tree = XTS_PARSER (lisp_parser)->tree;
+ if (tree != NULL)
+ {
+ eassert (start_byte <= old_end_byte);
+ eassert (start_byte <= new_end_byte);
+ /* Think the recorded change as a delete followed by an
+ insert, and think of them as moving unchanged text back
+ and forth. After all, the whole point of updating the
+ tree is to update the position of unchanged text. */
+ ptrdiff_t bytes_del = old_end_byte - start_byte;
+ ptrdiff_t bytes_ins = new_end_byte - start_byte;
+
+ ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
+ ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
+
+ ptrdiff_t affected_start =
+ max (visible_beg, start_byte) - visible_beg;
+ ptrdiff_t affected_old_end =
+ min (visible_end, affected_start + bytes_del);
+ ptrdiff_t affected_new_end =
+ affected_start + bytes_ins;
+
+ ts_tree_edit_1 (tree, affected_start, affected_old_end,
+ affected_new_end);
+ XTS_PARSER (lisp_parser)->visible_end = affected_new_end;
+ XTS_PARSER (lisp_parser)->need_reparse = true;
+ XTS_PARSER (lisp_parser)->timestamp++;
+ }
+ }
+}
+
+void
+ts_ensure_position_synced (Lisp_Object parser)
+{
+ TSParser *ts_parser = XTS_PARSER (parser)->parser;
+ TSTree *tree = XTS_PARSER (parser)->tree;
+
+ if (tree == NULL)
+ return;
+
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+ ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
+ ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
+ /* Before we parse or set ranges, catch up with the narrowing
+ situation. We change visible_beg and visible_end to match
+ BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the
+ change. We want to move the visible range of tree-sitter to
+ match the narrowed range. For example,
+ from ________|xxxx|__
+ to |xxxx|__________ */
+
+ /* 1. Make sure visible_beg <= BUF_BEGV_BYTE. */
+ if (visible_beg > BUF_BEGV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: insert at the beginning. */
+ ts_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer));
+ visible_beg = BUF_BEGV_BYTE (buffer);
+ }
+ /* 2. Make sure visible_end = BUF_ZV_BYTE. */
+ if (visible_end < BUF_ZV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: insert at the end. */
+ ts_tree_edit_1 (tree, visible_end - visible_beg,
+ visible_end - visible_beg,
+ BUF_ZV_BYTE (buffer) - visible_beg);
+ visible_end = BUF_ZV_BYTE (buffer);
+ }
+ else if (visible_end > BUF_ZV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: delete at the end. */
+ ts_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
+ visible_end - visible_beg,
+ BUF_ZV_BYTE (buffer) - visible_beg);
+ visible_end = BUF_ZV_BYTE (buffer);
+ }
+ /* 3. Make sure visible_beg = BUF_BEGV_BYTE. */
+ if (visible_beg < BUF_BEGV_BYTE (buffer))
+ {
+ /* Tree-sitter sees: delete at the beginning. */
+ ts_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0);
+ visible_beg = BUF_BEGV_BYTE (buffer);
+ }
+ eassert (0 <= visible_beg);
+ eassert (visible_beg <= visible_end);
+
+ XTS_PARSER (parser)->visible_beg = visible_beg;
+ XTS_PARSER (parser)->visible_end = visible_end;
+}
+
+void
+ts_check_buffer_size (struct buffer *buffer)
+{
+ ptrdiff_t buffer_size =
+ (BUF_Z (buffer) - BUF_BEG (buffer));
+ if (buffer_size > UINT32_MAX)
+ xsignal2 (Qtreesit_buffer_too_large,
+ build_pure_c_string ("Buffer size too large, size:"),
+ make_fixnum (buffer_size));
+}
+
+/* Parse the buffer. We don't parse until we have to. When we have
+to, we call this function to parse and update the tree. */
+void
+ts_ensure_parsed (Lisp_Object parser)
+{
+ if (!XTS_PARSER (parser)->need_reparse)
+ return;
+ TSParser *ts_parser = XTS_PARSER (parser)->parser;
+ TSTree *tree = XTS_PARSER(parser)->tree;
+ TSInput input = XTS_PARSER (parser)->input;
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+ ts_check_buffer_size (buffer);
+
+ /* Before we parse, catch up with the narrowing situation. */
+ ts_ensure_position_synced (parser);
+
+ TSTree *new_tree = ts_parser_parse(ts_parser, tree, input);
+ /* This should be very rare (impossible, really): it only happens
+ when 1) language is not set (impossible in Emacs because the user
+ has to supply a language to create a parser), 2) parse canceled
+ due to timeout (impossible because we don't set a timeout), 3)
+ parse canceled due to cancellation flag (impossible because we
+ don't set the flag). (See comments for ts_parser_parse in
+ tree_sitter/api.h.) */
+ if (new_tree == NULL)
+ {
+ Lisp_Object buf;
+ XSETBUFFER (buf, buffer);
+ xsignal1 (Qtreesit_parse_error, buf);
+ }
+
+ ts_tree_delete (tree);
+ XTS_PARSER (parser)->tree = new_tree;
+ XTS_PARSER (parser)->need_reparse = false;
+}
+
+/* This is the read function provided to tree-sitter to read from a
+ buffer. It reads one character at a time and automatically skips
+ the gap. */
+const char*
+ts_read_buffer (void *parser, uint32_t byte_index,
+ TSPoint position, uint32_t *bytes_read)
+{
+ struct buffer *buffer =
+ XBUFFER (((struct Lisp_TS_Parser *) parser)->buffer);
+ ptrdiff_t visible_beg = ((struct Lisp_TS_Parser *) parser)->visible_beg;
+ ptrdiff_t visible_end = ((struct Lisp_TS_Parser *) parser)->visible_end;
+ ptrdiff_t byte_pos = byte_index + visible_beg;
+ /* We will make sure visible_beg = BUF_BEGV_BYTE before re-parse (in
+ ts_ensure_parsed), so byte_pos will never be smaller than
+ BUF_BEG_BYTE. */
+ eassert (visible_beg = BUF_BEGV_BYTE (buffer));
+ eassert (visible_end = BUF_ZV_BYTE (buffer));
+
+ /* Read one character. Tree-sitter wants us to set bytes_read to 0
+ if it reads to the end of buffer. It doesn't say what it wants
+ for the return value in that case, so we just give it an empty
+ string. */
+ char *beg;
+ int len;
+ /* This function could run from a user command, so it is better to
+ do nothing instead of raising an error. (It was a pain in the a**
+ to decrypt mega-if-conditions in Emacs source, so I wrote the two
+ branches separately.) */
+ if (!BUFFER_LIVE_P (buffer))
+ {
+ beg = NULL;
+ len = 0;
+ }
+ /* Reached visible end-of-buffer, tell tree-sitter to read no more. */
+ else if (byte_pos >= visible_end)
+ {
+ beg = NULL;
+ len = 0;
+ }
+ /* Normal case, read a character. */
+ else
+ {
+ beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos);
+ len = BYTES_BY_CHAR_HEAD ((int) *beg);
+ }
+ *bytes_read = (uint32_t) len;
+ return beg;
+}
+
+/*** Functions for parser and node object*/
+
+/* Wrap the parser in a Lisp_Object to be used in the Lisp machine. */
+Lisp_Object
+make_ts_parser (Lisp_Object buffer, TSParser *parser,
+ TSTree *tree, Lisp_Object language_symbol)
+{
+ struct Lisp_TS_Parser *lisp_parser
+ = ALLOCATE_PSEUDOVECTOR
+ (struct Lisp_TS_Parser, buffer, PVEC_TS_PARSER);
+
+ lisp_parser->language_symbol = language_symbol;
+ lisp_parser->buffer = buffer;
+ lisp_parser->parser = parser;
+ lisp_parser->tree = tree;
+ TSInput input = {lisp_parser, ts_read_buffer, TSInputEncodingUTF8};
+ lisp_parser->input = input;
+ lisp_parser->need_reparse = true;
+ lisp_parser->visible_beg = BUF_BEGV (XBUFFER (buffer));
+ lisp_parser->visible_end = BUF_ZV (XBUFFER (buffer));
+ return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
+}
+
+/* Wrap the node in a Lisp_Object to be used in the Lisp machine. */
+Lisp_Object
+make_ts_node (Lisp_Object parser, TSNode node)
+{
+ struct Lisp_TS_Node *lisp_node
+ = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Node, parser, PVEC_TS_NODE);
+ lisp_node->parser = parser;
+ lisp_node->node = node;
+ lisp_node->timestamp = XTS_PARSER (parser)->timestamp;
+ return make_lisp_ptr (lisp_node, Lisp_Vectorlike);
+}
+
+DEFUN ("treesit-parser-p",
+ Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a tree-sitter parser. */)
+ (Lisp_Object object)
+{
+ if (TS_PARSERP (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-node-p",
+ Ftreesit_node_p, Streesit_node_p, 1, 1, 0,
+ doc: /* Return t if OBJECT is a tree-sitter node. */)
+ (Lisp_Object object)
+{
+ if (TS_NODEP (object))
+ return Qt;
+ else
+ return Qnil;
+}
+
+DEFUN ("treesit-node-parser",
+ Ftreesit_node_parser, Streesit_node_parser,
+ 1, 1, 0,
+ doc: /* Return the parser to which NODE belongs. */)
+ (Lisp_Object node)
+{
+ CHECK_TS_NODE (node);
+ return XTS_NODE (node)->parser;
+}
+
+DEFUN ("treesit-parser-create",
+ Ftreesit_parser_create, Streesit_parser_create,
+ 2, 2, 0,
+ doc: /* Create and return a parser in BUFFER for LANGUAGE.
+
+The parser is automatically added to BUFFER's
+`treesit-parser-list'. LANGUAGE should be the symbol of a
+function provided by a tree-sitter language dynamic module, e.g.,
+'treesit-json. If BUFFER is nil, use the current buffer. */)
+ (Lisp_Object buffer, Lisp_Object language)
+{
+ if (NILP (buffer))
+ buffer = Fcurrent_buffer ();
+
+ CHECK_BUFFER (buffer);
+ CHECK_SYMBOL (language);
+ ts_check_buffer_size (XBUFFER (buffer));
+
+ ts_initialize ();
+
+ TSParser *parser = ts_parser_new ();
+ TSLanguage *lang = ts_load_language (language, true);
+ /* We check language version when loading a language, so this should
+ always succeed. */
+ ts_parser_set_language (parser, lang);
+
+ Lisp_Object lisp_parser
+ = make_ts_parser (buffer, parser, NULL, language);
+
+ struct buffer *old_buffer = current_buffer;
+ set_buffer_internal (XBUFFER (buffer));
+
+ Fset (Qtreesit_parser_list,
+ Fcons (lisp_parser, Fsymbol_value (Qtreesit_parser_list)));
+
+ set_buffer_internal (old_buffer);
+ return lisp_parser;
+}
+
+DEFUN ("treesit-parser-buffer",
+ Ftreesit_parser_buffer, Streesit_parser_buffer,
+ 1, 1, 0,
+ doc: /* Return the buffer of PARSER. */)
+ (Lisp_Object parser)
+{
+ CHECK_TS_PARSER (parser);
+ Lisp_Object buf;
+ XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer));
+ return buf;
+}
+
+DEFUN ("treesit-parser-language",
+ Ftreesit_parser_language, Streesit_parser_language,
+ 1, 1, 0,
+ doc: /* Return parser's language symbol.
+This symbol is the one used to create the parser. */)
+ (Lisp_Object parser)
+{
+ CHECK_TS_PARSER (parser);
+ return XTS_PARSER (parser)->language_symbol;
+}
+
+/*** Parser API */
+
+DEFUN ("treesit-parser-root-node",
+ Ftreesit_parser_root_node, Streesit_parser_root_node,
+ 1, 1, 0,
+ doc: /* Return the root node of PARSER. */)
+ (Lisp_Object parser)
+{
+ CHECK_TS_PARSER (parser);
+ ts_ensure_parsed (parser);
+ TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree);
+ return make_ts_node (parser, root_node);
+}
+
+/* Checks that the RANGES argument of
+ treesit-parser-set-included-ranges is valid. */
+void
+ts_check_range_argument (Lisp_Object ranges)
+{
+ EMACS_INT last_point = 1;
+ for (Lisp_Object tail = ranges;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ CHECK_CONS (tail);
+ Lisp_Object range = XCAR (tail);
+ CHECK_CONS (range);
+ CHECK_FIXNUM (XCAR (range));
+ CHECK_FIXNUM (XCDR (range));
+ EMACS_INT beg = XFIXNUM (XCAR (range));
+ EMACS_INT end = XFIXNUM (XCDR (range));
+ /* TODO: Maybe we should check for point-min/max, too? */
+ if (!(last_point <= beg && beg <= end))
+ xsignal2 (Qtreesit_range_invalid,
+ build_pure_c_string
+ ("RANGE is either overlapping or out-of-order"),
+ ranges);
+ last_point = end;
+ }
+}
+
+DEFUN ("treesit-parser-set-included-ranges",
+ Ftreesit_parser_set_included_ranges,
+ Streesit_parser_set_included_ranges,
+ 2, 2, 0,
+ doc: /* Limit PARSER to RANGES.
+
+RANGES is a list of (BEG . END), each (BEG . END) confines a range in
+which the parser should operate in. Each range must not overlap, and
+each range should come in order. Signal `treesit-set-range-error'
+if the argument is invalid, or something else went wrong. If RANGES
+is nil, set PARSER to parse the whole buffer. */)
+ (Lisp_Object parser, Lisp_Object ranges)
+{
+ CHECK_TS_PARSER (parser);
+ CHECK_CONS (ranges);
+ ts_check_range_argument (ranges);
+
+ /* Before we parse, catch up with narrowing/widening. */
+ ts_ensure_position_synced (parser);
+
+ bool success;
+ if (NILP (ranges))
+ {
+ /* If RANGES is nil, make parser to parse the whole document.
+ To do that we give tree-sitter a 0 length, the range is a
+ dummy. */
+ TSRange ts_range = {0, 0, 0, 0};
+ success = ts_parser_set_included_ranges
+ (XTS_PARSER (parser)->parser, &ts_range , 0);
+ }
+ else
+ {
+ /* Set ranges for PARSER. */
+ ptrdiff_t len = list_length (ranges);
+ TSRange *ts_ranges = malloc (sizeof(TSRange) * len);
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ for (int idx=0; !NILP (ranges); idx++, ranges = XCDR (ranges))
+ {
+ Lisp_Object range = XCAR (ranges);
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ EMACS_INT beg_byte = buf_charpos_to_bytepos
+ (buffer, XFIXNUM (XCAR (range)));
+ EMACS_INT end_byte = buf_charpos_to_bytepos
+ (buffer, XFIXNUM (XCDR (range)));
+ /* We don't care about start and end points, put in dummy
+ value. */
+ TSRange rg = {{0,0}, {0,0},
+ (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer),
+ (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)};
+ ts_ranges[idx] = rg;
+ }
+ success = ts_parser_set_included_ranges
+ (XTS_PARSER (parser)->parser, ts_ranges, (uint32_t) len);
+ /* Although XFIXNUM could signal, it should be impossible
+ because we have checked the input by ts_check_range_argument.
+ So there is no need for unwind-protect. */
+ free (ts_ranges);
+ }
+
+ if (!success)
+ xsignal2 (Qtreesit_range_invalid,
+ build_pure_c_string
+ ("Something went wrong when setting ranges"),
+ ranges);
+
+ XTS_PARSER (parser)->need_reparse = true;
+ return Qnil;
+}
+
+DEFUN ("treesit-parser-included-ranges",
+ Ftreesit_parser_included_ranges,
+ Streesit_parser_included_ranges,
+ 1, 1, 0,
+ doc: /* Return the ranges set for PARSER.
+See `treesit-parser-set-ranges'. If no range is set, return
+nil. */)
+ (Lisp_Object parser)
+{
+ CHECK_TS_PARSER (parser);
+ uint32_t len;
+ const TSRange *ranges = ts_parser_included_ranges
+ (XTS_PARSER (parser)->parser, &len);
+ if (len == 0)
+ return Qnil;
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ Lisp_Object list = Qnil;
+ for (int idx=0; idx < len; idx++)
+ {
+ TSRange range = ranges[idx];
+ uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
+ uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
+
+ Lisp_Object lisp_range =
+ Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)) ,
+ make_fixnum (buf_bytepos_to_charpos (buffer, end_byte)));
+ list = Fcons (lisp_range, list);
+ }
+ return Fnreverse (list);
+}
+
+/*** Node API */
+
+/* Check that OBJ is a positive integer and signal an error if
+ otherwise. */
+static void
+ts_check_positive_integer (Lisp_Object obj)
+{
+ CHECK_INTEGER (obj);
+ if (XFIXNUM (obj) < 0)
+ xsignal1 (Qargs_out_of_range, obj);
+}
+
+static void
+ts_check_node (Lisp_Object obj)
+{
+ CHECK_TS_NODE (obj);
+ Lisp_Object lisp_parser = XTS_NODE (obj)->parser;
+ if (XTS_NODE (obj)->timestamp !=
+ XTS_PARSER (lisp_parser)->timestamp)
+ xsignal1 (Qtreesit_node_outdated, obj);
+}
+
+DEFUN ("treesit-node-type",
+ Ftreesit_node_type, Streesit_node_type, 1, 1, 0,
+ doc: /* Return the NODE's type as a string.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ const char *type = ts_node_type (ts_node);
+ return build_string (type);
+}
+
+DEFUN ("treesit-node-start",
+ Ftreesit_node_start, Streesit_node_start, 1, 1, 0,
+ doc: /* Return the NODE's start position.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ uint32_t start_byte_offset = ts_node_start_byte (ts_node);
+ struct buffer *buffer =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t start_pos = buf_bytepos_to_charpos
+ (buffer, start_byte_offset + visible_beg);
+ return make_fixnum (start_pos);
+}
+
+DEFUN ("treesit-node-end",
+ Ftreesit_node_end, Streesit_node_end, 1, 1, 0,
+ doc: /* Return the NODE's end position.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ uint32_t end_byte_offset = ts_node_end_byte (ts_node);
+ struct buffer *buffer =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t end_pos = buf_bytepos_to_charpos
+ (buffer, end_byte_offset + visible_beg);
+ return make_fixnum (end_pos);
+}
+
+DEFUN ("treesit-node-string",
+ Ftreesit_node_string, Streesit_node_string, 1, 1, 0,
+ doc: /* Return the string representation of NODE.
+If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ char *string = ts_node_string (ts_node);
+ return make_string (string, strlen (string));
+}
+
+DEFUN ("treesit-node-parent",
+ Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
+ doc: /* Return the immediate parent of NODE.
+Return nil if there isn't any. If NODE is nil, return nil. */)
+ (Lisp_Object node)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode parent = ts_node_parent (ts_node);
+
+ if (ts_node_is_null (parent))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, parent);
+}
+
+DEFUN ("treesit-node-child",
+ Ftreesit_node_child, Streesit_node_child, 2, 3, 0,
+ doc: /* Return the Nth child of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object n, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (n);
+ EMACS_INT idx = XFIXNUM (n);
+ if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_child (ts_node, (uint32_t) idx);
+ else
+ child = ts_node_named_child (ts_node, (uint32_t) idx);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-check",
+ Ftreesit_node_check, Streesit_node_check, 2, 2, 0,
+ doc: /* Return non-nil if NODE has PROPERTY, nil otherwise.
+
+PROPERTY could be 'named, 'missing, 'extra, 'has-changes, 'has-error.
+Named nodes correspond to named rules in the language definition,
+whereas "anonymous" nodes correspond to string literals in the
+language definition.
+
+Missing nodes are inserted by the parser in order to recover from
+certain kinds of syntax errors, i.e., should be there but not there.
+
+Extra nodes represent things like comments, which are not required the
+language definition, but can appear anywhere.
+
+A node "has changes" if the buffer changed since the node is
+created. (Don't forget the "s" at the end of 'has-changes.)
+
+A node "has error" if itself is a syntax error or contains any syntax
+errors. */)
+ (Lisp_Object node, Lisp_Object property)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_SYMBOL (property);
+ TSNode ts_node = XTS_NODE (node)->node;
+ bool result;
+ if (EQ (property, Qnamed))
+ result = ts_node_is_named (ts_node);
+ else if (EQ (property, Qmissing))
+ result = ts_node_is_missing (ts_node);
+ else if (EQ (property, Qextra))
+ result = ts_node_is_extra (ts_node);
+ else if (EQ (property, Qhas_error))
+ result = ts_node_has_error (ts_node);
+ else if (EQ (property, Qhas_changes))
+ result = ts_node_has_changes (ts_node);
+ else
+ signal_error ("Expecting 'named, 'missing, 'extra, 'has-changes or 'has-error, got",
+ property);
+ return result ? Qt : Qnil;
+}
+
+DEFUN ("treesit-node-field-name-for-child",
+ Ftreesit_node_field_name_for_child,
+ Streesit_node_field_name_for_child, 2, 2, 0,
+ doc: /* Return the field name of the Nth child of NODE.
+
+Return nil if there isn't any child or no field is found.
+If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object n)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (n);
+ EMACS_INT idx = XFIXNUM (n);
+ if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n);
+ TSNode ts_node = XTS_NODE (node)->node;
+ const char *name
+ = ts_node_field_name_for_child (ts_node, (uint32_t) idx);
+
+ if (name == NULL)
+ return Qnil;
+
+ return make_string (name, strlen (name));
+}
+
+DEFUN ("treesit-node-child-count",
+ Ftreesit_node_child_count,
+ Streesit_node_child_count, 1, 2, 0,
+ doc: /* Return the number of children of NODE.
+
+If NAMED is non-nil, count named child only. NAMED defaults to
+nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ uint32_t count;
+ if (NILP (named))
+ count = ts_node_child_count (ts_node);
+ else
+ count = ts_node_named_child_count (ts_node);
+ return make_fixnum (count);
+}
+
+DEFUN ("treesit-node-child-by-field-name",
+ Ftreesit_node_child_by_field_name,
+ Streesit_node_child_by_field_name, 2, 2, 0,
+ doc: /* Return the child of NODE with FIELD-NAME.
+Return nil if there isn't any. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object field_name)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_STRING (field_name);
+ char *name_str = SSDATA (field_name);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child
+ = ts_node_child_by_field_name (ts_node, name_str, strlen (name_str));
+
+ if (ts_node_is_null(child))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-next-sibling",
+ Ftreesit_node_next_sibling,
+ Streesit_node_next_sibling, 1, 2, 0,
+ doc: /* Return the next sibling of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode sibling;
+ if (NILP (named))
+ sibling = ts_node_next_sibling (ts_node);
+ else
+ sibling = ts_node_next_named_sibling (ts_node);
+
+ if (ts_node_is_null(sibling))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, sibling);
+}
+
+DEFUN ("treesit-node-prev-sibling",
+ Ftreesit_node_prev_sibling,
+ Streesit_node_prev_sibling, 1, 2, 0,
+ doc: /* Return the previous sibling of NODE.
+
+Return nil if there isn't any. If NAMED is non-nil, look for named
+child only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode sibling;
+
+ if (NILP (named))
+ sibling = ts_node_prev_sibling (ts_node);
+ else
+ sibling = ts_node_prev_named_sibling (ts_node);
+
+ if (ts_node_is_null(sibling))
+ return Qnil;
+
+ return make_ts_node(XTS_NODE (node)->parser, sibling);
+}
+
+DEFUN ("treesit-node-first-child-for-pos",
+ Ftreesit_node_first_child_for_pos,
+ Streesit_node_first_child_for_pos, 2, 3, 0,
+ doc: /* Return the first child of NODE on POS.
+
+Specifically, return the first child that extends beyond POS. POS is
+a position in the buffer. Return nil if there isn't any. If NAMED is
+non-nil, look for named child only. NAMED defaults to nil. Note that
+this function returns an immediate child, not the smallest
+(grand)child. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object pos, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ ts_check_positive_integer (pos);
+
+ struct buffer *buf =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
+
+ if (byte_pos < BUF_BEGV_BYTE (buf) || byte_pos > BUF_ZV_BYTE (buf))
+ xsignal1 (Qargs_out_of_range, pos);
+
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_first_child_for_byte
+ (ts_node, byte_pos - visible_beg);
+ else
+ child = ts_node_first_named_child_for_byte
+ (ts_node, byte_pos - visible_beg);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-descendant-for-range",
+ Ftreesit_node_descendant_for_range,
+ Streesit_node_descendant_for_range, 3, 4, 0,
+ doc: /* Return the smallest node that covers BEG to END.
+
+The returned node is a descendant of NODE. POS is a position. Return
+nil if there isn't any. If NAMED is non-nil, look for named child
+only. NAMED defaults to nil. If NODE is nil, return nil. */)
+ (Lisp_Object node, Lisp_Object beg, Lisp_Object end, Lisp_Object named)
+{
+ if (NILP (node)) return Qnil;
+ ts_check_node (node);
+ CHECK_INTEGER (beg);
+ CHECK_INTEGER (end);
+
+ struct buffer *buf =
+ XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
+ ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
+
+ /* Checks for BUFFER_BEG <= BEG <= END <= BUFFER_END. */
+ if (!(BUF_BEGV_BYTE (buf) <= byte_beg
+ && byte_beg <= byte_end
+ && byte_end <= BUF_ZV_BYTE (buf)))
+ xsignal2 (Qargs_out_of_range, beg, end);
+
+ TSNode ts_node = XTS_NODE (node)->node;
+ TSNode child;
+ if (NILP (named))
+ child = ts_node_descendant_for_byte_range
+ (ts_node, byte_beg - visible_beg , byte_end - visible_beg);
+ else
+ child = ts_node_named_descendant_for_byte_range
+ (ts_node, byte_beg - visible_beg, byte_end - visible_beg);
+
+ if (ts_node_is_null (child))
+ return Qnil;
+
+ return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-eq",
+ Ftreesit_node_eq,
+ Streesit_node_eq, 2, 2, 0,
+ doc: /* Return non-nil if NODE1 and NODE2 are the same node.
+If any one of NODE1 and NODE2 is nil, return nil. */)
+ (Lisp_Object node1, Lisp_Object node2)
+{
+ if (NILP (node1) || NILP (node2))
+ return Qnil;
+ CHECK_TS_NODE (node1);
+ CHECK_TS_NODE (node2);
+
+ TSNode ts_node_1 = XTS_NODE (node1)->node;
+ TSNode ts_node_2 = XTS_NODE (node2)->node;
+
+ bool same_node = ts_node_eq (ts_node_1, ts_node_2);
+ return same_node ? Qt : Qnil;
+}
+
+/*** Query functions */
+
+/* If we decide to pre-load tree-sitter.el, maybe we can implement
+ this function in Lisp. */
+DEFUN ("treesit-expand-pattern",
+ Ftreesit_expand_pattern,
+ Streesit_expand_pattern, 1, 1, 0,
+ doc: /* Expand PATTERN to its string form.
+
+PATTERN can be
+
+ :anchor
+ :?
+ :*
+ :+
+ :equal
+ :match
+ (TYPE PATTERN...)
+ [PATTERN...]
+ FIELD-NAME:
+ @CAPTURE-NAME
+ (_)
+ _
+ \"TYPE\"
+
+Consult Info node `(elisp)Pattern Matching' form detailed
+explanation. */)
+ (Lisp_Object pattern)
+{
+ if (EQ (pattern, intern_c_string (":anchor")))
+ return build_pure_c_string(".");
+ if (EQ (pattern, intern_c_string (":?")))
+ return build_pure_c_string("?");
+ if (EQ (pattern, intern_c_string (":*")))
+ return build_pure_c_string("*");
+ if (EQ (pattern, intern_c_string (":+")))
+ return build_pure_c_string("+");
+ if (EQ (pattern, intern_c_string (":equal")))
+ return build_pure_c_string("#equal");
+ if (EQ (pattern, intern_c_string (":match")))
+ return build_pure_c_string("#match");
+ Lisp_Object opening_delimeter =
+ build_pure_c_string (VECTORP (pattern) ? "[" : "(");
+ Lisp_Object closing_delimiter =
+ build_pure_c_string (VECTORP (pattern) ? "]" : ")");
+ if (VECTORP (pattern) || CONSP (pattern))
+ return concat3 (opening_delimeter,
+ Fmapconcat (intern_c_string
+ ("treesit-expand-pattern"),
+ pattern,
+ build_pure_c_string (" ")),
+ closing_delimiter);
+ return CALLN (Fformat, build_pure_c_string("%S"), pattern);
+}
+
+DEFUN ("treesit-expand-query",
+ Ftreesit_expand_query,
+ Streesit_expand_query, 1, 1, 0,
+ doc: /* Expand sexp QUERY to its string form.
+
+A PATTERN in QUERY can be
+
+ :anchor
+ :?
+ :*
+ :+
+ :equal
+ :match
+ (TYPE PATTERN...)
+ [PATTERN...]
+ FIELD-NAME:
+ @CAPTURE-NAME
+ (_)
+ _
+ \"TYPE\"
+
+Consult Info node `(elisp)Pattern Matching' form detailed
+explanation. */)
+ (Lisp_Object query)
+{
+ return Fmapconcat (intern_c_string ("treesit-expand-pattern"),
+ query, build_pure_c_string (" "));
+}
+
+char*
+ts_query_error_to_string (TSQueryError error)
+{
+ switch (error)
+ {
+ case TSQueryErrorNone:
+ return "None";
+ case TSQueryErrorSyntax:
+ return "Syntax error at";
+ case TSQueryErrorNodeType:
+ return "Node type error at";
+ case TSQueryErrorField:
+ return "Field error at";
+ case TSQueryErrorCapture:
+ return "Capture error at";
+ case TSQueryErrorStructure:
+ return "Structure error at";
+ default:
+ return "Unknown error";
+ }
+}
+
+/* Collect predicates for this match and return them in a list. Each
+ predicate is a list of strings and symbols. */
+Lisp_Object
+ts_predicates_for_pattern
+(TSQuery *query, uint32_t pattern_index)
+{
+ uint32_t len;
+ const TSQueryPredicateStep *predicate_list =
+ ts_query_predicates_for_pattern (query, pattern_index, &len);
+ Lisp_Object result = Qnil;
+ Lisp_Object predicate = Qnil;
+ for (int idx=0; idx < len; idx++)
+ {
+ TSQueryPredicateStep step = predicate_list[idx];
+ switch (step.type)
+ {
+ case TSQueryPredicateStepTypeCapture:
+ {
+ uint32_t str_len;
+ const char *str = ts_query_capture_name_for_id
+ (query, step.value_id, &str_len);
+ predicate = Fcons (intern_c_string_1 (str, str_len),
+ predicate);
+ break;
+ }
+ case TSQueryPredicateStepTypeString:
+ {
+ uint32_t str_len;
+ const char *str = ts_query_string_value_for_id
+ (query, step.value_id, &str_len);
+ predicate = Fcons (make_string (str, str_len), predicate);
+ break;
+ }
+ case TSQueryPredicateStepTypeDone:
+ result = Fcons (Fnreverse (predicate), result);
+ predicate = Qnil;
+ break;
+ }
+ }
+ return Fnreverse (result);
+}
+
+/* Translate a capture NAME (symbol) to the text of the captured node.
+ Signals treesit-query-error if such node is not captured. */
+Lisp_Object
+ts_predicate_capture_name_to_text (Lisp_Object name, Lisp_Object captures)
+{
+ Lisp_Object node = Qnil;
+ for (Lisp_Object tail = captures; !NILP (tail); tail = XCDR (tail))
+ {
+ if (EQ (XCAR (XCAR (tail)), name))
+ {
+ node = XCDR (XCAR (tail));
+ break;
+ }
+ }
+
+ if (NILP (node))
+ xsignal3 (Qtreesit_query_error,
+ build_pure_c_string ("Cannot find captured node"),
+ name, build_pure_c_string ("A predicate can only refer to captured nodes in the same pattern"));
+
+ struct buffer *old_buffer = current_buffer;
+ set_buffer_internal
+ (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
+ Lisp_Object text = Fbuffer_substring
+ (Ftreesit_node_start (node), Ftreesit_node_end (node));
+ set_buffer_internal (old_buffer);
+ return text;
+}
+
+/* Handles predicate (#equal A B). Return true if A equals B; return
+ false otherwise. A and B can be either string, or a capture name.
+ The capture name evaluates to the text its captured node spans in
+ the buffer. */
+bool
+ts_predicate_equal (Lisp_Object args, Lisp_Object captures)
+{
+ if (XFIXNUM (Flength (args)) != 2)
+ xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args));
+
+ Lisp_Object arg1 = XCAR (args);
+ Lisp_Object arg2 = XCAR (XCDR (args));
+ Lisp_Object tail = captures;
+ Lisp_Object text1 = STRINGP (arg1) ? arg1 :
+ ts_predicate_capture_name_to_text (arg1, captures);
+ Lisp_Object text2 = STRINGP (arg2) ? arg2 :
+ ts_predicate_capture_name_to_text (arg2, captures);
+
+ if (NILP (Fstring_equal (text1, text2)))
+ return false;
+ else
+ return true;
+}
+
+/* Handles predicate (#match "regexp" @node). Return true if "regexp"
+ matches the text spanned by @node; return false otherwise. Matching
+ is case-sensitive. */
+bool
+ts_predicate_match (Lisp_Object args, Lisp_Object captures)
+{
+ if (XFIXNUM (Flength (args)) != 2)
+ xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args));
+
+ Lisp_Object regexp = XCAR (args);
+ Lisp_Object capture_name = XCAR (XCDR (args));
+ Lisp_Object tail = captures;
+ Lisp_Object text = ts_predicate_capture_name_to_text
+ (capture_name, captures);
+
+ /* It's probably common to get the argument order backwards. Catch
+ this mistake early and show helpful explanation, because Emacs
+ loves you. (We put the regexp first because that's what
+ string-match does.) */
+ if (!STRINGP (regexp))
+ xsignal1 (Qtreesit_query_error, build_pure_c_string ("The first argument to `match' should be a regexp string, not a capture name"));
+ if (!SYMBOLP (capture_name))
+ xsignal1 (Qtreesit_query_error, build_pure_c_string ("The second argument to `match' should be a capture name, not a string"));
+
+ if (fast_string_match (regexp, text) >= 0)
+ return true;
+ else
+ return false;
+}
+
+/* About predicates: I decide to hard-code predicates in C instead of
+ implementing an extensible system where predicates are translated
+ to Lisp functions, and new predicates can be added by extending a
+ list of functions, because I really couldn't imagine any useful
+ predicates besides equal and match. If we later found out that
+ such system is indeed useful and necessary, it can be easily
+ added. */
+
+/* If all predicates in PREDICATES passes, return true; otherwise
+ return false. */
+bool
+ts_eval_predicates (Lisp_Object captures, Lisp_Object predicates)
+{
+ bool pass = true;
+ /* Evaluate each predicates. */
+ for (Lisp_Object tail = predicates;
+ !NILP (tail); tail = XCDR (tail))
+ {
+ Lisp_Object predicate = XCAR (tail);
+ Lisp_Object fn = XCAR (predicate);
+ Lisp_Object args = XCDR (predicate);
+ if (!NILP (Fstring_equal (fn, build_pure_c_string("equal"))))
+ pass = ts_predicate_equal (args, captures);
+ else if (!NILP (Fstring_equal
+ (fn, build_pure_c_string("match"))))
+ pass = ts_predicate_match (args, captures);
+ else
+ xsignal3 (Qtreesit_query_error,
+ build_pure_c_string ("Invalid predicate"),
+ fn, build_pure_c_string ("Currently Emacs only supports equal and match predicate"));
+ }
+ /* If all predicates passed, add captures to result list. */
+ return pass;
+}
+
+DEFUN ("treesit-query-capture",
+ Ftreesit_query_capture,
+ Streesit_query_capture, 2, 4, 0,
+ doc: /* Query NODE with patterns in QUERY.
+
+Return a list of (CAPTURE_NAME . NODE). CAPTURE_NAME is the name
+assigned to the node in PATTERN. NODE is the captured node.
+
+QUERY is either a string query or a sexp query. See Info node
+`(elisp)Pattern Matching' for how to write a query in either string or
+s-expression form.
+
+BEG and END, if both non-nil, specifies the range in which the query
+is executed.
+
+Raise an treesit-query-error if QUERY is malformed, or something
+else goes wrong. */)
+ (Lisp_Object node, Lisp_Object query,
+ Lisp_Object beg, Lisp_Object end)
+{
+ ts_check_node (node);
+ if (!NILP (beg))
+ CHECK_INTEGER (beg);
+ if (!NILP (end))
+ CHECK_INTEGER (end);
+
+ if (CONSP (query))
+ query = Ftreesit_expand_query (query);
+ else
+ CHECK_STRING (query);
+
+ /* Extract C values from Lisp objects. */
+ TSNode ts_node = XTS_NODE (node)->node;
+ Lisp_Object lisp_parser = XTS_NODE (node)->parser;
+ ptrdiff_t visible_beg =
+ XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+ const TSLanguage *lang = ts_parser_language
+ (XTS_PARSER (lisp_parser)->parser);
+ char *source = SSDATA (query);
+
+ /* Initialize query objects, and execute query. */
+ uint32_t error_offset;
+ TSQueryError error_type;
+ /* TODO: We could cache the query object, so that repeatedly
+ querying with the same query can reuse the query object. It also
+ saves us from expanding the sexp query into a string. I don't
+ know how much time that could save though. */
+ TSQuery *ts_query = ts_query_new (lang, source, strlen (source),
+ &error_offset, &error_type);
+ TSQueryCursor *cursor = ts_query_cursor_new ();
+
+ if (ts_query == NULL)
+ {
+ xsignal2 (Qtreesit_query_error,
+ build_string (ts_query_error_to_string (error_type)),
+ make_fixnum (error_offset + 1));
+ }
+ if (!NILP (beg) && !NILP (end))
+ {
+ EMACS_INT beg_byte = XFIXNUM (beg);
+ EMACS_INT end_byte = XFIXNUM (end);
+ ts_query_cursor_set_byte_range
+ (cursor, (uint32_t) beg_byte - visible_beg,
+ (uint32_t) end_byte - visible_beg);
+ }
+
+ ts_query_cursor_exec (cursor, ts_query, ts_node);
+ TSQueryMatch match;
+
+ /* Go over each match, collect captures and predicates. Include the
+ captures in the return list if all predicates in that match
+ passes. */
+ Lisp_Object result = Qnil;
+ while (ts_query_cursor_next_match (cursor, &match))
+ {
+ /* Get captured nodes. */
+ Lisp_Object captures_lisp = Qnil;
+ const TSQueryCapture *captures = match.captures;
+ for (int idx=0; idx < match.capture_count; idx++)
+ {
+ uint32_t capture_name_len;
+ TSQueryCapture capture = captures[idx];
+ Lisp_Object captured_node =
+ make_ts_node(lisp_parser, capture.node);
+ const char *capture_name = ts_query_capture_name_for_id
+ (ts_query, capture.index, &capture_name_len);
+ Lisp_Object cap =
+ Fcons (intern_c_string_1 (capture_name, capture_name_len),
+ captured_node);
+ captures_lisp = Fcons (cap, captures_lisp);
+ }
+ /* Get predicates. */
+ Lisp_Object predicates =
+ ts_predicates_for_pattern (ts_query, match.pattern_index);
+
+ captures_lisp = Fnreverse (captures_lisp);
+ if (ts_eval_predicates (captures_lisp, predicates))
+ {
+ result = CALLN (Fnconc, result, captures_lisp);
+ }
+ }
+ ts_query_delete (ts_query);
+ ts_query_cursor_delete (cursor);
+ return result;
+}
+
+/*** Initialization */
+
+/* Initialize the tree-sitter routines. */
+void
+syms_of_treesit (void)
+{
+ DEFSYM (Qtreesit_parser_p, "treesit-parser-p");
+ DEFSYM (Qtreesit_node_p, "treesit-node-p");
+ DEFSYM (Qnamed, "named");
+ DEFSYM (Qmissing, "missing");
+ DEFSYM (Qextra, "extra");
+ DEFSYM (Qhas_changes, "has-changes");
+ DEFSYM (Qhas_error, "has-error");
+
+ DEFSYM (Qtreesit_error, "treesit-error");
+ DEFSYM (Qtreesit_query_error, "treesit-query-error");
+ DEFSYM (Qtreesit_parse_error, "treesit-parse-error");
+ DEFSYM (Qtreesit_range_invalid, "treesit-range-invalid");
+ DEFSYM (Qtreesit_buffer_too_large,
+ "treesit-buffer-too-large");
+ DEFSYM (Qtreesit_load_language_error,
+ "treesit-load-language-error");
+ DEFSYM (Qtreesit_node_outdated,
+ "treesit-node-outdated");
+ DEFSYM (Quser_emacs_directory,
+ "user-emacs-directory");
+
+ define_error (Qtreesit_error, "Generic tree-sitter error", Qerror);
+ define_error (Qtreesit_query_error, "Query pattern is malformed",
+ Qtreesit_error);
+ /* Should be impossible, no need to document this error. */
+ define_error (Qtreesit_parse_error, "Parse failed",
+ Qtreesit_error);
+ define_error (Qtreesit_range_invalid,
+ "RANGES are invalid, they have to be ordered and not overlapping",
+ Qtreesit_error);
+ define_error (Qtreesit_buffer_too_large, "Buffer too large (> 4GB)",
+ Qtreesit_error);
+ define_error (Qtreesit_load_language_error,
+ "Cannot load language definition",
+ Qtreesit_error);
+ define_error (Qtreesit_node_outdated,
+ "This node is outdated, please retrieve a new one",
+ Qtreesit_error);
+
+ DEFSYM (Qtreesit_parser_list, "treesit-parser-list");
+ DEFVAR_LISP ("treesit-parser-list", Vtreesit_parser_list,
+ doc: /* A list of tree-sitter parsers.
+
+If you removed a parser from this list, do not put it back in. Emacs
+keeps the parser in this list updated with any change in the buffer.
+If removed and put back in, there is no guarantee that the parser is in
+sync with the buffer's content. */);
+ Vtreesit_parser_list = Qnil;
+ Fmake_variable_buffer_local (Qtreesit_parser_list);
+
+ DEFVAR_LISP ("treesit-load-name-override-list",
+ Vtreesit_load_name_override_list,
+ doc:
+ /* An override list for unconventional tree-sitter libraries.
+
+By default, Emacs assumes the dynamic library for LANG is
+libtree-sitter-LANG.EXT, where EXT is the OS specific extension for
+dynamic libraries. Emacs also assumes that the name of the C function
+the library provides is tree_sitter_LANG. If that is not the case,
+add an entry
+
+ (LANG LIBRARY-BASE-NAME FUNCTION-NAME)
+
+to this list, where LIBRARY-BASE-NAME is the filename of the dynamic
+library without extension, FUNCTION-NAME is the function provided by
+the library. */);
+ Vtreesit_load_name_override_list = Qnil;
+
+ DEFVAR_LISP ("treesit-extra-load-path",
+ Vtreesit_extra_load_path,
+ doc:
+ /* Extra load paths of tree-sitter language definitions.
+When trying to load a tree-sitter language definition,
+Emacs looks at directories in this variable,
+`user-emacs-directory'/tree-sitter, and system default locations for
+dynamic libraries, in that order. */);
+ Vtreesit_extra_load_path = Qnil;
+
+ defsubr (&Streesit_language_available_p);
+
+ defsubr (&Streesit_parser_p);
+ defsubr (&Streesit_node_p);
+
+ defsubr (&Streesit_node_parser);
+
+ defsubr (&Streesit_parser_create);
+ defsubr (&Streesit_parser_buffer);
+ defsubr (&Streesit_parser_language);
+
+ defsubr (&Streesit_parser_root_node);
+ /* defsubr (&Streesit_parse_string); */
+
+ defsubr (&Streesit_parser_set_included_ranges);
+ defsubr (&Streesit_parser_included_ranges);
+
+ defsubr (&Streesit_node_type);
+ defsubr (&Streesit_node_start);
+ defsubr (&Streesit_node_end);
+ defsubr (&Streesit_node_string);
+ defsubr (&Streesit_node_parent);
+ defsubr (&Streesit_node_child);
+ defsubr (&Streesit_node_check);
+ defsubr (&Streesit_node_field_name_for_child);
+ defsubr (&Streesit_node_child_count);
+ defsubr (&Streesit_node_child_by_field_name);
+ defsubr (&Streesit_node_next_sibling);
+ defsubr (&Streesit_node_prev_sibling);
+ defsubr (&Streesit_node_first_child_for_pos);
+ defsubr (&Streesit_node_descendant_for_range);
+ defsubr (&Streesit_node_eq);
+
+ defsubr (&Streesit_expand_pattern);
+ defsubr (&Streesit_expand_query);
+ defsubr (&Streesit_query_capture);
+}
--- /dev/null
+/* Header file for the tree-sitter integration.
+
+Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
+
+#ifndef EMACS_TREESIT_H
+#define EMACS_TREESIT_H
+
+#include <tree_sitter/api.h>
+#include "lisp.h"
+
+INLINE_HEADER_BEGIN
+
+/* A wrapper for a tree-sitter parser, but also contains a parse tree
+ and other goodies for convenience. */
+struct Lisp_TS_Parser
+{
+ union vectorlike_header header;
+ /* A symbol represents the language this parser uses. See the
+ manual for more explanation. */
+ Lisp_Object language_symbol;
+ /* The buffer associated with this parser. */
+ Lisp_Object buffer;
+ /* The pointer to the tree-sitter parser. Never NULL. */
+ TSParser *parser;
+ /* Pointer to the syntax tree. Initially is NULL, so check for NULL
+ before use. */
+ TSTree *tree;
+ /* Teaches tree-sitter how to read an Emacs buffer. */
+ TSInput input;
+ /* Re-parsing an unchanged buffer is not free for tree-sitter, so we
+ only make it re-parse when need_reparse == true. That usually
+ means some change is made in the buffer. But others could set
+ this field to true to force tree-sitter to re-parse. */
+ bool need_reparse;
+ /* These two positions record the buffer byte position (1-based) of
+ the "visible region" that tree-sitter sees. Unlike markers,
+ These two positions do not change as the user inserts and deletes
+ text around them. Before re-parse, we move these positions to
+ match BUF_BEGV_BYTE and BUF_ZV_BYTE. Note that we don't need to
+ synchronize these positions when retrieving them in a function
+ that involves a node: if the node is not outdated, these
+ positions are synchronized. */
+ ptrdiff_t visible_beg;
+ ptrdiff_t visible_end;
+ /* This counter is incremented every time a change is made to the
+ buffer in ts_record_change. The node retrieved from this parser
+ inherits this timestamp. This way we can make sure the node is
+ not outdated when we access its information. */
+ ptrdiff_t timestamp;
+};
+
+/* A wrapper around a tree-sitter node. */
+struct Lisp_TS_Node
+{
+ union vectorlike_header header;
+ /* This prevents gc from collecting the tree before the node is done
+ with it. TSNode contains a pointer to the tree it belongs to,
+ and the parser object, when collected by gc, will free that
+ tree. */
+ Lisp_Object parser;
+ TSNode node;
+ /* A node inherits its parser's timestamp at creation time. The
+ parser's timestamp increments as the buffer changes. This way we
+ can make sure the node is not outdated when we access its
+ information. */
+ ptrdiff_t timestamp;
+};
+
+INLINE bool
+TS_PARSERP (Lisp_Object x)
+{
+ return PSEUDOVECTORP (x, PVEC_TS_PARSER);
+}
+
+INLINE struct Lisp_TS_Parser *
+XTS_PARSER (Lisp_Object a)
+{
+ eassert (TS_PARSERP (a));
+ return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Parser);
+}
+
+INLINE bool
+TS_NODEP (Lisp_Object x)
+{
+ return PSEUDOVECTORP (x, PVEC_TS_NODE);
+}
+
+INLINE struct Lisp_TS_Node *
+XTS_NODE (Lisp_Object a)
+{
+ eassert (TS_NODEP (a));
+ return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Node);
+}
+
+INLINE void
+CHECK_TS_PARSER (Lisp_Object parser)
+{
+ CHECK_TYPE (TS_PARSERP (parser), Qtreesit_parser_p, parser);
+}
+
+INLINE void
+CHECK_TS_NODE (Lisp_Object node)
+{
+ CHECK_TYPE (TS_NODEP (node), Qtreesit_node_p, node);
+}
+
+void
+ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+ ptrdiff_t new_end_byte);
+
+Lisp_Object
+make_ts_parser (Lisp_Object buffer, TSParser *parser,
+ TSTree *tree, Lisp_Object language_symbol);
+
+Lisp_Object
+make_ts_node (Lisp_Object parser, TSNode node);
+
+extern void syms_of_treesit (void);
+
+INLINE_HEADER_END
+
+#endif /* EMACS_TREESIT_H */
--- /dev/null
+;;; treesit-tests.el --- tests for src/treesit.c -*- lexical-binding: t; -*-
+
+;; Copyright (C) 2021 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Code:
+
+(require 'ert)
+(require 'treesit)
+
+(ert-deftest treesit-basic-parsing ()
+ "Test basic parsing routines."
+ (with-temp-buffer
+ (let ((parser (treesit-parser-create
+ (current-buffer) 'json)))
+ (should
+ (eq parser (car treesit-parser-list)))
+ (should
+ (equal (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(ERROR)"))
+
+ (insert "[1,2,3]")
+ (should
+ (equal (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number) (number) (number)))"))
+
+ (goto-char (point-min))
+ (forward-char 3)
+ (insert "{\"name\": \"Bob\"},")
+ (should
+ (equal
+ (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number) (number)))")))))
+
+(ert-deftest treesit-node-api ()
+ "Tests for node API."
+ (with-temp-buffer
+ (let (parser root-node doc-node object-node pair-node)
+ (progn
+ (insert "[1,2,{\"name\": \"Bob\"},3]")
+ (setq parser (treesit-parser-create
+ (current-buffer) 'json))
+ (setq root-node (treesit-parser-root-node
+ parser)))
+ ;; `treesit-node-type'.
+ (should (equal "document" (treesit-node-type root-node)))
+ ;; `treesit-node-check'.
+ (should (eq t (treesit-node-check root-node 'named)))
+ (should (eq nil (treesit-node-check root-node 'missing)))
+ (should (eq nil (treesit-node-check root-node 'extra)))
+ (should (eq nil (treesit-node-check root-node 'has-error)))
+ ;; `treesit-node-child'.
+ (setq doc-node (treesit-node-child root-node 0))
+ (should (equal "array" (treesit-node-type doc-node)))
+ (should (equal (treesit-node-string doc-node)
+ "(array (number) (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number))"))
+ ;; `treesit-node-child-count'.
+ (should (eql 9 (treesit-node-child-count doc-node)))
+ (should (eql 4 (treesit-node-child-count doc-node t)))
+ ;; `treesit-node-field-name-for-child'.
+ (setq object-node (treesit-node-child doc-node 2 t))
+ (setq pair-node (treesit-node-child object-node 0 t))
+ (should (equal "object" (treesit-node-type object-node)))
+ (should (equal "pair" (treesit-node-type pair-node)))
+ (should (equal "key"
+ (treesit-node-field-name-for-child
+ pair-node 0)))
+ ;; `treesit-node-child-by-field-name'.
+ (should (equal "(string (string_content))"
+ (treesit-node-string
+ (treesit-node-child-by-field-name
+ pair-node "key"))))
+ ;; `treesit-node-next-sibling'.
+ (should (equal "(number)"
+ (treesit-node-string
+ (treesit-node-next-sibling object-node t))))
+ (should (equal "(\",\")"
+ (treesit-node-string
+ (treesit-node-next-sibling object-node))))
+ ;; `treesit-node-prev-sibling'.
+ (should (equal "(number)"
+ (treesit-node-string
+ (treesit-node-prev-sibling object-node t))))
+ (should (equal "(\",\")"
+ (treesit-node-string
+ (treesit-node-prev-sibling object-node))))
+ ;; `treesit-node-first-child-for-pos'.
+ (should (equal "(number)"
+ (treesit-node-string
+ (treesit-node-first-child-for-pos
+ doc-node 3 t))))
+ (should (equal "(\",\")"
+ (treesit-node-string
+ (treesit-node-first-child-for-pos
+ doc-node 3))))
+ ;; `treesit-node-descendant-for-range'.
+ (should (equal "(\"{\")"
+ (treesit-node-string
+ (treesit-node-descendant-for-range
+ root-node 6 7))))
+ (should (equal "(object (pair key: (string (string_content)) value: (string (string_content))))"
+ (treesit-node-string
+ (treesit-node-descendant-for-range
+ root-node 6 7 t))))
+ ;; `treesit-node-eq'.
+ (should (treesit-node-eq root-node root-node))
+ (should (not (treesit-node-eq root-node doc-node))))))
+
+(ert-deftest treesit-query-api ()
+ "Tests for query API."
+ (with-temp-buffer
+ (let (parser root-node pattern doc-node object-node pair-node)
+ (progn
+ (insert "[1,2,{\"name\": \"Bob\"},3]")
+ (setq parser (treesit-parser-create
+ (current-buffer) 'json))
+ (setq root-node (treesit-parser-root-node
+ parser)))
+
+ (dolist (pattern
+ '("(string) @string
+(pair key: (_) @keyword)
+((_) @bob (#match \"^B.b$\" @bob))
+(number) @number
+((number) @n3 (#equal \"3\" @n3)) "
+ ((string) @string
+ (pair key: (_) @keyword)
+ ((_) @bob (:match "^B.b$" @bob))
+ (number) @number
+ ((number) @n3 (:equal "3" @n3)))))
+ (should
+ (equal
+ '((number . "1") (number . "2")
+ (keyword . "\"name\"")
+ (string . "\"name\"")
+ (string . "\"Bob\"")
+ (bob . "Bob")
+ (number . "3")
+ (n3 . "3"))
+ (mapcar (lambda (entry)
+ (cons (car entry)
+ (treesit-node-text
+ (cdr entry))))
+ (treesit-query-capture root-node pattern))))
+ (should
+ (equal
+ "(type field: (_) @capture .) ? * + \"return\""
+ (treesit-expand-query
+ '((type field: (_) @capture :anchor)
+ :? :* :+ "return"))))))))
+
+(ert-deftest treesit-narrow ()
+ "Tests if narrowing works."
+ (with-temp-buffer
+ (let (parser root-node pattern doc-node object-node pair-node)
+ (progn
+ (insert "xxx[1,{\"name\": \"Bob\"},2,3]xxx")
+ (narrow-to-region (+ (point-min) 3) (- (point-max) 3))
+ (setq parser (treesit-parser-create
+ (current-buffer) 'json))
+ (setq root-node (treesit-parser-root-node
+ parser)))
+ ;; This test is from the basic test.
+ (should
+ (equal
+ (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number) (number)))"))
+
+ (widen)
+ (goto-char (point-min))
+ (insert "ooo")
+ (should (equal "oooxxx[1,{\"name\": \"Bob\"},2,3]xxx"
+ (buffer-string)))
+ (delete-region 10 26)
+ (should (equal "oooxxx[1,2,3]xxx"
+ (buffer-string)))
+ (narrow-to-region (+ (point-min) 6) (- (point-max) 3))
+ ;; This test is also from the basic test.
+ (should
+ (equal (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number) (number) (number)))"))
+ (widen)
+ (goto-char (point-max))
+ (insert "[1,2]")
+ (should (equal "oooxxx[1,2,3]xxx[1,2]"
+ (buffer-string)))
+ (narrow-to-region (- (point-max) 5) (point-max))
+ (should
+ (equal (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number) (number)))"))
+ (widen)
+ (goto-char (point-min))
+ (insert "[1]")
+ (should (equal "[1]oooxxx[1,2,3]xxx[1,2]"
+ (buffer-string)))
+ (narrow-to-region (point-min) (+ (point-min) 3))
+ (should
+ (equal (treesit-node-string
+ (treesit-parser-root-node parser))
+ "(document (array (number)))")))))
+
+(ert-deftest treesit-range ()
+ "Tests if range works."
+ (with-temp-buffer
+ (let (parser root-node pattern doc-node object-node pair-node)
+ (progn
+ (insert "[[1],oooxxx[1,2,3],xxx[1,2]]")
+ (setq parser (treesit-parser-create
+ (current-buffer) 'json))
+ (setq root-node (treesit-parser-root-node
+ parser)))
+ (should-error
+ (treesit-parser-set-included-ranges
+ parser '((1 . 6) (5 . 20)))
+ :type '(treesit-range-invalid))
+
+ (treesit-parser-set-included-ranges
+ parser '((1 . 6) (12 . 20) (23 . 29)))
+ (should (equal '((1 . 6) (12 . 20) (23 . 29))
+ (treesit-parser-included-ranges parser)))
+ (should (equal "(document (array (array (number)) (array (number) (number) (number)) (array (number) (number))))"
+ (treesit-node-string
+ (treesit-parser-root-node parser))))
+ ;; TODO: More tests.
+ )))
+
+(ert-deftest treesit-multi-lang ()
+ "Tests if parsing multiple language works."
+ (with-temp-buffer
+ (let (html css js html-range css-range js-range)
+ (progn
+ (insert "<html><script>1</script><style>body {}</style></html>")
+ (setq html (treesit-get-parser-create 'html))
+ (setq css (treesit-get-parser-create 'css))
+ (setq js (treesit-get-parser-create 'javascript)))
+ ;; JavaScript.
+ (setq js-range
+ (treesit-query-range
+ 'html
+ '((script_element (raw_text) @capture))))
+ (should (equal '((15 . 16)) js-range))
+ (treesit-parser-set-included-ranges js js-range)
+ (should (equal "(program (expression_statement (number)))"
+ (treesit-node-string
+ (treesit-parser-root-node js))))
+ ;; CSS.
+ (setq css-range
+ (treesit-query-range
+ 'html
+ '((style_element (raw_text) @capture))))
+ (should (equal '((32 . 39)) css-range))
+ (treesit-parser-set-included-ranges css css-range)
+ (should
+ (equal "(stylesheet (rule_set (selectors (tag_name)) (block)))"
+ (treesit-node-string
+ (treesit-parser-root-node css))))
+ ;; TODO: More tests.
+ )))
+
+(ert-deftest treesit-parser-supplemental ()
+ "Supplemental node functions."
+ ;; `treesit-get-parser'.
+ (with-temp-buffer
+ (should (equal (treesit-get-parser 'json) nil)))
+ ;; `treesit-get-parser-create'.
+ (with-temp-buffer
+ (should (not (equal (treesit-get-parser-create 'json)
+ nil))))
+ ;; `treesit-parse-string'.
+ (should (equal (treesit-node-string
+ (treesit-parse-string
+ "[1,2,{\"name\": \"Bob\"},3]"
+ 'json))
+ "(document (array (number) (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number)))"))
+ (with-temp-buffer
+ (let (parser root-node doc-node object-node pair-node)
+ (progn
+ (insert "[1,2,{\"name\": \"Bob\"},3]")
+ (setq parser (treesit-parser-create
+ (current-buffer) 'json))
+ (setq root-node (treesit-parser-root-node
+ parser))
+ (setq doc-node (treesit-node-child root-node 0)))
+ ;; `treesit-get-parser'.
+ (should (not (equal (treesit-get-parser 'json)
+ nil)))
+ ;; `treesit-language-at'.
+ (should (equal (treesit-language-at (point))
+ 'json))
+ ;; `treesit-set-ranges', `treesit-get-ranges'.
+ (treesit-set-ranges 'json
+ '((1 . 2)))
+ (should (equal (treesit-get-ranges 'json)
+ '((1 . 2)))))))
+
+(ert-deftest treesit-node-supplemental ()
+ "Supplemental node functions."
+ (let (parser root-node doc-node array-node)
+ (progn
+ (insert "[1,2,{\"name\": \"Bob\"},3]")
+ (setq parser (treesit-parser-create
+ (current-buffer) 'json))
+ (setq root-node (treesit-parser-root-node
+ parser))
+ (setq doc-node (treesit-node-child root-node 0)))
+ ;; `treesit-node-buffer'.
+ (should (equal (treesit-node-buffer root-node)
+ (current-buffer)))
+ ;; `treesit-node-language'.
+ (should (eq (treesit-node-language root-node)
+ 'json))
+ ;; `treesit-node-at'.
+ (should (equal (treesit-node-string
+ (treesit-node-at 1 2 'json))
+ "(\"[\")"))
+ ;; `treesit-buffer-root-node'.
+ (should (treesit-node-eq
+ (treesit-buffer-root-node 'json)
+ root-node))
+ ;; `treesit-filter-child'.
+ (should (equal (mapcar
+ (lambda (node)
+ (treesit-node-type node))
+ (treesit-filter-child
+ doc-node (lambda (node)
+ (treesit-node-check node 'named))))
+ '("number" "number" "object" "number")))
+ ;; `treesit-node-text'.
+ (should (equal (treesit-node-text doc-node)
+ "[1,2,{\"name\": \"Bob\"},3]"))
+ ;; `treesit-node-index'.
+ (should (eq (treesit-node-index doc-node)
+ 0))
+ ;; TODO:
+ ;; `treesit-parent-until'
+ ;; `treesit-parent-while'
+ ;; `treesit-node-children'
+ ;; `treesit-node-field-name'
+ ))
+
+;; TODO
+;; - Functions in treesit.el
+;; - treesit-load-name-override-list
+
+(provide 'treesit-tests)
+;;; treesit-tests.el ends here