Add tree-sitter intergration

author Yuan Fu <casouri@gmail.com>

Sun, 13 Mar 2022 06:10:06 +0000 (22:10 -0800)

committer Yuan Fu <casouri@gmail.com>

Sat, 7 May 2022 08:11:39 +0000 (01:11 -0700)
author Yuan Fu <casouri@gmail.com>
Sun, 13 Mar 2022 06:10:06 +0000 (22:10 -0800)
committer Yuan Fu <casouri@gmail.com>
Sat, 7 May 2022 08:11:39 +0000 (01:11 -0700)
diff --git a/configure.ac b/configure.ac

index a315eeb6bd2304ae661399488aa0b3e9bb31d332..0c174c6a7b5aea2aa508b2b1b2d0005d2499c8db 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -457,6 +457,7 @@ OPTION_DEFAULT_ON([xml2],[don't compile with XML parsing support])
  OPTION_DEFAULT_OFF([imagemagick],[compile with ImageMagick image support])
  OPTION_DEFAULT_ON([native-image-api], [don't use native image APIs (GDI+ on Windows)])
  OPTION_DEFAULT_IFAVAILABLE([json], [compile with native JSON support])
+OPTION_DEFAULT_IFAVAILABLE([tree-sitter], [compile with tree-sitter])
  
  OPTION_DEFAULT_ON([xft],[don't use XFT for anti aliased fonts])
  OPTION_DEFAULT_ON([harfbuzz],[don't use HarfBuzz for text shaping])
@@ -3087,6 +3088,27 @@ AC_SUBST(JSON_LIBS)
  AC_SUBST(JSON_CFLAGS)
  AC_SUBST(JSON_OBJ)
  
+HAVE_TREE_SITTER=no
+TREE_SITTER_OBJ=
+
+if test "${with_tree_sitter}" != "no"; then
+   dnl TODO: we should use tree-sitter >= 0.20.2, but right now all
+   dnl tree-sitter libraries distributed are versioned at 0.0, so for
+   dnl the easy of development we'll just leave the version
+   dnl requirement at 0.0 for now.
+  EMACS_CHECK_MODULES([TREE_SITTER], [tree-sitter >= 0.0],
+    [HAVE_TREE_SITTER=yes], [HAVE_TREE_SITTER=no])
+  if test "${HAVE_TREE_SITTER}" = yes; then
+    AC_DEFINE(HAVE_TREE_SITTER, 1, [Define if using tree-sitter.])
+    TREE_SITTER_LIBS=-ltree-sitter
+    TREE_SITTER_OBJ="treesit.o"
+  fi
+fi
+
+AC_SUBST(TREE_SITTER_LIBS)
+AC_SUBST(TREE_SITTER_CFLAGS)
+AC_SUBST(TREE_SITTER_OBJ)
+
  NOTIFY_OBJ=
  NOTIFY_SUMMARY=no
  
@@ -3926,20 +3948,31 @@ if test "${HAVE_ZLIB}" = "yes"; then
  fi
  AC_SUBST(LIBZ)
  
+### Dynamic library support
+case $opsys in
+  cygwin|mingw32) DYNAMIC_LIB_SUFFIX=".dll" ;;
+  darwin) DYNAMIC_LIB_SUFFIX=".dylib" ;;
+  *) DYNAMIC_LIB_SUFFIX=".so" ;;
+esac
+case "${opsys}" in
+  darwin) DYNAMIC_LIB_SECONDARY_SUFFIX='.so' ;;
+  *) DYNAMIC_LIB_SECONDARY_SUFFIX='' ;;
+esac
+AC_DEFINE_UNQUOTED(DYNAMIC_LIB_SUFFIX, "$DYNAMIC_LIB_SUFFIX",
+  [System extension for dynamic libraries])
+AC_DEFINE_UNQUOTED(DYNAMIC_LIB_SECONDARY_SUFFIX, "$DYNAMIC_LIB_SECONDARY_SUFFIX",
+  [Alternative system extension for dynamic libraries.])
+
+AC_SUBST(DYNAMIC_LIB_SUFFIX)
+AC_SUBST(DYNAMIC_LIB_SECONDARY_SUFFIX)
+
  ### Dynamic modules support
  LIBMODULES=
  HAVE_MODULES=no
  MODULES_OBJ=
  NEED_DYNLIB=no
-case $opsys in
-  cygwin|mingw32) MODULES_SUFFIX=".dll" ;;
-  darwin) MODULES_SUFFIX=".dylib" ;;
-  *) MODULES_SUFFIX=".so" ;;
-esac
-case "${opsys}" in
-  darwin) MODULES_SECONDARY_SUFFIX='.so' ;;
-  *) MODULES_SECONDARY_SUFFIX='' ;;
-esac
+MODULES_SUFFIX="${DYNAMIC_LIB_SUFFIX}"
+MODULES_SECONDARY_SUFFIX="${DYNAMIC_LIB_SECONDARY_SUFFIX}"
  if test "${with_modules}" != "no"; then
    case $opsys in
      gnu|gnu-linux)
@@ -3970,10 +4003,10 @@ if test "${HAVE_MODULES}" = yes; then
     NEED_DYNLIB=yes
     AC_DEFINE(HAVE_MODULES, 1, [Define to 1 if dynamic modules are enabled])
     AC_DEFINE_UNQUOTED(MODULES_SUFFIX, "$MODULES_SUFFIX",
-     [System extension for dynamic libraries])
+     [System extension for dynamic modules])
     if test -n "${MODULES_SECONDARY_SUFFIX}"; then
       AC_DEFINE_UNQUOTED(MODULES_SECONDARY_SUFFIX, "$MODULES_SECONDARY_SUFFIX",
-       [Alternative system extension for dynamic libraries.])
+       [Alternative system extension for dynamic modules.])
     fi
  fi
  AC_SUBST(MODULES_OBJ)
@@ -4333,6 +4366,12 @@ case $with_json,$HAVE_JSON in
    *) MISSING="$MISSING json"
       WITH_IFAVAILABLE="$WITH_IFAVAILABLE --with-json=ifavailable";;
  esac
+case $with_tree_sitter,$HAVE_TREE_SITTER in
+  no,* | ifavailable,* | *,yes) ;;
+  *) MISSING="$MISSING tree-sitter"
+     WITH_IFAVAILABLE="$WITH_IFAVAILABLE --with-tree-sitter=ifavailable";;
+esac
+
  if test "X${MISSING}" != X; then
    # If we have a missing library, and we don't have pkg-config installed,
    # the missing pkg-config may be the reason.  Give the user a hint.
@@ -6263,7 +6302,7 @@ Configured for '${canonical}'.
  optsep=
  emacs_config_features=
  for opt in ACL BE_APP CAIRO DBUS FREETYPE GCONF GIF GLIB GMP GNUTLS GPM GSETTINGS \
- HARFBUZZ IMAGEMAGICK JPEG JSON LCMS2 LIBOTF LIBSELINUX LIBSYSTEMD LIBXML2 \
+ HARFBUZZ IMAGEMAGICK JPEG JSON TREE-SITTER LCMS2 LIBOTF LIBSELINUX LIBSYSTEMD LIBXML2 \
   M17N_FLT MODULES NATIVE_COMP NOTIFY NS OLDXMENU PDUMPER PGTK PNG RSVG SECCOMP \
   SOUND SQLITE3 THREADS TIFF TOOLKIT_SCROLL_BARS \
   UNEXEC WEBP X11 XAW3D XDBE XFT XIM XINPUT2 XPM XWIDGETS X_TOOLKIT \
@@ -6334,6 +6373,7 @@ AS_ECHO(["  Does Emacs use -lXaw3d?                                 ${HAVE_XAW3D
    Does Emacs use -lxft?                                   ${HAVE_XFT}
    Does Emacs use -lsystemd?                               ${HAVE_LIBSYSTEMD}
    Does Emacs use -ljansson?                               ${HAVE_JSON}
+  Does Emacs use -ltree-sitter?                           ${HAVE_TREE_SITTER}
    Does Emacs use the GMP library?                         ${HAVE_GMP}
    Does Emacs directly use zlib?                           ${HAVE_ZLIB}
    Does Emacs have dynamic modules support?                ${HAVE_MODULES}
diff --git a/doc/lispref/elisp.texi b/doc/lispref/elisp.texi

index 426bb6d01760842ee6a32091456e9f87cde02668..739035201605e64f6ad34c1340066f64f37b8012 100644 (file)
--- a/doc/lispref/elisp.texi
+++ b/doc/lispref/elisp.texi
@@ -222,6 +222,7 @@ To view this manual in other formats, click
  * Non-ASCII Characters::    Non-ASCII text in buffers and strings.
  * Searching and Matching::  Searching buffers for strings or regexps.
  * Syntax Tables::           The syntax table controls word and list parsing.
+* Parsing Program Source::  Generate syntax tree for program sources.
  * Abbrevs::                 How Abbrev mode works, and its data structures.
  
  * Threads::                 Concurrency in Emacs Lisp.
@@ -1357,6 +1358,16 @@ Syntax Tables
  * Syntax Table Internals::  How syntax table information is stored.
  * Categories::              Another way of classifying character syntax.
  
+Parsing Program Source
+
+* Language Definitions::     Loading tree-sitter language definitions.
+* Using Parser::             Introduction to parsers.
+* Retrieving Node::          Retrieving node from syntax tree.
+* Accessing Node::           Accessing node information.
+* Pattern Matching::         Pattern matching with query patterns.
+* Multiple Languages::       Parse text written in multiple languages.
+* Tree-sitter C API::        Compare the C API and the ELisp API.
+
  Syntax Descriptors
  
  * Syntax Class Table::      Table of syntax classes.
@@ -1701,6 +1712,7 @@ Object Internals
  
  @include searching.texi
  @include syntax.texi
+@include parsing.texi
  @include abbrevs.texi
  @include threads.texi
  @include processes.texi
diff --git a/doc/lispref/modes.texi b/doc/lispref/modes.texi

index c29936d5caa1d0b0c013ad9246622290b97c745a..905b21c0d48db31b42acdacabc4110e3611e23af 100644 (file)
--- a/doc/lispref/modes.texi
+++ b/doc/lispref/modes.texi
@@ -2826,11 +2826,13 @@ mode; most major modes define syntactic criteria for which faces to use
  in which contexts.  This section explains how to customize Font Lock for
  a particular major mode.
  
-  Font Lock mode finds text to highlight in two ways: through
-syntactic parsing based on the syntax table, and through searching
-(usually for regular expressions).  Syntactic fontification happens
-first; it finds comments and string constants and highlights them.
-Search-based fontification happens second.
+  Font Lock mode finds text to highlight in three ways: through
+syntactic parsing based on the syntax table, through searching
+(usually for regular expressions), and through parsing based on a
+full-blown parser.  Syntactic fontification happens first; it finds
+comments and string constants and highlights them.  Search-based
+fontification happens second.  Parser-based fontification can be
+optionally enabled and it will precede the other two fontifications.
  
  @menu
  * Font Lock Basics::            Overview of customizing Font Lock.
@@ -2845,6 +2847,7 @@ Search-based fontification happens second.
  * Syntactic Font Lock::         Fontification based on syntax tables.
  * Multiline Font Lock::         How to coerce Font Lock into properly
                                    highlighting multiline constructs.
+* Parser-based Font Lock::      Use a parser for fontification.
  @end menu
  
  @node Font Lock Basics
@@ -3735,6 +3738,89 @@ Since this function is called after every buffer change, it should be
  reasonably fast.
  @end defvar
  
+@node Parser-based Font Lock
+@subsection Parser-based Font Lock
+
+@c This node is written when the only parser Emacs has is tree-sitter,
+@c if in the future more parser are supported, feel free to reorganize
+@c and rewrite this node to describe multiple parsers in parallel.
+
+Besides simple syntactic font lock and search-based font lock, Emacs
+also provides complete syntactic font lock with the help of a parser,
+currently provided by the tree-sitter library (@pxref{Parsing Program
+Source}).  Because it is an optional feature, parser-based font lock
+is less integrated with Emacs.  Most variables introduced in previous
+sections only apply to search-based font lock, except for
+@var{font-lock-maximum-decoration}.
+
+@defun treesit-font-lock-enable
+This function enables parser-based font lock in the current buffer.
+@end defun
+
+Parser-based font lock and other font lock mechanism are not mutually
+exclusive.  By default, if enabled, parser-based font lock runs first,
+then the simple syntactic font lock (if enabled), then search-based
+font lock.
+
+Although parser-based font lock doesn't share the same customization
+variables with search-based font lock, parser-based font lock uses
+similar customization schemes.  Just like @var{font-lock-keywords} and
+@var{font-lock-defaults}, parser-based font lock has
+@var{treesit-font-lock-settings} and
+@var{treesit-font-lock-defaults}.
+
+@defvar treesit-font-lock-settings
+A list of @var{setting}s for tree-sitter font lock.
+
+Each @var{setting} should look like
+
+@example
+(@var{language} @var{query})
+@end example
+
+Each @var{setting} controls one parser (often of different language).
+And @var{language} is the language symbol (@pxref{Language
+Definitions}); @var{query} is either a string query or a sexp query
+(@pxref{Pattern Matching}).
+
+Capture names in @var{query} should be face names like
+@code{font-lock-keyword-face}.  The captured node will be fontified
+with that face.  Capture names can also be function names, in which
+case the function is called with (@var{start} @var{end} @var{node}),
+where @var{start} and @var{end} are the start and end position of the
+node in buffer, and @var{node} is the tree-sitter node object.  If a
+capture name is both a face and a function, face takes priority.
+
+Generally, major modes should set @var{treesit-font-lock-defaults},
+and let Emacs automatically populate this variable.
+@end defvar
+
+@defvar treesit-font-lock-defaults
+This variable stores defaults for tree-sitter font Lock.  It is a list
+of
+
+@example
+(@var{default} @var{:keyword} @var{value}...)
+@end example
+
+A @var{default} may be a symbol or a list of symbols (for different
+levels of fontification).  The symbol(s) can be a variable or a
+function.  If a symbol is both a variable and a function, it is used
+as a function.  Different levels of fontification can be controlled by
+@var{font-lock-maximum-decoration}.
+
+The symbol(s) in @var{default} should contain or return a
+@var{setting} as described in @var{treesit-font-lock-settings}.
+
+The rest @var{keyword}s and @var{value}s are additional settings that
+could be used to alter the fontification behavior.  Currently there
+aren't any.
+@end defvar
+
+Multi-language major modes should provide range functions in
+@var{treesit-range-functions}, and Emacs will set the ranges
+accordingly before fontifing a region (@pxref{Multiple Languages}).
+
  @node Auto-Indentation
  @section Automatic Indentation of code
  
@@ -3791,10 +3877,12 @@ and a few other such modes) has been made more generic over the years,
  so if your language seems somewhat similar to one of those languages,
  you might try to use that engine.  @c FIXME: documentation?
  Another one is SMIE which takes an approach in the spirit
-of Lisp sexps and adapts it to non-Lisp languages.
+of Lisp sexps and adapts it to non-Lisp languages. Yet another one is
+to rely on a full-blown parser, for example, the tree-sitter library.
  
  @menu
  * SMIE::                        A simple minded indentation engine.
+* Parser-based indentation::    Parser-based indentation engine.
  @end menu
  
  @node SMIE
@@ -4454,6 +4542,176 @@ to the file's local variables of the form:
  @code{eval: (smie-config-local '(@var{rules}))}.
  @end defun
  
+@node Parser-based Indentation
+@subsection Parser-based Indentation
+
+@c This node is written when the only parser Emacs has is tree-sitter,
+@c if in the future more parser are supported, feel free to reorganize
+@c and rewrite this node to describe multiple parsers in parallel.
+
+When built with the tree-sitter library (@pxref{Parsing Program
+Source}), Emacs could parse program source and produce a syntax tree.
+And this syntax tree can be used for indentation.  For maximum
+flexibility, we could write a custom indent function that queries the
+syntax tree and indents accordingly for each language, but that would
+be a lot of work.  It is more convenient to use the simple indentation
+engine described below: we only need to write some indentation rules
+and the engine takes care of the rest.
+
+To enable the indentation engine, set the value of
+@var{indent-line-function} to @code{treesit-indent}.
+
+@defvar treesit-indent-function
+This variable stores the actual function called by
+@code{treesit-indent}.  By default, its value is
+@code{treesit-simple-indent}.  In the future we might add other
+more complex indentation engines, if @code{treesit-simple-indent}
+proves to be insufficient.
+@end defvar
+
+@heading Writing indentation rules
+
+@defvar treesit-simple-indent-rules
+This local variable stores indentation rules for every language. It is
+a list of
+
+@example
+(@var{language} . @var{rules})
+@end example
+
+where @var{language} is a language symbol, @var{rules} is a list of
+
+@example
+(@var{matcher} @var{anchor} @var{offset})
+@end example
+
+The @var{matcher} determines whether this rule applies, @var{anchor}
+and @var{offset} together determines which column to indent to.
+
+A @var{matcher} is a function that takes three arguments (@var{node}
+@var{parent} @var{bol}).  Argument @var{bol} is the point at where we
+are indenting: the position of the first non-whitespace character from
+the beginning of line; @var{node} is the largest (highest-in-tree)
+node that starts at that point; @var{parent} is the parent of
+@var{node};
+
+If @var{matcher} returns non-nil, meaning the rule matches, Emacs then
+uses @var{anchor} to find an anchor, it should be a function that
+takes the same argument (@var{node} @var{parent} @var{bol}) and
+returns a point.
+
+Finally Emacs computes the column of that point returned by
+@var{anchor} and adds @var{offset} to it, and indents to that column.
+
+For @var{matcher} and @var{anchor}, Emacs provides some convenient
+presets to spare us from writing these functions ourselves. They are
+stored in @var{treesit-simple-indent-presets}, see below.
+@end defvar
+
+@defvar treesit-simple-indent-presets
+This is a list of presets for @var{matcher}s and @var{anchor}s in
+@var{treesit-simple-indent-rules}. Each of them represent a
+function that takes @var{node}, @var{parent} and @var{bol} as
+arguments.
+
+@example
+(match @var{node-type} @var{parent-type}
+       @var{node-field} @var{node-index-min} @var{node-index-max})
+@end example
+
+This matcher checks if @var{node}'s type is @var{node-type},
+@var{parent}'s type is @var{parent-type}, @var{node}'s field name in
+@var{parent} is @var{node-field}, and @var{node}'s index among its
+siblings is between @var{node-index-min} and @var{node-index-max}.  If
+the value of a constraint is nil, this matcher doesn't check for that
+constraint.  For example, to match the first child where parent is
+@code{argument_list}, use
+
+@example
+(match nil "argument_list" nil nil 0 0)
+@end example
+
+@example
+no-node
+@end example
+
+This matcher matches the case where @var{node} is nil, i.e., there is
+no node that starts at @var{bol}.  This is the case when @var{bol} is
+at an empty line or inside a multi-line string, etc.
+
+@example
+(parent-is @var{type})
+@end example
+
+This matcher matches if @var{parent}'s type is @var{type}.
+
+@example
+(node-is @var{type})
+@end example
+
+This matcher matches if @var{node}'s type is @var{type}.
+
+@example
+(query @var{query})
+@end example
+
+This matcher matches if querying @var{parent} with @var{query}
+captures @var{node}.  The capture name does not matter.
+
+@example
+first-sibling
+@end example
+
+This anchor returns the start of the first child of @var{parent}.
+
+@example
+parent
+@end example
+
+This anchor returns the start of @var{parent}.
+
+@example
+parent-bol
+@end example
+
+This anchor returns the beginning of non-space characters on the line
+where @var{parent} is on.
+
+@example
+prev-sibling
+@end example
+
+This anchor returns the start of the previous sibling of @var{node}.
+
+@example
+no-indent
+@end example
+
+This anchor returns the start of @var{node}, i.e., do not indent.
+
+@example
+prev-line
+@end example
+
+This anchor returns the start of the first named node on the previous
+line.  This can be used for indenting an empty line.
+@end defvar
+
+@heading Indentation utilities
+
+Here are some utility functions that can help writing indentation
+rules.
+
+@defun treesit-check-indent mode
+This function check current buffer's indentation against major mode
+@var{mode}.  It indents the current line in @var{mode} and compares
+the indentation with the current indentation.  Then it pops up a diff
+buffer showing the difference.  Correct indentation (target) is in
+green, current indentation is in red.
+@end defun
+
+It is also helpful to use @code{treesit-inspect-mode} when writing
+indentation rules.
  
  @node Desktop Save Mode
  @section Desktop Save Mode
diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi

new file mode 100644 (file)

index 0000000..bbe70ff
--- /dev/null
+++ b/doc/lispref/parsing.texi
@@ -0,0 +1,1420 @@
+@c -*- mode: texinfo; coding: utf-8 -*-
+@c This is part of the GNU Emacs Lisp Reference Manual.
+@c Copyright (C) 2021 Free Software Foundation, Inc.
+@c See the file elisp.texi for copying conditions.
+@node Parsing Program Source
+@chapter Parsing Program Source
+
+Emacs provides various ways to parse program source text and produce a
+@dfn{syntax tree}.  In a syntax tree, text is no longer a
+one-dimensional stream but a structured tree of nodes, where each node
+representing a piece of text.  Thus a syntax tree can enable
+interesting features like precise fontification, indentation,
+navigation, structured editing, etc.
+
+Emacs has a simple facility for parsing balanced expressions
+(@pxref{Parsing Expressions}).  There is also SMIE library for generic
+navigation and indentation (@pxref{SMIE}).
+
+Emacs also provides integration with tree-sitter library
+(@uref{https://tree-sitter.github.io/tree-sitter}) if compiled with
+it.  The tree-sitter library implements an incremental parser and has
+support from a wide range of programming languages.
+
+@defun treesit-available-p
+This function returns non-nil if tree-sitter features are available
+for this Emacs instance.
+@end defun
+
+For using tree-sitter features in font-lock and indentation,
+@pxref{Parser-based Font Lock}, @pxref{Parser-based Indentation}.
+
+To access the syntax tree of the text in a buffer, we need to first
+load a language definition and create a parser with it.  Next, we can
+query the parser for specific nodes in the syntax tree.  Then, we can
+access various information about the node, and we can pattern-match a
+node with a powerful syntax.  Finally, we explain how to work with
+source files that mixes multiple languages.  The following sections
+explain how to do each of the tasks in detail.
+
+@menu
+* Language Definitions::     Loading tree-sitter language definitions.
+* Using Parser::             Introduction to parsers.
+* Retrieving Node::          Retrieving node from syntax tree.
+* Accessing Node::           Accessing node information.
+* Pattern Matching::         Pattern matching with query patterns.
+* Multiple Languages::       Parse text written in multiple languages.
+* Tree-sitter C API::        Compare the C API and the ELisp API.
+@end menu
+
+@node Language Definitions
+@section Tree-sitter Language Definitions
+
+@heading Loading a language definition
+
+Tree-sitter relies on language definitions to parse text in that
+language. In Emacs, A language definition is represented by a symbol.
+For example, C language definition is represented as @code{c}, and
+@code{c} can be passed to tree-sitter functions as the @var{language}
+argument.
+
+@vindex treesit-extra-load-path
+@vindex treesit-load-language-error
+@vindex treesit-load-suffixes
+Tree-sitter language definitions are distributed as dynamic libraries.
+In order to use a language definition in Emacs, you need to make sure
+that the dynamic library is installed on the system.  Emacs looks for
+language definitions under load paths in
+@var{treesit-extra-load-path}, @var{user-emacs-directory}/tree-sitter,
+and system default locations for dynamic libraries, in that order.
+Emacs tries each extensions in @var{treesit-load-suffixes}.  If Emacs
+cannot find the library or has problem loading it, Emacs signals
+@var{treesit-load-language-error}.  The signal data is a list of
+specific error messages.
+
+@defun treesit-language-available-p language
+This function checks whether the dynamic library for @var{language} is
+present on the system, and return non-nil if it is.
+@end defun
+
+@vindex treesit-load-name-override-list
+By convention, the dynamic library for @var{language} is
+@code{libtree-sitter-@var{language}.@var{ext}}, where @var{ext} is the
+system-specific extension for dynamic libraries. Also by convention,
+the function provided by that library is named
+@code{tree_sitter_<language>}.  If a language definition doesn't
+follow this convention, you should add an entry
+
+@example
+(@var{language} @var{library-base-name} @var{function-name})
+@end example
+
+to @var{treesit-load-name-override-list}, where
+@var{library-base-name} is the base filename for the dynamic library
+(conventionally @code{libtree-sitter-@var{language}}), and
+@var{function-name} is the function provided by the library
+(conventionally @code{tree_sitter_@var{language}). For example,
+
+@example
+(cool-lang "libtree-sitter-coool" "tree_sitter_cooool")
+@end example
+
+for a language too cool to abide by the rules.
+
+@heading Concrete syntax tree
+
+A syntax tree is what a language definition defines (more or less) and
+what a parser generates.  In a syntax tree, each node represents a
+piece of text, and is connected to each other by a parent-child
+relationship.  For example, if the source text is
+
+@example
+1 + 2
+@end example
+
+@noindent
+its syntax tree could be
+
+@example
+@group
+                  +--------------+
+                  | root "1 + 2" |
+                  +--------------+
+                         |
+        +--------------------------------+
+        |       expression "1 + 2"       |
+        +--------------------------------+
+           |             |            |
++------------+   +--------------+   +------------+
+| number "1" |   | operator "+" |   | number "2" |
++------------+   +--------------+   +------------+
+@end group
+@end example
+
+We can also represent it in s-expression:
+
+@example
+(root (expression (number) (operator) (number)))
+@end example
+
+@subheading Node types
+
+@cindex tree-sitter node type
+@anchor{tree-sitter node type}
+@cindex tree-sitter named node
+@anchor{tree-sitter named node}
+@cindex tree-sitter anonymous node
+Names like @code{root}, @code{expression}, @code{number},
+@code{operator} are nodes' @dfn{type}.  However, not all nodes in a
+syntax tree have a type.  Nodes that don't are @dfn{anonymous nodes},
+and nodes with a type are @dfn{named nodes}.  Anonymous nodes are
+tokens with fixed spellings, including punctuation characters like
+bracket @samp{]}, and keywords like @code{return}.
+
+@subheading Field names
+
+@cindex tree-sitter node field name
+@anchor{tree-sitter node field name} To make the syntax tree easier to
+analyze, many language definitions assign @dfn{field names} to child
+nodes.  For example, a @code{function_definition} node could have a
+@code{declarator} and a @code{body}:
+
+@example
+@group
+(function_definition
+ declarator: (declaration)
+ body: (compound_statement))
+@end group
+@end example
+
+@deffn Command treesit-inspect-mode
+This minor mode displays the node that @emph{starts} at point in
+mode-line.  The mode-line will display
+
+@example
+@var{parent} @var{field-name}: (@var{child} (@var{grand-child} (...)))
+@end example
+
+@var{child}, @var{grand-child}, and @var{grand-grand-child}, etc, are
+nodes that have their beginning at point.  And @var{parent} is the
+parent of @var{child}.
+
+If there is no node that starts at point, i.e., point is in the middle
+of a node, then the mode-line only displays the smallest node that
+spans point, and its immediate parent.
+
+This minor mode doesn't create parsers on its own.  It simply uses the
+first parser in @var{treesit-parser-list} (@pxref{Using Parser}).
+@end deffn
+
+@heading Reading the grammar definition
+
+Authors of language definitions define the @dfn{grammar} of a
+language, and this grammar determines how does a parser construct a
+concrete syntax tree out of the text.  In order to used the syntax
+tree effectively, we need to read the @dfn{grammar file}.
+
+The grammar file is usually @code{grammar.js} in a language
+definition’s project repository.  The link to a language definition’s
+home page can be found in tree-sitter’s homepage
+(@uref{https://tree-sitter.github.io/tree-sitter}).
+
+The grammar is written in JavaScript syntax.  For example, the rule
+matching a @code{function_definition} node looks like
+
+@example
+@group
+function_definition: $ => seq(
+  $.declaration_specifiers,
+  field('declarator', $.declaration),
+  field('body', $.compound_statement)
+)
+@end group
+@end example
+
+The rule is represented by a function that takes a single argument
+@var{$}, representing the whole grammar.  The function itself is
+constructed by other functions: the @code{seq} function puts together a
+sequence of children; the @code{field} function annotates a child with
+a field name.  If we write the above definition in BNF syntax, it
+would look like
+
+@example
+@group
+function_definition :=
+  <declaration_specifiers> <declaration> <compound_statement>
+@end group
+@end example
+
+@noindent
+and the node returned by the parser would look like
+
+@example
+@group
+(function_definition
+  (declaration_specifier)
+  declarator: (declaration)
+  body: (compound_statement))
+@end group
+@end example
+
+Below is a list of functions that one will see in a grammar
+definition.  Each function takes other rules as arguments and returns
+a new rule.
+
+@itemize @bullet
+@item
+@code{seq(rule1, rule2, ...)} matches each rule one after another.
+
+@item
+@code{choice(rule1, rule2, ...)} matches one of the rules in its
+arguments.
+
+@item
+@code{repeat(rule)} matches @var{rule} for @emph{zero or more} times.
+This is like the @samp{*} operator in regular expressions.
+
+@item
+@code{repeat1(rule)} matches @var{rule} for @emph{one or more} times.
+This is like the @samp{+} operator in regular expressions.
+
+@item
+@code{optional(rule)} matches @var{rule} for @emph{zero or one} time.
+This is like the @samp{?} operator in regular expressions.
+
+@item
+@code{field(name, rule)} assigns field name @var{name} to the child
+node matched by @var{rule}.
+
+@item
+@code{alias(rule, alias)} makes nodes matched by @var{rule} appear as
+@var{alias} in the syntax tree generated by the parser.  For example,
+
+@example
+alias(preprocessor_call_exp, call_expression)
+@end example
+
+makes any node matched by @code{preprocessor_call_exp} to appear as
+@code{call_expression}.
+@end itemize
+
+Below are grammar functions less interesting for a reader of a
+language definition.
+
+@itemize
+@item
+@code{token(rule)} marks @var{rule} to produce a single leaf node.
+That is, instead of generating a parent node with individual child
+nodes under it, everything is combined into a single leaf node.
+
+@item
+Normally, grammar rules ignore preceding whitespaces,
+@code{token.immediate(rule)} changes @var{rule} to match only when
+there is no preceding whitespaces.
+
+@item
+@code{prec(n, rule)} gives @var{rule} a level @var{n} precedence.
+
+@item
+@code{prec.left([n,] rule)} marks @var{rule} as left-associative,
+optionally with level @var{n}.
+
+@item
+@code{prec.right([n,] rule)} marks @var{rule} as right-associative,
+optionally with level @var{n}.
+
+@item
+@code{prec.dynamic(n, rule)} is like @code{prec}, but the precedence
+is applied at runtime instead.
+@end itemize
+
+The tree-sitter project talks about writing a grammar in more detail:
+@uref{https://tree-sitter.github.io/tree-sitter/creating-parsers}.
+Read especially ``The Grammar DSL'' section.
+
+@node Using Parser
+@section Using Tree-sitter Parser
+@cindex Tree-sitter parser
+
+This section described how to create and configure a tree-sitter
+parser.  In Emacs, each tree-sitter parser is associated with a
+buffer.  As we edit the buffer, the associated parser is automatically
+kept up-to-date.
+
+@defvar treesit-disabled-modes
+Before creating a parser, it is perhaps good to check whether we
+should use tree-sitter at all.  Sometimes a user don't want to use
+tree-sitter features for a major mode.  To turn-off tree-sitter for a
+mode, they add that mode to this variable.
+@end defvar
+
+@defvar treesit-maximum-size
+If users want to turn off tree-sitter for buffers larger than a
+particular size (because tree-sitter consumes memory ~10 times the
+buffer size for storing the syntax tree), they set this variable to
+that size.
+@end defvar
+
+@defun treesit-should-enable-p &optional mode
+This function returns non-nil if @var{mode} (default to the current
+major mode) should activate tree-sitter features.  The result depends
+on the value of @var{treesit-disabled-modes} and
+@var{treesit-maximum-size} described above.  The result also
+depends on, of course, the result of @code{treesit-avaliabe-p}.
+
+Writer of major modes or other packages are responsible for calling
+this function and determine whether to activate tree-sitter features.
+@end defun
+
+
+@cindex Creating tree-sitter parsers
+To create a parser, we provide a buffer to parse and the language to
+use (@pxref{Language Definitions}).  Emacs provides several creation
+functions for different use cases.
+
+@defun treesit-get-parser-create language
+This function is the most convenient one.  It gives you a parser that
+recognizes @var{language} for the current buffer.  The function
+checks if there already exists a parser suiting the need, and only
+creates a new one when it can't find one.
+
+@example
+@group
+;; Create a parser for C programming language.
+(treesit-get-parser-create 'c)
+    @c @result{} #<treesit-parser for c in *scratch*>
+@end group
+@end example
+@end defun
+
+@defun treesit-get-parser language
+This function is like @code{treesit-get-parser-create}, but it
+always creates a new parser.
+@end defun
+
+@defun treesit-parser-create buffer language
+This function is the most primitive, requiring both the buffer to
+associate to, and the language to use.  If @var{buffer} is nil, the
+current buffer is used.
+@end defun
+
+Given a parser, we can query information about it:
+
+@defun treesit-parser-buffer parser
+Returns the buffer associated with @var{parser}.
+@end defun
+
+@defun treesit-parser-language parser
+Returns the language that @var{parser} uses.
+@end defun
+
+@defun treesit-parser-p object
+Checks if @var{object} is a tree-sitter parser. Return non-nil if it
+is, return nil otherwise.
+@end defun
+
+There is no need to explicitly parse a buffer, because parsing is done
+automatically and lazily.  A parser only parses when we query for a
+node in its syntax tree.  Therefore, when a parser is first created,
+it doesn't parse the buffer; instead, it waits until we query for a
+node for the first time.  Similarly, when some change is made in the
+buffer, a parser doesn't re-parse immediately and only records some
+necessary information to later re-parse when necessary.
+
+@vindex treesit-buffer-too-large
+When a parser do parse, it checks for the size of the buffer.
+Tree-sitter can only handle buffer no larger than about 4GB.  If the
+size exceeds that, Emacs signals @var{treesit-buffer-too-large}
+with signal data being the buffer size.
+
+@vindex treesit-parser-list
+Once a parser is created, Emacs automatically adds it to the
+buffer-local variable @var{treesit-parser-list}.  Every time a
+change is made to the buffer, Emacs updates parsers in this list so
+they can update their syntax tree incrementally.  Therefore, one must
+not remove parsers from this list and put the parser back in: if any
+change is made when that parser is absent, the parser will be
+permanently out-of-sync with the buffer content, and shouldn't be used
+anymore.
+
+@cindex tree-sitter narrowing
+@anchor{tree-sitter narrowing} Normally, a parser ``sees'' the whole
+buffer, but when the buffer is narrowed (@pxref{Narrowing}), the
+parser will only see the visible region.  As far as the parser can
+tell, the hidden region is deleted.  And when the buffer is later
+widened, the parser thinks text is inserted in the beginning and in
+the end.  Although parsers respect narrowing, narrowing shouldn't be
+the mean to handle a multi-language buffer; instead, set the ranges in
+which a parser should operate in.  @xref{Multiple Languages}.
+
+Because a parser parses lazily, when we narrow the buffer, the parser
+doesn't act immediately; as long as we don't query for a node while
+the buffer is narrowed, narrowing does not affect the parser.
+
+@cindex tree-sitter parse string
+@defun treesit-parse-string string language
+Besides creating a parser for a buffer, we can also just parse a
+string.  Unlike a buffer, parsing a string is a one-time deal, and
+there is no way to update the result.
+
+This function parses @var{string} with @var{language}, and returns the
+root node of the generated syntax tree.
+@end defun
+
+@node Retrieving Node
+@section Retrieving Node
+
+@cindex tree-sitter find node
+@cindex tree-sitter get node
+There are two ways to retrieve a node: directly from the syntax tree,
+or by traveling from other nodes.  But before we continue, lets go
+over some conventions of tree-sitter functions.
+
+We talk about a node being ``smaller'' or ``larger'', and ``lower'' or
+``higher''.  A smaller and lower node is lower in the syntax tree and
+therefore spans a smaller piece of text; a larger and higher node is
+higher up in the syntax tree, containing many smaller nodes as its
+children, and therefore spans a larger piece of text.
+
+When a function cannot find a node, it returns nil.  And for the
+convenience for function chaining, all the functions that take a node
+as argument and returns a node accept the node to be nil; in that
+case, the function just returns nil.
+
+@vindex treesit-node-outdated
+Nodes are not automatically updated when the associated buffer is
+modified.  In fact, there is no way to update a node once it is
+retrieved.  It is best to use a node and throw it away and not save
+it.  A node is @dfn{outdated} if the buffer has changed since the node
+is retrieved.  Using an outdated node throws
+@var{treesit-node-outdated} error.
+
+@heading Retrieving node from syntax tree
+
+@defun treesit-node-at beg &optional end parser-or-lang named
+This function returns the @emph{smallest} node that covers the span
+from @var{beg} to @var{end}.  In other words, the start of the node
+@code{<=} @var{beg}, and the end of the node @code{>=} @var{end}.  If
+@var{end} is omitted, it defaults to the value of @var{beg}.
+
+When @var{parser-or-lang} is nil, this function uses the first parser
+in @var{treesit-parser-list} in the current buffer.  If
+@var{parser-or-lang} is a parser object, it use that parser; if
+@var{parser-or-lang} is a language, it finds the first parser using
+that language in @var{treesit-parser-list} and use that.
+
+If @var{named} is non-nil, this function looks for a named node
+instead (@pxref{tree-sitter named node, named node}).
+
+@example
+@group
+;; Find the node at point in a C parser's syntax tree.
+(treesit-node-at (point) (point) 'c)
+    @c @result{} #<treesit-node from 1 to 4 in *scratch*>
+@end group
+@end example
+@end defun
+
+@defun treesit-parser-root-node parser
+This function returns the root node of the syntax tree generated by
+@var{parser}.
+@end defun
+
+@defun treesit-buffer-root-node &optional language
+This function finds the first parser that uses @var{language} in
+@var{treesit-parser-list} in the current buffer, and returns the
+root node of that buffer.  If it cannot find an appropriate parser, it
+returns nil.
+@end defun
+
+Once we have a node, we can retrieve other nodes from it, or query for
+information about this node.
+
+@heading Retrieving node from other nodes
+
+@subheading By kinship
+
+@defun treesit-node-parent node
+This function returns the immediate parent of @var{node}.
+@end defun
+
+@defun treesit-node-child node n &optional named
+This function returns the @var{n}'th child of @var{node}.  If
+@var{named} is non-nil, then it only counts named nodes
+(@pxref{tree-sitter named node, named node}).  For example, in a node
+that represents a string: @code{"text"}, there are three children
+nodes: the opening quote @code{"}, the string content @code{text}, and
+the enclosing quote @code{"}.  Among these nodes, the first child is
+the opening quote @code{"}, the first named child is the string
+content @code{text}.
+@end defun
+
+@defun treesit-node-children node &optional named
+This function returns all of @var{node}'s children in a list.  If
+@var{named} is non-nil, then it only retrieves named nodes
+(@pxref{tree-sitter named node, named node}).
+@end defun
+
+@defun treesit-next-sibling node &optional named
+This function finds the next sibling of @var{node}.  If @var{named} is
+non-nil, it finds the next named sibling (@pxref{tree-sitter named
+node, named node}).
+@end defun
+
+@defun treesit-prev-sibling node &optional named
+This function finds the previous sibling of @var{node}.  If
+@var{named} is non-nil, it finds the previous named sibling
+(@pxref{tree-sitter named node, named node}).
+@end defun
+
+@subheading By field name
+
+To make the syntax tree easier to analyze, many language definitions
+assign @dfn{field names} to child nodes (@pxref{tree-sitter node field
+name, field name}).  For example, a @code{function_definition} node
+could have a @code{declarator} and a @code{body}.
+
+@defun treesit-child-by-field-name node field-name
+This function finds the child of @var{node} that has @var{field-name}
+as its field name.
+
+@example
+@group
+;; Get the child that has "body" as its field name.
+(treesit-child-by-field-name node "body")
+    @c @result{} #<treesit-node from 3 to 11 in *scratch*>
+@end group
+@end example
+@end defun
+
+@subheading By position
+
+@defun treesit-first-child-for-pos node pos &optional named
+This function finds the first child of @var{node} that extends beyond
+@var{pos}.  ``Extend beyond'' means the end of the child node
+@code{>=} @var{pos}.  This function only looks for immediate children of
+@var{node}, and doesn't look in its grand children.  If @var{named} is
+non-nil, it only looks for named child (@pxref{tree-sitter named node,
+named node}).
+@end defun
+
+@defun treesit-node-descendant-for-range node beg end &optional named
+This function finds the @emph{smallest} (grand)child of @var{node}
+that spans the range from @var{beg} to @var{end}.  It is similar to
+@code{treesit-node-at}.  If @var{named} is non-nil, it only looks
+for named child (@pxref{tree-sitter named node, named node}).
+@end defun
+
+@heading More convenient functions
+
+@defun treesit-filter-child node pred &optional named
+This function finds children of @var{node} that satisfies @var{pred}.
+
+Function @var{pred} takes the child node as the argument and should
+return non-nil to indicated keeping the child.  If @var{named}
+non-nil, this function only searches for named nodes."
+@end defun
+
+@defun treesit-parent-until node pred
+This function repeatedly finds the parent of @var{node}, and returns
+the parent if it satisfies @var{pred} (which takes the parent as the
+argument).  If no parent satisfies @var{pred}, this function returns
+nil.
+@end defun
+
+@defun treesit-parent-while
+This function repeatedly finds the parent of @var{node}, and keeps
+doing so as long as the parent satisfies @var{pred} (which takes the
+parent as the single argument).  I.e., this function returns the
+farthest parent that still satisfies @var{pred}.
+@end defun
+
+@node Accessing Node
+@section Accessing Node Information
+
+Before going further, make sure you have read the basic conventions
+about tree-sitter nodes in the previous node.
+
+@heading Basic information
+
+Every node is associated with a parser, and that parser is associated
+with a buffer.  The following functions let you retrieve them.
+
+@defun treesit-node-parser node
+This function returns @var{node}'s associated parser.
+@end defun
+
+@defun treesit-node-buffer node
+This function returns @var{node}'s parser's associated buffer.
+@end defun
+
+@defun treesit-node-language node
+This function returns @var{node}'s parser's associated language.
+@end defun
+
+Each node represents a piece of text in the buffer.  Functions below
+finds relevant information about that text.
+
+@defun treesit-node-start node
+Return the start position of @var{node}.
+@end defun
+
+@defun treesit-node-end node
+Return the end position of @var{node}.
+@end defun
+
+@defun treesit-node-text node &optional object
+Returns the buffer text that @var{node} represents.  (If @var{node} is
+retrieved from parsing a string, it will be the text from that
+string.)
+@end defun
+
+Here are some basic checks on tree-sitter nodes.
+
+@defun treesit-node-p object
+Checks if @var{object} is a tree-sitter syntax node.
+@end defun
+
+@defun treesit-node-eq node1 node2
+Checks if @var{node1} and @var{node2} are the same node in a syntax
+tree.
+@end defun
+
+@heading Property information
+
+In general, nodes in a concrete syntax tree fall into two categories:
+@dfn{named nodes} and @dfn{anonymous nodes}.  Whether a node is named
+or anonymous is determined by the language definition
+(@pxref{tree-sitter named node, named node}).
+
+@cindex tree-sitter missing node
+Apart from being named/anonymous, a node can have other properties.  A
+node can be ``missing'': missing nodes are inserted by the parser in
+order to recover from certain kinds of syntax errors, i.e., something
+should probably be there according to the grammar, but not there.
+
+@cindex tree-sitter extra node
+A node can be ``extra'': extra nodes represent things like comments,
+which can appear anywhere in the text.
+
+@cindex tree-sitter node that has changes
+A node ``has changes'' if the buffer changed since when the node is
+retrieved.  In this case, the node's start and end position would be
+off and we better throw it away and retrieve a new one.
+
+@cindex tree-sitter node that has error
+A node ``has error'' if the text it spans contains a syntax error.  It
+can be the node itself has an error, or one of its (grand)children has
+an error.
+
+@defun treesit-node-check node property
+This function checks if @var{node} has @var{property}.  @var{property}
+can be @code{'named}, @code{'missing}, @code{'extra},
+@code{'has-changes}, or @code{'has-error}.
+@end defun
+
+Named nodes have ``types'' (@pxref{tree-sitter node type, node type}).
+For example, a named node can be a @code{string_literal} node, where
+@code{string_literal} is its type.
+
+@defun treesit-node-type node
+Return @var{node}'s type as a string.
+@end defun
+
+@heading Information as a child or parent
+
+@defun treesit-node-index node &optional named
+This function returns the index of @var{node} as a child node of its
+parent.  If @var{named} is non-nil, it only count named nodes
+(@pxref{tree-sitter named node, named node}).
+@end defun
+
+@defun treesit-node-field-name node
+A child of a parent node could have a field name (@pxref{tree-sitter
+node field name, field name}).  This function returns the field name
+of @var{node} as a child of its parent.
+@end defun
+
+@defun treesit-node-field-name-for-child node n
+This is a more primitive function that returns the field name of the
+@var{n}'th child of @var{node}.
+@end defun
+
+@defun treesit-child-count node &optional named
+This function finds the number of children of @var{node}.  If
+@var{named} is non-nil, it only counts named child (@pxref{tree-sitter
+named node, named node}).
+@end defun
+
+@node Pattern Matching
+@section Pattern Matching Tree-sitter Nodes
+
+Tree-sitter let us pattern match with a small declarative language.
+Pattern matching consists of two steps: first tree-sitter matches a
+@dfn{pattern} against nodes in the syntax tree, then it @dfn{captures}
+specific nodes in that pattern and returns the captured nodes.
+
+We describe first how to write the most basic query pattern and how to
+capture nodes in a pattern, then the pattern-match function, finally
+more advanced pattern syntax.
+
+@heading Basic query syntax
+
+@cindex Tree-sitter query syntax
+@cindex Tree-sitter query pattern
+A @dfn{query} consists of multiple @dfn{patterns}, each pattern is an
+s-expression that matches a certain node in the syntax node.  A
+pattern has the following shape:
+
+@example
+(@var{type} @var{child}...)
+@end example
+
+@noindent
+For example, a pattern that matches a @code{binary_expression} node that
+contains @code{number_literal} child nodes would look like
+
+@example
+(binary_expression (number_literal))
+@end example
+
+To @dfn{capture} a node in the query pattern above, append
+@code{@@capture-name} after the node pattern you want to capture.  For
+example,
+
+@example
+(binary_expression (number_literal) @@number-in-exp)
+@end example
+
+@noindent
+captures @code{number_literal} nodes that are inside a
+@code{binary_expression} node with capture name @code{number-in-exp}.
+
+We can capture the @code{binary_expression} node too, with capture
+name @code{biexp}:
+
+@example
+(binary_expression
+ (number_literal) @@number-in-exp) @@biexp
+@end example
+
+@heading Query function
+
+Now we can introduce the query functions.
+
+@defun treesit-query-capture node query &optional beg end
+This function matches patterns in @var{query} in @var{node}.
+Argument @var{query} can be a either string or a s-expression.  For
+now, we focus on the string syntax; s-expression syntax is described
+at the end of the section.
+
+The function returns all captured nodes in a list of
+@code{(@var{capture_name} . @var{node})}.  If @var{beg} and @var{end}
+are both non-nil, it only pattern matches nodes in that range.
+
+@vindex treesit-query-error
+This function raise a @var{treesit-query-error} if @var{query} is
+malformed.  The signal data contains a description of the specific
+error.
+@end defun
+
+@defun treesit-query-in source query &optional beg end
+This function matches patterns in @var{query} in @var{source}, and
+returns all captured nodes in a list of @code{(@var{capture_name}
+. @var{node})}.  If @var{beg} and @var{end} are both non-nil, it only
+pattern match nodes in that range.
+
+Argument @var{source} designates a node, it can be a language symbol,
+a parser, or simply a node.  If a language symbol, @var{source}
+represents the root node of the first parser for that language in the
+current buffer; if a parser, @var{source} represents the root node of
+that parser.
+
+This function also raises @var{treesit-query-error}.
+@end defun
+
+For example, suppose @var{node}'s content is @code{1 + 2}, and
+@var{query} is
+
+@example
+@group
+(setq query
+      "(binary_expression
+        (number_literal) @@number-in-exp) @@biexp")
+@end group
+@end example
+
+@noindent
+Querying that query would return
+
+@example
+@group
+(treesit-query-capture node query)
+    @result{} ((biexp . @var{<node for "1 + 2">})
+       (number-in-exp . @var{<node for "1">})
+       (number-in-exp . @var{<node for "2">}))
+@end group
+@end example
+
+As we mentioned earlier, a @var{query} could contain multiple
+patterns. For example, it could have two top-level patterns:
+
+@example
+@group
+(setq query
+      "(binary_expression) @@biexp
+       (number_literal)  @@number @@biexp")
+@end group
+@end example
+
+@defun treesit-query-string string query language
+This function parses @var{string} with @var{language}, pattern matches
+its root node with @var{query}, and returns the result.
+@end defun
+
+@heading More query syntax
+
+Besides node type and capture, tree-sitter's query syntax can express
+anonymous node, field name, wildcard, quantification, grouping,
+alternation, anchor, and predicate.
+
+@subheading Anonymous node
+
+An anonymous node is written verbatim, surrounded by quotes.  A
+pattern matching (and capturing) keyword @code{return} would be
+
+@example
+"return" @@keyword
+@end example
+
+@subheading Wild card
+
+In a query pattern, @samp{(_)} matches any named node, and @samp{_}
+matches any named and anonymous node.  For example, to capture any
+named child of a @code{binary_expression} node, the pattern would be
+
+@example
+(binary_expression (_) @@in_biexp)
+@end example
+
+@subheading Field name
+
+We can capture child nodes that has specific field names:
+
+@example
+@group
+(function_definition
+  declarator: (_) @@func-declarator
+  body: (_) @@func-body)
+@end group
+@end example
+
+We can also capture a node that doesn't have certain field, say, a
+@code{function_definition} without a @code{body} field.
+
+@example
+(function_definition !body) @@func-no-body
+@end example
+
+@subheading Quantify node
+
+Tree-sitter recognizes quantification operators @samp{*}, @samp{+} and
+@samp{?}.  Their meanings are the same as in regular expressions:
+@samp{*} matches the preceding pattern zero or more times, @samp{+}
+matches one or more times, and @samp{?} matches zero or one time.
+
+For example, this pattern matches @code{type_declaration} nodes
+that has @emph{zero or more} @code{long} keyword.
+
+@example
+(type_declaration "long"* @@long-in-type)
+@end example
+
+@noindent
+And this pattern matches a type declaration that has zero or one
+@code{long} keyword:
+
+@example
+(type_declaration "long"?) @@type-decl
+@end example
+
+@subheading Grouping
+
+Similar to groups in regular expression, we can bundle patterns into a
+group and apply quantification operators to it.  For example, to
+express a comma separated list of identifiers, one could write
+
+@example
+(identifier) ("," (identifier))*
+@end example
+
+@subheading Alternation
+
+Again, similar to regular expressions, we can express ``match anyone
+from this group of patterns'' in the query pattern.  The syntax is a
+list of patterns enclosed in square brackets.  For example, to capture
+some keywords in C, the query pattern would be
+
+@example
+@group
+[
+  "return"
+  "break"
+  "if"
+  "else"
+] @@keyword
+@end group
+@end example
+
+@subheading Anchor
+
+The anchor operator @samp{.} can be used to enforce juxtaposition,
+i.e., to enforce two things to be directly next to each other.  The
+two ``things'' can be two nodes, or a child and the end of its parent.
+For example, to capture the first child, the last child, or two
+adjacent children:
+
+@example
+@group
+;; Anchor the child with the end of its parent.
+(compound_expression (_) @@last-child .)
+
+;; Anchor the child with the beginning of its parent.
+(compound_expression . (_) @@first-child)
+
+;; Anchor two adjacent children.
+(compound_expression
+ (_) @@prev-child
+ .
+ (_) @@next-child)
+@end group
+@end example
+
+Note that the enforcement of juxtaposition ignores any anonymous
+nodes.
+
+@subheading Predicate
+
+We can add predicate constraints to a pattern.  For example, if we use
+the following query pattern
+
+@example
+@group
+(
+ (array . (_) @@first (_) @@last .)
+ (#equal @@first @@last)
+)
+@end group
+@end example
+
+Then tree-sitter only matches arrays where the first element equals to
+the last element.  To attach a predicate to a pattern, we need to
+group then together.  A predicate always starts with a @samp{#}.
+Currently there are two predicates, @code{#equal} and @code{#match}.
+
+@deffn Predicate equal arg1 arg2
+Matches if @var{arg1} equals to @var{arg2}.  Arguments can be either a
+string or a capture name.  Capture names represent the text that the
+captured node spans in the buffer.
+@end deffn
+
+@deffn Predicate match regexp capture-name
+Matches if the text that @var{capture-name}’s node spans in the buffer
+matches regular expression @var{regexp}.  Matching is case-sensitive.
+@end deffn
+
+Note that a predicate can only refer to capture names appeared in the
+same pattern.  Indeed, it makes little sense to refer to capture names
+in other patterns anyway.
+
+@heading S-expression patterns
+
+Besides strings, Emacs provides a s-expression based syntax for query
+patterns. It largely resembles the string-based syntax.  For example,
+the following pattern
+
+@example
+@group
+(treesit-query-capture
+ node "(addition_expression
+        left: (_) @@left
+        \"+\" @@plus-sign
+        right: (_) @@right) @@addition
+
+        [\"return\" \"break\"] @@keyword")
+@end group
+@end example
+
+@noindent
+is equivalent to
+
+@example
+@group
+(treesit-query-capture
+ node '((addition_expression
+         left: (_) @@left
+         "+" @@plus-sign
+         right: (_) @@right) @@addition
+
+         ["return" "break"] @@keyword))
+@end group
+@end example
+
+Most pattern syntax can be written directly as strange but
+never-the-less valid s-expressions.  Only a few of them needs
+modification:
+
+@itemize
+@item
+Anchor @samp{.} is written as @code{:anchor}.
+@item
+@samp{?} is written as @samp{:?}.
+@item
+@samp{*} is written as @samp{:*}.
+@item
+@samp{+} is written as @samp{:+}.
+@item
+@code{#equal} is written as @code{:equal}.  In general, predicates
+change their @samp{#} to @samp{:}.
+@end itemize
+
+For example,
+
+@example
+@group
+"(
+  (compound_expression . (_) @@first (_)* @@rest)
+  (#match \"love\" @@first)
+  )"
+@end group
+@end example
+
+is written in s-expression as
+
+@example
+@group
+'((
+   (compound_expression :anchor (_) @@first (_) :* @@rest)
+   (:match "love" @@first)
+   ))
+@end group
+@end example
+
+@defun treesit-expand-query query
+This function expands the s-expression @var{query} into a string
+query.  It is usually a good idea to expand the s-expression patterns
+into strings for font-lock queries since they are called repeatedly.
+@end defun
+
+Tree-sitter project's documentation about pattern-matching can be
+found at
+@uref{https://tree-sitter.github.io/tree-sitter/using-parsers#pattern-matching-with-queries}.
+
+@node Multiple Languages
+@section Parsing Text in Multiple Languages
+
+Sometimes, the source of a programming language could contain sources
+of other languages, HTML + CSS + JavaScript is one example.  In that
+case, we need to assign individual parsers to text segments written in
+different languages.  Traditionally this is achieved by using
+narrowing.  While tree-sitter works with narrowing (@pxref{tree-sitter
+narrowing, narrowing}), the recommended way is to set ranges in which
+a parser will operate.
+
+@defun treesit-parser-set-included-ranges parser ranges
+This function sets the range of @var{parser} to @var{ranges}.  Then
+@var{parser} will only read the text covered in each range.  Each
+range in @var{ranges} is a list of cons @code{(@var{beg}
+. @var{end})}.
+
+Each range in @var{ranges} must come in order and not overlap.  That
+is, in pseudo code:
+
+@example
+@group
+(cl-loop for idx from 1 to (1- (length ranges))
+         for prev = (nth (1- idx) ranges)
+         for next = (nth idx ranges)
+         should (<= (car prev) (cdr prev)
+                    (car next) (cdr next)))
+@end group
+@end example
+
+@vindex treesit-range-invalid
+If @var{ranges} violates this constraint, or something else went
+wrong, this function signals a @var{treesit-range-invalid}.  The
+signal data contains a specific error message and the ranges we are
+trying to set.
+
+This function can also be used for disabling ranges.  If @var{ranges}
+is nil, the parser is set to parse the whole buffer.
+
+Example:
+
+@example
+@group
+(treesit-parser-set-included-ranges
+ parser '((1 . 9) (16 . 24) (24 . 25)))
+@end group
+@end example
+@end defun
+
+@defun treesit-parser-included-ranges parser
+This function returns the ranges set for @var{parser}.  The return
+value is the same as the @var{ranges} argument of
+@code{treesit-parser-included-ranges}: a list of cons
+@code{(@var{beg} . @var{end})}.  And if @var{parser} doesn't have any
+ranges, the return value is nil.
+
+@example
+@group
+(treesit-parser-included-ranges parser)
+    @result{} ((1 . 9) (16 . 24) (24 . 25))
+@end group
+@end example
+@end defun
+
+@defun treesit-set-ranges parser-or-lang ranges
+Like @code{treesit-parser-set-included-ranges}, this function sets
+the ranges of @var{parser-or-lang} to @var{ranges}.  Conveniently,
+@var{parser-or-lang} could be either a parser or a language.  If it is
+a language, this function looks for the first parser in
+@var{treesit-parser-list} for that language in the current buffer,
+and set range for it.
+@end defun
+
+@defun treesit-get-ranges parser-or-lang
+This function returns the ranges of @var{parser-or-lang}, like
+@code{treesit-parser-included-ranges}.  And like
+@code{treesit-set-ranges}, @var{parser-or-lang} can be a parser or
+a language symbol.
+@end defun
+
+@defun treesit-query-range source pattern &optional beg end
+This function matches @var{source} with @var{pattern} and returns the
+ranges of captured nodes.  The return value has the same shape of
+other functions: a list of @code{(@var{beg} . @var{end})}.
+
+For convenience, @var{source} can be a language symbol, a parser, or a
+node.  If a language symbol, this function matches in the root node of
+the first parser using that language; if a parser, this function
+matches in the root node of that parser; if a node, this function
+matches in that node.
+
+Parameter @var{pattern} is the query pattern used to capture nodes
+(@pxref{Pattern Matching}). The capture names don't matter.  Parameter
+@var{beg} and @var{end}, if both non-nil, limits the range in which
+this function queries.
+
+Like other query functions, this function raises an
+@var{treesit-query-error} if @var{pattern} is malformed.
+@end defun
+
+@defun treesit-language-at point
+This function tries to figure out which language is responsible for
+the text at @var{point}.  It goes over each parser in
+@var{treesit-parser-list} and see if that parser's range covers
+@var{point}.
+@end defun
+
+@defvar treesit-range-functions
+A list of range functions.  Font-locking and indenting code uses
+functions in this alist to set correct ranges for a language parser
+before using it.
+
+The signature of each function should be
+
+@example
+(@var{start} @var{end} &rest @var{_})
+@end example
+
+where @var{start} and @var{end} marks the region that is about to be
+used.  A range function only need to (but not limited to) update
+ranges in that region.
+
+Each function in the list is called in-order.
+@end defvar
+
+@defun treesit-update-ranges &optional start end
+This function is used by font-lock and indent to update ranges before
+using any parser.  Each range function in
+@var{treesit-range-functions} is called in-order.  Arguments
+@var{start} and @var{end} are passed to each range function.
+@end defun
+
+@heading An example
+
+Normally, in a set of languages that can be mixed together, there is a
+major language and several embedded languages. The major language
+parses the whole document, and skips the embedded languages. Then the
+parser for the major language knows the ranges of the embedded
+languages. So we first parse the whole document with the major
+language’s parser, set ranges for the embedded languages, then parse
+the embedded languages.
+
+Suppose we want to parse a very simple document that mixes HTML, CSS
+and JavaScript:
+
+@example
+@group
+<html>
+  <script>1 + 2</script>
+  <style>body @{ color: "blue"; @}</style>
+</html>
+@end group
+@end example
+
+We first parse with HTML, then set ranges for CSS and JavaScript:
+
+@example
+@group
+;; Create parsers.
+(setq html (treesit-get-parser-create 'html))
+(setq css (treesit-get-parser-create 'css))
+(setq js (treesit-get-parser-create 'javascript))
+
+;; Set CSS ranges.
+(setq css-range
+      (treesit-query-range
+       'html
+       "(style_element (raw_text) @@capture)"))
+(treesit-parser-set-included-ranges css css-range)
+
+;; Set JavaScript ranges.
+(setq js-range
+      (treesit-query-range
+       'html
+       "(script_element (raw_text) @@capture)"))
+(treesit-parser-set-included-ranges js js-range)
+@end group
+@end example
+
+We use a query pattern @code{(style_element (raw_text) @@capture)} to
+find CSS nodes in the HTML parse tree. For how to write query
+patterns, @pxref{Pattern Matching}.
+
+@node Tree-sitter C API
+@section Tree-sitter C API Correspondence
+
+Emacs' tree-sitter integration doesn't expose every feature
+tree-sitter's C API provides.  Missing features include:
+
+@itemize
+@item
+Creating a tree cursor and navigating the syntax tree with it.
+@item
+Setting timeout and cancellation flag for a parser.
+@item
+Setting the logger for a parser.
+@item
+Printing a DOT graph of the syntax tree to a file.
+@item
+Coping and modifying a syntax tree.  (Emacs doesn't expose a tree
+object.)
+@item
+Using (row, column) coordinates as position.
+@item
+Updating a node with changes. (In Emacs, retrieve a new node instead
+of updating the existing one.)
+@item
+Querying statics of a language definition.
+@end itemize
+
+In addition, Emacs makes some changes to the C API to make the API more
+convenient and idiomatic:
+
+@itemize
+@item
+Instead of using byte positions, the ELisp API uses character
+positions.
+@item
+Null nodes are converted to nil.
+@end itemize
+
+Below is the correspondence between all C API functions and their
+ELisp counterparts.  Sometimes one ELisp function corresponds to
+multiple C functions, and many C functions don't have an ELisp
+counterpart.
+
+@example
+ts_parser_new                           treesit-parser-create
+ts_parser_delete
+ts_parser_set_language
+ts_parser_language                      treesit-parser-language
+ts_parser_set_included_ranges           treesit-parser-set-included-ranges
+ts_parser_included_ranges               treesit-parser-included-ranges
+ts_parser_parse
+ts_parser_parse_string                  treesit-parse-string
+ts_parser_parse_string_encoding
+ts_parser_reset
+ts_parser_set_timeout_micros
+ts_parser_timeout_micros
+ts_parser_set_cancellation_flag
+ts_parser_cancellation_flag
+ts_parser_set_logger
+ts_parser_logger
+ts_parser_print_dot_graphs
+ts_tree_copy
+ts_tree_delete
+ts_tree_root_node
+ts_tree_language
+ts_tree_edit
+ts_tree_get_changed_ranges
+ts_tree_print_dot_graph
+ts_node_type                            treesit-node-type
+ts_node_symbol
+ts_node_start_byte                      treesit-node-start
+ts_node_start_point
+ts_node_end_byte                        treesit-node-end
+ts_node_end_point
+ts_node_string                          treesit-node-string
+ts_node_is_null
+ts_node_is_named                        treesit-node-check
+ts_node_is_missing                      treesit-node-check
+ts_node_is_extra                        treesit-node-check
+ts_node_has_changes                     treesit-node-check
+ts_node_has_error                       treesit-node-check
+ts_node_parent                          treesit-node-parent
+ts_node_child                           treesit-node-child
+ts_node_field_name_for_child            treesit-node-field-name-for-child
+ts_node_child_count                     treesit-node-child-count
+ts_node_named_child                     treesit-node-child
+ts_node_named_child_count               treesit-node-child-count
+ts_node_child_by_field_name             treesit-node-by-field-name
+ts_node_child_by_field_id
+ts_node_next_sibling                    treesit-next-sibling
+ts_node_prev_sibling                    treesit-prev-sibling
+ts_node_next_named_sibling              treesit-next-sibling
+ts_node_prev_named_sibling              treesit-prev-sibling
+ts_node_first_child_for_byte            treesit-first-child-for-pos
+ts_node_first_named_child_for_byte      treesit-first-child-for-pos
+ts_node_descendant_for_byte_range       treesit-descendant-for-range
+ts_node_descendant_for_point_range
+ts_node_named_descendant_for_byte_range treesit-descendant-for-range
+ts_node_named_descendant_for_point_range
+ts_node_edit
+ts_node_eq                              treesit-node-eq
+ts_tree_cursor_new
+ts_tree_cursor_delete
+ts_tree_cursor_reset
+ts_tree_cursor_current_node
+ts_tree_cursor_current_field_name
+ts_tree_cursor_current_field_id
+ts_tree_cursor_goto_parent
+ts_tree_cursor_goto_next_sibling
+ts_tree_cursor_goto_first_child
+ts_tree_cursor_goto_first_child_for_byte
+ts_tree_cursor_goto_first_child_for_point
+ts_tree_cursor_copy
+ts_query_new
+ts_query_delete
+ts_query_pattern_count
+ts_query_capture_count
+ts_query_string_count
+ts_query_start_byte_for_pattern
+ts_query_predicates_for_pattern
+ts_query_step_is_definite
+ts_query_capture_name_for_id
+ts_query_string_value_for_id
+ts_query_disable_capture
+ts_query_disable_pattern
+ts_query_cursor_new
+ts_query_cursor_delete
+ts_query_cursor_exec                    treesit-query-capture
+ts_query_cursor_did_exceed_match_limit
+ts_query_cursor_match_limit
+ts_query_cursor_set_match_limit
+ts_query_cursor_set_byte_range
+ts_query_cursor_set_point_range
+ts_query_cursor_next_match
+ts_query_cursor_remove_match
+ts_query_cursor_next_capture
+ts_language_symbol_count
+ts_language_symbol_name
+ts_language_symbol_for_name
+ts_language_field_count
+ts_language_field_name_for_id
+ts_language_field_id_for_name
+ts_language_symbol_type
+ts_language_version
+@end example
diff --git a/lisp/emacs-lisp/cl-preloaded.el b/lisp/emacs-lisp/cl-preloaded.el

index 6aa45526d845cd33259910a5086b226c23f307b6..b4be54bbd6319b35ec60d0aacf69bf9e32f47f5f 100644 (file)
--- a/lisp/emacs-lisp/cl-preloaded.el
+++ b/lisp/emacs-lisp/cl-preloaded.el
@@ -68,6 +68,8 @@
      (font-spec atom) (font-entity atom) (font-object atom)
      (vector array sequence atom)
      (user-ptr atom)
+    (tree-sitter-parser atom)
+    (tree-sitter-node atom)
      ;; Plus, really hand made:
      (null symbol list sequence atom))
    "Alist of supertypes.
diff --git a/lisp/treesit.el b/lisp/treesit.el

new file mode 100644 (file)

index 0000000..eaaa131
--- /dev/null
+++ b/lisp/treesit.el
@@ -0,0 +1,853 @@
+;;; treesit.el --- tree-sitter utilities -*- lexical-binding: t -*-
+
+;; Copyright (C) 2021 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;;
+;; Note to self: we don't create parsers automatically in any provided
+;; functions.
+
+;;; Code:
+
+(eval-when-compile (require 'cl-lib))
+(require 'cl-seq)
+(require 'font-lock)
+
+;;; Activating tree-sitter
+
+(defgroup treesit
+  nil
+  "Tree-sitter is an incremental parser."
+  :group 'tools)
+
+(defcustom treesit-disabled-modes nil
+  "A list of major-modes for which tree-sitter support is disabled."
+  :type '(list symbol))
+
+(defcustom treesit-maximum-size (* 4 1024 1024)
+  "Maximum buffer size for enabling tree-sitter parsing."
+  :type 'integer)
+
+(defun treesit-available-p ()
+  "Return non-nil if tree-sitter features are available."
+  (fboundp 'treesit-parser-create))
+
+(defun treesit-should-enable-p (&optional mode)
+  "Return non-nil if MODE should activate tree-sitter support.
+MODE defaults to the value of `major-mode'.  The result depends
+on the value of `treesit-disabled-modes',
+`treesit-maximum-size', and of course, whether tree-sitter is
+available on the system at all."
+  (let* ((mode (or mode major-mode))
+         (disabled (cl-loop
+                    for disabled-mode in treesit-disabled-modes
+                    if (provided-mode-derived-p mode disabled-mode)
+                    return t
+                    finally return nil)))
+    (and (treesit-available-p)
+         (not disabled)
+         (< (buffer-size) treesit-maximum-size))))
+
+;;; Parser API supplement
+
+(defun treesit-get-parser (language)
+  "Find the first parser using LANGUAGE in `treesit-parser-list'."
+  (catch 'found
+    (dolist (parser treesit-parser-list)
+      (when (eq language (treesit-parser-language parser))
+        (throw 'found parser)))))
+
+(defun treesit-get-parser-create (language)
+  "Find the first parser using LANGUAGE in `treesit-parser-list'.
+If none exists, create one and return it."
+  (or (treesit-get-parser language)
+      (treesit-parser-create
+       (current-buffer) language)))
+
+(defun treesit-parse-string (string language)
+  "Parse STRING using a parser for LANGUAGE.
+Return the root node of the syntax tree."
+  (with-temp-buffer
+    (insert string)
+    (treesit-parser-root-node
+     (treesit-parser-create (current-buffer) language))))
+
+(defun treesit-language-at (point)
+  "Return the language used at POINT."
+  (cl-loop for parser in treesit-parser-list
+           if (treesit-node-at point nil parser)
+           return (treesit-parser-language parser)))
+
+(defun treesit-set-ranges (parser-or-lang ranges)
+  "Set the ranges of PARSER-OR-LANG to RANGES."
+  (treesit-parser-set-included-ranges
+   (cond ((symbolp parser-or-lang)
+          (or (treesit-get-parser parser-or-lang)
+              (error "Cannot find a parser for %s" parser-or-lang)))
+         ((treesit-parser-p parser-or-lang)
+          parser-or-lang)
+         (t (error "Expecting a parser or language, but got %s"
+                   parser-or-lang)))
+   ranges))
+
+(defun treesit-get-ranges (parser-or-lang)
+  "Get the ranges of PARSER-OR-LANG."
+  (treesit-parser-included-ranges
+   (cond ((symbolp parser-or-lang)
+          (or (treesit-get-parser parser-or-lang)
+              (error "Cannot find a parser for %s" parser-or-lang)))
+         ((treesit-parser-p parser-or-lang)
+          parser-or-lang)
+         (t (error "Expecting a parser or language, but got %s"
+                   parser-or-lang)))))
+
+;;; Node API supplement
+
+(defun treesit-node-buffer (node)
+  "Return the buffer in where NODE belongs."
+  (treesit-parser-buffer
+   (treesit-node-parser node)))
+
+(defun treesit-node-language (node)
+  "Return the language symbol that NODE's parser uses."
+  (treesit-parser-language
+   (treesit-node-parser node)))
+
+(defun treesit-node-at (beg &optional end parser-or-lang named)
+  "Return the smallest node covering BEG to END.
+
+If omitted, END defaults to BEG.  Return nil if none find.  If
+NAMED non-nil, only look for named node.  NAMED defaults to nil.
+
+If PARSER-OR-LANG is nil, use the first parser in
+`treesit-parser-list'; if PARSER-OR-LANG is a parser, use
+that parser; if PARSER-OR-LANG is a language, find a parser using
+that language in the current buffer, and use that."
+  (let ((root (if (treesit-parser-p parser-or-lang)
+                  (treesit-parser-root-node parser-or-lang)
+                (treesit-buffer-root-node parser-or-lang))))
+    (treesit-node-descendant-for-range root beg (or end beg) named)))
+
+(defun treesit-buffer-root-node (&optional language)
+  "Return the root node of the current buffer.
+Use the first parser in `treesit-parser-list', if LANGUAGE is
+non-nil, use the first parser for LANGUAGE."
+  (if-let ((parser
+            (or (if language
+                    (or (treesit-get-parser language)
+                        (error "Cannot find a parser for %s" language))
+                  (or (car treesit-parser-list)
+                      (error "Buffer has no parser"))))))
+      (treesit-parser-root-node parser)))
+
+(defun treesit-filter-child (node pred &optional named)
+  "Return children of NODE that satisfies PRED.
+PRED is a function that takes one argument, the child node.  If
+NAMED non-nil, only search for named node."
+  (let ((child (treesit-node-child node 0 named))
+        result)
+    (while child
+      (when (funcall pred child)
+        (push child result))
+      (setq child (treesit-node-next-sibling child named)))
+    (reverse result)))
+
+(defun treesit-node-text (node &optional no-property)
+  "Return the buffer (or string) content corresponding to NODE.
+If NO-PROPERTY is non-nil, remove text properties."
+  (with-current-buffer (treesit-node-buffer node)
+    (if no-property
+        (buffer-substring-no-properties
+         (treesit-node-start node)
+         (treesit-node-end node))
+      (buffer-substring
+       (treesit-node-start node)
+       (treesit-node-end node)))))
+
+(defun treesit-parent-until (node pred)
+  "Return the closest parent of NODE that satisfies PRED.
+Return nil if none found.  PRED should be a function that takes
+one argument, the parent node."
+  (let ((node (treesit-node-parent node)))
+    (while (and node (not (funcall pred node)))
+      (setq node (treesit-node-parent node)))
+    node))
+
+(defun treesit-parent-while (node pred)
+  "Return the furthest parent of NODE that satisfies PRED.
+Return nil if none found.  PRED should be a function that takes
+one argument, the parent node."
+  (let ((last nil))
+    (while (and node (funcall pred node))
+      (setq last node
+            node (treesit-node-parent node)))
+    last))
+
+(defun treesit-node-children (node &optional named)
+  "Return a list of NODE's children.
+If NAMED is non-nil, collect named child only."
+  (mapcar (lambda (idx)
+            (treesit-node-child node idx named))
+          (number-sequence
+           0 (1- (treesit-node-child-count node named)))))
+
+(defun treesit-node-index (node &optional named)
+  "Return the index of NODE in its parent.
+If NAMED is non-nil, count named child only."
+  (let ((count 0))
+    (while (setq node (treesit-node-prev-sibling node named))
+      (cl-incf count))
+    count))
+
+(defun treesit-node-field-name (node)
+  "Return the field name of NODE as a child of its parent."
+  (when-let ((parent (treesit-node-parent node))
+             (idx (treesit-node-index node)))
+    (treesit-node-field-name-for-child parent idx)))
+
+;;; Query API supplement
+
+(defun treesit-query-in (source query &optional beg end)
+  "Query the current buffer with QUERY.
+
+SOURCE can be a language symbol, a parser, or a node.  If a
+language symbol, use the root node of the first parser for that
+language; if a parser, use the root node of that parser; if a
+node, use that node.
+
+QUERY is either a string query or a sexp query.  See Info node
+`(elisp)Pattern Matching' for how to write a query pattern in either
+string or s-expression form.
+
+BEG and END, if _both_ non-nil, specifies the range in which the query
+is executed.
+
+Raise an treesit-query-error if QUERY is malformed."
+  (treesit-query-capture
+   (cond ((symbolp source) (treesit-buffer-root-node source))
+         ((treesit-parser-p source)
+          (treesit-parser-root-node source))
+         ((treesit-node-p source) source))
+   query
+   beg end))
+
+(defun treesit-query-string (string query language)
+  "Query STRING with QUERY in LANGUAGE.
+See `treesit-query-capture' for QUERY."
+  (with-temp-buffer
+    (insert string)
+    (let ((parser (treesit-parser-create (current-buffer) language)))
+      (treesit-query-capture
+       (treesit-parser-root-node parser)
+       query))))
+
+(defun treesit-query-range (source query &optional beg end)
+  "Query the current buffer and return ranges of captured nodes.
+
+QUERY, SOURCE, BEG, END are the same as in
+`treesit-query-in'.  This function returns a list
+of (START . END), where START and END specifics the range of each
+captured node.  Capture names don't matter."
+  (cl-loop for capture
+           in (treesit-query-in source query beg end)
+           for node = (cdr capture)
+           collect (cons (treesit-node-start node)
+                         (treesit-node-end node))))
+
+;;; Range API supplement
+
+(defvar-local treesit-range-functions nil
+  "A list of range functions.
+Font-locking and indenting code uses functions in this alist to
+set correct ranges for a language parser before using it.
+
+The signature of each function should be
+
+    (start end &rest _)
+
+where START and END marks the region that is about to be used.  A
+range function only need to (but not limited to) update ranges in
+that region.
+
+Each function in the list is called in-order.")
+
+(defun treesit-update-ranges (&optional start end)
+  "Update the ranges for each language in the current buffer.
+Calls each range functions in `treesit-range-functions'
+in-order.  START and END are passed to each range function."
+  (dolist (range-fn treesit-range-functions)
+    (funcall range-fn (or start (point-min)) (or end (point-max)))))
+
+;;; Font-lock
+
+(defvar-local treesit-font-lock-settings nil
+  "A list of SETTINGs for treesit-based fontification.
+
+Each SETTING should look like
+
+    (LANGUAGE QUERY)
+
+Each SETTING controls one parser (often of different language).
+LANGUAGE is the language symbol.  See Info node `(elisp)Language
+Definitions'.
+
+QUERY is either a string query or a sexp query.
+See Info node `(elisp)Pattern Matching' for writing queries.
+
+Capture names in QUERY should be face names like
+`font-lock-keyword-face'.  The captured node will be fontified
+with that face.  Capture names can also be function names, in
+which case the function is called with (START END NODE), where
+START and END are the start and end position of the node in
+buffer, and NODE is the tree-sitter node object.  If a capture
+name is both a face and a function, face takes priority.
+
+Generally, major modes should set
+`treesit-font-lock-defaults', and let Emacs automatically
+populate this variable.")
+
+(defvar-local treesit-font-lock-defaults nil
+  "Defaults for tree-sitter Font Lock specified by the major mode.
+
+This variable should be a list of
+
+    (DEFAULT :KEYWORD VALUE...)
+
+A DEFAULT may be a symbol or a list of symbols (specifying
+different levels of fontification).  The symbol(s) can be of a
+variable or a function.  If a symbol is both a variable and a
+function, it is used as a function.  Different levels of
+fontification can be controlled by
+`font-lock-maximum-decoration'.
+
+The symbol(s) in DEFAULT should contain or return a SETTING as
+explained in `treesit-font-lock-settings', which looks like
+
+    (LANGUAGE QUERY)
+
+KEYWORD and VALUE are additional settings could be used to alter
+fontification behavior.  Currently there aren't any.
+
+Multi-language major-modes should provide a range function for
+eacn language it supports in `treesit-range-functions', and
+Emacs will set the ranges accordingly before fontifing a region.
+See Info node `(elisp)Multiple Languages' for what does it mean
+to set ranges for a parser.")
+
+(defun treesit-font-lock-fontify-region (start end &optional loudly)
+  "Fontify the region between START and END.
+If LOUDLY is non-nil, message some debugging information."
+  (treesit-update-ranges start end)
+  (font-lock-unfontify-region start end)
+  (dolist (setting treesit-font-lock-settings)
+    (when-let* ((language (nth 0 setting))
+                (match-pattern (nth 1 setting))
+                (parser (treesit-get-parser-create language)))
+      (when-let ((node (treesit-node-at start end parser)))
+        (let ((captures (treesit-query-capture
+                         node match-pattern
+                         ;; Specifying the range is important. More
+                         ;; often than not, NODE will be the root
+                         ;; node, and if we don't specify the range,
+                         ;; we are basically querying the whole file.
+                         start end)))
+          (with-silent-modifications
+            (dolist (capture captures)
+              (let* ((face (car capture))
+                     (node (cdr capture))
+                     (start (treesit-node-start node))
+                     (end (treesit-node-end node)))
+                (cond ((facep face)
+                       (put-text-property start end 'face face))
+                      ((functionp face)
+                       (funcall face start end node))
+                      (t (error "Capture name %s is neither a face nor a function" face)))
+                (when loudly
+                  (message "Fontifying text from %d to %d, Face: %s Language: %s"
+                           start end face language)))))))))
+  ;; Call regexp font-lock after tree-sitter, as it is usually used
+  ;; for custom fontification.
+  (let ((font-lock-unfontify-region-function #'ignore))
+    (funcall #'font-lock-default-fontify-region start end loudly)))
+
+(defun treesit-font-lock-enable ()
+  "Enable tree-sitter font-locking for the current buffer."
+  (let ((default (car treesit-font-lock-defaults))
+        (attributes (cdr treesit-font-lock-defaults)))
+    (ignore attributes)
+    (setq-local treesit-font-lock-settings
+                (font-lock-eval-keywords
+                 (font-lock-choose-keywords
+                  default
+                 (font-lock-value-in-major-mode
+                   font-lock-maximum-decoration)))))
+  (setq-local font-lock-fontify-region-function
+              #'treesit-font-lock-fontify-region)
+  ;; If we don't set `font-lock-defaults' to some non-nil value,
+  ;; font-lock doesn't enable properly (the font-lock-mode-internal
+  ;; doesn't run).  See `font-lock-add-keywords'.
+  (when (and font-lock-mode
+             (null font-lock-keywords)
+             (null font-lock-defaults))
+    (font-lock-mode -1)
+    (setq-local font-lock-defaults '(nil t))
+    (font-lock-mode 1)))
+
+;;; Indent
+
+(defvar treesit--indent-verbose nil
+  "If non-nil, log progress when indenting.")
+
+;; This is not bound locally like we normally do with major-mode
+;; stuff, because for tree-sitter, a buffer could contain more than
+;; one language.
+(defvar treesit-simple-indent-rules nil
+  "A list of indent rule settings.
+Each indent rule setting should be (LANGUAGE . RULES),
+where LANGUAGE is a language symbol, and RULES is a list of
+
+    (MATCHER ANCHOR OFFSET).
+
+MATCHER determines whether this rule applies, ANCHOR and OFFSET
+together determines which column to indent to.
+
+A MATCHER is a function that takes three arguments (NODE PARENT
+BOL).  BOL is the point where we are indenting: the beginning of
+line content, the position of the first non-whitespace character.
+NODE is the largest (highest-in-tree) node starting at that
+point.  PARENT is the parent of NODE.
+
+If MATCHER returns non-nil, meaning the rule matches, Emacs then
+uses ANCHOR to find an anchor, it should be a function that takes
+the same argument (NODE PARENT BOL) and returns a point.
+
+Finally Emacs computes the column of that point returned by ANCHOR
+and adds OFFSET to it, and indents to that column.
+
+For MATCHER and ANCHOR, Emacs provides some convenient presets.
+See `treesit-simple-indent-presets'.")
+
+(defvar treesit-simple-indent-presets
+  '((match . (lambda
+               (&optional node-type parent-type node-field
+                          node-index-min node-index-max)
+               `(lambda (node parent bol &rest _)
+                  (and (or (null ,node-type)
+                           (equal (treesit-node-type node)
+                                  ,node-type))
+                       (or (null ,parent-type)
+                           (equal (treesit-node-type parent)
+                                  ,parent-type))
+                       (or (null ,node-field)
+                           (equal (treesit-node-field-name node)
+                                  ,node-field))
+                       (or (null ,node-index-min)
+                           (>= (treesit-node-index node t)
+                               ,node-index-min))
+                       (or (null ,node-index-max)
+                           (<= (treesit-node-index node t)
+                               ,node-index-max))))))
+    (no-node . (lambda (node parent bol &rest _) (null node)))
+    (parent-is . (lambda (type)
+                   `(lambda (node parent bol &rest _)
+                      (equal ,type (treesit-node-type parent)))))
+
+    (node-is . (lambda (type)
+                 `(lambda (node parent bol &rest _)
+                    (equal ,type (treesit-node-type node)))))
+
+    (query . (lambda (pattern)
+               `(lambda (node parent bol &rest _)
+                  (cl-loop for capture
+                           in (treesit-query-capture
+                               parent ,pattern)
+                           if (treesit-node-eq node (cdr capture))
+                           return t
+                           finally return nil))))
+    (first-sibling . (lambda (node parent bol &rest _)
+                       (treesit-node-start
+                        (treesit-node-child parent 0 t))))
+
+    (parent . (lambda (node parent bol &rest _)
+                (treesit-node-start parent)))
+    (parent-bol . (lambda (node parent bol &rest _)
+                    (save-excursion
+                      (goto-char (treesit-node-start parent))
+                      (back-to-indentation)
+                      (point))))
+    (prev-sibling . (lambda (node parent bol &rest _)
+                      (treesit-node-start
+                       (treesit-node-prev-sibling node))))
+    (no-indent . (lambda (node parent bol &rest _) bol))
+    (prev-line . (lambda (node parent bol &rest _)
+                   (save-excursion
+                     (goto-char bol)
+                     (forward-line -1)
+                     (skip-chars-forward " \t")
+                     (treesit-node-start
+                      (treesit-node-at (point) nil nil t))))))
+  "A list of presets.
+These presets that can be used as MATHER and ANCHOR in
+`treesit-simple-indent-rules'.
+
+MATCHER:
+
+\(match NODE-TYPE PARENT-TYPE NODE-FIELD NODE-INDEX-MIN NODE-INDEX-MAX)
+
+    NODE-TYPE checks for node's type, PARENT-TYPE checks for
+    parent's type, NODE-FIELD checks for the filed name of node
+    in the parent, NODE-INDEX-MIN and NODE-INDEX-MAX checks for
+    the node's index in the parent.  Therefore, to match the
+    first child where parent is \"argument_list\", use
+
+        (match nil \"argument_list\" nil nil 0 0).
+
+no-node
+
+    Matches the case where node is nil, i.e., there is no node
+    that starts at point.  This is the case when indenting an
+    empty line.
+
+\(parent-is TYPE)
+
+    Check that the parent has type TYPE.
+
+\(node-is TYPE)
+
+    Checks that the node has type TYPE.
+
+\(query QUERY)
+
+    Queries the parent node with QUERY, and checks if the node
+    is captured (by any capture name).
+
+ANCHOR:
+
+first-sibling
+
+    Find the first child of the parent.
+
+parent
+
+    Find the parent.
+
+parent-bol
+
+    Find the beginning of non-space characters on the line where
+    the parent is on.
+
+prev-sibling
+
+    Find node's previous sibling.
+
+no-indent
+
+    Do nothing.
+
+prev-line
+
+    Find the named node on the previous line.  This can be used when
+    indenting an empty line: just indent like the previous node.")
+
+(defun treesit--simple-apply (fn args)
+  "Apply ARGS to FN.
+
+If FN is a key in `treesit-simple-indent-presets', use the
+corresponding value as the function."
+  ;; We don't want to match uncompiled lambdas, so make sure this cons
+  ;; is not a function.  We could move the condition functionp
+  ;; forward, but better be explicit.
+  (cond ((and (consp fn) (not (functionp fn)))
+         (apply (treesit--simple-apply (car fn) (cdr fn))
+                ;; We don't evaluate ARGS with `simple-apply', i.e.,
+                ;; no composing, better keep it simple.
+                args))
+        ((and (symbolp fn)
+              (alist-get fn treesit-simple-indent-presets))
+         (apply (alist-get fn treesit-simple-indent-presets)
+                args))
+        ((functionp fn) (apply fn args))
+        (t (error "Couldn't find the function corresponding to %s" fn))))
+
+;; This variable might seem unnecessary: why split
+;; `treesit-indent' and `treesit-simple-indent' into two
+;; functions?  We add this variable in between because later we might
+;; add more powerful indentation engines, and that new engine can
+;; probably share `treesit-indent'.  It is also useful, suggested
+;; by Stefan M, to have a function that figures out how much to indent
+;; but doesn't actually performs the indentation, because we might
+;; want to know where will a node indent to if we put it at some other
+;; location, and use that information to calculate the actual
+;; indentation.  And `treesit-simple-indent' is that function.  I
+;; forgot the example Stefan gave, but it makes a lot of sense.
+(defvar treesit-indent-function #'treesit-simple-indent
+  "Function used by `treesit-indent' to do some of the work.
+
+This function is called with
+
+    (NODE PARENT BOL &rest _)
+
+and returns
+
+    (ANCHOR . OFFSET).
+
+BOL is the position of the beginning of the line; NODE is the
+\"largest\" node that starts at BOL; PARENT is its parent; ANCHOR
+is a point (not a node), and OFFSET is a number.  Emacs finds the
+column of ANCHOR and adds OFFSET to it as the final indentation
+of the current line.")
+
+(defun treesit-indent ()
+  "Indent according to the result of `treesit-indent-function'."
+  (treesit-update-ranges)
+  (let* ((orig-pos (point))
+         (bol (save-excursion
+                (forward-line 0)
+                (skip-chars-forward " \t")
+                (point)))
+         (smallest-node
+          (cl-loop for parser in treesit-parser-list
+                   for node = (treesit-node-at
+                               bol nil parser)
+                   if node return node))
+         (node (treesit-parent-while
+                smallest-node
+                (lambda (node)
+                  (eq bol (treesit-node-start node))))))
+    (pcase-let*
+        ((parser (if smallest-node
+                     (treesit-node-parser smallest-node)
+                   nil))
+         ;; NODE would be nil if BOL is on a whitespace.  In that case
+         ;; we set PARENT to the "node at point", which would
+         ;; encompass the whitespace.
+         (parent (cond ((and node parser)
+                        (treesit-node-parent node))
+                       (parser
+                        (treesit-node-at bol nil parser))
+                       (t nil)))
+         (`(,anchor . ,offset)
+          (funcall treesit-indent-function node parent bol)))
+      (if (null anchor)
+          (when treesit--indent-verbose
+            (message "Failed to find the anchor"))
+        (let ((col (+ (save-excursion
+                        (goto-char anchor)
+                        (current-column))
+                      offset)))
+          (if (< bol orig-pos)
+              (save-excursion
+                (indent-line-to col))
+            (indent-line-to col)))))))
+
+(defun treesit-simple-indent (node parent bol)
+  "Calculate indentation according to `treesit-simple-indent-rules'.
+
+BOL is the position of the first non-whitespace character on the
+current line.  NODE is the largest node that starts at BOL,
+PARENT is NODE's parent.
+
+Return (ANCHOR . OFFSET) where ANCHOR is a node, OFFSET is the
+indentation offset, meaning indent to align with ANCHOR and add
+OFFSET."
+  (if (null parent)
+      (when treesit--indent-verbose
+        (message "PARENT is nil, not indenting"))
+    (let* ((language (treesit-node-language parent))
+           (rules (alist-get language
+                             treesit-simple-indent-rules)))
+      (cl-loop for rule in rules
+               for pred = (nth 0 rule)
+               for anchor = (nth 1 rule)
+               for offset = (nth 2 rule)
+               if (treesit--simple-apply
+                   pred (list node parent bol))
+               do (when treesit--indent-verbose
+                    (message "Matched rule: %S" rule))
+               and
+               return (cons (treesit--simple-apply
+                             anchor (list node parent bol))
+                            offset)))))
+
+(defun treesit-check-indent (mode)
+  "Check current buffer's indentation against a major mode MODE.
+
+Pop up a diff buffer showing the difference.  Correct
+indentation (target) is in green, current indentation is in red."
+  (interactive "CTarget major mode: ")
+  (let ((source-buf (current-buffer)))
+    (with-temp-buffer
+      (insert-buffer-substring source-buf)
+      (funcall mode)
+      (indent-region (point-min) (point-max))
+      (diff-buffers source-buf (current-buffer)))))
+
+;;; Debugging
+
+(defvar-local treesit--inspect-name nil
+  "treesit-inspect-mode uses this to show node name in mode-line.")
+
+(defun treesit-inspect-node-at-point (&optional arg)
+  "Show information of the node at point.
+If called interactively, show in echo area, otherwise set
+`treesit--inspect-name' (which will appear in the mode-line
+if `treesit-inspect-mode' is enabled).  Uses the first parser
+in `treesit-parser-list'."
+  (interactive "p")
+  ;; NODE-LIST contains all the node that starts at point.
+  (let* ((node-list
+          (cl-loop for node = (treesit-node-at (point))
+                   then (treesit-node-parent node)
+                   while node
+                   if (eq (treesit-node-start node)
+                          (point))
+                   collect node))
+         (largest-node (car (last node-list)))
+         (parent (treesit-node-parent largest-node))
+         ;; node-list-acending contains all the node bottom-up, then
+         ;; the parent.
+         (node-list-acending
+          (if (null largest-node)
+              ;; If there are no nodes that start at point, just show
+              ;; the node at point and its parent.
+              (list (treesit-node-at (point))
+                    (treesit-node-parent
+                     (treesit-node-at (point))))
+            (append node-list (list parent))))
+         (name ""))
+    ;; We draw nodes like (parent field-name: (node)) recursively,
+    ;; so it could be (node1 field-name: (node2 field-name: (node3))).
+    (dolist (node node-list-acending)
+      (setq
+       name
+       (concat
+        (if (treesit-node-field-name node)
+            (format " %s: " (treesit-node-field-name node))
+          " ")
+        (if (treesit-node-check node 'named) "(" "\"")
+        (or (treesit-node-type node)
+            "N/A")
+        name
+        (if (treesit-node-check node 'named) ")" "\""))))
+    (setq treesit--inspect-name name)
+    (force-mode-line-update)
+    (when arg
+      (if node-list
+          (message "%s" treesit--inspect-name)
+        (message "No node at point")))))
+
+(define-minor-mode treesit-inspect-mode
+  "Shows the node that _starts_ at point in the mode-line.
+
+The mode-line displays
+
+    PARENT FIELD-NAME: (CHILD (GRAND-CHILD (...)))
+
+CHILD, GRAND-CHILD, and GRAND-GRAND-CHILD, etc, are nodes that
+have their beginning at point.  And PARENT is the parent of
+CHILD.
+
+If no node starts at point, i.e., point is in the middle of a
+node, then we just display the smallest node that spans point and
+its immediate parent.
+
+This minor mode doesn't create parsers on its own.  It simply
+uses the first parser in `treesit-parser-list'."
+  :lighter nil
+  (if treesit-inspect-mode
+      (progn
+        (add-hook 'post-command-hook
+                  #'treesit-inspect-node-at-point 0 t)
+        (add-to-list 'mode-line-misc-info
+                     '(:eval treesit--inspect-name)))
+    (remove-hook 'post-command-hook
+                 #'treesit-inspect-node-at-point t)
+    (setq mode-line-misc-info
+          (remove '(:eval treesit--inspect-name)
+                  mode-line-misc-info))))
+
+(defun treesit-check-query (query language)
+  "Check if QUERY is valid for LANGUAGE.
+If QUERY is invalid, display the query in a popup buffer, jumps
+to the offending pattern and highlight the pattern."
+  (let ((buf (get-buffer-create "*tree-sitter check query*")))
+    (with-temp-buffer
+      (treesit-get-parser-create language)
+      (condition-case err
+          (progn (treesit-query-in language query)
+                 (message "QUERY is valid"))
+        (treesit-query-error
+         (with-current-buffer buf
+           (let* ((data (cdr err))
+                  (message (nth 0 data))
+                  (start (nth 1 data)))
+             (erase-buffer)
+             (insert query)
+             (goto-char start)
+             (search-forward " " nil t)
+             (put-text-property start (point) 'face 'error)
+             (message "%s" (buffer-substring start (point)))
+             (goto-char (point-min))
+             (insert (format "%s: %d\n" message start))
+             (forward-char start)))
+         (pop-to-buffer buf))))))
+
+;;; Etc
+
+(declare-function find-library-name "find-func.el")
+(defun treesit--check-manual-covarage ()
+  "Print tree-sitter functions missing from the manual in message buffer."
+  (interactive)
+  (require 'find-func)
+  (let ((functions-in-source
+         (with-temp-buffer
+           (insert-file-contents (find-library-name "tree-sitter"))
+           (cl-remove-if
+            (lambda (name) (string-match "treesit--" name))
+            (cl-sort
+             (save-excursion
+               (goto-char (point-min))
+               (cl-loop while (re-search-forward
+                               "^(defun \\([^ ]+\\)" nil t)
+                        collect (match-string-no-properties 1)))
+             #'string<))))
+        (functions-in-manual
+         (with-temp-buffer
+           (insert-file-contents (expand-file-name
+                                  "doc/lispref/parsing.texi"
+                                  source-directory))
+           (insert-file-contents (expand-file-name
+                                  "doc/lispref/modes.texi"
+                                  source-directory))
+           (cl-sort
+            (save-excursion
+              (goto-char (point-min))
+              (cl-loop while (re-search-forward
+                              "^@defun \\([^ ]+\\)" nil t)
+                       collect (match-string-no-properties 1)))
+            #'string<))))
+    (message "Missing: %s"
+             (string-join
+              (cl-remove-if
+               (lambda (name) (member name functions-in-manual))
+               functions-in-source)
+              "\n"))))
+
+(provide 'treesit)
+
+;;; treesit.el ends here
diff --git a/src/Makefile.in b/src/Makefile.in

index 2b7c4bb316c816ef02e173ff703274aac46cf448..6ae55b19e164be24be71634506e28d74ddf1f37a 100644 (file)
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -337,6 +337,10 @@ JSON_LIBS = @JSON_LIBS@
  JSON_CFLAGS = @JSON_CFLAGS@
  JSON_OBJ = @JSON_OBJ@
  
+TREE_SITTER_LIBS = @TREE_SITTER_LIBS@
+TREE_SITTER_FLAGS = @TREE_SITTER_FLAGS@
+TREE_SITTER_OBJ = @TREE_SITTER_OBJ@
+
  INTERVALS_H = dispextern.h intervals.h composite.h
  
  GETLOADAVG_LIBS = @GETLOADAVG_LIBS@
@@ -400,7 +404,7 @@ EMACS_CFLAGS=-Demacs $(MYCPPFLAGS) -I. -I$(srcdir) \
    $(XINPUT_CFLAGS) $(WEBP_CFLAGS) $(WEBKIT_CFLAGS) $(LCMS2_CFLAGS) \
    $(SETTINGS_CFLAGS) $(FREETYPE_CFLAGS) $(FONTCONFIG_CFLAGS) \
    $(HARFBUZZ_CFLAGS) $(LIBOTF_CFLAGS) $(M17N_FLT_CFLAGS) $(DEPFLAGS) \
-  $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) \
+  $(LIBSYSTEMD_CFLAGS) $(JSON_CFLAGS) $(XSYNC_CFLAGS) $(TREE_SITTER_CFLAGS) \
    $(LIBGNUTLS_CFLAGS) $(NOTIFY_CFLAGS) $(CAIRO_CFLAGS) \
    $(WERROR_CFLAGS) $(HAIKU_CFLAGS)
  ALL_CFLAGS = $(EMACS_CFLAGS) $(WARN_CFLAGS) $(CFLAGS)
@@ -439,7 +443,7 @@ base_obj = dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \
         $(if $(HYBRID_MALLOC),sheap.o) \
         $(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) \
         $(W32_OBJ) $(WINDOW_SYSTEM_OBJ) $(XGSELOBJ) $(JSON_OBJ) \
-       $(HAIKU_OBJ) $(PGTK_OBJ)
+       $(TREE_SITTER_OBJ) $(HAIKU_OBJ) $(PGTK_OBJ)
  doc_obj = $(base_obj) $(NS_OBJC_OBJ)
  obj = $(doc_obj) $(HAIKU_CXX_OBJ)
  
@@ -559,7 +563,7 @@ LIBES = $(LIBS) $(W32_LIBS) $(LIBS_GNUSTEP) $(PGTK_LIBS) $(LIBX_BASE) $(LIBIMAGE
     $(LIBGNUTLS_LIBS) $(LIB_PTHREAD) $(GETADDRINFO_A_LIBS) $(LCMS2_LIBS) \
     $(NOTIFY_LIBS) $(LIB_MATH) $(LIBZ) $(LIBMODULES) $(LIBSYSTEMD_LIBS) \
     $(JSON_LIBS) $(LIBGMP) $(LIBGCCJIT_LIBS) $(XINPUT_LIBS) $(HAIKU_LIBS) \
-   $(SQLITE3_LIBS)
+   $(TREE_SITTER_LIBS) $(SQLITE3_LIBS)
  
  ## FORCE it so that admin/unidata can decide whether this file is
  ## up-to-date.  Although since charprop depends on bootstrap-emacs,
diff --git a/src/alloc.c b/src/alloc.c

index 9ed94dc8a1e16857e8c25740001dac55534b1f9f..e7603fac37a31f9348f35d6ca79b39cc8a7376d3 100644 (file)
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -50,6 +50,10 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
  #include TERM_HEADER
  #endif /* HAVE_WINDOW_SYSTEM */
  
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
  #include <flexmember.h>
  #include <verify.h>
  #include <execinfo.h>           /* For backtrace.  */
@@ -3177,6 +3181,15 @@ cleanup_vector (struct Lisp_Vector *vector)
        if (uptr->finalizer)
         uptr->finalizer (uptr->p);
      }
+#ifdef HAVE_TREE_SITTER
+  else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_TS_PARSER))
+    {
+      struct Lisp_TS_Parser *lisp_parser
+       = PSEUDOVEC_STRUCT (vector, Lisp_TS_Parser);
+      ts_tree_delete(lisp_parser->tree);
+      ts_parser_delete(lisp_parser->parser);
+    }
+#endif
  #ifdef HAVE_MODULES
    else if (PSEUDOVECTOR_TYPEP (&vector->header, PVEC_MODULE_FUNCTION))
      {
diff --git a/src/casefiddle.c b/src/casefiddle.c

index 2ea5f09b4c5ceb7bd62c34ac62a29772bfb8f71d..3022c5cc7d67ae88843565d151ba11adaf4a90b7 100644 (file)
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -30,6 +30,10 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
  #include "composite.h"
  #include "keymap.h"
  
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
  enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
  
  /* State for casing individual characters.  */
@@ -530,6 +534,11 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
    modify_text (start, end);
    prepare_casing_context (&ctx, flag, true);
  
+#ifdef HAVE_TREE_SITTER
+  ptrdiff_t start_byte = CHAR_TO_BYTE (start);
+  ptrdiff_t old_end_byte = CHAR_TO_BYTE (end);
+#endif
+
    ptrdiff_t orig_end = end;
    record_delete (start, make_buffer_string (start, end, true), false);
    if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
@@ -548,6 +557,9 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
      {
        signal_after_change (start, end - start - added, end - start);
        update_compositions (start, end, CHECK_ALL);
+#ifdef HAVE_TREE_SITTER
+      ts_record_change (start_byte, old_end_byte, CHAR_TO_BYTE (end));
+#endif
      }
  
    return orig_end + added;
diff --git a/src/data.c b/src/data.c

index 1526cc0c73748b7ee334ca7af985f53e4a4512c2..9c711d2021281c9bfb117e66188034fba1a528dc 100644 (file)
--- a/src/data.c
+++ b/src/data.c
@@ -260,6 +260,10 @@ for example, (type-of 1) returns `integer'.  */)
            return Qxwidget;
          case PVEC_XWIDGET_VIEW:
            return Qxwidget_view;
+       case PVEC_TS_PARSER:
+         return Qtreesit_parser;
+       case PVEC_TS_NODE:
+         return Qtreesit_node;
          case PVEC_SQLITE:
            return Qsqlite;
          /* "Impossible" cases.  */
@@ -4203,6 +4207,8 @@ syms_of_data (void)
    DEFSYM (Qterminal, "terminal");
    DEFSYM (Qxwidget, "xwidget");
    DEFSYM (Qxwidget_view, "xwidget-view");
+  DEFSYM (Qtreesit_parser, "treesit-parser");
+  DEFSYM (Qtreesit_node, "treesit-node");
  
    DEFSYM (Qdefun, "defun");
  
diff --git a/src/emacs.c b/src/emacs.c

index d1060bca0b37067bd5fbe0cd5fa139089aa512a0..9a14e1037599091f5463db084d8687f412308fef 100644 (file)
--- a/src/emacs.c
+++ b/src/emacs.c
@@ -136,6 +136,10 @@ extern char etext;
  #include <sys/resource.h>
  #endif
  
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
  #include "pdumper.h"
  #include "fingerprint.h"
  #include "epaths.h"
@@ -2181,6 +2185,9 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem
        syms_of_module ();
  #endif
  
+#ifdef HAVE_TREE_SITTER
+      syms_of_treesit ();
+#endif
  #ifdef HAVE_SOUND
        syms_of_sound ();
  #endif
diff --git a/src/eval.c b/src/eval.c

index 294d79e67a09e3e365cd30d8958ddf6037bb3bff..ecf57efb92db471e69d830da26abe31731b2c623 100644 (file)
--- a/src/eval.c
+++ b/src/eval.c
@@ -1915,6 +1915,19 @@ signal_error (const char *s, Lisp_Object arg)
    xsignal (Qerror, Fcons (build_string (s), arg));
  }
  
+void
+define_error (Lisp_Object name, const char *message, Lisp_Object parent)
+{
+  eassert (SYMBOLP (name));
+  eassert (SYMBOLP (parent));
+  Lisp_Object parent_conditions = Fget (parent, Qerror_conditions);
+  eassert (CONSP (parent_conditions));
+  eassert (!NILP (Fmemq (parent, parent_conditions)));
+  eassert (NILP (Fmemq (name, parent_conditions)));
+  Fput (name, Qerror_conditions, pure_cons (name, parent_conditions));
+  Fput (name, Qerror_message, build_pure_c_string (message));
+}
+
  /* Use this for arithmetic overflow, e.g., when an integer result is
     too large even for a bignum.  */
  void
diff --git a/src/insdel.c b/src/insdel.c

index 6f180ac5800e3cb0ecef9ffd50d9e6cb946cc689..4676330cb7912127781acd64e92d1544272dfbf0 100644 (file)
--- a/src/insdel.c
+++ b/src/insdel.c
@@ -31,6 +31,10 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
  #include "region-cache.h"
  #include "pdumper.h"
  
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
  static void insert_from_string_1 (Lisp_Object, ptrdiff_t, ptrdiff_t, ptrdiff_t,
                                   ptrdiff_t, bool, bool);
  static void insert_from_buffer_1 (struct buffer *, ptrdiff_t, ptrdiff_t, bool);
@@ -940,6 +944,12 @@ insert_1_both (const char *string,
      set_text_properties (make_fixnum (PT), make_fixnum (PT + nchars),
                          Qnil, Qnil, Qnil);
  
+#ifdef HAVE_TREE_SITTER
+  eassert (nbytes >= 0);
+  eassert (PT_BYTE >= 0);
+  ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes);
+#endif
+
    adjust_point (nchars, nbytes);
  
    check_markers ();
@@ -1071,6 +1081,12 @@ insert_from_string_1 (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
    graft_intervals_into_buffer (intervals, PT, nchars,
                                current_buffer, inherit);
  
+#ifdef HAVE_TREE_SITTER
+  eassert (nbytes >= 0);
+  eassert (PT_BYTE >= 0);
+  ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + nbytes);
+#endif
+
    adjust_point (nchars, outgoing_nbytes);
  
    check_markers ();
@@ -1137,6 +1153,12 @@ insert_from_gap (ptrdiff_t nchars, ptrdiff_t nbytes, bool text_at_gap_tail)
                                    current_buffer, 0);
      }
  
+#ifdef HAVE_TREE_SITTER
+  eassert (nbytes >= 0);
+  eassert (ins_bytepos >= 0);
+  ts_record_change (ins_bytepos, ins_bytepos, ins_bytepos + nbytes);
+#endif
+
    if (ins_charpos < PT)
      adjust_point (nchars, nbytes);
  
@@ -1287,6 +1309,12 @@ insert_from_buffer_1 (struct buffer *buf,
    /* Insert those intervals.  */
    graft_intervals_into_buffer (intervals, PT, nchars, current_buffer, inherit);
  
+#ifdef HAVE_TREE_SITTER
+  eassert (outgoing_nbytes >= 0);
+  eassert (PT_BYTE >= 0);
+  ts_record_change (PT_BYTE, PT_BYTE, PT_BYTE + outgoing_nbytes);
+#endif
+
    adjust_point (nchars, outgoing_nbytes);
  }
  \f
@@ -1535,6 +1563,13 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new,
    graft_intervals_into_buffer (intervals, from, inschars,
                                current_buffer, inherit);
  
+#ifdef HAVE_TREE_SITTER
+  eassert (to_byte >= from_byte);
+  eassert (outgoing_insbytes >= 0);
+  eassert (from_byte >= 0);
+  ts_record_change (from_byte, to_byte, from_byte + outgoing_insbytes);
+#endif
+
    /* Relocate point as if it were a marker.  */
    if (from < PT)
      adjust_point ((from + inschars - (PT < to ? PT : to)),
@@ -1569,7 +1604,11 @@ replace_range (ptrdiff_t from, ptrdiff_t to, Lisp_Object new,
     If MARKERS, relocate markers.
  
     Unlike most functions at this level, never call
-   prepare_to_modify_buffer and never call signal_after_change.  */
+   prepare_to_modify_buffer and never call signal_after_change.
+   Because this function is called in a loop, one character at a time.
+   The caller of 'replace_range_2' calls these hooks for the entire
+   region once.  Apart from signal_after_change, any caller of this
+   function should also call ts_record_change.  */
  
  void
  replace_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
@@ -1892,6 +1931,12 @@ del_range_2 (ptrdiff_t from, ptrdiff_t from_byte,
  
    evaporate_overlays (from);
  
+#ifdef HAVE_TREE_SITTER
+  eassert (from_byte <= to_byte);
+  eassert (from_byte >= 0);
+  ts_record_change (from_byte, to_byte, from_byte);
+#endif
+
    return deletion;
  }
  
diff --git a/src/json.c b/src/json.c

index db1be07f196f61b58f3f73e0a8356e932ef2e84b..957f91b46bb6e833a84e0ecaa79bd5bf779dfda4 100644 (file)
--- a/src/json.c
+++ b/src/json.c
@@ -1090,22 +1090,6 @@ usage: (json-parse-buffer &rest args) */)
    return unbind_to (count, lisp);
  }
  
-/* Simplified version of 'define-error' that works with pure
-   objects.  */
-
-static void
-define_error (Lisp_Object name, const char *message, Lisp_Object parent)
-{
-  eassert (SYMBOLP (name));
-  eassert (SYMBOLP (parent));
-  Lisp_Object parent_conditions = Fget (parent, Qerror_conditions);
-  eassert (CONSP (parent_conditions));
-  eassert (!NILP (Fmemq (parent, parent_conditions)));
-  eassert (NILP (Fmemq (name, parent_conditions)));
-  Fput (name, Qerror_conditions, pure_cons (name, parent_conditions));
-  Fput (name, Qerror_message, build_pure_c_string (message));
-}
-
  void
  syms_of_json (void)
  {
diff --git a/src/lisp.h b/src/lisp.h

index 778bd1bfa5a30c2cd3f4de03b101da765a5fc261..aecbfed7fae30331f895d9d846a5b628bb49a43f 100644 (file)
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -575,6 +575,8 @@ enum Lisp_Fwd_Type
     your object -- this way, the same object could be used to represent
     several disparate C structures.
  
+   In addition, you need to add switch branches in data.c for Ftype_of.
+
     You also need to add the new type to the constant
     `cl--typeof-types' in lisp/emacs-lisp/cl-preloaded.el.  */
  
@@ -1053,6 +1055,8 @@ enum pvec_type
    PVEC_CONDVAR,
    PVEC_MODULE_FUNCTION,
    PVEC_NATIVE_COMP_UNIT,
+  PVEC_TS_PARSER,
+  PVEC_TS_NODE,
    PVEC_SQLITE,
  
    /* These should be last, for internal_equal and sxhash_obj.  */
@@ -5407,6 +5411,11 @@ maybe_gc (void)
      maybe_garbage_collect ();
  }
  
+/* Simplified version of 'define-error' that works with pure
+   objects.  */
+void
+define_error (Lisp_Object name, const char *message, Lisp_Object parent);
+
  INLINE_HEADER_END
  
  #endif /* EMACS_LISP_H */
diff --git a/src/lread.c b/src/lread.c

index 0486a98883c3a15d511a0ed4939e62e11ee01170..8989e2d12ddbeadeeabc5e44888e8ad23e1faaf1 100644 (file)
--- a/src/lread.c
+++ b/src/lread.c
@@ -5196,6 +5196,14 @@ to the specified file name if a suffix is allowed or required.  */);
      Fcons (build_pure_c_string (MODULES_SECONDARY_SUFFIX), Vload_suffixes);
  #endif
  
+  DEFVAR_LISP ("dynamic-library-suffixes", Vdynamic_library_suffixes,
+              doc: /* A list of suffixes for loadable dynamic libraries.  */);
+  Vdynamic_library_suffixes =
+    Fcons (build_pure_c_string (DYNAMIC_LIB_SECONDARY_SUFFIX), Qnil);
+  Vdynamic_library_suffixes =
+    Fcons (build_pure_c_string (DYNAMIC_LIB_SUFFIX),
+          Vdynamic_library_suffixes);
+
  #endif
    DEFVAR_LISP ("module-file-suffix", Vmodule_file_suffix,
                doc: /* Suffix of loadable module file, or nil if modules are not supported.  */);
diff --git a/src/print.c b/src/print.c

index 8cce8a1ad837668e343c8b8348b92851b6520630..ab3047dee52ac91d33a8abe338adfe974ba3110e 100644 (file)
--- a/src/print.c
+++ b/src/print.c
@@ -48,6 +48,10 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
  # include <sys/socket.h> /* for F_DUPFD_CLOEXEC */
  #endif
  
+#ifdef HAVE_TREE_SITTER
+#include "treesit.h"
+#endif
+
  struct terminal;
  
  /* Avoid actual stack overflow in print.  */
@@ -1936,6 +1940,30 @@ print_vectorlike (Lisp_Object obj, Lisp_Object printcharfun, bool escapeflag,
        }
        break;
  #endif
+
+#ifdef HAVE_TREE_SITTER
+    case PVEC_TS_PARSER:
+      print_c_string ("#<treesit-parser for ", printcharfun);
+      Lisp_Object language = XTS_PARSER (obj)->language_symbol;
+      print_string (Fsymbol_name (language), printcharfun);
+      print_c_string (" in ", printcharfun);
+      print_object (XTS_PARSER (obj)->buffer, printcharfun, escapeflag);
+      printchar ('>', printcharfun);
+      break;
+    case PVEC_TS_NODE:
+      print_c_string ("#<treesit-node from ", printcharfun);
+      print_object (Ftreesit_node_start (obj),
+                   printcharfun, escapeflag);
+      print_c_string (" to ", printcharfun);
+      print_object (Ftreesit_node_end (obj),
+                   printcharfun, escapeflag);
+      print_c_string (" in ", printcharfun);
+      print_object (XTS_PARSER (XTS_NODE (obj)->parser)->buffer,
+                   printcharfun, escapeflag);
+      printchar ('>', printcharfun);
+      break;
+#endif
+
      case PVEC_SQLITE:
        {
         print_c_string ("#<sqlite ", printcharfun);
diff --git a/src/treesit.c b/src/treesit.c

new file mode 100644 (file)

index 0000000..e127fc2
--- /dev/null
+++ b/src/treesit.c
@@ -0,0 +1,1657 @@
+/* Tree-sitter integration for GNU Emacs.
+
+Copyright (C) 2021-2022 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include "lisp.h"
+#include "buffer.h"
+#include "treesit.h"
+
+/* Commentary
+
+   The Emacs wrapper of tree-sitter does not expose everything the C
+   API provides, most notably:
+
+   - It doesn't expose a syntax tree, we put the syntax tree in the
+     parser object, and updating the tree is handled in the C level.
+
+   - We don't expose tree cursor either.  I think Lisp is slow enough
+     to nullify any performance advantage of using a cursor, though I
+     don't have evidence.  Also I want to minimize the number of new
+     types we introduce, currently we only add parser and node type.
+
+   - Because updating the change is handled in the C level as each
+     change is made in the buffer, there is no way for Lisp to update
+     a node.  But since we can just retrieve a new node, it shouldn't
+     be a limitation.
+
+   - I didn't expose setting timeout and cancellation flag for a
+     parser, mainly because I don't think they are really necessary
+     in Emacs' use cases.
+
+   - Many tree-sitter functions asks for a TSPoint, basically a (row,
+     column) location.  Emacs uses a gap buffer and keeps no
+     information about row and column position.  According to the
+     author of tree-sitter, tree-sitter only asks for (row, column)
+     position to carry it around and return back to the user later;
+     and the real position used is the byte position.  He also said
+     that he _think_ that it will work to use byte position only.
+     That's why whenever a TSPoint is asked, we pass a dummy one to
+     it.  Judging by the nature of parsing algorithms, I think it is
+     safe to use only byte position, and I don't think this will
+     change in the future.
+
+     REF: https://github.com/tree-sitter/tree-sitter/issues/445
+
+   treesit.h has some commentary on the two main data structure
+   for the parser and node.  ts_ensure_position_synced has some
+   commentary on how do we make tree-sitter play well with narrowing
+   (tree-sitter parser only sees the visible region, so we need to
+   translate positions back and forth).  Most action happens in
+   ts_ensure_parsed, ts_read_buffer and ts_record_change.
+
+   A complete correspondence list between tree-sitter functions and
+   exposed Lisp functions can be found in the manual (elisp)API
+   Correspondence.
+
+   Placement of CHECK_xxx functions: call CHECK_xxx before using any
+   unchecked Lisp values; these include argument of Lisp functions,
+   return value of Fsymbol_value, car of a cons.
+
+   Initializing tree-sitter: there are two entry points to tree-sitter
+   functions: 'treesit-parser-create' and
+   'treesit-language-available-p'.  Therefore we only need to call
+   initialization function in those two functions.
+
+   Tree-sitter offset (0-based) and buffer position (1-based):
+     tree-sitter offset + buffer position = buffer position
+     buffer position - buffer position = tree-sitter offset
+
+   Tree-sitter-related code in other files:
+   - src/alloc.c for gc for parser and node
+   - src/casefiddle.c & src/insdel.c for notifying tree-sitter
+     parser of buffer changes.
+   - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
+     node type.
+ */
+
+/*** Initialization */
+
+bool ts_initialized = false;
+
+static void *
+ts_calloc_wrapper (size_t n, size_t size)
+{
+  return xzalloc (n * size);
+}
+
+void
+ts_initialize ()
+{
+  if (!ts_initialized)
+    {
+      ts_set_allocator (xmalloc, ts_calloc_wrapper, xrealloc, xfree);
+      ts_initialized = true;
+    }
+}
+
+/*** Loading language library */
+
+/* Translates a symbol treesit-<lang> to a C name
+   treesit_<lang>.  */
+void
+ts_symbol_to_c_name (char *symbol_name)
+{
+  for (int idx=0; idx < strlen (symbol_name); idx++)
+    {
+      if (symbol_name[idx] == '-')
+       symbol_name[idx] = '_';
+    }
+}
+
+bool
+ts_find_override_name
+(Lisp_Object language_symbol, Lisp_Object *name, Lisp_Object *c_symbol)
+{
+  for (Lisp_Object list = Vtreesit_load_name_override_list;
+       !NILP (list); list = XCDR (list))
+    {
+      Lisp_Object lang = XCAR (XCAR (list));
+      CHECK_SYMBOL (lang);
+      if (EQ (lang, language_symbol))
+       {
+         *name = Fnth (make_fixnum (1), XCAR (list));
+         CHECK_STRING (*name);
+         *c_symbol = Fnth (make_fixnum (2), XCAR (list));
+         CHECK_STRING (*c_symbol);
+         return true;
+       }
+    }
+  return false;
+}
+
+/* For example, if Vdynamic_library_suffixes is (".so", ".dylib"),
+   thsi function pushes "lib_base_name.so" and "lib_base_name.dylib"
+   into *path_candidates. Obiviously path_candidates should be a Lisp
+   list of Lisp strings.  */
+void
+ts_load_language_push_for_each_suffix
+(Lisp_Object lib_base_name, Lisp_Object *path_candidates)
+{
+  for (Lisp_Object suffixes = Vdynamic_library_suffixes;
+       !NILP (suffixes); suffixes = XCDR (suffixes)) {
+    *path_candidates = Fcons (concat2 (lib_base_name, XCAR (suffixes)),
+                             *path_candidates);
+  }
+}
+
+/* Load the dynamic library of LANGUAGE_SYMBOL and return the pointer
+   to the language definition.  Signals
+   Qtreesit_load_language_error if something goes wrong.
+   Qtreesit_load_language_error carries the error message from
+   trying to load the library with each extension.
+
+   If SIGNAL is true, signal an error when failed to load LANGUAGE; if
+   false, return NULL when failed.  */
+TSLanguage *
+ts_load_language (Lisp_Object language_symbol, bool signal)
+{
+  Lisp_Object symbol_name = Fsymbol_name (language_symbol);
+
+  /* Figure out the library name and C name.  */
+  Lisp_Object lib_base_name =
+    (concat2 (build_pure_c_string ("libtree-sitter-"), symbol_name));
+  Lisp_Object base_name =
+    (concat2 (build_pure_c_string ("tree-sitter-"), symbol_name));
+  char *c_name = strdup (SSDATA (base_name));
+  ts_symbol_to_c_name (c_name);
+
+  /* Override the library name and C name, if appropriate.  */
+  Lisp_Object override_name;
+  Lisp_Object override_c_name;
+  bool found_override = ts_find_override_name
+    (language_symbol, &override_name, &override_c_name);
+  if (found_override)
+    {
+      lib_base_name = override_name;
+      c_name = SSDATA (override_c_name);
+    }
+
+  /* Now we generate a list of possible library paths.  */
+  Lisp_Object path_candidates = Qnil;
+  /* First push just the filenames to the candidate list, which will
+     make dynlib_open look under standard system load paths.  */
+  ts_load_language_push_for_each_suffix
+    (lib_base_name, &path_candidates);
+  /* Then push ~/.emacs.d/tree-sitter paths.  */
+  ts_load_language_push_for_each_suffix
+    (Fexpand_file_name
+     (concat2 (build_string ("tree-sitter/"), lib_base_name),
+      Fsymbol_value (Quser_emacs_directory)),
+     &path_candidates);
+  /* Then push paths from treesit-extra-load-path.  */
+  for (Lisp_Object tail = Freverse (Vtreesit_extra_load_path);
+       !NILP (tail); tail = XCDR (tail))
+    {
+      ts_load_language_push_for_each_suffix
+       (Fexpand_file_name (lib_base_name, XCAR (tail)),
+        &path_candidates);
+    }
+
+  /* Try loading the dynamic library by each path candidate.  Stop
+     when succeed, record the error message and try the next one when
+     fail.  */
+  dynlib_handle_ptr handle;
+  char const *error;
+  Lisp_Object error_list = Qnil;
+  for (Lisp_Object tail = path_candidates;
+       !NILP (tail); tail = XCDR (tail))
+    {
+      char *library_name = SSDATA (XCAR (tail));
+      dynlib_error ();
+      handle = dynlib_open (library_name);
+      error = dynlib_error ();
+      if (error == NULL)
+       break;
+      else
+       error_list = Fcons (build_string (error), error_list);
+    }
+  if (error != NULL)
+    {
+      if (signal)
+       xsignal2 (Qtreesit_load_language_error,
+                 symbol_name, Fnreverse (error_list));
+      else
+       return NULL;
+    }
+
+  /* Load TSLanguage.  */
+  dynlib_error ();
+  TSLanguage *(*langfn) ();
+  langfn = dynlib_sym (handle, c_name);
+  error = dynlib_error ();
+  if (error != NULL)
+    {
+      if (signal)
+       xsignal1 (Qtreesit_load_language_error,
+                 build_string (error));
+      else
+       return NULL;
+    }
+  TSLanguage *lang = (*langfn) ();
+
+  /* Check if language version matches tree-sitter version.  */
+  TSParser *parser = ts_parser_new ();
+  bool success = ts_parser_set_language (parser, lang);
+  ts_parser_delete (parser);
+  if (!success)
+    {
+      if (signal)
+       xsignal2 (Qtreesit_load_language_error,
+                 build_pure_c_string ("Language version doesn't match tree-sitter version, language version:"),
+                 make_fixnum (ts_language_version (lang)));
+      else
+       return NULL;
+    }
+  return lang;
+}
+
+DEFUN ("treesit-language-available-p",
+       Ftreesit_langauge_available_p,
+       Streesit_language_available_p,
+       1, 1, 0,
+       doc: /* Return non-nil if LANGUAGE exists and is loadable.  */)
+  (Lisp_Object language)
+{
+  CHECK_SYMBOL (language);
+  ts_initialize ();
+  if (ts_load_language(language, false) == NULL)
+    return Qnil;
+  else
+    return Qt;
+}
+
+/*** Parsing functions */
+
+/* An auxiliary function that saves a few lines of code.  Assumes TREE
+   is not NULL.  */
+static inline void
+ts_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
+               ptrdiff_t old_end_byte, ptrdiff_t new_end_byte)
+{
+  TSPoint dummy_point = {0, 0};
+  TSInputEdit edit = {(uint32_t) start_byte,
+                     (uint32_t) old_end_byte,
+                     (uint32_t) new_end_byte,
+                     dummy_point, dummy_point, dummy_point};
+  ts_tree_edit (tree, &edit);
+}
+
+/* Update each parser's tree after the user made an edit.  This
+function does not parse the buffer and only updates the tree. (So it
+should be very fast.)  */
+void
+ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+                 ptrdiff_t new_end_byte)
+{
+  for (Lisp_Object parser_list =
+        Fsymbol_value (Qtreesit_parser_list);
+       !NILP (parser_list);
+       parser_list = XCDR (parser_list))
+    {
+      CHECK_CONS (parser_list);
+      Lisp_Object lisp_parser = XCAR (parser_list);
+      CHECK_TS_PARSER (lisp_parser);
+      TSTree *tree = XTS_PARSER (lisp_parser)->tree;
+      if (tree != NULL)
+       {
+         eassert (start_byte <= old_end_byte);
+         eassert (start_byte <= new_end_byte);
+         /* Think the recorded change as a delete followed by an
+            insert, and think of them as moving unchanged text back
+            and forth.  After all, the whole point of updating the
+            tree is to update the position of unchanged text.  */
+         ptrdiff_t bytes_del = old_end_byte - start_byte;
+         ptrdiff_t bytes_ins = new_end_byte - start_byte;
+
+         ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
+         ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
+
+         ptrdiff_t affected_start =
+           max (visible_beg, start_byte) - visible_beg;
+         ptrdiff_t affected_old_end =
+           min (visible_end, affected_start + bytes_del);
+         ptrdiff_t affected_new_end =
+           affected_start + bytes_ins;
+
+         ts_tree_edit_1 (tree, affected_start, affected_old_end,
+                         affected_new_end);
+         XTS_PARSER (lisp_parser)->visible_end = affected_new_end;
+         XTS_PARSER (lisp_parser)->need_reparse = true;
+         XTS_PARSER (lisp_parser)->timestamp++;
+       }
+    }
+}
+
+void
+ts_ensure_position_synced (Lisp_Object parser)
+{
+  TSParser *ts_parser = XTS_PARSER (parser)->parser;
+  TSTree *tree = XTS_PARSER (parser)->tree;
+
+  if (tree == NULL)
+    return;
+
+  struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+  ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
+  ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
+  /* Before we parse or set ranges, catch up with the narrowing
+     situation.  We change visible_beg and visible_end to match
+     BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the
+     change.  We want to move the visible range of tree-sitter to
+     match the narrowed range. For example,
+     from ________|xxxx|__
+     to   |xxxx|__________ */
+
+  /* 1. Make sure visible_beg <= BUF_BEGV_BYTE.  */
+  if (visible_beg > BUF_BEGV_BYTE (buffer))
+    {
+      /* Tree-sitter sees: insert at the beginning. */
+      ts_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer));
+      visible_beg = BUF_BEGV_BYTE (buffer);
+    }
+  /* 2. Make sure visible_end = BUF_ZV_BYTE.  */
+  if (visible_end < BUF_ZV_BYTE (buffer))
+    {
+      /* Tree-sitter sees: insert at the end.  */
+      ts_tree_edit_1 (tree, visible_end - visible_beg,
+                     visible_end - visible_beg,
+                     BUF_ZV_BYTE (buffer) - visible_beg);
+      visible_end = BUF_ZV_BYTE (buffer);
+    }
+  else if (visible_end > BUF_ZV_BYTE (buffer))
+    {
+      /* Tree-sitter sees: delete at the end.  */
+      ts_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
+                     visible_end - visible_beg,
+                     BUF_ZV_BYTE (buffer) - visible_beg);
+      visible_end = BUF_ZV_BYTE (buffer);
+    }
+  /* 3. Make sure visible_beg = BUF_BEGV_BYTE.  */
+  if (visible_beg < BUF_BEGV_BYTE (buffer))
+    {
+      /* Tree-sitter sees: delete at the beginning.  */
+      ts_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0);
+      visible_beg = BUF_BEGV_BYTE (buffer);
+    }
+  eassert (0 <= visible_beg);
+  eassert (visible_beg <= visible_end);
+
+  XTS_PARSER (parser)->visible_beg = visible_beg;
+  XTS_PARSER (parser)->visible_end = visible_end;
+}
+
+void
+ts_check_buffer_size (struct buffer *buffer)
+{
+  ptrdiff_t buffer_size =
+    (BUF_Z (buffer) - BUF_BEG (buffer));
+  if (buffer_size > UINT32_MAX)
+    xsignal2 (Qtreesit_buffer_too_large,
+             build_pure_c_string ("Buffer size too large, size:"),
+             make_fixnum (buffer_size));
+}
+
+/* Parse the buffer.  We don't parse until we have to. When we have
+to, we call this function to parse and update the tree.  */
+void
+ts_ensure_parsed (Lisp_Object parser)
+{
+  if (!XTS_PARSER (parser)->need_reparse)
+    return;
+  TSParser *ts_parser = XTS_PARSER (parser)->parser;
+  TSTree *tree = XTS_PARSER(parser)->tree;
+  TSInput input = XTS_PARSER (parser)->input;
+  struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+  ts_check_buffer_size (buffer);
+
+  /* Before we parse, catch up with the narrowing situation.  */
+  ts_ensure_position_synced (parser);
+
+  TSTree *new_tree = ts_parser_parse(ts_parser, tree, input);
+  /* This should be very rare (impossible, really): it only happens
+     when 1) language is not set (impossible in Emacs because the user
+     has to supply a language to create a parser), 2) parse canceled
+     due to timeout (impossible because we don't set a timeout), 3)
+     parse canceled due to cancellation flag (impossible because we
+     don't set the flag).  (See comments for ts_parser_parse in
+     tree_sitter/api.h.)  */
+  if (new_tree == NULL)
+    {
+      Lisp_Object buf;
+      XSETBUFFER (buf, buffer);
+      xsignal1 (Qtreesit_parse_error, buf);
+    }
+
+  ts_tree_delete (tree);
+  XTS_PARSER (parser)->tree = new_tree;
+  XTS_PARSER (parser)->need_reparse = false;
+}
+
+/* This is the read function provided to tree-sitter to read from a
+   buffer.  It reads one character at a time and automatically skips
+   the gap.  */
+const char*
+ts_read_buffer (void *parser, uint32_t byte_index,
+               TSPoint position, uint32_t *bytes_read)
+{
+  struct buffer *buffer =
+    XBUFFER (((struct Lisp_TS_Parser *) parser)->buffer);
+  ptrdiff_t visible_beg = ((struct Lisp_TS_Parser *) parser)->visible_beg;
+  ptrdiff_t visible_end = ((struct Lisp_TS_Parser *) parser)->visible_end;
+  ptrdiff_t byte_pos = byte_index + visible_beg;
+  /* We will make sure visible_beg = BUF_BEGV_BYTE before re-parse (in
+     ts_ensure_parsed), so byte_pos will never be smaller than
+     BUF_BEG_BYTE.  */
+  eassert (visible_beg = BUF_BEGV_BYTE (buffer));
+  eassert (visible_end = BUF_ZV_BYTE (buffer));
+
+  /* Read one character.  Tree-sitter wants us to set bytes_read to 0
+     if it reads to the end of buffer.  It doesn't say what it wants
+     for the return value in that case, so we just give it an empty
+     string.  */
+  char *beg;
+  int len;
+  /* This function could run from a user command, so it is better to
+     do nothing instead of raising an error. (It was a pain in the a**
+     to decrypt mega-if-conditions in Emacs source, so I wrote the two
+     branches separately.)  */
+  if (!BUFFER_LIVE_P (buffer))
+    {
+      beg = NULL;
+      len = 0;
+    }
+  /* Reached visible end-of-buffer, tell tree-sitter to read no more.  */
+  else if (byte_pos >= visible_end)
+    {
+      beg = NULL;
+      len = 0;
+    }
+  /* Normal case, read a character.  */
+  else
+    {
+      beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos);
+      len = BYTES_BY_CHAR_HEAD ((int) *beg);
+    }
+  *bytes_read = (uint32_t) len;
+  return beg;
+}
+
+/*** Functions for parser and node object*/
+
+/* Wrap the parser in a Lisp_Object to be used in the Lisp machine.  */
+Lisp_Object
+make_ts_parser (Lisp_Object buffer, TSParser *parser,
+               TSTree *tree, Lisp_Object language_symbol)
+{
+  struct Lisp_TS_Parser *lisp_parser
+    = ALLOCATE_PSEUDOVECTOR
+    (struct Lisp_TS_Parser, buffer, PVEC_TS_PARSER);
+
+  lisp_parser->language_symbol = language_symbol;
+  lisp_parser->buffer = buffer;
+  lisp_parser->parser = parser;
+  lisp_parser->tree = tree;
+  TSInput input = {lisp_parser, ts_read_buffer, TSInputEncodingUTF8};
+  lisp_parser->input = input;
+  lisp_parser->need_reparse = true;
+  lisp_parser->visible_beg = BUF_BEGV (XBUFFER (buffer));
+  lisp_parser->visible_end = BUF_ZV (XBUFFER (buffer));
+  return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
+}
+
+/* Wrap the node in a Lisp_Object to be used in the Lisp machine.  */
+Lisp_Object
+make_ts_node (Lisp_Object parser, TSNode node)
+{
+  struct Lisp_TS_Node *lisp_node
+    = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Node, parser, PVEC_TS_NODE);
+  lisp_node->parser = parser;
+  lisp_node->node = node;
+  lisp_node->timestamp = XTS_PARSER (parser)->timestamp;
+  return make_lisp_ptr (lisp_node, Lisp_Vectorlike);
+}
+
+DEFUN ("treesit-parser-p",
+       Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0,
+       doc: /* Return t if OBJECT is a tree-sitter parser.  */)
+  (Lisp_Object object)
+{
+  if (TS_PARSERP (object))
+    return Qt;
+  else
+    return Qnil;
+}
+
+DEFUN ("treesit-node-p",
+       Ftreesit_node_p, Streesit_node_p, 1, 1, 0,
+       doc: /* Return t if OBJECT is a tree-sitter node.  */)
+  (Lisp_Object object)
+{
+  if (TS_NODEP (object))
+    return Qt;
+  else
+    return Qnil;
+}
+
+DEFUN ("treesit-node-parser",
+       Ftreesit_node_parser, Streesit_node_parser,
+       1, 1, 0,
+       doc: /* Return the parser to which NODE belongs.  */)
+  (Lisp_Object node)
+{
+  CHECK_TS_NODE (node);
+  return XTS_NODE (node)->parser;
+}
+
+DEFUN ("treesit-parser-create",
+       Ftreesit_parser_create, Streesit_parser_create,
+       2, 2, 0,
+       doc: /* Create and return a parser in BUFFER for LANGUAGE.
+
+The parser is automatically added to BUFFER's
+`treesit-parser-list'.  LANGUAGE should be the symbol of a
+function provided by a tree-sitter language dynamic module, e.g.,
+'treesit-json.  If BUFFER is nil, use the current buffer.  */)
+  (Lisp_Object buffer, Lisp_Object language)
+{
+  if (NILP (buffer))
+    buffer = Fcurrent_buffer ();
+
+  CHECK_BUFFER (buffer);
+  CHECK_SYMBOL (language);
+  ts_check_buffer_size (XBUFFER (buffer));
+
+  ts_initialize ();
+
+  TSParser *parser = ts_parser_new ();
+  TSLanguage *lang = ts_load_language (language, true);
+  /* We check language version when loading a language, so this should
+     always succeed.  */
+  ts_parser_set_language (parser, lang);
+
+  Lisp_Object lisp_parser
+    = make_ts_parser (buffer, parser, NULL, language);
+
+  struct buffer *old_buffer = current_buffer;
+  set_buffer_internal (XBUFFER (buffer));
+
+  Fset (Qtreesit_parser_list,
+       Fcons (lisp_parser, Fsymbol_value (Qtreesit_parser_list)));
+
+  set_buffer_internal (old_buffer);
+  return lisp_parser;
+}
+
+DEFUN ("treesit-parser-buffer",
+       Ftreesit_parser_buffer, Streesit_parser_buffer,
+       1, 1, 0,
+       doc: /* Return the buffer of PARSER.  */)
+  (Lisp_Object parser)
+{
+  CHECK_TS_PARSER (parser);
+  Lisp_Object buf;
+  XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer));
+  return buf;
+}
+
+DEFUN ("treesit-parser-language",
+       Ftreesit_parser_language, Streesit_parser_language,
+       1, 1, 0,
+       doc: /* Return parser's language symbol.
+This symbol is the one used to create the parser.  */)
+  (Lisp_Object parser)
+{
+  CHECK_TS_PARSER (parser);
+  return XTS_PARSER (parser)->language_symbol;
+}
+
+/*** Parser API */
+
+DEFUN ("treesit-parser-root-node",
+       Ftreesit_parser_root_node, Streesit_parser_root_node,
+       1, 1, 0,
+       doc: /* Return the root node of PARSER.  */)
+  (Lisp_Object parser)
+{
+  CHECK_TS_PARSER (parser);
+  ts_ensure_parsed (parser);
+  TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree);
+  return make_ts_node (parser, root_node);
+}
+
+/* Checks that the RANGES argument of
+   treesit-parser-set-included-ranges is valid.  */
+void
+ts_check_range_argument (Lisp_Object ranges)
+{
+  EMACS_INT last_point = 1;
+  for (Lisp_Object tail = ranges;
+       !NILP (tail); tail = XCDR (tail))
+    {
+      CHECK_CONS (tail);
+      Lisp_Object range = XCAR (tail);
+      CHECK_CONS (range);
+      CHECK_FIXNUM (XCAR (range));
+      CHECK_FIXNUM (XCDR (range));
+      EMACS_INT beg = XFIXNUM (XCAR (range));
+      EMACS_INT end = XFIXNUM (XCDR (range));
+      /* TODO: Maybe we should check for point-min/max, too?  */
+      if (!(last_point <= beg && beg <= end))
+       xsignal2 (Qtreesit_range_invalid,
+                 build_pure_c_string
+                 ("RANGE is either overlapping or out-of-order"),
+                 ranges);
+      last_point = end;
+    }
+}
+
+DEFUN ("treesit-parser-set-included-ranges",
+       Ftreesit_parser_set_included_ranges,
+       Streesit_parser_set_included_ranges,
+       2, 2, 0,
+       doc: /* Limit PARSER to RANGES.
+
+RANGES is a list of (BEG . END), each (BEG . END) confines a range in
+which the parser should operate in.  Each range must not overlap, and
+each range should come in order.  Signal `treesit-set-range-error'
+if the argument is invalid, or something else went wrong.  If RANGES
+is nil, set PARSER to parse the whole buffer.  */)
+  (Lisp_Object parser, Lisp_Object ranges)
+{
+  CHECK_TS_PARSER (parser);
+  CHECK_CONS (ranges);
+  ts_check_range_argument (ranges);
+
+  /* Before we parse, catch up with narrowing/widening.  */
+  ts_ensure_position_synced (parser);
+
+  bool success;
+  if (NILP (ranges))
+    {
+      /* If RANGES is nil, make parser to parse the whole document.
+        To do that we give tree-sitter a 0 length, the range is a
+        dummy.  */
+      TSRange ts_range = {0, 0, 0, 0};
+      success = ts_parser_set_included_ranges
+       (XTS_PARSER (parser)->parser, &ts_range , 0);
+    }
+  else
+    {
+      /* Set ranges for PARSER.  */
+      ptrdiff_t len = list_length (ranges);
+      TSRange *ts_ranges = malloc (sizeof(TSRange) * len);
+      struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+      for (int idx=0; !NILP (ranges); idx++, ranges = XCDR (ranges))
+       {
+         Lisp_Object range = XCAR (ranges);
+         struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+         EMACS_INT beg_byte = buf_charpos_to_bytepos
+           (buffer, XFIXNUM (XCAR (range)));
+         EMACS_INT end_byte = buf_charpos_to_bytepos
+           (buffer, XFIXNUM (XCDR (range)));
+         /* We don't care about start and end points, put in dummy
+            value.  */
+         TSRange rg = {{0,0}, {0,0},
+                       (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer),
+                       (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)};
+         ts_ranges[idx] = rg;
+       }
+      success = ts_parser_set_included_ranges
+       (XTS_PARSER (parser)->parser, ts_ranges, (uint32_t) len);
+      /* Although XFIXNUM could signal, it should be impossible
+        because we have checked the input by ts_check_range_argument.
+        So there is no need for unwind-protect.  */
+      free (ts_ranges);
+    }
+
+  if (!success)
+    xsignal2 (Qtreesit_range_invalid,
+             build_pure_c_string
+             ("Something went wrong when setting ranges"),
+             ranges);
+
+  XTS_PARSER (parser)->need_reparse = true;
+  return Qnil;
+}
+
+DEFUN ("treesit-parser-included-ranges",
+       Ftreesit_parser_included_ranges,
+       Streesit_parser_included_ranges,
+       1, 1, 0,
+       doc: /* Return the ranges set for PARSER.
+See `treesit-parser-set-ranges'.  If no range is set, return
+nil.  */)
+  (Lisp_Object parser)
+{
+  CHECK_TS_PARSER (parser);
+  uint32_t len;
+  const TSRange *ranges = ts_parser_included_ranges
+    (XTS_PARSER (parser)->parser, &len);
+  if (len == 0)
+    return Qnil;
+  struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+  Lisp_Object list = Qnil;
+  for (int idx=0; idx < len; idx++)
+    {
+      TSRange range = ranges[idx];
+      uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
+      uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
+
+      Lisp_Object lisp_range =
+       Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)) ,
+              make_fixnum (buf_bytepos_to_charpos (buffer, end_byte)));
+      list = Fcons (lisp_range, list);
+    }
+  return Fnreverse (list);
+}
+
+/*** Node API  */
+
+/* Check that OBJ is a positive integer and signal an error if
+   otherwise. */
+static void
+ts_check_positive_integer (Lisp_Object obj)
+{
+  CHECK_INTEGER (obj);
+  if (XFIXNUM (obj) < 0)
+    xsignal1 (Qargs_out_of_range, obj);
+}
+
+static void
+ts_check_node (Lisp_Object obj)
+{
+  CHECK_TS_NODE (obj);
+  Lisp_Object lisp_parser = XTS_NODE (obj)->parser;
+  if (XTS_NODE (obj)->timestamp !=
+      XTS_PARSER (lisp_parser)->timestamp)
+    xsignal1 (Qtreesit_node_outdated, obj);
+}
+
+DEFUN ("treesit-node-type",
+       Ftreesit_node_type, Streesit_node_type, 1, 1, 0,
+       doc: /* Return the NODE's type as a string.
+If NODE is nil, return nil.  */)
+  (Lisp_Object node)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  TSNode ts_node = XTS_NODE (node)->node;
+  const char *type = ts_node_type (ts_node);
+  return build_string (type);
+}
+
+DEFUN ("treesit-node-start",
+       Ftreesit_node_start, Streesit_node_start, 1, 1, 0,
+       doc: /* Return the NODE's start position.
+If NODE is nil, return nil.  */)
+  (Lisp_Object node)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  TSNode ts_node = XTS_NODE (node)->node;
+  ptrdiff_t visible_beg =
+    XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+  uint32_t start_byte_offset = ts_node_start_byte (ts_node);
+  struct buffer *buffer =
+    XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+  ptrdiff_t start_pos = buf_bytepos_to_charpos
+    (buffer, start_byte_offset + visible_beg);
+  return make_fixnum (start_pos);
+}
+
+DEFUN ("treesit-node-end",
+       Ftreesit_node_end, Streesit_node_end, 1, 1, 0,
+       doc: /* Return the NODE's end position.
+If NODE is nil, return nil.  */)
+  (Lisp_Object node)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  TSNode ts_node = XTS_NODE (node)->node;
+  ptrdiff_t visible_beg =
+    XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+  uint32_t end_byte_offset = ts_node_end_byte (ts_node);
+  struct buffer *buffer =
+    XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+  ptrdiff_t end_pos = buf_bytepos_to_charpos
+    (buffer, end_byte_offset + visible_beg);
+  return make_fixnum (end_pos);
+}
+
+DEFUN ("treesit-node-string",
+       Ftreesit_node_string, Streesit_node_string, 1, 1, 0,
+       doc: /* Return the string representation of NODE.
+If NODE is nil, return nil.  */)
+  (Lisp_Object node)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  TSNode ts_node = XTS_NODE (node)->node;
+  char *string = ts_node_string (ts_node);
+  return make_string (string, strlen (string));
+}
+
+DEFUN ("treesit-node-parent",
+       Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
+       doc: /* Return the immediate parent of NODE.
+Return nil if there isn't any.  If NODE is nil, return nil.  */)
+  (Lisp_Object node)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  TSNode ts_node = XTS_NODE (node)->node;
+  TSNode parent = ts_node_parent (ts_node);
+
+  if (ts_node_is_null (parent))
+    return Qnil;
+
+  return make_ts_node (XTS_NODE (node)->parser, parent);
+}
+
+DEFUN ("treesit-node-child",
+       Ftreesit_node_child, Streesit_node_child, 2, 3, 0,
+       doc: /* Return the Nth child of NODE.
+
+Return nil if there isn't any.  If NAMED is non-nil, look for named
+child only.  NAMED defaults to nil.  If NODE is nil, return nil.  */)
+  (Lisp_Object node, Lisp_Object n, Lisp_Object named)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  ts_check_positive_integer (n);
+  EMACS_INT idx = XFIXNUM (n);
+  if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n);
+  TSNode ts_node = XTS_NODE (node)->node;
+  TSNode child;
+  if (NILP (named))
+    child = ts_node_child (ts_node, (uint32_t) idx);
+  else
+    child = ts_node_named_child (ts_node, (uint32_t) idx);
+
+  if (ts_node_is_null (child))
+    return Qnil;
+
+  return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-check",
+       Ftreesit_node_check, Streesit_node_check, 2, 2, 0,
+       doc: /* Return non-nil if NODE has PROPERTY, nil otherwise.
+
+PROPERTY could be 'named, 'missing, 'extra, 'has-changes, 'has-error.
+Named nodes correspond to named rules in the language definition,
+whereas "anonymous" nodes correspond to string literals in the
+language definition.
+
+Missing nodes are inserted by the parser in order to recover from
+certain kinds of syntax errors, i.e., should be there but not there.
+
+Extra nodes represent things like comments, which are not required the
+language definition, but can appear anywhere.
+
+A node "has changes" if the buffer changed since the node is
+created. (Don't forget the "s" at the end of 'has-changes.)
+
+A node "has error" if itself is a syntax error or contains any syntax
+errors.  */)
+  (Lisp_Object node, Lisp_Object property)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  CHECK_SYMBOL (property);
+  TSNode ts_node = XTS_NODE (node)->node;
+  bool result;
+  if (EQ (property, Qnamed))
+    result = ts_node_is_named (ts_node);
+  else if (EQ (property, Qmissing))
+    result = ts_node_is_missing (ts_node);
+  else if (EQ (property, Qextra))
+    result = ts_node_is_extra (ts_node);
+  else if (EQ (property, Qhas_error))
+    result = ts_node_has_error (ts_node);
+  else if (EQ (property, Qhas_changes))
+    result = ts_node_has_changes (ts_node);
+  else
+    signal_error ("Expecting 'named, 'missing, 'extra, 'has-changes or 'has-error, got",
+                 property);
+  return result ? Qt : Qnil;
+}
+
+DEFUN ("treesit-node-field-name-for-child",
+       Ftreesit_node_field_name_for_child,
+       Streesit_node_field_name_for_child, 2, 2, 0,
+       doc: /* Return the field name of the Nth child of NODE.
+
+Return nil if there isn't any child or no field is found.
+If NODE is nil, return nil.  */)
+  (Lisp_Object node, Lisp_Object n)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  ts_check_positive_integer (n);
+  EMACS_INT idx = XFIXNUM (n);
+  if (idx > UINT32_MAX) xsignal1 (Qargs_out_of_range, n);
+  TSNode ts_node = XTS_NODE (node)->node;
+  const char *name
+    = ts_node_field_name_for_child (ts_node, (uint32_t) idx);
+
+  if (name == NULL)
+    return Qnil;
+
+  return make_string (name, strlen (name));
+}
+
+DEFUN ("treesit-node-child-count",
+       Ftreesit_node_child_count,
+       Streesit_node_child_count, 1, 2, 0,
+       doc: /* Return the number of children of NODE.
+
+If NAMED is non-nil, count named child only.  NAMED defaults to
+nil.  If NODE is nil, return nil.  */)
+  (Lisp_Object node, Lisp_Object named)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  TSNode ts_node = XTS_NODE (node)->node;
+  uint32_t count;
+  if (NILP (named))
+    count = ts_node_child_count (ts_node);
+  else
+    count = ts_node_named_child_count (ts_node);
+  return make_fixnum (count);
+}
+
+DEFUN ("treesit-node-child-by-field-name",
+       Ftreesit_node_child_by_field_name,
+       Streesit_node_child_by_field_name, 2, 2, 0,
+       doc: /* Return the child of NODE with FIELD-NAME.
+Return nil if there isn't any.  If NODE is nil, return nil.  */)
+  (Lisp_Object node, Lisp_Object field_name)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  CHECK_STRING (field_name);
+  char *name_str = SSDATA (field_name);
+  TSNode ts_node = XTS_NODE (node)->node;
+  TSNode child
+    = ts_node_child_by_field_name (ts_node, name_str, strlen (name_str));
+
+  if (ts_node_is_null(child))
+    return Qnil;
+
+  return make_ts_node(XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-next-sibling",
+       Ftreesit_node_next_sibling,
+       Streesit_node_next_sibling, 1, 2, 0,
+       doc: /* Return the next sibling of NODE.
+
+Return nil if there isn't any.  If NAMED is non-nil, look for named
+child only.  NAMED defaults to nil.  If NODE is nil, return nil.  */)
+  (Lisp_Object node, Lisp_Object named)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  TSNode ts_node = XTS_NODE (node)->node;
+  TSNode sibling;
+  if (NILP (named))
+    sibling = ts_node_next_sibling (ts_node);
+  else
+    sibling = ts_node_next_named_sibling (ts_node);
+
+  if (ts_node_is_null(sibling))
+    return Qnil;
+
+  return make_ts_node(XTS_NODE (node)->parser, sibling);
+}
+
+DEFUN ("treesit-node-prev-sibling",
+       Ftreesit_node_prev_sibling,
+       Streesit_node_prev_sibling, 1, 2, 0,
+       doc: /* Return the previous sibling of NODE.
+
+Return nil if there isn't any.  If NAMED is non-nil, look for named
+child only.  NAMED defaults to nil.  If NODE is nil, return nil.  */)
+  (Lisp_Object node, Lisp_Object named)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  TSNode ts_node = XTS_NODE (node)->node;
+  TSNode sibling;
+
+  if (NILP (named))
+    sibling = ts_node_prev_sibling (ts_node);
+  else
+    sibling = ts_node_prev_named_sibling (ts_node);
+
+  if (ts_node_is_null(sibling))
+    return Qnil;
+
+  return make_ts_node(XTS_NODE (node)->parser, sibling);
+}
+
+DEFUN ("treesit-node-first-child-for-pos",
+       Ftreesit_node_first_child_for_pos,
+       Streesit_node_first_child_for_pos, 2, 3, 0,
+       doc: /* Return the first child of NODE on POS.
+
+Specifically, return the first child that extends beyond POS.  POS is
+a position in the buffer.  Return nil if there isn't any.  If NAMED is
+non-nil, look for named child only.  NAMED defaults to nil.  Note that
+this function returns an immediate child, not the smallest
+(grand)child.  If NODE is nil, return nil.  */)
+  (Lisp_Object node, Lisp_Object pos, Lisp_Object named)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  ts_check_positive_integer (pos);
+
+  struct buffer *buf =
+    XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+  ptrdiff_t visible_beg =
+    XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+  ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
+
+  if (byte_pos < BUF_BEGV_BYTE (buf) || byte_pos > BUF_ZV_BYTE (buf))
+    xsignal1 (Qargs_out_of_range, pos);
+
+  TSNode ts_node = XTS_NODE (node)->node;
+  TSNode child;
+  if (NILP (named))
+    child = ts_node_first_child_for_byte
+      (ts_node, byte_pos - visible_beg);
+  else
+    child = ts_node_first_named_child_for_byte
+      (ts_node, byte_pos - visible_beg);
+
+  if (ts_node_is_null (child))
+    return Qnil;
+
+  return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-descendant-for-range",
+       Ftreesit_node_descendant_for_range,
+       Streesit_node_descendant_for_range, 3, 4, 0,
+       doc: /* Return the smallest node that covers BEG to END.
+
+The returned node is a descendant of NODE.  POS is a position.  Return
+nil if there isn't any.  If NAMED is non-nil, look for named child
+only.  NAMED defaults to nil.  If NODE is nil, return nil.  */)
+  (Lisp_Object node, Lisp_Object beg, Lisp_Object end, Lisp_Object named)
+{
+  if (NILP (node)) return Qnil;
+  ts_check_node (node);
+  CHECK_INTEGER (beg);
+  CHECK_INTEGER (end);
+
+  struct buffer *buf =
+    XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
+  ptrdiff_t visible_beg =
+    XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+  ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
+  ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
+
+  /* Checks for BUFFER_BEG <= BEG <= END <= BUFFER_END.  */
+  if (!(BUF_BEGV_BYTE (buf) <= byte_beg
+       && byte_beg <= byte_end
+       && byte_end <= BUF_ZV_BYTE (buf)))
+    xsignal2 (Qargs_out_of_range, beg, end);
+
+  TSNode ts_node = XTS_NODE (node)->node;
+  TSNode child;
+  if (NILP (named))
+    child = ts_node_descendant_for_byte_range
+      (ts_node, byte_beg - visible_beg , byte_end - visible_beg);
+  else
+    child = ts_node_named_descendant_for_byte_range
+      (ts_node, byte_beg - visible_beg, byte_end - visible_beg);
+
+  if (ts_node_is_null (child))
+    return Qnil;
+
+  return make_ts_node (XTS_NODE (node)->parser, child);
+}
+
+DEFUN ("treesit-node-eq",
+       Ftreesit_node_eq,
+       Streesit_node_eq, 2, 2, 0,
+       doc: /* Return non-nil if NODE1 and NODE2 are the same node.
+If any one of NODE1 and NODE2 is nil, return nil.  */)
+  (Lisp_Object node1, Lisp_Object node2)
+{
+  if (NILP (node1) || NILP (node2))
+    return Qnil;
+  CHECK_TS_NODE (node1);
+  CHECK_TS_NODE (node2);
+
+  TSNode ts_node_1 = XTS_NODE (node1)->node;
+  TSNode ts_node_2 = XTS_NODE (node2)->node;
+
+  bool same_node = ts_node_eq (ts_node_1, ts_node_2);
+  return same_node ? Qt : Qnil;
+}
+
+/*** Query functions */
+
+/* If we decide to pre-load tree-sitter.el, maybe we can implement
+   this function in Lisp.  */
+DEFUN ("treesit-expand-pattern",
+       Ftreesit_expand_pattern,
+       Streesit_expand_pattern, 1, 1, 0,
+       doc: /* Expand PATTERN to its string form.
+
+PATTERN can be
+
+    :anchor
+    :?
+    :*
+    :+
+    :equal
+    :match
+    (TYPE PATTERN...)
+    [PATTERN...]
+    FIELD-NAME:
+    @CAPTURE-NAME
+    (_)
+    _
+    \"TYPE\"
+
+Consult Info node `(elisp)Pattern Matching' form detailed
+explanation.  */)
+  (Lisp_Object pattern)
+{
+  if (EQ (pattern, intern_c_string (":anchor")))
+    return build_pure_c_string(".");
+  if (EQ (pattern, intern_c_string (":?")))
+    return build_pure_c_string("?");
+  if (EQ (pattern, intern_c_string (":*")))
+    return build_pure_c_string("*");
+  if (EQ (pattern, intern_c_string (":+")))
+    return build_pure_c_string("+");
+  if (EQ (pattern, intern_c_string (":equal")))
+    return build_pure_c_string("#equal");
+  if (EQ (pattern, intern_c_string (":match")))
+    return build_pure_c_string("#match");
+  Lisp_Object opening_delimeter =
+    build_pure_c_string (VECTORP (pattern) ? "[" : "(");
+  Lisp_Object closing_delimiter =
+    build_pure_c_string (VECTORP (pattern) ? "]" : ")");
+  if (VECTORP (pattern) || CONSP (pattern))
+    return concat3 (opening_delimeter,
+                   Fmapconcat (intern_c_string
+                               ("treesit-expand-pattern"),
+                               pattern,
+                               build_pure_c_string (" ")),
+                   closing_delimiter);
+  return CALLN (Fformat, build_pure_c_string("%S"), pattern);
+}
+
+DEFUN ("treesit-expand-query",
+       Ftreesit_expand_query,
+       Streesit_expand_query, 1, 1, 0,
+       doc: /* Expand sexp QUERY to its string form.
+
+A PATTERN in QUERY can be
+
+    :anchor
+    :?
+    :*
+    :+
+    :equal
+    :match
+    (TYPE PATTERN...)
+    [PATTERN...]
+    FIELD-NAME:
+    @CAPTURE-NAME
+    (_)
+    _
+    \"TYPE\"
+
+Consult Info node `(elisp)Pattern Matching' form detailed
+explanation.  */)
+  (Lisp_Object query)
+{
+  return Fmapconcat (intern_c_string ("treesit-expand-pattern"),
+                    query, build_pure_c_string (" "));
+}
+
+char*
+ts_query_error_to_string (TSQueryError error)
+{
+  switch (error)
+    {
+    case TSQueryErrorNone:
+      return "None";
+    case TSQueryErrorSyntax:
+      return "Syntax error at";
+    case TSQueryErrorNodeType:
+      return "Node type error at";
+    case TSQueryErrorField:
+      return "Field error at";
+    case TSQueryErrorCapture:
+      return "Capture error at";
+    case TSQueryErrorStructure:
+      return "Structure error at";
+    default:
+      return "Unknown error";
+    }
+}
+
+/* Collect predicates for this match and return them in a list.  Each
+   predicate is a list of strings and symbols.  */
+Lisp_Object
+ts_predicates_for_pattern
+(TSQuery *query, uint32_t pattern_index)
+{
+  uint32_t len;
+  const TSQueryPredicateStep *predicate_list =
+    ts_query_predicates_for_pattern (query, pattern_index, &len);
+  Lisp_Object result = Qnil;
+  Lisp_Object predicate = Qnil;
+  for (int idx=0; idx < len; idx++)
+    {
+      TSQueryPredicateStep step = predicate_list[idx];
+      switch (step.type)
+       {
+       case TSQueryPredicateStepTypeCapture:
+         {
+           uint32_t str_len;
+           const char *str = ts_query_capture_name_for_id
+             (query, step.value_id, &str_len);
+           predicate = Fcons (intern_c_string_1 (str, str_len),
+                              predicate);
+           break;
+         }
+       case TSQueryPredicateStepTypeString:
+         {
+           uint32_t str_len;
+           const char *str = ts_query_string_value_for_id
+             (query, step.value_id, &str_len);
+           predicate = Fcons (make_string (str, str_len), predicate);
+           break;
+         }
+       case TSQueryPredicateStepTypeDone:
+         result = Fcons (Fnreverse (predicate), result);
+         predicate = Qnil;
+         break;
+       }
+    }
+  return Fnreverse (result);
+}
+
+/* Translate a capture NAME (symbol) to the text of the captured node.
+   Signals treesit-query-error if such node is not captured.  */
+Lisp_Object
+ts_predicate_capture_name_to_text (Lisp_Object name, Lisp_Object captures)
+{
+  Lisp_Object node = Qnil;
+  for (Lisp_Object tail = captures; !NILP (tail); tail = XCDR (tail))
+    {
+      if (EQ (XCAR (XCAR (tail)), name))
+       {
+         node = XCDR (XCAR (tail));
+         break;
+       }
+    }
+
+  if (NILP (node))
+    xsignal3 (Qtreesit_query_error,
+             build_pure_c_string ("Cannot find captured node"),
+             name, build_pure_c_string ("A predicate can only refer to captured nodes in the same pattern"));
+
+  struct buffer *old_buffer = current_buffer;
+  set_buffer_internal
+    (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
+  Lisp_Object text = Fbuffer_substring
+    (Ftreesit_node_start (node), Ftreesit_node_end (node));
+  set_buffer_internal (old_buffer);
+  return text;
+}
+
+/* Handles predicate (#equal A B).  Return true if A equals B; return
+   false otherwise. A and B can be either string, or a capture name.
+   The capture name evaluates to the text its captured node spans in
+   the buffer.  */
+bool
+ts_predicate_equal (Lisp_Object args, Lisp_Object captures)
+{
+  if (XFIXNUM (Flength (args)) != 2)
+    xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args));
+
+  Lisp_Object arg1 = XCAR (args);
+  Lisp_Object arg2 = XCAR (XCDR (args));
+  Lisp_Object tail = captures;
+  Lisp_Object text1 = STRINGP (arg1) ? arg1 :
+    ts_predicate_capture_name_to_text (arg1, captures);
+  Lisp_Object text2 = STRINGP (arg2) ? arg2 :
+    ts_predicate_capture_name_to_text (arg2, captures);
+
+  if (NILP (Fstring_equal (text1, text2)))
+    return false;
+  else
+    return true;
+}
+
+/* Handles predicate (#match "regexp" @node).  Return true if "regexp"
+   matches the text spanned by @node; return false otherwise.  Matching
+   is case-sensitive.  */
+bool
+ts_predicate_match (Lisp_Object args, Lisp_Object captures)
+{
+  if (XFIXNUM (Flength (args)) != 2)
+    xsignal2 (Qtreesit_query_error, build_pure_c_string ("Predicate `equal' requires two arguments but only given"), Flength (args));
+
+  Lisp_Object regexp = XCAR (args);
+  Lisp_Object capture_name = XCAR (XCDR (args));
+  Lisp_Object tail = captures;
+  Lisp_Object text = ts_predicate_capture_name_to_text
+    (capture_name, captures);
+
+  /* It's probably common to get the argument order backwards.  Catch
+     this mistake early and show helpful explanation, because Emacs
+     loves you.  (We put the regexp first because that's what
+     string-match does.)  */
+  if (!STRINGP (regexp))
+    xsignal1 (Qtreesit_query_error, build_pure_c_string ("The first argument to `match' should be a regexp string, not a capture name"));
+  if (!SYMBOLP (capture_name))
+    xsignal1 (Qtreesit_query_error, build_pure_c_string ("The second argument to `match' should be a capture name, not a string"));
+
+  if (fast_string_match (regexp, text) >= 0)
+    return true;
+  else
+    return false;
+}
+
+/* About predicates: I decide to hard-code predicates in C instead of
+   implementing an extensible system where predicates are translated
+   to Lisp functions, and new predicates can be added by extending a
+   list of functions, because I really couldn't imagine any useful
+   predicates besides equal and match.  If we later found out that
+   such system is indeed useful and necessary, it can be easily
+   added.  */
+
+/* If all predicates in PREDICATES passes, return true; otherwise
+   return false.  */
+bool
+ts_eval_predicates (Lisp_Object captures, Lisp_Object predicates)
+{
+  bool pass = true;
+  /* Evaluate each predicates.  */
+  for (Lisp_Object tail = predicates;
+       !NILP (tail); tail = XCDR (tail))
+    {
+      Lisp_Object predicate = XCAR (tail);
+      Lisp_Object fn = XCAR (predicate);
+      Lisp_Object args = XCDR (predicate);
+      if (!NILP (Fstring_equal (fn, build_pure_c_string("equal"))))
+       pass = ts_predicate_equal (args, captures);
+      else if (!NILP (Fstring_equal
+                     (fn, build_pure_c_string("match"))))
+       pass = ts_predicate_match (args, captures);
+      else
+       xsignal3 (Qtreesit_query_error,
+                 build_pure_c_string ("Invalid predicate"),
+                 fn, build_pure_c_string ("Currently Emacs only supports equal and match predicate"));
+    }
+  /* If all predicates passed, add captures to result list.  */
+  return pass;
+}
+
+DEFUN ("treesit-query-capture",
+       Ftreesit_query_capture,
+       Streesit_query_capture, 2, 4, 0,
+       doc: /* Query NODE with patterns in QUERY.
+
+Return a list of (CAPTURE_NAME . NODE).  CAPTURE_NAME is the name
+assigned to the node in PATTERN.  NODE is the captured node.
+
+QUERY is either a string query or a sexp query.  See Info node
+`(elisp)Pattern Matching' for how to write a query in either string or
+s-expression form.
+
+BEG and END, if both non-nil, specifies the range in which the query
+is executed.
+
+Raise an treesit-query-error if QUERY is malformed, or something
+else goes wrong.  */)
+  (Lisp_Object node, Lisp_Object query,
+   Lisp_Object beg, Lisp_Object end)
+{
+  ts_check_node (node);
+  if (!NILP (beg))
+    CHECK_INTEGER (beg);
+  if (!NILP (end))
+    CHECK_INTEGER (end);
+
+  if (CONSP (query))
+    query = Ftreesit_expand_query (query);
+  else
+    CHECK_STRING (query);
+
+  /* Extract C values from Lisp objects.  */
+  TSNode ts_node = XTS_NODE (node)->node;
+  Lisp_Object lisp_parser = XTS_NODE (node)->parser;
+  ptrdiff_t visible_beg =
+    XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
+  const TSLanguage *lang = ts_parser_language
+    (XTS_PARSER (lisp_parser)->parser);
+  char *source = SSDATA (query);
+
+  /* Initialize query objects, and execute query.  */
+  uint32_t error_offset;
+  TSQueryError error_type;
+  /* TODO: We could cache the query object, so that repeatedly
+     querying with the same query can reuse the query object.  It also
+     saves us from expanding the sexp query into a string.  I don't
+     know how much time that could save though.  */
+  TSQuery *ts_query = ts_query_new (lang, source, strlen (source),
+                                   &error_offset, &error_type);
+  TSQueryCursor *cursor = ts_query_cursor_new ();
+
+  if (ts_query == NULL)
+    {
+      xsignal2 (Qtreesit_query_error,
+               build_string (ts_query_error_to_string (error_type)),
+               make_fixnum (error_offset + 1));
+    }
+  if (!NILP (beg) && !NILP (end))
+    {
+      EMACS_INT beg_byte = XFIXNUM (beg);
+      EMACS_INT end_byte = XFIXNUM (end);
+      ts_query_cursor_set_byte_range
+       (cursor, (uint32_t) beg_byte - visible_beg,
+        (uint32_t) end_byte - visible_beg);
+    }
+
+  ts_query_cursor_exec (cursor, ts_query, ts_node);
+  TSQueryMatch match;
+
+  /* Go over each match, collect captures and predicates.  Include the
+     captures in the return list if all predicates in that match
+     passes.  */
+  Lisp_Object result = Qnil;
+  while (ts_query_cursor_next_match (cursor, &match))
+    {
+      /* Get captured nodes.  */
+      Lisp_Object captures_lisp = Qnil;
+      const TSQueryCapture *captures = match.captures;
+      for (int idx=0; idx < match.capture_count; idx++)
+       {
+         uint32_t capture_name_len;
+         TSQueryCapture capture = captures[idx];
+         Lisp_Object captured_node =
+           make_ts_node(lisp_parser, capture.node);
+         const char *capture_name = ts_query_capture_name_for_id
+           (ts_query, capture.index, &capture_name_len);
+         Lisp_Object cap =
+           Fcons (intern_c_string_1 (capture_name, capture_name_len),
+                  captured_node);
+         captures_lisp = Fcons (cap, captures_lisp);
+       }
+      /* Get predicates.  */
+      Lisp_Object predicates =
+       ts_predicates_for_pattern (ts_query, match.pattern_index);
+
+      captures_lisp = Fnreverse (captures_lisp);
+      if (ts_eval_predicates (captures_lisp, predicates))
+       {
+         result = CALLN (Fnconc, result, captures_lisp);
+       }
+    }
+  ts_query_delete (ts_query);
+  ts_query_cursor_delete (cursor);
+  return result;
+}
+
+/*** Initialization */
+
+/* Initialize the tree-sitter routines.  */
+void
+syms_of_treesit (void)
+{
+  DEFSYM (Qtreesit_parser_p, "treesit-parser-p");
+  DEFSYM (Qtreesit_node_p, "treesit-node-p");
+  DEFSYM (Qnamed, "named");
+  DEFSYM (Qmissing, "missing");
+  DEFSYM (Qextra, "extra");
+  DEFSYM (Qhas_changes, "has-changes");
+  DEFSYM (Qhas_error, "has-error");
+
+  DEFSYM (Qtreesit_error, "treesit-error");
+  DEFSYM (Qtreesit_query_error, "treesit-query-error");
+  DEFSYM (Qtreesit_parse_error, "treesit-parse-error");
+  DEFSYM (Qtreesit_range_invalid, "treesit-range-invalid");
+  DEFSYM (Qtreesit_buffer_too_large,
+         "treesit-buffer-too-large");
+  DEFSYM (Qtreesit_load_language_error,
+         "treesit-load-language-error");
+  DEFSYM (Qtreesit_node_outdated,
+         "treesit-node-outdated");
+  DEFSYM (Quser_emacs_directory,
+         "user-emacs-directory");
+
+  define_error (Qtreesit_error, "Generic tree-sitter error", Qerror);
+  define_error (Qtreesit_query_error, "Query pattern is malformed",
+               Qtreesit_error);
+  /* Should be impossible, no need to document this error.  */
+  define_error (Qtreesit_parse_error, "Parse failed",
+               Qtreesit_error);
+  define_error (Qtreesit_range_invalid,
+               "RANGES are invalid, they have to be ordered and not overlapping",
+               Qtreesit_error);
+  define_error (Qtreesit_buffer_too_large, "Buffer too large (> 4GB)",
+               Qtreesit_error);
+  define_error (Qtreesit_load_language_error,
+               "Cannot load language definition",
+               Qtreesit_error);
+  define_error (Qtreesit_node_outdated,
+               "This node is outdated, please retrieve a new one",
+               Qtreesit_error);
+
+  DEFSYM (Qtreesit_parser_list, "treesit-parser-list");
+  DEFVAR_LISP ("treesit-parser-list", Vtreesit_parser_list,
+              doc: /* A list of tree-sitter parsers.
+
+If you removed a parser from this list, do not put it back in.  Emacs
+keeps the parser in this list updated with any change in the buffer.
+If removed and put back in, there is no guarantee that the parser is in
+sync with the buffer's content.  */);
+  Vtreesit_parser_list = Qnil;
+  Fmake_variable_buffer_local (Qtreesit_parser_list);
+
+  DEFVAR_LISP ("treesit-load-name-override-list",
+              Vtreesit_load_name_override_list,
+              doc:
+              /* An override list for unconventional tree-sitter libraries.
+
+By default, Emacs assumes the dynamic library for LANG is
+libtree-sitter-LANG.EXT, where EXT is the OS specific extension for
+dynamic libraries.  Emacs also assumes that the name of the C function
+the library provides is tree_sitter_LANG.  If that is not the case,
+add an entry
+
+    (LANG LIBRARY-BASE-NAME FUNCTION-NAME)
+
+to this list, where LIBRARY-BASE-NAME is the filename of the dynamic
+library without extension, FUNCTION-NAME is the function provided by
+the library.  */);
+  Vtreesit_load_name_override_list = Qnil;
+
+  DEFVAR_LISP ("treesit-extra-load-path",
+              Vtreesit_extra_load_path,
+              doc:
+              /* Extra load paths of tree-sitter language definitions.
+When trying to load a tree-sitter language definition,
+Emacs looks at directories in this variable,
+`user-emacs-directory'/tree-sitter, and system default locations for
+dynamic libraries, in that order.  */);
+  Vtreesit_extra_load_path = Qnil;
+
+  defsubr (&Streesit_language_available_p);
+
+  defsubr (&Streesit_parser_p);
+  defsubr (&Streesit_node_p);
+
+  defsubr (&Streesit_node_parser);
+
+  defsubr (&Streesit_parser_create);
+  defsubr (&Streesit_parser_buffer);
+  defsubr (&Streesit_parser_language);
+
+  defsubr (&Streesit_parser_root_node);
+  /* defsubr (&Streesit_parse_string); */
+
+  defsubr (&Streesit_parser_set_included_ranges);
+  defsubr (&Streesit_parser_included_ranges);
+
+  defsubr (&Streesit_node_type);
+  defsubr (&Streesit_node_start);
+  defsubr (&Streesit_node_end);
+  defsubr (&Streesit_node_string);
+  defsubr (&Streesit_node_parent);
+  defsubr (&Streesit_node_child);
+  defsubr (&Streesit_node_check);
+  defsubr (&Streesit_node_field_name_for_child);
+  defsubr (&Streesit_node_child_count);
+  defsubr (&Streesit_node_child_by_field_name);
+  defsubr (&Streesit_node_next_sibling);
+  defsubr (&Streesit_node_prev_sibling);
+  defsubr (&Streesit_node_first_child_for_pos);
+  defsubr (&Streesit_node_descendant_for_range);
+  defsubr (&Streesit_node_eq);
+
+  defsubr (&Streesit_expand_pattern);
+  defsubr (&Streesit_expand_query);
+  defsubr (&Streesit_query_capture);
+}
diff --git a/src/treesit.h b/src/treesit.h

new file mode 100644 (file)

index 0000000..639c4ee
--- /dev/null
+++ b/src/treesit.h
@@ -0,0 +1,137 @@
+/* Header file for the tree-sitter integration.
+
+Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#ifndef EMACS_TREESIT_H
+#define EMACS_TREESIT_H
+
+#include <tree_sitter/api.h>
+#include "lisp.h"
+
+INLINE_HEADER_BEGIN
+
+/* A wrapper for a tree-sitter parser, but also contains a parse tree
+   and other goodies for convenience.  */
+struct Lisp_TS_Parser
+{
+  union vectorlike_header header;
+  /* A symbol represents the language this parser uses.  See the
+     manual for more explanation.  */
+  Lisp_Object language_symbol;
+  /* The buffer associated with this parser.  */
+  Lisp_Object buffer;
+  /* The pointer to the tree-sitter parser.  Never NULL.  */
+  TSParser *parser;
+  /* Pointer to the syntax tree.  Initially is NULL, so check for NULL
+     before use.  */
+  TSTree *tree;
+  /* Teaches tree-sitter how to read an Emacs buffer.  */
+  TSInput input;
+  /* Re-parsing an unchanged buffer is not free for tree-sitter, so we
+     only make it re-parse when need_reparse == true.  That usually
+     means some change is made in the buffer.  But others could set
+     this field to true to force tree-sitter to re-parse.  */
+  bool need_reparse;
+  /* These two positions record the buffer byte position (1-based) of
+     the "visible region" that tree-sitter sees.  Unlike markers,
+     These two positions do not change as the user inserts and deletes
+     text around them. Before re-parse, we move these positions to
+     match BUF_BEGV_BYTE and BUF_ZV_BYTE.  Note that we don't need to
+     synchronize these positions when retrieving them in a function
+     that involves a node: if the node is not outdated, these
+     positions are synchronized.  */
+  ptrdiff_t visible_beg;
+  ptrdiff_t visible_end;
+  /* This counter is incremented every time a change is made to the
+     buffer in ts_record_change.  The node retrieved from this parser
+     inherits this timestamp.  This way we can make sure the node is
+     not outdated when we access its information.  */
+  ptrdiff_t timestamp;
+};
+
+/* A wrapper around a tree-sitter node.  */
+struct Lisp_TS_Node
+{
+  union vectorlike_header header;
+  /* This prevents gc from collecting the tree before the node is done
+     with it.  TSNode contains a pointer to the tree it belongs to,
+     and the parser object, when collected by gc, will free that
+     tree. */
+  Lisp_Object parser;
+  TSNode node;
+  /* A node inherits its parser's timestamp at creation time.  The
+     parser's timestamp increments as the buffer changes.  This way we
+     can make sure the node is not outdated when we access its
+     information.  */
+  ptrdiff_t timestamp;
+};
+
+INLINE bool
+TS_PARSERP (Lisp_Object x)
+{
+  return PSEUDOVECTORP (x, PVEC_TS_PARSER);
+}
+
+INLINE struct Lisp_TS_Parser *
+XTS_PARSER (Lisp_Object a)
+{
+  eassert (TS_PARSERP (a));
+  return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Parser);
+}
+
+INLINE bool
+TS_NODEP (Lisp_Object x)
+{
+  return PSEUDOVECTORP (x, PVEC_TS_NODE);
+}
+
+INLINE struct Lisp_TS_Node *
+XTS_NODE (Lisp_Object a)
+{
+  eassert (TS_NODEP (a));
+  return XUNTAG (a, Lisp_Vectorlike, struct Lisp_TS_Node);
+}
+
+INLINE void
+CHECK_TS_PARSER (Lisp_Object parser)
+{
+  CHECK_TYPE (TS_PARSERP (parser), Qtreesit_parser_p, parser);
+}
+
+INLINE void
+CHECK_TS_NODE (Lisp_Object node)
+{
+  CHECK_TYPE (TS_NODEP (node), Qtreesit_node_p, node);
+}
+
+void
+ts_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
+                 ptrdiff_t new_end_byte);
+
+Lisp_Object
+make_ts_parser (Lisp_Object buffer, TSParser *parser,
+               TSTree *tree, Lisp_Object language_symbol);
+
+Lisp_Object
+make_ts_node (Lisp_Object parser, TSNode node);
+
+extern void syms_of_treesit (void);
+
+INLINE_HEADER_END
+
+#endif /* EMACS_TREESIT_H */
diff --git a/test/src/treesit-tests.el b/test/src/treesit-tests.el

new file mode 100644 (file)

index 0000000..eb6e85c
--- /dev/null
+++ b/test/src/treesit-tests.el
@@ -0,0 +1,366 @@
+;;; treesit-tests.el --- tests for src/treesit.c         -*- lexical-binding: t; -*-
+
+;; Copyright (C) 2021 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.
+
+;;; Code:
+
+(require 'ert)
+(require 'treesit)
+
+(ert-deftest treesit-basic-parsing ()
+  "Test basic parsing routines."
+  (with-temp-buffer
+    (let ((parser (treesit-parser-create
+                   (current-buffer) 'json)))
+      (should
+       (eq parser (car treesit-parser-list)))
+      (should
+       (equal (treesit-node-string
+               (treesit-parser-root-node parser))
+              "(ERROR)"))
+
+      (insert "[1,2,3]")
+      (should
+       (equal (treesit-node-string
+               (treesit-parser-root-node parser))
+              "(document (array (number) (number) (number)))"))
+
+      (goto-char (point-min))
+      (forward-char 3)
+      (insert "{\"name\": \"Bob\"},")
+      (should
+       (equal
+        (treesit-node-string
+         (treesit-parser-root-node parser))
+        "(document (array (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number) (number)))")))))
+
+(ert-deftest treesit-node-api ()
+  "Tests for node API."
+  (with-temp-buffer
+    (let (parser root-node doc-node object-node pair-node)
+      (progn
+        (insert "[1,2,{\"name\": \"Bob\"},3]")
+        (setq parser (treesit-parser-create
+                      (current-buffer) 'json))
+        (setq root-node (treesit-parser-root-node
+                         parser)))
+      ;; `treesit-node-type'.
+      (should (equal "document" (treesit-node-type root-node)))
+      ;; `treesit-node-check'.
+      (should (eq t (treesit-node-check root-node 'named)))
+      (should (eq nil (treesit-node-check root-node 'missing)))
+      (should (eq nil (treesit-node-check root-node 'extra)))
+      (should (eq nil (treesit-node-check root-node 'has-error)))
+      ;; `treesit-node-child'.
+      (setq doc-node (treesit-node-child root-node 0))
+      (should (equal "array" (treesit-node-type doc-node)))
+      (should (equal (treesit-node-string doc-node)
+                     "(array (number) (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number))"))
+      ;; `treesit-node-child-count'.
+      (should (eql 9 (treesit-node-child-count doc-node)))
+      (should (eql 4 (treesit-node-child-count doc-node t)))
+      ;; `treesit-node-field-name-for-child'.
+      (setq object-node (treesit-node-child doc-node 2 t))
+      (setq pair-node (treesit-node-child object-node 0 t))
+      (should (equal "object" (treesit-node-type object-node)))
+      (should (equal "pair" (treesit-node-type pair-node)))
+      (should (equal "key"
+                     (treesit-node-field-name-for-child
+                      pair-node 0)))
+      ;; `treesit-node-child-by-field-name'.
+      (should (equal "(string (string_content))"
+                     (treesit-node-string
+                      (treesit-node-child-by-field-name
+                       pair-node "key"))))
+      ;; `treesit-node-next-sibling'.
+      (should (equal "(number)"
+                     (treesit-node-string
+                      (treesit-node-next-sibling object-node t))))
+      (should (equal "(\",\")"
+                     (treesit-node-string
+                      (treesit-node-next-sibling object-node))))
+      ;; `treesit-node-prev-sibling'.
+      (should (equal "(number)"
+                     (treesit-node-string
+                      (treesit-node-prev-sibling object-node t))))
+      (should (equal "(\",\")"
+                     (treesit-node-string
+                      (treesit-node-prev-sibling object-node))))
+      ;; `treesit-node-first-child-for-pos'.
+      (should (equal "(number)"
+                     (treesit-node-string
+                      (treesit-node-first-child-for-pos
+                       doc-node 3 t))))
+      (should (equal "(\",\")"
+                     (treesit-node-string
+                      (treesit-node-first-child-for-pos
+                       doc-node 3))))
+      ;; `treesit-node-descendant-for-range'.
+      (should (equal "(\"{\")"
+                     (treesit-node-string
+                      (treesit-node-descendant-for-range
+                       root-node 6 7))))
+      (should (equal "(object (pair key: (string (string_content)) value: (string (string_content))))"
+                     (treesit-node-string
+                      (treesit-node-descendant-for-range
+                       root-node 6 7 t))))
+      ;; `treesit-node-eq'.
+      (should (treesit-node-eq root-node root-node))
+      (should (not (treesit-node-eq root-node doc-node))))))
+
+(ert-deftest treesit-query-api ()
+  "Tests for query API."
+  (with-temp-buffer
+    (let (parser root-node pattern doc-node object-node pair-node)
+      (progn
+        (insert "[1,2,{\"name\": \"Bob\"},3]")
+        (setq parser (treesit-parser-create
+                      (current-buffer) 'json))
+        (setq root-node (treesit-parser-root-node
+                         parser)))
+
+      (dolist (pattern
+               '("(string) @string
+(pair key: (_) @keyword)
+((_) @bob (#match \"^B.b$\" @bob))
+(number) @number
+((number) @n3 (#equal \"3\" @n3)) "
+                 ((string) @string
+                  (pair key: (_) @keyword)
+                  ((_) @bob (:match "^B.b$" @bob))
+                  (number) @number
+                  ((number) @n3 (:equal "3" @n3)))))
+        (should
+         (equal
+          '((number . "1") (number . "2")
+            (keyword . "\"name\"")
+            (string . "\"name\"")
+            (string . "\"Bob\"")
+            (bob . "Bob")
+            (number . "3")
+            (n3 . "3"))
+          (mapcar (lambda (entry)
+                    (cons (car entry)
+                          (treesit-node-text
+                           (cdr entry))))
+                  (treesit-query-capture root-node pattern))))
+        (should
+         (equal
+          "(type field: (_) @capture .) ? * + \"return\""
+          (treesit-expand-query
+           '((type field: (_) @capture :anchor)
+             :? :* :+ "return"))))))))
+
+(ert-deftest treesit-narrow ()
+  "Tests if narrowing works."
+  (with-temp-buffer
+    (let (parser root-node pattern doc-node object-node pair-node)
+      (progn
+        (insert "xxx[1,{\"name\": \"Bob\"},2,3]xxx")
+        (narrow-to-region (+ (point-min) 3) (- (point-max) 3))
+        (setq parser (treesit-parser-create
+                      (current-buffer) 'json))
+        (setq root-node (treesit-parser-root-node
+                         parser)))
+      ;; This test is from the basic test.
+      (should
+       (equal
+        (treesit-node-string
+         (treesit-parser-root-node parser))
+        "(document (array (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number) (number)))"))
+
+      (widen)
+      (goto-char (point-min))
+      (insert "ooo")
+      (should (equal "oooxxx[1,{\"name\": \"Bob\"},2,3]xxx"
+                     (buffer-string)))
+      (delete-region 10 26)
+      (should (equal "oooxxx[1,2,3]xxx"
+                     (buffer-string)))
+      (narrow-to-region (+ (point-min) 6) (- (point-max) 3))
+      ;; This test is also from the basic test.
+      (should
+       (equal (treesit-node-string
+               (treesit-parser-root-node parser))
+              "(document (array (number) (number) (number)))"))
+      (widen)
+      (goto-char (point-max))
+      (insert "[1,2]")
+      (should (equal "oooxxx[1,2,3]xxx[1,2]"
+                     (buffer-string)))
+      (narrow-to-region (- (point-max) 5) (point-max))
+      (should
+       (equal (treesit-node-string
+               (treesit-parser-root-node parser))
+              "(document (array (number) (number)))"))
+      (widen)
+      (goto-char (point-min))
+      (insert "[1]")
+      (should (equal "[1]oooxxx[1,2,3]xxx[1,2]"
+                     (buffer-string)))
+      (narrow-to-region (point-min) (+ (point-min) 3))
+      (should
+       (equal (treesit-node-string
+               (treesit-parser-root-node parser))
+              "(document (array (number)))")))))
+
+(ert-deftest treesit-range ()
+  "Tests if range works."
+  (with-temp-buffer
+    (let (parser root-node pattern doc-node object-node pair-node)
+      (progn
+        (insert "[[1],oooxxx[1,2,3],xxx[1,2]]")
+        (setq parser (treesit-parser-create
+                      (current-buffer) 'json))
+        (setq root-node (treesit-parser-root-node
+                         parser)))
+      (should-error
+       (treesit-parser-set-included-ranges
+        parser '((1 . 6) (5 . 20)))
+       :type '(treesit-range-invalid))
+
+      (treesit-parser-set-included-ranges
+       parser '((1 . 6) (12 . 20) (23 . 29)))
+      (should (equal '((1 . 6) (12 . 20) (23 . 29))
+                     (treesit-parser-included-ranges parser)))
+      (should (equal "(document (array (array (number)) (array (number) (number) (number)) (array (number) (number))))"
+                     (treesit-node-string
+                      (treesit-parser-root-node parser))))
+      ;; TODO: More tests.
+      )))
+
+(ert-deftest treesit-multi-lang ()
+  "Tests if parsing multiple language works."
+  (with-temp-buffer
+    (let (html css js html-range css-range js-range)
+      (progn
+        (insert "<html><script>1</script><style>body {}</style></html>")
+        (setq html (treesit-get-parser-create 'html))
+        (setq css (treesit-get-parser-create 'css))
+        (setq js (treesit-get-parser-create 'javascript)))
+      ;; JavaScript.
+      (setq js-range
+            (treesit-query-range
+             'html
+             '((script_element (raw_text) @capture))))
+      (should (equal '((15 . 16)) js-range))
+      (treesit-parser-set-included-ranges js js-range)
+      (should (equal "(program (expression_statement (number)))"
+                     (treesit-node-string
+                      (treesit-parser-root-node js))))
+      ;; CSS.
+      (setq css-range
+            (treesit-query-range
+             'html
+             '((style_element (raw_text) @capture))))
+      (should (equal '((32 . 39)) css-range))
+      (treesit-parser-set-included-ranges css css-range)
+      (should
+       (equal "(stylesheet (rule_set (selectors (tag_name)) (block)))"
+              (treesit-node-string
+               (treesit-parser-root-node css))))
+      ;; TODO: More tests.
+      )))
+
+(ert-deftest treesit-parser-supplemental ()
+  "Supplemental node functions."
+  ;; `treesit-get-parser'.
+  (with-temp-buffer
+    (should (equal (treesit-get-parser 'json) nil)))
+  ;; `treesit-get-parser-create'.
+  (with-temp-buffer
+    (should (not (equal (treesit-get-parser-create 'json)
+                        nil))))
+  ;; `treesit-parse-string'.
+  (should (equal (treesit-node-string
+                  (treesit-parse-string
+                   "[1,2,{\"name\": \"Bob\"},3]"
+                   'json))
+                 "(document (array (number) (number) (object (pair key: (string (string_content)) value: (string (string_content)))) (number)))"))
+  (with-temp-buffer
+    (let (parser root-node doc-node object-node pair-node)
+      (progn
+        (insert "[1,2,{\"name\": \"Bob\"},3]")
+        (setq parser (treesit-parser-create
+                      (current-buffer) 'json))
+        (setq root-node (treesit-parser-root-node
+                         parser))
+        (setq doc-node (treesit-node-child root-node 0)))
+      ;; `treesit-get-parser'.
+      (should (not (equal (treesit-get-parser 'json)
+                          nil)))
+      ;; `treesit-language-at'.
+      (should (equal (treesit-language-at (point))
+                     'json))
+      ;; `treesit-set-ranges', `treesit-get-ranges'.
+      (treesit-set-ranges 'json
+                          '((1 . 2)))
+      (should (equal (treesit-get-ranges 'json)
+                     '((1 . 2)))))))
+
+(ert-deftest treesit-node-supplemental ()
+  "Supplemental node functions."
+  (let (parser root-node doc-node array-node)
+    (progn
+      (insert "[1,2,{\"name\": \"Bob\"},3]")
+      (setq parser (treesit-parser-create
+                    (current-buffer) 'json))
+      (setq root-node (treesit-parser-root-node
+                       parser))
+      (setq doc-node (treesit-node-child root-node 0)))
+    ;; `treesit-node-buffer'.
+    (should (equal (treesit-node-buffer root-node)
+                   (current-buffer)))
+    ;; `treesit-node-language'.
+    (should (eq (treesit-node-language root-node)
+                'json))
+    ;; `treesit-node-at'.
+    (should (equal (treesit-node-string
+                    (treesit-node-at 1 2 'json))
+                   "(\"[\")"))
+    ;; `treesit-buffer-root-node'.
+    (should (treesit-node-eq
+             (treesit-buffer-root-node 'json)
+             root-node))
+    ;; `treesit-filter-child'.
+    (should (equal (mapcar
+                    (lambda (node)
+                      (treesit-node-type node))
+                    (treesit-filter-child
+                     doc-node (lambda (node)
+                                (treesit-node-check node 'named))))
+                   '("number" "number" "object" "number")))
+    ;; `treesit-node-text'.
+    (should (equal (treesit-node-text doc-node)
+                   "[1,2,{\"name\": \"Bob\"},3]"))
+    ;; `treesit-node-index'.
+    (should (eq (treesit-node-index doc-node)
+                0))
+    ;; TODO:
+    ;; `treesit-parent-until'
+    ;; `treesit-parent-while'
+    ;; `treesit-node-children'
+    ;; `treesit-node-field-name'
+    ))
+
+;; TODO
+;; - Functions in treesit.el
+;; - treesit-load-name-override-list
+
+(provide 'treesit-tests)
+;;; treesit-tests.el ends here
author	Yuan Fu <casouri@gmail.com>
	Sun, 13 Mar 2022 06:10:06 +0000 (22:10 -0800)
committer	Yuan Fu <casouri@gmail.com>
	Sat, 7 May 2022 08:11:39 +0000 (01:11 -0700)
configure.ac		patch \| blob \| history
doc/lispref/elisp.texi		patch \| blob \| history
doc/lispref/modes.texi		patch \| blob \| history
doc/lispref/parsing.texi	[new file with mode: 0644]	patch \| blob
lisp/emacs-lisp/cl-preloaded.el		patch \| blob \| history
lisp/treesit.el	[new file with mode: 0644]	patch \| blob
src/Makefile.in		patch \| blob \| history
src/alloc.c		patch \| blob \| history
src/casefiddle.c		patch \| blob \| history
src/data.c		patch \| blob \| history
src/emacs.c		patch \| blob \| history
src/eval.c		patch \| blob \| history
src/insdel.c		patch \| blob \| history
src/json.c		patch \| blob \| history
src/lisp.h		patch \| blob \| history
src/lread.c		patch \| blob \| history
src/print.c		patch \| blob \| history
src/treesit.c	[new file with mode: 0644]	patch \| blob
src/treesit.h	[new file with mode: 0644]	patch \| blob
test/src/treesit-tests.el	[new file with mode: 0644]	patch \| blob