SSDATA (SYMBOL_NAME (Vthis_command)),
SSDATA (SYMBOL_NAME (XTS_PARSER (parser)->language_symbol)),
buf_name, BUF_BEG (buf),
- BUF_BEGV (buf), BUF_Z (buf), BUF_ZV (buf));
+ BUF_BEGV (buf), BUF_ZV (buf), BUF_Z (buf));
Lisp_Object tail = BVAR (buf, ts_parser_list);
FOR_EACH_TAIL (tail)
}
}
+static TSRange *treesit_make_ts_ranges (Lisp_Object, Lisp_Object,
+ uint32_t *);
+
/* Comment (ref:visible-beg-null) The purpose of visible_beg/end is to
keep track of "which part of the buffer does the tree-sitter tree
see", in order to update the tree correctly. Visible_beg/end have
XTS_PARSER (parser)->visible_beg = visible_beg;
XTS_PARSER (parser)->visible_end = visible_end;
+
+ /* Fix ranges so that the ranges stays with in visible_end. Here we
+ try to do minimal work so that the ranges is minimally correct and
+ there's no OOB error. Usually treesit-update-ranges should update
+ the parser with semantically correct ranges.
+
+ We start with the charpos ranges, because for bytepos ranges, after
+ user edits, the ranges start/end might end up inside a multibyte
+ char! See (ref:bytepos-range-pitfall) below. */
+ Lisp_Object lisp_ranges = XTS_PARSER (parser)->last_set_ranges;
+ if (NILP (lisp_ranges)) return;
+
+ Lisp_Object new_ranges_head = lisp_ranges;
+
+ FOR_EACH_TAIL_SAFE (lisp_ranges)
+ {
+ Lisp_Object range = XCAR (lisp_ranges);
+ ptrdiff_t beg = XFIXNUM (XCAR (range));
+ ptrdiff_t end = XFIXNUM (XCDR (range));
+
+ if (end <= visible_beg)
+ /* Even the end is before visible_beg, discard this range. */
+ new_ranges_head = XCDR (new_ranges_head);
+ else if (beg >= visible_end)
+ {
+ /* Even the beg is after visible_end, dicard this range and all
+ the ranges after it. */
+ XSETCDR (range, Qnil);
+ break;
+ }
+ else
+ {
+ /* At this point, the range overlaps with the visible portion of
+ the buffer in some way (in front / in back / completely
+ encased / completely encases). */
+ if (beg < visible_beg)
+ XSETCAR (range, make_fixnum (visible_beg));
+ if (end > visible_end)
+ XSETCDR (range, make_fixnum (visible_end));
+ }
+ }
+
+ XTS_PARSER (parser)->last_set_ranges = new_ranges_head;
+
+ if (NILP (new_ranges_head))
+ {
+ bool success;
+ success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
+ NULL, 0);
+ eassert (success);
+ }
+ else
+ {
+ uint32_t len = 0;
+ TSRange *ts_ranges = treesit_make_ts_ranges (new_ranges_head, parser,
+ &len);
+ bool success;
+ success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
+ ts_ranges, len);
+ xfree (ts_ranges);
+ eassert (success);
+ }
}
+/* (ref:bytepos-range-pitfall) Suppose we have the following buffer
+ content ([ ] is a unibyte char, [ ] is a multibyte char):
+
+ [a][b][c][d][e][ f ]
+
+ and the following ranges (denoted by braces):
+
+ [a][b][c][d][e][ f ]
+ { }{ }
+
+ So far so good, now user deletes a unibyte char at the beginning:
+
+ [b][c][d][e][ f ]
+ { }{ }
+
+ Oops, now our range cuts into the multibyte char, bad! */
+
static void
treesit_check_buffer_size (struct buffer *buffer)
{
make_fixnum (buffer_size_bytes));
}
-static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, struct buffer *);
+static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t,
+ Lisp_Object, struct buffer *);
static void
treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
{
uint32_t len;
TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
- lisp_ranges = treesit_make_ranges (ranges, len, buf);
+ lisp_ranges = treesit_make_ranges (ranges, len, parser, buf);
xfree (ranges);
}
else
static void
treesit_ensure_parsed (Lisp_Object parser)
{
+ if (XTS_PARSER (parser)->within_reparse) return;
+ XTS_PARSER (parser)->within_reparse = true;
+
struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
/* Before we parse, catch up with the narrowing situation. */
because it might set the flag to true. */
treesit_sync_visible_region (parser);
- /* Make sure this comes before everything else, see comment
- (ref:notifier-inside-ensure-parsed) for more detail. */
if (!XTS_PARSER (parser)->need_reparse)
- return;
+ {
+ XTS_PARSER (parser)->within_reparse = false;
+ return;
+ }
TSParser *treesit_parser = XTS_PARSER (parser)->parser;
TSTree *tree = XTS_PARSER (parser)->tree;
XTS_PARSER (parser)->need_reparse = false;
XTS_PARSER (parser)->timestamp++;
- /* After-change functions should run at the very end, most crucially
- after need_reparse is set to false, this way if the function
- calls some tree-sitter function which invokes
- treesit_ensure_parsed again, it returns early and do not
- recursively call the after change functions again.
- (ref:notifier-inside-ensure-parsed) */
treesit_call_after_change_functions (tree, new_tree, parser);
ts_tree_delete (tree);
+
+ XTS_PARSER (parser)->within_reparse = false;
}
/* This is the read function provided to tree-sitter to read from a
beg = NULL;
len = 0;
}
- /* Normal case, read a character. */
+ /* Normal case, read a character. We can't give tree-sitter the
+ whole buffer range because we move the gap around, realloc the
+ buffer, etc; and there's no way to invalidate the previously
+ given range in tree-sitter. Move over, benchmark shows there's
+ very little difference between passing a whole chunk vs passing a
+ single char at once. The only cost is funcall I guess. */
else
{
beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos);
lisp_parser->timestamp = 0;
lisp_parser->deleted = false;
lisp_parser->need_to_gc_buffer = false;
+ lisp_parser->within_reparse = false;
eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
}
convert between tree-sitter buffer offset and buffer position. */
static Lisp_Object
treesit_make_ranges (const TSRange *ranges, uint32_t len,
- struct buffer *buffer)
+ Lisp_Object parser, struct buffer *buffer)
{
Lisp_Object list = Qnil;
for (int idx = 0; idx < len; idx++)
{
TSRange range = ranges[idx];
- uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
- uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
+ uint32_t beg_byte = range.start_byte + XTS_PARSER (parser)->visible_beg;
+ uint32_t end_byte = range.end_byte + XTS_PARSER (parser)->visible_beg;
eassert (BUF_BEGV_BYTE (buffer) <= beg_byte);
eassert (beg_byte <= end_byte);
eassert (end_byte <= BUF_ZV_BYTE (buffer));
return Fnreverse (list);
}
+/* Convert lisp ranges to tree-sitter ranges. Set LEN to the length of
+ the ranges. RANGES must be a valid ranges list, (cons of numbers, no
+ overlap, etc). PARSER must be a parser. This function doesn't check
+ for types. Caller must free the returned ranges. */
+static TSRange *
+treesit_make_ts_ranges (Lisp_Object ranges, Lisp_Object parser, uint32_t *len)
+{
+ ptrdiff_t ranges_len = list_length (ranges);
+ if (ranges_len > UINT32_MAX)
+ xsignal (Qargs_out_of_range, list2 (ranges, Flength (ranges)));
+
+ *len = (uint32_t) ranges_len;
+ TSRange *treesit_ranges = xmalloc (sizeof (TSRange) * ranges_len);
+
+ struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
+
+ for (int idx = 0; idx < ranges_len; idx++, ranges = XCDR (ranges))
+ {
+ Lisp_Object range = XCAR (ranges);
+ ptrdiff_t beg_byte = buf_charpos_to_bytepos (buffer,
+ XFIXNUM (XCAR (range)));
+ ptrdiff_t end_byte = buf_charpos_to_bytepos (buffer,
+ XFIXNUM (XCDR (range)));
+
+ /* Shouldn't violate assertion since we just checked for
+ buffer size at the beginning of this function. */
+ eassert (beg_byte - BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
+ eassert (end_byte - BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
+
+ /* We don't care about points, put in dummy values. */
+ TSRange rg =
+ {
+ {0, 0}, {0, 0},
+ (uint32_t) beg_byte - XTS_PARSER (parser)->visible_beg,
+ (uint32_t) end_byte - XTS_PARSER (parser)->visible_beg
+ };
+ treesit_ranges[idx] = rg;
+ }
+
+ return treesit_ranges;
+}
+
DEFUN ("treesit-parser-set-included-ranges",
Ftreesit_parser_set_included_ranges,
Streesit_parser_set_included_ranges,
if (NILP (ranges))
{
/* If RANGES is nil, make parser to parse the whole document.
- To do that we give tree-sitter a 0 length, the range is a
- dummy. */
- TSRange treesit_range = {{0, 0}, {0, 0}, 0, 0};
+ To do that we give tree-sitter a 0 length. */
success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
- &treesit_range , 0);
+ NULL , 0);
}
else
{
- /* Set ranges for PARSER. */
- if (list_length (ranges) > UINT32_MAX)
- xsignal (Qargs_out_of_range, list2 (ranges, Flength (ranges)));
- uint32_t len = (uint32_t) list_length (ranges);
- TSRange *treesit_ranges = xmalloc (sizeof (TSRange) * len);
- struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
-
- /* We can use XFIXNUM, XCAR, XCDR freely because we have checked
- the input by treesit_check_range_argument. */
-
- for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges))
- {
- Lisp_Object range = XCAR (ranges);
- ptrdiff_t beg_byte = buf_charpos_to_bytepos (buffer,
- XFIXNUM (XCAR (range)));
- ptrdiff_t end_byte = buf_charpos_to_bytepos (buffer,
- XFIXNUM (XCDR (range)));
- /* Shouldn't violate assertion since we just checked for
- buffer size at the beginning of this function. */
- eassert (beg_byte - BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
- eassert (end_byte - BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
- /* We don't care about start and end points, put in dummy
- values. */
- TSRange rg = {{0, 0}, {0, 0},
- (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer),
- (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)};
- treesit_ranges[idx] = rg;
- }
+ uint32_t len = 0;
+ TSRange *treesit_ranges = treesit_make_ts_ranges (ranges, parser, &len);
success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
treesit_ranges, len);
xfree (treesit_ranges);
treesit_check_parser (parser);
treesit_initialize ();
- /* When the parser doesn't have a range set and we call
- ts_parser_included_ranges on it, it doesn't return an empty list,
- but rather return DEFAULT_RANGE. (A single range where start_byte
- = 0, end_byte = UINT32_MAX). So we need to track whether the
- parser is ranged ourselves. */
- if (NILP (XTS_PARSER (parser)->last_set_ranges))
- return Qnil;
-
- uint32_t len;
- const TSRange *ranges
- = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len);
-
- /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
- etc), so we need to sync up. */
- treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
treesit_sync_visible_region (parser);
- struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
- return treesit_make_ranges (ranges, len, buffer);
+ return XTS_PARSER (parser)->last_set_ranges;
}
DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers,