From 31182c1d17f6f7bc946d5e4576e025c9b975e0b5 Mon Sep 17 00:00:00 2001 From: Alan Mackenzie Date: Fri, 1 Mar 2019 17:35:12 +0000 Subject: [PATCH] Maintain interval ->position fields correctly in update_interval Also fix some anomalies in the handling of byte positions in regexp-emacs.c This fixes bug #34525. * src/intervals.c (SET_PARENT_POSITION): New macro. (update_interval): When moving to an interval's parent, set that parent's ->position field, to maintain the consistency of the tree. * src/intervals.h (struct interval): Amend the comment describing when ->position is valid. * src/pdumper.c: Update the hash associated with struct interval. * src/regex-emacs.c: (re_match_2_internal): Only invoke POINTER_TO_OFFSET on a known character boundary. Only perform arithmetic on character positions, not on byte positions. Correct the argument to an invocation of UPDATE_SYNTAX_TABLE_FORWARD by adding 1 to it (in case wordend:). * src/syntax.c: (update_syntax_table): Remove the now redundant code that set the ->position field of all parents of the interval found by update_interval. --- src/intervals.c | 30 +++++++++++++++++++++++------- src/intervals.h | 14 +++++++++----- src/pdumper.c | 2 +- src/regex-emacs.c | 14 +++++++------- src/syntax.c | 14 -------------- 5 files changed, 40 insertions(+), 34 deletions(-) diff --git a/src/intervals.c b/src/intervals.c index 524bb944e51..8f39c45762f 100644 --- a/src/intervals.c +++ b/src/intervals.c @@ -713,11 +713,21 @@ previous_interval (register INTERVAL interval) return NULL; } -/* Find the interval containing POS given some non-NULL INTERVAL - in the same tree. Note that we need to update interval->position - if we go down the tree. - To speed up the process, we assume that the ->position of - I and all its parents is already uptodate. */ +/* Set the ->position field of I's parent, based on I->position. */ +#define SET_PARENT_POSITION(i) \ + if (AM_LEFT_CHILD (i)) \ + INTERVAL_PARENT (i)->position = \ + i->position + TOTAL_LENGTH (i) - LEFT_TOTAL_LENGTH (i); \ + else \ + INTERVAL_PARENT (i)->position = \ + i->position - LEFT_TOTAL_LENGTH (i) \ + - LENGTH (INTERVAL_PARENT (i)) + +/* Find the interval containing POS, given some non-NULL INTERVAL in + the same tree. Note that we update interval->position in each + interval we traverse, assuming it is already correctly set for the + argument I. We don't assume that any other interval already has a + correctly set ->position. */ INTERVAL update_interval (register INTERVAL i, ptrdiff_t pos) { @@ -738,7 +748,10 @@ update_interval (register INTERVAL i, ptrdiff_t pos) else if (NULL_PARENT (i)) error ("Point before start of properties"); else - i = INTERVAL_PARENT (i); + { + SET_PARENT_POSITION (i); + i = INTERVAL_PARENT (i); + } continue; } else if (pos >= INTERVAL_LAST_POS (i)) @@ -753,7 +766,10 @@ update_interval (register INTERVAL i, ptrdiff_t pos) else if (NULL_PARENT (i)) error ("Point %"pD"d after end of properties", pos); else - i = INTERVAL_PARENT (i); + { + SET_PARENT_POSITION (i); + i = INTERVAL_PARENT (i); + } continue; } else diff --git a/src/intervals.h b/src/intervals.h index 9c5adf33a14..e9166946d9a 100644 --- a/src/intervals.h +++ b/src/intervals.h @@ -31,11 +31,15 @@ struct interval /* The first group of entries deal with the tree structure. */ ptrdiff_t total_length; /* Length of myself and both children. */ ptrdiff_t position; /* Cache of interval's character position. */ - /* This field is usually updated - simultaneously with an interval - traversal, there is no guarantee - that it is valid for a random - interval. */ + /* This field is valid in the final + target interval returned by + find_interval, next_interval, + previous_interval and + update_interval. It cannot be + depended upon in any intermediate + intervals traversed by these + functions, or any other + interval. */ struct interval *left; /* Intervals which precede me. */ struct interval *right; /* Intervals which succeed me. */ diff --git a/src/pdumper.c b/src/pdumper.c index 4d35fd1233f..bba43370a14 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -2064,7 +2064,7 @@ dump_interval_tree (struct dump_context *ctx, INTERVAL tree, dump_off parent_offset) { -#if CHECK_STRUCTS && !defined (HASH_interval_9110163DA0) +#if CHECK_STRUCTS && !defined (HASH_interval_1B38941C37) # error "interval changed. See CHECK_STRUCTS comment." #endif // TODO: output tree breadth-first? diff --git a/src/regex-emacs.c b/src/regex-emacs.c index b667a43a37f..45b4f8107c7 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -4732,8 +4732,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, int c1, c2; int s1, s2; int dummy; - ptrdiff_t offset = PTR_TO_OFFSET (d - 1); - ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; UPDATE_SYNTAX_TABLE (charpos); GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); s1 = SYNTAX (c1); @@ -4811,8 +4811,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, int c1, c2; int s1, s2; int dummy; - ptrdiff_t offset = PTR_TO_OFFSET (d) - 1; - ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; UPDATE_SYNTAX_TABLE (charpos); GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); s1 = SYNTAX (c1); @@ -4826,7 +4826,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, { PREFETCH_NOLIMIT (); GET_CHAR_AFTER (c2, d, dummy); - UPDATE_SYNTAX_TABLE_FORWARD (charpos); + UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1); s2 = SYNTAX (c2); /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2) @@ -4890,8 +4890,8 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, is the character at D, and S2 is the syntax of C2. */ int c1, c2; int s1, s2; - ptrdiff_t offset = PTR_TO_OFFSET (d) - 1; - ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); + ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; UPDATE_SYNTAX_TABLE (charpos); GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); s1 = SYNTAX (c1); diff --git a/src/syntax.c b/src/syntax.c index dd2f56f2cfa..fe1e2d236b9 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -340,20 +340,6 @@ update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init, invalidate = false; if (!i) return; - /* interval_of updates only ->position of the return value, so - update the parents manually to speed up update_interval. */ - while (!NULL_PARENT (i)) - { - if (AM_RIGHT_CHILD (i)) - INTERVAL_PARENT (i)->position = i->position - - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */ - - TOTAL_LENGTH (INTERVAL_PARENT (i)) - + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i)); - else - INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i) - + TOTAL_LENGTH (i); - i = INTERVAL_PARENT (i); - } i = gl_state.forward_i; gl_state.b_property = i->position - gl_state.offset; gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset; -- 2.39.2