From d53ff9fe28b63dcd9bab2479cb7a517ba7233016 Mon Sep 17 00:00:00 2001 From: Stefan Monnier Date: Wed, 12 Apr 2023 15:44:58 -0400 Subject: [PATCH] src/regex-emacs.c (POS_AS_IN_BUFFER): Delete macro That macro added 1 to buffer positions because: Strings are 0-indexed, buffers are 1-indexed but the reality is that this 1 was added to the regexp engine's "byte offsets" which are not 1-based byte positions as used throughout the rest of Emacs, but they are BEGV_BYTE-relative offsets, so the two did not cancel out. * src/regex-emacs.c (PTR_TO_OFFSET, POS_AS_IN_BUFFER): Delete macros; use `POINTER_TO_OFFSET` instead. (re_search_2, re_match_2, re_match_2_internal): Adjust accordingly. * src/syntax.h (SYNTAX_TABLE_BYTE_TO_CHAR): Don't remove 1 from buffer byteoffsets now that `POS_AS_IN_BUFFER` doesn't add it any more. --- src/regex-emacs.c | 23 ++++++++--------------- src/syntax.h | 12 ++++++------ 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 2571812cb39..969f2ff9464 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -47,13 +47,6 @@ /* Make syntax table lookup grant data in gl_state. */ #define SYNTAX(c) syntax_property (c, 1) -/* Convert the pointer to the char to BEG-based offset from the start. */ -#define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d)) -/* Strings are 0-indexed, buffers are 1-indexed; pun on the boolean - result to get the right base index. */ -#define POS_AS_IN_BUFFER(p) \ - ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object))) - #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte) #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte) #define RE_STRING_CHAR(p, multibyte) \ @@ -3260,7 +3253,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1, gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ { - ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos)); + ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (startpos); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); } @@ -3873,7 +3866,7 @@ re_match_2 (struct re_pattern_buffer *bufp, ptrdiff_t charpos; gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */ - charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos)); + charpos = SYNTAX_TABLE_BYTE_TO_CHAR (pos); SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1); result = re_match_2_internal (bufp, (re_char *) string1, size1, @@ -4806,7 +4799,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, int c1, c2; int s1, s2; int dummy; - ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t offset = POINTER_TO_OFFSET (d); ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; UPDATE_SYNTAX_TABLE (charpos); GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); @@ -4846,7 +4839,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, int c1, c2; int s1, s2; int dummy; - ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t offset = POINTER_TO_OFFSET (d); ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); PREFETCH (); @@ -4889,7 +4882,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, int c1, c2; int s1, s2; int dummy; - ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t offset = POINTER_TO_OFFSET (d); ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; UPDATE_SYNTAX_TABLE (charpos); GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); @@ -4931,7 +4924,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, is the character at D, and S2 is the syntax of C2. */ int c1, c2; int s1, s2; - ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t offset = POINTER_TO_OFFSET (d); ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (charpos); PREFETCH (); @@ -4972,7 +4965,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, is the character at D, and S2 is the syntax of C2. */ int c1, c2; int s1, s2; - ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t offset = POINTER_TO_OFFSET (d); ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1; UPDATE_SYNTAX_TABLE (charpos); GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); @@ -5008,7 +5001,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, mcnt); PREFETCH (); { - ptrdiff_t offset = PTR_TO_OFFSET (d); + ptrdiff_t offset = POINTER_TO_OFFSET (d); ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset); UPDATE_SYNTAX_TABLE (pos1); } diff --git a/src/syntax.h b/src/syntax.h index aefe4dafa42..05d58eff05f 100644 --- a/src/syntax.h +++ b/src/syntax.h @@ -145,7 +145,7 @@ extern bool syntax_prefix_flag_p (int c); extern unsigned char const syntax_spec_code[0400]; -/* Convert the byte offset BYTEPOS into a character position, +/* Convert the regexp BYTEOFFSET into a character position, for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT. The value is meant for use in code that does nothing when @@ -153,19 +153,19 @@ extern unsigned char const syntax_spec_code[0400]; for speed. */ INLINE ptrdiff_t -SYNTAX_TABLE_BYTE_TO_CHAR (ptrdiff_t bytepos) +SYNTAX_TABLE_BYTE_TO_CHAR (ptrdiff_t byteoffset) { return (! parse_sexp_lookup_properties ? 0 : STRINGP (gl_state.object) - ? string_byte_to_char (gl_state.object, bytepos) + ? string_byte_to_char (gl_state.object, byteoffset) : BUFFERP (gl_state.object) ? ((buf_bytepos_to_charpos (XBUFFER (gl_state.object), - (bytepos + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1)))) + (byteoffset + BUF_BEGV_BYTE (XBUFFER (gl_state.object)))))) : NILP (gl_state.object) - ? BYTE_TO_CHAR (bytepos + BEGV_BYTE - 1) - : bytepos); + ? BYTE_TO_CHAR (byteoffset + BEGV_BYTE) + : byteoffset); } /* Make syntax table state (gl_state) good for CHARPOS, assuming it is -- 2.39.2