char fastmap[0400];
/* True means regexp was compiled to do full POSIX backtracking. */
bool posix;
+ /* True means we're inside a buffer match. */
+ bool busy;
};
/* The instances of that struct. */
ptrdiff_t, ptrdiff_t, EMACS_INT, int,
Lisp_Object, Lisp_Object, bool);
+Lisp_Object re_match_object;
+
static _Noreturn void
matcher_overflow (void)
{
#endif
}
-static void
-thaw_buffer_relocation (void)
-{
-#ifdef REL_ALLOC
- unbind_to (SPECPDL_INDEX () - 1, Qnil);
-#endif
-}
-
/* Compile a regexp and signal a Lisp error if anything goes wrong.
PATTERN is the pattern to compile.
CP is the place to put the result.
const char *whitespace_regexp;
char *val;
+ eassert (!cp->busy);
cp->regexp = Qnil;
cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
cp->posix = posix;
struct regexp_cache *cp;
for (cp = searchbuf_head; cp != 0; cp = cp->next)
- {
- cp->buf.allocated = cp->buf.used;
- cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
- }
+ if (!cp->busy)
+ {
+ cp->buf.allocated = cp->buf.used;
+ cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
+ }
}
/* Clear the regexp cache w.r.t. a particular syntax table,
/* It's tempting to compare with the syntax-table we've actually changed,
but it's not sufficient because char-table inheritance means that
modifying one syntax-table can change others at the same time. */
- if (!EQ (searchbufs[i].syntax_table, Qt))
+ if (!searchbufs[i].busy && !EQ (searchbufs[i].syntax_table, Qt))
searchbufs[i].regexp = Qnil;
}
+static void
+unfreeze_pattern (void *arg)
+{
+ struct regexp_cache *searchbuf = arg;
+ searchbuf->busy = false;
+}
+
+static void
+freeze_pattern (struct regexp_cache *searchbuf)
+{
+ eassert (!searchbuf->busy);
+ record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
+ searchbuf->busy = true;
+}
+
/* Compile a regexp if necessary, but first check to see if there's one in
the cache.
PATTERN is the pattern to compile.
POSIX is true if we want full backtracking (POSIX style) for this pattern.
False means backtrack only enough to get a valid match. */
-struct re_pattern_buffer *
+static struct regexp_cache *
compile_pattern (Lisp_Object pattern, struct re_registers *regp,
Lisp_Object translate, bool posix, bool multibyte)
{
if (NILP (cp->regexp))
goto compile_it;
if (SCHARS (cp->regexp) == SCHARS (pattern)
+ && !cp->busy
&& STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
&& !NILP (Fstring_equal (cp->regexp, pattern))
&& EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
string value. */
if (cp->next == 0)
{
+ if (cp->busy)
+ error ("Too much matching reentrancy");
compile_it:
+ eassert (!cp->busy);
compile_pattern_1 (cp, pattern, translate, posix);
break;
}
/* The compiled pattern can be used both for multibyte and unibyte
target. But, we have to tell which the pattern is used for. */
cp->buf.target_multibyte = multibyte;
-
- return &cp->buf;
+ return cp;
}
\f
unsigned char *p1, *p2;
ptrdiff_t s1, s2;
register ptrdiff_t i;
- struct re_pattern_buffer *bufp;
if (running_asynch_code)
save_search_regs ();
BVAR (current_buffer, case_eqv_table));
CHECK_STRING (string);
- bufp = compile_pattern (string,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : NULL),
- (!NILP (BVAR (current_buffer, case_fold_search))
- ? BVAR (current_buffer, case_canon_table) : Qnil),
- posix,
- !NILP (BVAR (current_buffer, enable_multibyte_characters)));
+
+ /* Snapshot in case Lisp changes the value. */
+ bool preserve_match_data = NILP (Vinhibit_changing_match_data);
+
+ struct regexp_cache *cache_entry = compile_pattern (
+ string,
+ preserve_match_data ? &search_regs : NULL,
+ (!NILP (BVAR (current_buffer, case_fold_search))
+ ? BVAR (current_buffer, case_canon_table) : Qnil),
+ posix,
+ !NILP (BVAR (current_buffer, enable_multibyte_characters)));
/* Do a pending quit right away, to avoid paradoxical behavior */
maybe_quit ();
s2 = 0;
}
- re_match_object = Qnil;
-
+ ptrdiff_t count = SPECPDL_INDEX ();
freeze_buffer_relocation ();
- i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
+ freeze_pattern (cache_entry);
+ re_match_object = Qnil;
+ i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
PT_BYTE - BEGV_BYTE,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : NULL),
+ preserve_match_data ? &search_regs : NULL,
ZV_BYTE - BEGV_BYTE);
- thaw_buffer_relocation ();
if (i == -2)
matcher_overflow ();
val = (i >= 0 ? Qt : Qnil);
- if (NILP (Vinhibit_changing_match_data) && i >= 0)
+ if (preserve_match_data && i >= 0)
{
for (i = 0; i < search_regs.num_regs; i++)
if (search_regs.start[i] >= 0)
XSETBUFFER (last_thing_searched, current_buffer);
}
- return val;
+ return unbind_to (count, val);
}
DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
BVAR (current_buffer, case_eqv_table));
- bufp = compile_pattern (regexp,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : NULL),
- (!NILP (BVAR (current_buffer, case_fold_search))
- ? BVAR (current_buffer, case_canon_table) : Qnil),
- posix,
- STRING_MULTIBYTE (string));
+ bufp = &compile_pattern (regexp,
+ (NILP (Vinhibit_changing_match_data)
+ ? &search_regs : NULL),
+ (!NILP (BVAR (current_buffer, case_fold_search))
+ ? BVAR (current_buffer, case_canon_table) : Qnil),
+ posix,
+ STRING_MULTIBYTE (string))->buf;
re_match_object = string;
-
val = re_search (bufp, SSDATA (string),
SBYTES (string), pos_byte,
SBYTES (string) - pos_byte,
ptrdiff_t val;
struct re_pattern_buffer *bufp;
- bufp = compile_pattern (regexp, 0, table,
- 0, STRING_MULTIBYTE (string));
+ bufp = &compile_pattern (regexp, 0, table,
+ 0, STRING_MULTIBYTE (string))->buf;
re_match_object = string;
-
val = re_search (bufp, SSDATA (string),
SBYTES (string), 0,
SBYTES (string), 0);
struct re_pattern_buffer *bufp;
regexp = string_make_unibyte (regexp);
+ bufp = &compile_pattern (regexp, 0,
+ Vascii_canon_table, 0,
+ 0)->buf;
re_match_object = Qt;
- bufp = compile_pattern (regexp, 0,
- Vascii_canon_table, 0,
- 0);
val = re_search (bufp, string, len, 0, len, 0);
return val;
}
ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
{
bool multibyte;
- struct re_pattern_buffer *buf;
unsigned char *p1, *p2;
ptrdiff_t s1, s2;
ptrdiff_t len;
s1 = 0;
p2 = SDATA (string);
s2 = SBYTES (string);
- re_match_object = string;
multibyte = STRING_MULTIBYTE (string);
}
else
s1 = ZV_BYTE - BEGV_BYTE;
s2 = 0;
}
- re_match_object = Qnil;
multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
}
- buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
+ struct regexp_cache *cache_entry =
+ compile_pattern (regexp, 0, Qnil, 0, multibyte);
+ ptrdiff_t count = SPECPDL_INDEX ();
freeze_buffer_relocation ();
- len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
+ freeze_pattern (cache_entry);
+ re_match_object = STRINGP (string) ? string : Qnil;
+ len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
pos_byte, NULL, limit_byte);
- thaw_buffer_relocation ();
+ unbind_to (count, Qnil);
return len;
}
static struct re_registers search_regs_1;
static EMACS_INT
-search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
- ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
- int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
+search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
+ ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
+ Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
{
- ptrdiff_t len = SCHARS (string);
- ptrdiff_t len_byte = SBYTES (string);
- register ptrdiff_t i;
+ unsigned char *p1, *p2;
+ ptrdiff_t s1, s2;
- if (running_asynch_code)
- save_search_regs ();
+ /* Snapshot in case Lisp changes the value. */
+ bool preserve_match_data = NILP (Vinhibit_changing_match_data);
- /* Searching 0 times means don't move. */
- /* Null string is found at starting position. */
- if (len == 0 || n == 0)
+ struct regexp_cache *cache_entry =
+ compile_pattern (string,
+ preserve_match_data ? &search_regs : &search_regs_1,
+ trt, posix,
+ !NILP (BVAR (current_buffer, enable_multibyte_characters)));
+ struct re_pattern_buffer *bufp = &cache_entry->buf;
+
+ maybe_quit (); /* Do a pending quit right away,
+ to avoid paradoxical behavior */
+ /* Get pointers and sizes of the two strings
+ that make up the visible portion of the buffer. */
+
+ p1 = BEGV_ADDR;
+ s1 = GPT_BYTE - BEGV_BYTE;
+ p2 = GAP_END_ADDR;
+ s2 = ZV_BYTE - GPT_BYTE;
+ if (s1 < 0)
{
- set_search_regs (pos_byte, 0);
- return pos;
+ p2 = p1;
+ s2 = ZV_BYTE - BEGV_BYTE;
+ s1 = 0;
}
-
- if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
+ if (s2 < 0)
{
- unsigned char *p1, *p2;
- ptrdiff_t s1, s2;
- struct re_pattern_buffer *bufp;
+ s1 = ZV_BYTE - BEGV_BYTE;
+ s2 = 0;
+ }
- bufp = compile_pattern (string,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : &search_regs_1),
- trt, posix,
- !NILP (BVAR (current_buffer, enable_multibyte_characters)));
+ ptrdiff_t count = SPECPDL_INDEX ();
+ freeze_buffer_relocation ();
+ freeze_pattern (cache_entry);
- maybe_quit (); /* Do a pending quit right away,
- to avoid paradoxical behavior */
- /* Get pointers and sizes of the two strings
- that make up the visible portion of the buffer. */
+ while (n < 0)
+ {
+ ptrdiff_t val;
- p1 = BEGV_ADDR;
- s1 = GPT_BYTE - BEGV_BYTE;
- p2 = GAP_END_ADDR;
- s2 = ZV_BYTE - GPT_BYTE;
- if (s1 < 0)
- {
- p2 = p1;
- s2 = ZV_BYTE - BEGV_BYTE;
- s1 = 0;
- }
- if (s2 < 0)
- {
- s1 = ZV_BYTE - BEGV_BYTE;
- s2 = 0;
- }
re_match_object = Qnil;
+ val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
+ pos_byte - BEGV_BYTE, lim_byte - pos_byte,
+ preserve_match_data ? &search_regs : &search_regs_1,
+ /* Don't allow match past current point */
+ pos_byte - BEGV_BYTE);
+ if (val == -2)
+ {
+ matcher_overflow ();
+ }
+ if (val >= 0)
+ {
+ if (preserve_match_data)
+ {
+ pos_byte = search_regs.start[0] + BEGV_BYTE;
+ for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
+ if (search_regs.start[i] >= 0)
+ {
+ search_regs.start[i]
+ = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
+ search_regs.end[i]
+ = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
+ }
+ XSETBUFFER (last_thing_searched, current_buffer);
+ /* Set pos to the new position. */
+ pos = search_regs.start[0];
+ }
+ else
+ {
+ pos_byte = search_regs_1.start[0] + BEGV_BYTE;
+ /* Set pos to the new position. */
+ pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
+ }
+ }
+ else
+ {
+ unbind_to (count, Qnil);
+ return (n);
+ }
+ n++;
+ maybe_quit ();
+ }
+ while (n > 0)
+ {
+ ptrdiff_t val;
- freeze_buffer_relocation ();
+ re_match_object = Qnil;
+ val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
+ pos_byte - BEGV_BYTE, lim_byte - pos_byte,
+ preserve_match_data ? &search_regs : &search_regs_1,
+ lim_byte - BEGV_BYTE);
+ if (val == -2)
+ {
+ matcher_overflow ();
+ }
+ if (val >= 0)
+ {
+ if (preserve_match_data)
+ {
+ pos_byte = search_regs.end[0] + BEGV_BYTE;
+ for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
+ if (search_regs.start[i] >= 0)
+ {
+ search_regs.start[i]
+ = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
+ search_regs.end[i]
+ = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
+ }
+ XSETBUFFER (last_thing_searched, current_buffer);
+ pos = search_regs.end[0];
+ }
+ else
+ {
+ pos_byte = search_regs_1.end[0] + BEGV_BYTE;
+ pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
+ }
+ }
+ else
+ {
+ unbind_to (count, Qnil);
+ return (0 - n);
+ }
+ n--;
+ maybe_quit ();
+ }
+ unbind_to (count, Qnil);
+ return (pos);
+}
- while (n < 0)
- {
- ptrdiff_t val;
-
- val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
- pos_byte - BEGV_BYTE, lim_byte - pos_byte,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : &search_regs_1),
- /* Don't allow match past current point */
- pos_byte - BEGV_BYTE);
- if (val == -2)
- {
- matcher_overflow ();
- }
- if (val >= 0)
- {
- if (NILP (Vinhibit_changing_match_data))
- {
- pos_byte = search_regs.start[0] + BEGV_BYTE;
- for (i = 0; i < search_regs.num_regs; i++)
- if (search_regs.start[i] >= 0)
- {
- search_regs.start[i]
- = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
- search_regs.end[i]
- = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
- }
- XSETBUFFER (last_thing_searched, current_buffer);
- /* Set pos to the new position. */
- pos = search_regs.start[0];
- }
- else
- {
- pos_byte = search_regs_1.start[0] + BEGV_BYTE;
- /* Set pos to the new position. */
- pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
- }
- }
- else
- {
- thaw_buffer_relocation ();
- return (n);
- }
- n++;
- maybe_quit ();
- }
- while (n > 0)
- {
- ptrdiff_t val;
-
- val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
- pos_byte - BEGV_BYTE, lim_byte - pos_byte,
- (NILP (Vinhibit_changing_match_data)
- ? &search_regs : &search_regs_1),
- lim_byte - BEGV_BYTE);
- if (val == -2)
- {
- matcher_overflow ();
- }
- if (val >= 0)
- {
- if (NILP (Vinhibit_changing_match_data))
- {
- pos_byte = search_regs.end[0] + BEGV_BYTE;
- for (i = 0; i < search_regs.num_regs; i++)
- if (search_regs.start[i] >= 0)
- {
- search_regs.start[i]
- = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
- search_regs.end[i]
- = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
- }
- XSETBUFFER (last_thing_searched, current_buffer);
- pos = search_regs.end[0];
- }
- else
- {
- pos_byte = search_regs_1.end[0] + BEGV_BYTE;
- pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
- }
- }
- else
- {
- thaw_buffer_relocation ();
- return (0 - n);
- }
- n--;
- maybe_quit ();
- }
- thaw_buffer_relocation ();
- return (pos);
+static EMACS_INT
+search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
+ ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
+ EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt,
+ bool posix)
+{
+ unsigned char *raw_pattern, *pat;
+ ptrdiff_t raw_pattern_size;
+ ptrdiff_t raw_pattern_size_byte;
+ unsigned char *patbuf;
+ bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
+ unsigned char *base_pat;
+ /* Set to positive if we find a non-ASCII char that need
+ translation. Otherwise set to zero later. */
+ int char_base = -1;
+ bool boyer_moore_ok = 1;
+ USE_SAFE_ALLOCA;
+
+ /* MULTIBYTE says whether the text to be searched is multibyte.
+ We must convert PATTERN to match that, or we will not really
+ find things right. */
+
+ if (multibyte == STRING_MULTIBYTE (string))
+ {
+ raw_pattern = SDATA (string);
+ raw_pattern_size = SCHARS (string);
+ raw_pattern_size_byte = SBYTES (string);
}
- else /* non-RE case */
+ else if (multibyte)
{
- unsigned char *raw_pattern, *pat;
- ptrdiff_t raw_pattern_size;
- ptrdiff_t raw_pattern_size_byte;
- unsigned char *patbuf;
- bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
- unsigned char *base_pat;
- /* Set to positive if we find a non-ASCII char that need
- translation. Otherwise set to zero later. */
- int char_base = -1;
- bool boyer_moore_ok = 1;
- USE_SAFE_ALLOCA;
-
- /* MULTIBYTE says whether the text to be searched is multibyte.
- We must convert PATTERN to match that, or we will not really
- find things right. */
-
- if (multibyte == STRING_MULTIBYTE (string))
- {
- raw_pattern = SDATA (string);
- raw_pattern_size = SCHARS (string);
- raw_pattern_size_byte = SBYTES (string);
- }
- else if (multibyte)
- {
- raw_pattern_size = SCHARS (string);
- raw_pattern_size_byte
- = count_size_as_multibyte (SDATA (string),
- raw_pattern_size);
- raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
- copy_text (SDATA (string), raw_pattern,
- SCHARS (string), 0, 1);
- }
- else
- {
- /* Converting multibyte to single-byte.
-
- ??? Perhaps this conversion should be done in a special way
- by subtracting nonascii-insert-offset from each non-ASCII char,
- so that only the multibyte chars which really correspond to
- the chosen single-byte character set can possibly match. */
- raw_pattern_size = SCHARS (string);
- raw_pattern_size_byte = SCHARS (string);
- raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
- copy_text (SDATA (string), raw_pattern,
- SBYTES (string), 1, 0);
- }
+ raw_pattern_size = SCHARS (string);
+ raw_pattern_size_byte
+ = count_size_as_multibyte (SDATA (string),
+ raw_pattern_size);
+ raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
+ copy_text (SDATA (string), raw_pattern,
+ SCHARS (string), 0, 1);
+ }
+ else
+ {
+ /* Converting multibyte to single-byte.
+
+ ??? Perhaps this conversion should be done in a special way
+ by subtracting nonascii-insert-offset from each non-ASCII char,
+ so that only the multibyte chars which really correspond to
+ the chosen single-byte character set can possibly match. */
+ raw_pattern_size = SCHARS (string);
+ raw_pattern_size_byte = SCHARS (string);
+ raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
+ copy_text (SDATA (string), raw_pattern,
+ SBYTES (string), 1, 0);
+ }
- /* Copy and optionally translate the pattern. */
- len = raw_pattern_size;
- len_byte = raw_pattern_size_byte;
- SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
- pat = patbuf;
- base_pat = raw_pattern;
- if (multibyte)
- {
- /* Fill patbuf by translated characters in STRING while
- checking if we can use boyer-moore search. If TRT is
- non-nil, we can use boyer-moore search only if TRT can be
- represented by the byte array of 256 elements. For that,
- all non-ASCII case-equivalents of all case-sensitive
- characters in STRING must belong to the same character
- group (two characters belong to the same group iff their
- multibyte forms are the same except for the last byte;
- i.e. every 64 characters form a group; U+0000..U+003F,
- U+0040..U+007F, U+0080..U+00BF, ...). */
-
- while (--len >= 0)
- {
- unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
- int c, translated, inverse;
- int in_charlen, charlen;
-
- /* If we got here and the RE flag is set, it's because we're
- dealing with a regexp known to be trivial, so the backslash
- just quotes the next character. */
- if (RE && *base_pat == '\\')
- {
- len--;
- raw_pattern_size--;
- len_byte--;
- base_pat++;
- }
+ /* Copy and optionally translate the pattern. */
+ ptrdiff_t len = raw_pattern_size;
+ ptrdiff_t len_byte = raw_pattern_size_byte;
+ SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
+ pat = patbuf;
+ base_pat = raw_pattern;
+ if (multibyte)
+ {
+ /* Fill patbuf by translated characters in STRING while
+ checking if we can use boyer-moore search. If TRT is
+ non-nil, we can use boyer-moore search only if TRT can be
+ represented by the byte array of 256 elements. For that,
+ all non-ASCII case-equivalents of all case-sensitive
+ characters in STRING must belong to the same character
+ group (two characters belong to the same group iff their
+ multibyte forms are the same except for the last byte;
+ i.e. every 64 characters form a group; U+0000..U+003F,
+ U+0040..U+007F, U+0080..U+00BF, ...). */
+
+ while (--len >= 0)
+ {
+ unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
+ int c, translated, inverse;
+ int in_charlen, charlen;
+
+ /* If we got here and the RE flag is set, it's because we're
+ dealing with a regexp known to be trivial, so the backslash
+ just quotes the next character. */
+ if (RE && *base_pat == '\\')
+ {
+ len--;
+ raw_pattern_size--;
+ len_byte--;
+ base_pat++;
+ }
- c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
+ c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
- if (NILP (trt))
- {
- str = base_pat;
- charlen = in_charlen;
- }
- else
- {
- /* Translate the character. */
- TRANSLATE (translated, trt, c);
- charlen = CHAR_STRING (translated, str_base);
- str = str_base;
-
- /* Check if C has any other case-equivalents. */
- TRANSLATE (inverse, inverse_trt, c);
- /* If so, check if we can use boyer-moore. */
- if (c != inverse && boyer_moore_ok)
- {
- /* Check if all equivalents belong to the same
- group of characters. Note that the check of C
- itself is done by the last iteration. */
- int this_char_base = -1;
+ if (NILP (trt))
+ {
+ str = base_pat;
+ charlen = in_charlen;
+ }
+ else
+ {
+ /* Translate the character. */
+ TRANSLATE (translated, trt, c);
+ charlen = CHAR_STRING (translated, str_base);
+ str = str_base;
+
+ /* Check if C has any other case-equivalents. */
+ TRANSLATE (inverse, inverse_trt, c);
+ /* If so, check if we can use boyer-moore. */
+ if (c != inverse && boyer_moore_ok)
+ {
+ /* Check if all equivalents belong to the same
+ group of characters. Note that the check of C
+ itself is done by the last iteration. */
+ int this_char_base = -1;
+
+ while (boyer_moore_ok)
+ {
+ if (ASCII_CHAR_P (inverse))
+ {
+ if (this_char_base > 0)
+ boyer_moore_ok = 0;
+ else
+ this_char_base = 0;
+ }
+ else if (CHAR_BYTE8_P (inverse))
+ /* Boyer-moore search can't handle a
+ translation of an eight-bit
+ character. */
+ boyer_moore_ok = 0;
+ else if (this_char_base < 0)
+ {
+ this_char_base = inverse & ~0x3F;
+ if (char_base < 0)
+ char_base = this_char_base;
+ else if (this_char_base != char_base)
+ boyer_moore_ok = 0;
+ }
+ else if ((inverse & ~0x3F) != this_char_base)
+ boyer_moore_ok = 0;
+ if (c == inverse)
+ break;
+ TRANSLATE (inverse, inverse_trt, inverse);
+ }
+ }
+ }
- while (boyer_moore_ok)
- {
- if (ASCII_CHAR_P (inverse))
- {
- if (this_char_base > 0)
- boyer_moore_ok = 0;
- else
- this_char_base = 0;
- }
- else if (CHAR_BYTE8_P (inverse))
- /* Boyer-moore search can't handle a
- translation of an eight-bit
- character. */
- boyer_moore_ok = 0;
- else if (this_char_base < 0)
- {
- this_char_base = inverse & ~0x3F;
- if (char_base < 0)
- char_base = this_char_base;
- else if (this_char_base != char_base)
- boyer_moore_ok = 0;
- }
- else if ((inverse & ~0x3F) != this_char_base)
- boyer_moore_ok = 0;
- if (c == inverse)
- break;
- TRANSLATE (inverse, inverse_trt, inverse);
- }
- }
- }
+ /* Store this character into the translated pattern. */
+ memcpy (pat, str, charlen);
+ pat += charlen;
+ base_pat += in_charlen;
+ len_byte -= in_charlen;
+ }
- /* Store this character into the translated pattern. */
- memcpy (pat, str, charlen);
- pat += charlen;
- base_pat += in_charlen;
- len_byte -= in_charlen;
- }
+ /* If char_base is still negative we didn't find any translated
+ non-ASCII characters. */
+ if (char_base < 0)
+ char_base = 0;
+ }
+ else
+ {
+ /* Unibyte buffer. */
+ char_base = 0;
+ while (--len >= 0)
+ {
+ int c, translated, inverse;
- /* If char_base is still negative we didn't find any translated
- non-ASCII characters. */
- if (char_base < 0)
- char_base = 0;
- }
- else
- {
- /* Unibyte buffer. */
- char_base = 0;
- while (--len >= 0)
- {
- int c, translated, inverse;
+ /* If we got here and the RE flag is set, it's because we're
+ dealing with a regexp known to be trivial, so the backslash
+ just quotes the next character. */
+ if (RE && *base_pat == '\\')
+ {
+ len--;
+ raw_pattern_size--;
+ base_pat++;
+ }
+ c = *base_pat++;
+ TRANSLATE (translated, trt, c);
+ *pat++ = translated;
+ /* Check that none of C's equivalents violates the
+ assumptions of boyer_moore. */
+ TRANSLATE (inverse, inverse_trt, c);
+ while (1)
+ {
+ if (inverse >= 0200)
+ {
+ boyer_moore_ok = 0;
+ break;
+ }
+ if (c == inverse)
+ break;
+ TRANSLATE (inverse, inverse_trt, inverse);
+ }
+ }
+ }
- /* If we got here and the RE flag is set, it's because we're
- dealing with a regexp known to be trivial, so the backslash
- just quotes the next character. */
- if (RE && *base_pat == '\\')
- {
- len--;
- raw_pattern_size--;
- base_pat++;
- }
- c = *base_pat++;
- TRANSLATE (translated, trt, c);
- *pat++ = translated;
- /* Check that none of C's equivalents violates the
- assumptions of boyer_moore. */
- TRANSLATE (inverse, inverse_trt, c);
- while (1)
- {
- if (inverse >= 0200)
- {
- boyer_moore_ok = 0;
- break;
- }
- if (c == inverse)
- break;
- TRANSLATE (inverse, inverse_trt, inverse);
- }
- }
- }
+ len_byte = pat - patbuf;
+ pat = base_pat = patbuf;
+
+ EMACS_INT result
+ = (boyer_moore_ok
+ ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
+ pos_byte, lim_byte,
+ char_base)
+ : simple_search (n, pat, raw_pattern_size, len_byte, trt,
+ pos, pos_byte, lim, lim_byte));
+ SAFE_FREE ();
+ return result;
+}
+
+static EMACS_INT
+search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
+ ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
+ int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
+{
+ if (running_asynch_code)
+ save_search_regs ();
- len_byte = pat - patbuf;
- pat = base_pat = patbuf;
-
- EMACS_INT result
- = (boyer_moore_ok
- ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
- pos_byte, lim_byte,
- char_base)
- : simple_search (n, pat, raw_pattern_size, len_byte, trt,
- pos, pos_byte, lim, lim_byte));
- SAFE_FREE ();
- return result;
+ /* Searching 0 times means don't move. */
+ /* Null string is found at starting position. */
+ if (n == 0 || SCHARS (string) == 0)
+ {
+ set_search_regs (pos_byte, 0);
+ return pos;
}
+
+ if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
+ pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
+ n, trt, inverse_trt, posix);
+ else
+ pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
+ n, RE, trt, inverse_trt, posix);
+
+ return pos;
}
\f
/* Do a simple string search N times for the string PAT,
return val;
}
\f
+
void
syms_of_search (void)
{
searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
searchbufs[i].regexp = Qnil;
searchbufs[i].f_whitespace_regexp = Qnil;
+ searchbufs[i].busy = false;
searchbufs[i].syntax_table = Qnil;
staticpro (&searchbufs[i].regexp);
staticpro (&searchbufs[i].f_whitespace_regexp);
saved_last_thing_searched = Qnil;
staticpro (&saved_last_thing_searched);
+ re_match_object = Qnil;
+ staticpro (&re_match_object);
+
DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
doc: /* Regexp to substitute for bunches of spaces in regexp search.
Some commands use this for user-specified regexps.