for this pattern. 0 means backtrack only enough to get a valid match. */
struct re_pattern_buffer *
-compile_pattern (pattern, regp, translate, posix)
+compile_pattern (pattern, regp, translate, posix, multibyte)
Lisp_Object pattern;
struct re_registers *regp;
Lisp_Object *translate;
- int posix;
+ int posix, multibyte;
{
struct regexp_cache *cp, **cpp;
- /* Should we check it here, or add an argument `multibyte' to this
- function? */
- int multibyte = !NILP (current_buffer->enable_multibyte_characters);
for (cpp = &searchbuf_head; ; cpp = &cp->next)
{
bufp = compile_pattern (string, &search_regs,
(!NILP (current_buffer->case_fold_search)
? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
- posix);
+ posix,
+ !NILP (current_buffer->enable_multibyte_characters));
immediate_quit = 1;
QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
int posix;
{
int val;
- int s;
struct re_pattern_buffer *bufp;
+ int pos, pos_byte;
+ int i;
if (running_asynch_code)
save_search_regs ();
CHECK_STRING (string, 1);
if (NILP (start))
- s = 0;
+ pos = 0, pos_byte = 0;
else
{
int len = XSTRING (string)->size;
CHECK_NUMBER (start, 2);
- s = XINT (start);
- if (s < 0 && -s <= len)
- s = len + s;
- else if (0 > s || s > len)
+ pos = XINT (start);
+ if (pos < 0 && -pos <= len)
+ pos = len + pos;
+ else if (0 > pos || pos > len)
args_out_of_range (string, start);
+ pos_byte = string_char_to_byte (string, pos);
}
bufp = compile_pattern (regexp, &search_regs,
(!NILP (current_buffer->case_fold_search)
? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
- posix);
+ posix,
+ STRING_MULTIBYTE (string));
immediate_quit = 1;
re_match_object = string;
val = re_search (bufp, (char *) XSTRING (string)->data,
- XSTRING (string)->size, s, XSTRING (string)->size - s,
+ XSTRING (string)->size_byte, pos_byte,
+ XSTRING (string)->size_byte - pos_byte,
&search_regs);
immediate_quit = 0;
last_thing_searched = Qt;
if (val == -2)
matcher_overflow ();
if (val < 0) return Qnil;
- return make_number (val);
+
+ for (i = 0; i < search_regs.num_regs; i++)
+ if (search_regs.start[i] >= 0)
+ {
+ search_regs.start[i]
+ = string_byte_to_char (string, search_regs.start[i]);
+ search_regs.end[i]
+ = string_byte_to_char (string, search_regs.end[i]);
+ }
+
+ return make_number (string_byte_to_char (string, val));
}
DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
int val;
struct re_pattern_buffer *bufp;
- bufp = compile_pattern (regexp, 0, 0, 0);
+ bufp = compile_pattern (regexp, 0, 0, 0, STRING_MULTIBYTE (string));
immediate_quit = 1;
re_match_object = string;
/* Match REGEXP against STRING, searching all of STRING ignoring case,
and return the index of the match, or negative on failure.
- This does not clobber the match data. */
+ This does not clobber the match data.
+ We assume that STRING contains single-byte characters. */
extern Lisp_Object Vascii_downcase_table;
struct re_pattern_buffer *bufp;
int len = strlen (string);
+ regexp = string_make_unibyte (regexp);
re_match_object = Qt;
bufp = compile_pattern (regexp, 0,
- XCHAR_TABLE (Vascii_downcase_table)->contents, 0);
+ XCHAR_TABLE (Vascii_downcase_table)->contents, 0,
+ 1);
immediate_quit = 1;
val = re_search (bufp, string, len, 0, len, 0);
immediate_quit = 0;
int posix;
{
int len = XSTRING (string)->size;
+ int len_byte = XSTRING (string)->size_byte;
unsigned char *base_pat = XSTRING (string)->data;
register int *BM_tab;
int *BM_tab_base;
{
struct re_pattern_buffer *bufp;
- bufp = compile_pattern (string, &search_regs, trt, posix);
+ bufp = compile_pattern (string, &search_regs, trt, posix,
+ !NILP (current_buffer->enable_multibyte_characters));
immediate_quit = 1; /* Quit immediately if user types ^G,
because letting this function finish
BM_tab = (int *) alloca (0400 * sizeof (int));
#endif
{
- unsigned char *patbuf = (unsigned char *) alloca (len);
+ unsigned char *patbuf = (unsigned char *) alloca (len_byte);
pat = patbuf;
- while (--len >= 0)
+ while (--len_byte >= 0)
{
/* If we got here and the RE flag is set, it's because we're
dealing with a regexp known to be trivial, so the backslash
just quotes the next character. */
if (RE && *base_pat == '\\')
{
- len--;
+ len_byte--;
base_pat++;
}
*pat++ = (trt ? XINT (trt[*base_pat++]) : *base_pat++);
}
- len = pat - patbuf;
+ len_byte = pat - patbuf;
pat = base_pat = patbuf;
}
/* The general approach is that we are going to maintain that we know */
/* instance of a character that matches it (per trt), or misses */
/* it entirely if there is none. */
- dirlen = len * direction;
- infinity = dirlen - (lim_byte + pos_byte + len + len) * direction;
+ dirlen = len_byte * direction;
+ infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
if (direction < 0)
- pat = (base_pat += len - 1);
+ pat = (base_pat += len_byte - 1);
BM_tab_base = BM_tab;
BM_tab += 0400;
j = dirlen; /* to get it in a register */
/* to throw you well beyond the end of the search. It can also */
/* happen if you fail to match within the permitted region and would */
/* otherwise try a character beyond that region */
- if ((cursor - p_limit) * direction <= len)
+ if ((cursor - p_limit) * direction <= len_byte)
break; /* a small overrun is genuine */
cursor -= infinity; /* large overrun = hit */
i = dirlen - direction;
cursor += dirlen - i - direction; /* fix cursor */
if (i + direction == 0)
{
+ int position;
+
cursor -= direction;
- set_search_regs (pos_byte + cursor - p2 + ((direction > 0)
- ? 1 - len : 0),
- len);
+ position = pos_byte + cursor - p2 + ((direction > 0)
+ ? 1 - len_byte : 0);
+ set_search_regs (position, len_byte);
if ((n -= direction) != 0)
cursor += dirlen; /* to resume search */
? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
: BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
limit = ((direction > 0)
- ? min (limit + len, lim_byte - 1)
- : max (limit - len, lim_byte));
+ ? min (limit + len_byte, lim_byte - 1)
+ : max (limit - len_byte, lim_byte));
/* LIMIT is now the last value POS_BYTE can have
and still be valid for a possible match. */
while (1)
pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
/* now run the same tests to distinguish going off the */
/* end, a match or a phony match. */
- if ((pos_byte - limit) * direction <= len)
+ if ((pos_byte - limit) * direction <= len_byte)
break; /* ran off the end */
/* Found what might be a match.
Set POS_BYTE back to last (first if reverse) pos. */
pos_byte += dirlen - i- direction;
if (i + direction == 0)
{
+ int position;
pos_byte -= direction;
- set_search_regs (pos_byte + ((direction > 0) ? 1 - len : 0),
- len);
+ position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
+
+ set_search_regs (position, len_byte);
if ((n -= direction) != 0)
pos_byte += dirlen; /* to resume search */
Lisp_Object string;
{
register unsigned char *p, *o;
- register int i, len, punct_count = 0, word_count = 0;
+ register int i, i_byte, len, punct_count = 0, word_count = 0;
Lisp_Object val;
+ int prev_c = 0;
+ int adjust;
CHECK_STRING (string, 0);
p = XSTRING (string)->data;
len = XSTRING (string)->size;
- for (i = 0; i < len; i++)
- if (SYNTAX (p[i]) != Sword)
- {
- punct_count++;
- if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
- }
- if (SYNTAX (p[len-1]) == Sword) word_count++;
- if (!word_count) return build_string ("");
+ for (i = 0, i_byte = 0; i < len; )
+ {
+ int c;
+
+ if (STRING_MULTIBYTE (string))
+ FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
+ else
+ c = XSTRING (string)->data[i++];
+
+ if (SYNTAX (c) != Sword)
+ {
+ punct_count++;
+ if (i > 0 && SYNTAX (prev_c) == Sword)
+ word_count++;
+ }
- val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
+ prev_c = c;
+ }
+
+ if (SYNTAX (prev_c) == Sword)
+ word_count++;
+ if (!word_count)
+ return build_string ("");
+
+ adjust = - punct_count + 5 * (word_count - 1) + 4;
+ val = make_uninit_multibyte_string (len + adjust,
+ XSTRING (string)->size_byte + adjust);
o = XSTRING (val)->data;
*o++ = '\\';
*o++ = 'b';
- for (i = 0; i < len; i++)
+ for (i = 0; i < XSTRING (val)->size_byte; i++)
if (SYNTAX (p[i]) == Sword)
*o++ = p[i];
else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
if (NILP (literal))
{
int lastpos = -1;
+ int lastpos_byte = -1;
/* We build up the substituted string in ACCUM. */
Lisp_Object accum;
Lisp_Object middle;
+ int pos_byte;
accum = Qnil;
- for (pos = 0; pos < XSTRING (newtext)->size; pos++)
+ for (pos_byte = 0, pos = 0; pos_byte < XSTRING (newtext)->size_byte;)
{
int substart = -1;
int subend;
int delbackslash = 0;
- c = XSTRING (newtext)->data[pos];
+ FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
+
if (c == '\\')
{
- c = XSTRING (newtext)->data[++pos];
+ FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
if (c == '&')
{
substart = search_regs.start[sub];
if (substart >= 0)
{
if (pos - 1 != lastpos + 1)
- middle = Fsubstring (newtext,
- make_number (lastpos + 1),
- make_number (pos - 1));
+ middle = substring_both (newtext, lastpos + 1,
+ lastpos_byte + 1,
+ pos - 1, pos_byte - 1);
else
middle = Qnil;
accum = concat3 (accum, middle,
- Fsubstring (string, make_number (substart),
+ Fsubstring (string,
+ make_number (substart),
make_number (subend)));
lastpos = pos;
+ lastpos_byte = pos_byte;
}
else if (delbackslash)
{
- middle = Fsubstring (newtext, make_number (lastpos + 1),
- make_number (pos));
+ middle = substring_both (newtext, lastpos + 1,
+ lastpos_byte + 1,
+ pos, pos_byte);
+
accum = concat2 (accum, middle);
lastpos = pos;
+ lastpos_byte = pos_byte;
}
}
if (pos != lastpos + 1)
- middle = Fsubstring (newtext, make_number (lastpos + 1),
- make_number (pos));
+ middle = substring_both (newtext, lastpos + 1,
+ lastpos_byte + 1,
+ pos, pos_byte);
else
middle = Qnil;
{
register unsigned char *in, *out, *end;
register unsigned char *temp;
+ int backslashes_added = 0;
CHECK_STRING (string, 0);
- temp = (unsigned char *) alloca (XSTRING (string)->size * 2);
+ temp = (unsigned char *) alloca (XSTRING (string)->size_byte * 2);
/* Now copy the data into the new string, inserting escapes. */
in = XSTRING (string)->data;
- end = in + XSTRING (string)->size;
+ end = in + XSTRING (string)->size_byte;
out = temp;
for (; in != end; in++)
|| *in == '*' || *in == '.' || *in == '\\'
|| *in == '?' || *in == '+'
|| *in == '^' || *in == '$')
- *out++ = '\\';
+ *out++ = '\\', backslashes_added++;
*out++ = *in;
}
- return make_string (temp, out - temp);
+ return make_multibyte_string (temp,
+ XSTRING (string)->size + backslashes_added,
+ out - temp);
}
\f
syms_of_search ()