+2006-09-22 Stefan Monnier <monnier@iro.umontreal.ca>
+
+ * regex.c (analyse_first): For eight-bit-control chars, mark both the
+ char's value and its leading byte in the fastmap.
+ (re_search_2): When fast-scanning without translation, be careful to
+ check that we only match the leading byte of a multibyte char.
+
+ * charset.h (PREV_CHAR_BOUNDARY): Make it work from within a char's
+ byte sequence.
+ (AT_CHAR_BOUNDARY): New macro.
+
2006-09-22 Kenichi Handa <handa@m17n.org>
* fns.c (optimize_sub_char_table): Don't optimize a sub-char-table
2006-08-27 Martin Rudalics <rudalics@gmx.at>
- * xdisp.c (mouse_autoselect_window): Removed.
+ * xdisp.c (mouse_autoselect_window): Remove.
(Vmouse_autoselect_window): New variable. DEFVAR_LISP it.
* dispextern.h (mouse_autoselect_window): Remove extern.
if (fastmap)
{
int c = RE_STRING_CHAR (p + 1, pend - p);
-
+ /* When fast-scanning, the fastmap can be indexed either with
+ a char (smaller than 256) or with the first byte of
+ a char's byte sequence. So we have to conservatively add
+ both to the table. */
if (SINGLE_BYTE_CHAR_P (c))
fastmap[c] = 1;
- else
- fastmap[p[1]] = 1;
+ fastmap[p[1]] = 1;
}
break;
So any that are not listed in the charset
are possible matches, even in multibyte buffers. */
if (!fastmap) break;
+ /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
+ because it will automatically be set when needed by virtue of
+ being larger than the highest char of its charset (0xbf) but
+ smaller than (1<<BYTEWIDTH). */
for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
j < (1 << BYTEWIDTH); j++)
fastmap[j] = 1;
for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
j >= 0; j--)
if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
- fastmap[j] = 1;
+ {
+ fastmap[j] = 1;
+#ifdef emacs
+ if (j >= 0x80 && j < 0xa0)
+ fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
+#endif
+ }
if ((not && multibyte)
/* Any character set can possibly contain a character
}
}
else
- while (range > lim && !fastmap[*d])
+ do
{
- d++;
- range--;
- }
+ re_char *d_start = d;
+ while (range > lim && !fastmap[*d])
+ {
+ d++;
+ range--;
+ }
+#ifdef emacs
+ if (multibyte && range > lim)
+ {
+ /* Check that we are at the beginning of a char. */
+ int at_boundary;
+ AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
+ if (at_boundary)
+ break;
+ else
+ { /* We have matched an internal byte of a char
+ rather than the leading byte, so it's a false
+ positive: we should keep scanning. */
+ d++; range--;
+ }
+ }
+ else
+#endif
+ break;
+ } while (1);
startpos += irange - range;
}