+2002-08-23 Stefan Monnier <monnier@cs.yale.edu>
+
+ * regex.c (PATFETCH): Remove the translating fetch.
+ (PATFETCH_RAW): Rename to PATFETCH.
+ (set_image_of_range): New fun.
+ (SET_RANGE_TABLE_WORK_AREA): Use it.
+ (regex_compile): Don't translate the pattern chars so eagerly.
+ Only do it when inserting an `exactn' bytecode or when handling
+ a char-range.
+ (mutually_exclusive_p): Avoid empty statement.
+
2002-08-22 Kim F. Storm <storm@cua.dk>
* xdisp.c (redisplay_window): Do not `goto try_to_scroll' when we
(parse_solitary_modifier, Fexecute_extended_command): Likewise.
* textprop.c (validate_interval_range, interval_of): Likewise.
- * fontset.c (Fset_fontset_font): Use SDATA instead of
- XSTRING()->data.
+ * fontset.c (Fset_fontset_font): Use SDATA instead of XSTRING()->data.
- * charset.h (FETCH_STRING_CHAR_ADVANCE,
- FETCH_STRING_CHAR_ADVANCE_NO_CHECK): Use SBYTES instead of
+ * charset.h (FETCH_STRING_CHAR_ADVANCE)
+ (FETCH_STRING_CHAR_ADVANCE_NO_CHECK): Use SBYTES instead of
XSTRING()->size_byte.
* lisp.h (SDATA, SREF): Produce rvalue.
* buffer.c (Fother_buffer): Use SREF when retrieving a byte from
a string.
* casefiddle.c (casify_object): Use SSET.
- * charset.h (FETCH_STRING_CHAR_ADVANCE,
- FETCH_STRING_CHAR_ADVANCE_NO_CHECK): Use SDATA when getting
+ * charset.h (FETCH_STRING_CHAR_ADVANCE)
+ (FETCH_STRING_CHAR_ADVANCE_NO_CHECK): Use SDATA when getting
address of string contents.
* data.c (Faref): Use SDATA.
(Faset): Use SDATA, SSET.
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA. */
-/* TODO:
+/* BUGS:
+ - (x?)*y\1z should match both xxxxyxz and xxxyz.
+ TODO:
- structure the opcode space into opcode+flag.
- merge with glibc's regex.[ch].
- replace (succeed_n + jump_n + set_number_at) with something that doesn't
static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
char *fastmap, const int multibyte));
-/* Fetch the next character in the uncompiled pattern---translating it
- if necessary. */
-#define PATFETCH(c) \
- do { \
- PATFETCH_RAW (c); \
- c = TRANSLATE (c); \
- } while (0)
-
/* Fetch the next character in the uncompiled pattern, with no
translation. */
-#define PATFETCH_RAW(c) \
+#define PATFETCH(c) \
do { \
int len; \
if (p == pend) return REG_EEND; \
#define BIT_UPPER 0x10
#define BIT_MULTIBYTE 0x20
-/* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */
-#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
- do { \
- EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \
- (work_area).table[(work_area).used++] = (range_start); \
- (work_area).table[(work_area).used++] = (range_end); \
+/* Set a range START..END to WORK_AREA.
+ The range is passed through TRANSLATE, so START and END
+ should be untranslated. */
+#define SET_RANGE_TABLE_WORK_AREA(work_area, start, end) \
+ do { \
+ EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \
+ set_image_of_range (&work_area, start, end, translate); \
} while (0)
/* Free allocated memory for WORK_AREA. */
}
#endif
+
+
+/* We need to find the image of the range start..end when passed through
+ TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end)
+ and is not even necessarily contiguous.
+ We approximate it with the smallest contiguous range that contains
+ all the chars we need. */
+static void
+set_image_of_range (work_area, start, end, translate)
+ RE_TRANSLATE_TYPE translate;
+ struct range_table_work_area *work_area;
+ re_wchar_t start, end;
+{
+ re_wchar_t cmin = TRANSLATE (start), cmax = TRANSLATE (end);
+ if (RE_TRANSLATE_P (translate))
+ for (; start <= end; start++)
+ {
+ re_wchar_t c = TRANSLATE (start);
+ cmin = MIN (cmin, c);
+ cmax = MAX (cmax, c);
+ }
+ work_area->table[work_area->used++] = (cmin);
+ work_area->table[work_area->used++] = (cmax);
+}
+
/* Explicit quit checking is only used on NTemacs. */
#if defined WINDOWSNT && defined emacs && defined QUIT
extern int immediate_quit;
if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
+ /* Don't translate yet. The range TRANSLATE(X..Y) cannot
+ always be determined from TRANSLATE(X) and TRANSLATE(Y)
+ So the translation is done later in a loop. Example:
+ (let ((case-fold-search t)) (string-match "[A-_]" "A")) */
PATFETCH (c);
/* \ might escape characters inside [...] and [^...]. */
them). */
if (c == ':' && *p == ']')
{
- int ch;
+ re_wchar_t ch;
re_wctype_t cc;
cc = re_wctype (str);
starting at the smallest character in
the charset of C1 and ending at C1. */
int charset = CHAR_CHARSET (c1);
- int c2 = MAKE_CHAR (charset, 0, 0);
-
+ re_wchar_t c2 = MAKE_CHAR (charset, 0, 0);
+
SET_RANGE_TABLE_WORK_AREA (range_table_work,
c2, c1);
c1 = 0377;
/* ... into bitmap. */
{
re_wchar_t this_char;
- int range_start = c, range_end = c1;
+ re_wchar_t range_start = c, range_end = c1;
/* If the start is after the end, the range is empty. */
if (range_start > range_end)
/* Do not translate the character after the \, so that we can
distinguish, e.g., \B from \b, even if we normally would
translate, e.g., B to b. */
- PATFETCH_RAW (c);
+ PATFETCH (c);
switch (c)
{
case 'c':
laststart = b;
- PATFETCH_RAW (c);
+ PATFETCH (c);
BUF_PUSH_2 (categoryspec, c);
break;
case 'C':
laststart = b;
- PATFETCH_RAW (c);
+ PATFETCH (c);
BUF_PUSH_2 (notcategoryspec, c);
break;
#endif /* emacs */
/* You might think it would be useful for \ to mean
not to translate; but if we don't translate it
it will never match anything. */
- c = TRANSLATE (c);
goto normal_char;
}
break;
default:
/* Expects the character in `c'. */
normal_char:
- /* If no exactn currently being built. */
+ /* If no exactn currently being built. */
if (!pending_exact
/* If last exactn not at current position. */
{
int len;
+ c = TRANSLATE (c);
if (multibyte)
len = CHAR_STRING (c, b);
else
they don't overlap. The union of the two sets of excluded
chars should cover all possible chars, which, as a matter of
fact, is virtually impossible in multibyte buffers. */
- ;
+ break;
}
break;