From: Kenichi Handa Date: Tue, 15 Dec 1998 04:35:38 +0000 (+0000) Subject: (search_buffer): Don't use Boyer-Moore X-Git-Tag: emacs-20.4~1073 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=040272ceb2552a84171db18185f53fccbb9fb12d;p=emacs.git (search_buffer): Don't use Boyer-Moore to search for an invalid multibyte code. In unibyte case, no need to check whether there are translations in more than one charset; just set charset_base to 0. --- diff --git a/src/search.c b/src/search.c index e4877b5f498..fee6b835bd3 100644 --- a/src/search.c +++ b/src/search.c @@ -1142,7 +1142,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, int multibyte = !NILP (current_buffer->enable_multibyte_characters); unsigned char *base_pat = XSTRING (string)->data; int charset_base = -1; - int simple = 1; + int boyer_moore_ok = 1; /* MULTIBYTE says whether the text to be searched is multibyte. We must convert PATTERN to match that, or we will not really @@ -1204,6 +1204,12 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, } c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); + + /* If we are searching for something strange, + an invalid multibyte code, don't use boyer-moore. */ + if (! ASCII_BYTE_P (c)) + boyer_moore_ok = 0; + /* Translate the character, if requested. */ TRANSLATE (translated, trt, c); /* If translation changed the byte-length, go back @@ -1229,8 +1235,8 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, else if (charset_base != charset_base_code) /* If two different rows appear, needing translation, then we cannot use boyer_moore search. */ - simple = 0; - /* ??? Handa: this must do simple = 0 + boyer_moore_ok = 0; + /* ??? Handa: this must do boyer_moore_ok = 0 if c is a composite character. */ } @@ -1243,9 +1249,11 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, } else { + /* Unibyte buffer. */ + charset_base = 0; while (--len >= 0) { - int c, translated, inverse; + int c, translated; /* If we got here and the RE flag is set, it's because we're dealing with a regexp known to be trivial, so the backslash @@ -1257,22 +1265,6 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, } c = *base_pat++; TRANSLATE (translated, trt, c); - TRANSLATE (inverse, inverse_trt, c); - - /* Did this char actually get translated? - Would any other char get translated into it? */ - if (translated != c || inverse != c) - { - /* Keep track of which character set row - contains the characters that need translation. */ - int charset_base_code = c & ~0xff; - if (charset_base == -1) - charset_base = charset_base_code; - else if (charset_base != charset_base_code) - /* If two different rows appear, needing translation, - then we cannot use boyer_moore search. */ - simple = 0; - } *pat++ = translated; } } @@ -1281,7 +1273,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, len = raw_pattern_size; pat = base_pat = patbuf; - if (simple) + if (boyer_moore_ok) return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, pos, pos_byte, lim, lim_byte, charset_base);