From: Glenn Morris Date: Wed, 22 May 2019 17:18:51 +0000 (-0700) Subject: Merge from origin/emacs-26 X-Git-Tag: emacs-27.0.90~2805^2~13 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=0d1ff4cbe232e9ee398dc30058495c29996c0b33;p=emacs.git Merge from origin/emacs-26 5d24af8 Remove from docs references to obsolete MULE variables 2bdc419 Do potentially destructive operations in prepare-commit-msg # Conflicts: # src/search.c --- 0d1ff4cbe232e9ee398dc30058495c29996c0b33 diff --cc src/search.c index dfbae5c9628,db7fecd9bab..8a0f707b723 --- a/src/search.c +++ b/src/search.c @@@ -1147,374 -1158,350 +1147,369 @@@ while (0 static struct re_registers search_regs_1; static EMACS_INT -search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, - ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n, - int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix) +search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, + ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n, + Lisp_Object trt, Lisp_Object inverse_trt, bool posix) { - ptrdiff_t len = SCHARS (string); - ptrdiff_t len_byte = SBYTES (string); - register ptrdiff_t i; + unsigned char *p1, *p2; + ptrdiff_t s1, s2; - if (running_asynch_code) - save_search_regs (); + /* Snapshot in case Lisp changes the value. */ + bool preserve_match_data = NILP (Vinhibit_changing_match_data); - /* Searching 0 times means don't move. */ - /* Null string is found at starting position. */ - if (len == 0 || n == 0) + struct regexp_cache *cache_entry = + compile_pattern (string, + preserve_match_data ? &search_regs : &search_regs_1, + trt, posix, + !NILP (BVAR (current_buffer, enable_multibyte_characters))); + struct re_pattern_buffer *bufp = &cache_entry->buf; + + maybe_quit (); /* Do a pending quit right away, + to avoid paradoxical behavior */ + /* Get pointers and sizes of the two strings + that make up the visible portion of the buffer. */ + + p1 = BEGV_ADDR; + s1 = GPT_BYTE - BEGV_BYTE; + p2 = GAP_END_ADDR; + s2 = ZV_BYTE - GPT_BYTE; + if (s1 < 0) { - set_search_regs (pos_byte, 0); - return pos; + p2 = p1; + s2 = ZV_BYTE - BEGV_BYTE; + s1 = 0; } - - if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp))) + if (s2 < 0) { - unsigned char *p1, *p2; - ptrdiff_t s1, s2; - struct re_pattern_buffer *bufp; + s1 = ZV_BYTE - BEGV_BYTE; + s2 = 0; + } - bufp = compile_pattern (string, - (NILP (Vinhibit_changing_match_data) - ? &search_regs : &search_regs_1), - trt, posix, - !NILP (BVAR (current_buffer, enable_multibyte_characters))); + ptrdiff_t count = SPECPDL_INDEX (); + freeze_buffer_relocation (); + freeze_pattern (cache_entry); - maybe_quit (); /* Do a pending quit right away, - to avoid paradoxical behavior */ - /* Get pointers and sizes of the two strings - that make up the visible portion of the buffer. */ + while (n < 0) + { + ptrdiff_t val; - p1 = BEGV_ADDR; - s1 = GPT_BYTE - BEGV_BYTE; - p2 = GAP_END_ADDR; - s2 = ZV_BYTE - GPT_BYTE; - if (s1 < 0) - { - p2 = p1; - s2 = ZV_BYTE - BEGV_BYTE; - s1 = 0; - } - if (s2 < 0) - { - s1 = ZV_BYTE - BEGV_BYTE; - s2 = 0; - } re_match_object = Qnil; + val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, + pos_byte - BEGV_BYTE, lim_byte - pos_byte, + preserve_match_data ? &search_regs : &search_regs_1, + /* Don't allow match past current point */ + pos_byte - BEGV_BYTE); + if (val == -2) + { + unbind_to (count, Qnil); + matcher_overflow (); + } + if (val >= 0) + { + if (preserve_match_data) + { + pos_byte = search_regs.start[0] + BEGV_BYTE; + for (ptrdiff_t i = 0; i < search_regs.num_regs; i++) + if (search_regs.start[i] >= 0) + { + search_regs.start[i] + = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); + search_regs.end[i] + = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); + } + XSETBUFFER (last_thing_searched, current_buffer); + /* Set pos to the new position. */ + pos = search_regs.start[0]; + } + else + { + pos_byte = search_regs_1.start[0] + BEGV_BYTE; + /* Set pos to the new position. */ + pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE); + } + } + else + { + unbind_to (count, Qnil); + return (n); + } + n++; + maybe_quit (); + } + while (n > 0) + { + ptrdiff_t val; - freeze_buffer_relocation (); + re_match_object = Qnil; + val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, + pos_byte - BEGV_BYTE, lim_byte - pos_byte, + preserve_match_data ? &search_regs : &search_regs_1, + lim_byte - BEGV_BYTE); + if (val == -2) + { + unbind_to (count, Qnil); + matcher_overflow (); + } + if (val >= 0) + { + if (preserve_match_data) + { + pos_byte = search_regs.end[0] + BEGV_BYTE; + for (ptrdiff_t i = 0; i < search_regs.num_regs; i++) + if (search_regs.start[i] >= 0) + { + search_regs.start[i] + = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); + search_regs.end[i] + = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); + } + XSETBUFFER (last_thing_searched, current_buffer); + pos = search_regs.end[0]; + } + else + { + pos_byte = search_regs_1.end[0] + BEGV_BYTE; + pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE); + } + } + else + { + unbind_to (count, Qnil); + return (0 - n); + } + n--; + maybe_quit (); + } + unbind_to (count, Qnil); + return (pos); +} - while (n < 0) - { - ptrdiff_t val; - - val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, - pos_byte - BEGV_BYTE, lim_byte - pos_byte, - (NILP (Vinhibit_changing_match_data) - ? &search_regs : &search_regs_1), - /* Don't allow match past current point */ - pos_byte - BEGV_BYTE); - if (val == -2) - { - matcher_overflow (); - } - if (val >= 0) - { - if (NILP (Vinhibit_changing_match_data)) - { - pos_byte = search_regs.start[0] + BEGV_BYTE; - for (i = 0; i < search_regs.num_regs; i++) - if (search_regs.start[i] >= 0) - { - search_regs.start[i] - = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); - search_regs.end[i] - = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); - } - XSETBUFFER (last_thing_searched, current_buffer); - /* Set pos to the new position. */ - pos = search_regs.start[0]; - } - else - { - pos_byte = search_regs_1.start[0] + BEGV_BYTE; - /* Set pos to the new position. */ - pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE); - } - } - else - { - thaw_buffer_relocation (); - return (n); - } - n++; - maybe_quit (); - } - while (n > 0) - { - ptrdiff_t val; - - val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2, - pos_byte - BEGV_BYTE, lim_byte - pos_byte, - (NILP (Vinhibit_changing_match_data) - ? &search_regs : &search_regs_1), - lim_byte - BEGV_BYTE); - if (val == -2) - { - matcher_overflow (); - } - if (val >= 0) - { - if (NILP (Vinhibit_changing_match_data)) - { - pos_byte = search_regs.end[0] + BEGV_BYTE; - for (i = 0; i < search_regs.num_regs; i++) - if (search_regs.start[i] >= 0) - { - search_regs.start[i] - = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE); - search_regs.end[i] - = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); - } - XSETBUFFER (last_thing_searched, current_buffer); - pos = search_regs.end[0]; - } - else - { - pos_byte = search_regs_1.end[0] + BEGV_BYTE; - pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE); - } - } - else - { - thaw_buffer_relocation (); - return (0 - n); - } - n--; - maybe_quit (); - } - thaw_buffer_relocation (); - return (pos); +static EMACS_INT +search_buffer_non_re (Lisp_Object string, ptrdiff_t pos, + ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte, + EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt, + bool posix) +{ + unsigned char *raw_pattern, *pat; + ptrdiff_t raw_pattern_size; + ptrdiff_t raw_pattern_size_byte; + unsigned char *patbuf; + bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); + unsigned char *base_pat; + /* Set to positive if we find a non-ASCII char that need + translation. Otherwise set to zero later. */ + int char_base = -1; + bool boyer_moore_ok = 1; + USE_SAFE_ALLOCA; + + /* MULTIBYTE says whether the text to be searched is multibyte. + We must convert PATTERN to match that, or we will not really + find things right. */ + + if (multibyte == STRING_MULTIBYTE (string)) + { + raw_pattern = SDATA (string); + raw_pattern_size = SCHARS (string); + raw_pattern_size_byte = SBYTES (string); } - else /* non-RE case */ + else if (multibyte) { - unsigned char *raw_pattern, *pat; - ptrdiff_t raw_pattern_size; - ptrdiff_t raw_pattern_size_byte; - unsigned char *patbuf; - bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters)); - unsigned char *base_pat; - /* Set to positive if we find a non-ASCII char that need - translation. Otherwise set to zero later. */ - int char_base = -1; - bool boyer_moore_ok = 1; - USE_SAFE_ALLOCA; - - /* MULTIBYTE says whether the text to be searched is multibyte. - We must convert PATTERN to match that, or we will not really - find things right. */ - - if (multibyte == STRING_MULTIBYTE (string)) - { - raw_pattern = SDATA (string); - raw_pattern_size = SCHARS (string); - raw_pattern_size_byte = SBYTES (string); - } - else if (multibyte) - { - raw_pattern_size = SCHARS (string); - raw_pattern_size_byte - = count_size_as_multibyte (SDATA (string), - raw_pattern_size); - raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1); - copy_text (SDATA (string), raw_pattern, - SCHARS (string), 0, 1); - } - else - { - /* Converting multibyte to single-byte. */ - raw_pattern_size = SCHARS (string); - raw_pattern_size_byte = SCHARS (string); - raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1); - copy_text (SDATA (string), raw_pattern, - SBYTES (string), 1, 0); - } + raw_pattern_size = SCHARS (string); + raw_pattern_size_byte + = count_size_as_multibyte (SDATA (string), + raw_pattern_size); + raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1); + copy_text (SDATA (string), raw_pattern, + SCHARS (string), 0, 1); + } + else + { - /* Converting multibyte to single-byte. - - ??? Perhaps this conversion should be done in a special way - by subtracting nonascii-insert-offset from each non-ASCII char, - so that only the multibyte chars which really correspond to - the chosen single-byte character set can possibly match. */ ++ /* Converting multibyte to single-byte. */ + raw_pattern_size = SCHARS (string); + raw_pattern_size_byte = SCHARS (string); + raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1); + copy_text (SDATA (string), raw_pattern, + SBYTES (string), 1, 0); + } - /* Copy and optionally translate the pattern. */ - len = raw_pattern_size; - len_byte = raw_pattern_size_byte; - SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len); - pat = patbuf; - base_pat = raw_pattern; - if (multibyte) - { - /* Fill patbuf by translated characters in STRING while - checking if we can use boyer-moore search. If TRT is - non-nil, we can use boyer-moore search only if TRT can be - represented by the byte array of 256 elements. For that, - all non-ASCII case-equivalents of all case-sensitive - characters in STRING must belong to the same character - group (two characters belong to the same group iff their - multibyte forms are the same except for the last byte; - i.e. every 64 characters form a group; U+0000..U+003F, - U+0040..U+007F, U+0080..U+00BF, ...). */ - - while (--len >= 0) - { - unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str; - int c, translated, inverse; - int in_charlen, charlen; - - /* If we got here and the RE flag is set, it's because we're - dealing with a regexp known to be trivial, so the backslash - just quotes the next character. */ - if (RE && *base_pat == '\\') - { - len--; - raw_pattern_size--; - len_byte--; - base_pat++; - } + /* Copy and optionally translate the pattern. */ + ptrdiff_t len = raw_pattern_size; + ptrdiff_t len_byte = raw_pattern_size_byte; + SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len); + pat = patbuf; + base_pat = raw_pattern; + if (multibyte) + { + /* Fill patbuf by translated characters in STRING while + checking if we can use boyer-moore search. If TRT is + non-nil, we can use boyer-moore search only if TRT can be + represented by the byte array of 256 elements. For that, + all non-ASCII case-equivalents of all case-sensitive + characters in STRING must belong to the same character + group (two characters belong to the same group iff their + multibyte forms are the same except for the last byte; + i.e. every 64 characters form a group; U+0000..U+003F, + U+0040..U+007F, U+0080..U+00BF, ...). */ + + while (--len >= 0) + { + unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str; + int c, translated, inverse; + int in_charlen, charlen; + + /* If we got here and the RE flag is set, it's because we're + dealing with a regexp known to be trivial, so the backslash + just quotes the next character. */ + if (RE && *base_pat == '\\') + { + len--; + raw_pattern_size--; + len_byte--; + base_pat++; + } - c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen); + c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen); - if (NILP (trt)) - { - str = base_pat; - charlen = in_charlen; - } - else - { - /* Translate the character. */ - TRANSLATE (translated, trt, c); - charlen = CHAR_STRING (translated, str_base); - str = str_base; - - /* Check if C has any other case-equivalents. */ - TRANSLATE (inverse, inverse_trt, c); - /* If so, check if we can use boyer-moore. */ - if (c != inverse && boyer_moore_ok) - { - /* Check if all equivalents belong to the same - group of characters. Note that the check of C - itself is done by the last iteration. */ - int this_char_base = -1; + if (NILP (trt)) + { + str = base_pat; + charlen = in_charlen; + } + else + { + /* Translate the character. */ + TRANSLATE (translated, trt, c); + charlen = CHAR_STRING (translated, str_base); + str = str_base; + + /* Check if C has any other case-equivalents. */ + TRANSLATE (inverse, inverse_trt, c); + /* If so, check if we can use boyer-moore. */ + if (c != inverse && boyer_moore_ok) + { + /* Check if all equivalents belong to the same + group of characters. Note that the check of C + itself is done by the last iteration. */ + int this_char_base = -1; + + while (boyer_moore_ok) + { + if (ASCII_CHAR_P (inverse)) + { + if (this_char_base > 0) + boyer_moore_ok = 0; + else + this_char_base = 0; + } + else if (CHAR_BYTE8_P (inverse)) + /* Boyer-moore search can't handle a + translation of an eight-bit + character. */ + boyer_moore_ok = 0; + else if (this_char_base < 0) + { + this_char_base = inverse & ~0x3F; + if (char_base < 0) + char_base = this_char_base; + else if (this_char_base != char_base) + boyer_moore_ok = 0; + } + else if ((inverse & ~0x3F) != this_char_base) + boyer_moore_ok = 0; + if (c == inverse) + break; + TRANSLATE (inverse, inverse_trt, inverse); + } + } + } - while (boyer_moore_ok) - { - if (ASCII_CHAR_P (inverse)) - { - if (this_char_base > 0) - boyer_moore_ok = 0; - else - this_char_base = 0; - } - else if (CHAR_BYTE8_P (inverse)) - /* Boyer-moore search can't handle a - translation of an eight-bit - character. */ - boyer_moore_ok = 0; - else if (this_char_base < 0) - { - this_char_base = inverse & ~0x3F; - if (char_base < 0) - char_base = this_char_base; - else if (this_char_base != char_base) - boyer_moore_ok = 0; - } - else if ((inverse & ~0x3F) != this_char_base) - boyer_moore_ok = 0; - if (c == inverse) - break; - TRANSLATE (inverse, inverse_trt, inverse); - } - } - } + /* Store this character into the translated pattern. */ + memcpy (pat, str, charlen); + pat += charlen; + base_pat += in_charlen; + len_byte -= in_charlen; + } - /* Store this character into the translated pattern. */ - memcpy (pat, str, charlen); - pat += charlen; - base_pat += in_charlen; - len_byte -= in_charlen; - } + /* If char_base is still negative we didn't find any translated + non-ASCII characters. */ + if (char_base < 0) + char_base = 0; + } + else + { + /* Unibyte buffer. */ + char_base = 0; + while (--len >= 0) + { + int c, translated, inverse; - /* If char_base is still negative we didn't find any translated - non-ASCII characters. */ - if (char_base < 0) - char_base = 0; - } - else - { - /* Unibyte buffer. */ - char_base = 0; - while (--len >= 0) - { - int c, translated, inverse; + /* If we got here and the RE flag is set, it's because we're + dealing with a regexp known to be trivial, so the backslash + just quotes the next character. */ + if (RE && *base_pat == '\\') + { + len--; + raw_pattern_size--; + base_pat++; + } + c = *base_pat++; + TRANSLATE (translated, trt, c); + *pat++ = translated; + /* Check that none of C's equivalents violates the + assumptions of boyer_moore. */ + TRANSLATE (inverse, inverse_trt, c); + while (1) + { + if (inverse >= 0200) + { + boyer_moore_ok = 0; + break; + } + if (c == inverse) + break; + TRANSLATE (inverse, inverse_trt, inverse); + } + } + } - /* If we got here and the RE flag is set, it's because we're - dealing with a regexp known to be trivial, so the backslash - just quotes the next character. */ - if (RE && *base_pat == '\\') - { - len--; - raw_pattern_size--; - base_pat++; - } - c = *base_pat++; - TRANSLATE (translated, trt, c); - *pat++ = translated; - /* Check that none of C's equivalents violates the - assumptions of boyer_moore. */ - TRANSLATE (inverse, inverse_trt, c); - while (1) - { - if (inverse >= 0200) - { - boyer_moore_ok = 0; - break; - } - if (c == inverse) - break; - TRANSLATE (inverse, inverse_trt, inverse); - } - } - } + len_byte = pat - patbuf; + pat = base_pat = patbuf; + + EMACS_INT result + = (boyer_moore_ok + ? boyer_moore (n, pat, len_byte, trt, inverse_trt, + pos_byte, lim_byte, + char_base) + : simple_search (n, pat, raw_pattern_size, len_byte, trt, + pos, pos_byte, lim, lim_byte)); + SAFE_FREE (); + return result; +} - len_byte = pat - patbuf; - pat = base_pat = patbuf; - - EMACS_INT result - = (boyer_moore_ok - ? boyer_moore (n, pat, len_byte, trt, inverse_trt, - pos_byte, lim_byte, - char_base) - : simple_search (n, pat, raw_pattern_size, len_byte, trt, - pos, pos_byte, lim, lim_byte)); - SAFE_FREE (); - return result; +static EMACS_INT +search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, + ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n, + int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix) +{ + if (running_asynch_code) + save_search_regs (); + + /* Searching 0 times means don't move. */ + /* Null string is found at starting position. */ + if (n == 0 || SCHARS (string) == 0) + { + set_search_regs (pos_byte, 0); + return pos; } + + if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp))) + pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte, + n, trt, inverse_trt, posix); + else + pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte, + n, RE, trt, inverse_trt, posix); + + return pos; } /* Do a simple string search N times for the string PAT,