Make regex matching reentrant; update syntax during match

author Daniel Colascione <dancol@dancol.org>

Sat, 16 Jun 2018 20:46:10 +0000 (13:46 -0700)

committer Daniel Colascione <dancol@dancol.org>

Sat, 16 Jun 2018 20:46:38 +0000 (13:46 -0700)
author Daniel Colascione <dancol@dancol.org>
Sat, 16 Jun 2018 20:46:10 +0000 (13:46 -0700)
committer Daniel Colascione <dancol@dancol.org>
Sat, 16 Jun 2018 20:46:38 +0000 (13:46 -0700)
diff --git a/src/lisp.h b/src/lisp.h

index ff708ebf60ea1e39b04b75c7eec2d8dfb35dced2..d0c52d8567222ffe8645da466a5b2d35f7e0a6dd 100644 (file)
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -4029,10 +4029,6 @@ extern void restore_search_regs (void);
  extern void update_search_regs (ptrdiff_t oldstart,
                                  ptrdiff_t oldend, ptrdiff_t newend);
  extern void record_unwind_save_match_data (void);
-struct re_registers;
-extern struct re_pattern_buffer *compile_pattern (Lisp_Object,
-                                                 struct re_registers *,
-                                                 Lisp_Object, bool, bool);
  extern ptrdiff_t fast_string_match_internal (Lisp_Object, Lisp_Object,
                                              Lisp_Object);
  
diff --git a/src/regex.c b/src/regex.c

index 85e63feea10cc34087c9406b4d3042aa800eb546..b8c6f3f19b26cff59acc8ec6ad1a7d4684f5150d 100644 (file)
--- a/src/regex.c
+++ b/src/regex.c
@@ -155,7 +155,8 @@
  # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
  /* Strings are 0-indexed, buffers are 1-indexed; we pun on the boolean
     result to get the right base index.  */
-# define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
+# define POS_AS_IN_BUFFER(p)                                    \
+  ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
  
  # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
  # define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
@@ -1233,6 +1234,15 @@ static const char *re_error_msgid[] =
  # undef MATCH_MAY_ALLOCATE
  #endif
  
+/* While regex matching of a single compiled pattern isn't reentrant
+   (because we compile regexes to bytecode programs, and the bytecode
+   programs are self-modifying), the regex machinery must nevertheless
+   be reentrant with respect to _different_ patterns, and we do that
+   by avoiding global variables and using MATCH_MAY_ALLOCATE.  */
+#if !defined MATCH_MAY_ALLOCATE && defined emacs
+# error "Emacs requires MATCH_MAY_ALLOCATE"
+#endif
+
  \f
  /* Failure stack declarations and macros; both re_compile_fastmap and
     re_match_2 use a failure stack.  These have to be macros because of
@@ -5895,12 +5905,12 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
  #ifdef emacs
                 ssize_t offset = PTR_TO_OFFSET (d - 1);
                 ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
-               UPDATE_SYNTAX_TABLE_FAST (charpos);
+               UPDATE_SYNTAX_TABLE (charpos);
  #endif
                 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
                 s1 = SYNTAX (c1);
  #ifdef emacs
-               UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
+               UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
  #endif
                 PREFETCH_NOLIMIT ();
                 GET_CHAR_AFTER (c2, d, dummy);
@@ -5937,7 +5947,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
  #ifdef emacs
               ssize_t offset = PTR_TO_OFFSET (d);
               ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
-             UPDATE_SYNTAX_TABLE_FAST (charpos);
+             UPDATE_SYNTAX_TABLE (charpos);
  #endif
               PREFETCH ();
               GET_CHAR_AFTER (c2, d, dummy);
@@ -5982,7 +5992,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
  #ifdef emacs
               ssize_t offset = PTR_TO_OFFSET (d) - 1;
               ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
-             UPDATE_SYNTAX_TABLE_FAST (charpos);
+             UPDATE_SYNTAX_TABLE (charpos);
  #endif
               GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
               s1 = SYNTAX (c1);
@@ -5997,7 +6007,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
                   PREFETCH_NOLIMIT ();
                   GET_CHAR_AFTER (c2, d, dummy);
  #ifdef emacs
-                 UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos);
+                 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
  #endif
                   s2 = SYNTAX (c2);
  
@@ -6026,7 +6036,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
  #ifdef emacs
               ssize_t offset = PTR_TO_OFFSET (d);
               ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
-             UPDATE_SYNTAX_TABLE_FAST (charpos);
+             UPDATE_SYNTAX_TABLE (charpos);
  #endif
               PREFETCH ();
               c2 = RE_STRING_CHAR (d, target_multibyte);
@@ -6069,7 +6079,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
  #ifdef emacs
               ssize_t offset = PTR_TO_OFFSET (d) - 1;
               ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
-             UPDATE_SYNTAX_TABLE_FAST (charpos);
+             UPDATE_SYNTAX_TABLE (charpos);
  #endif
               GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
               s1 = SYNTAX (c1);
@@ -6084,7 +6094,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
                   PREFETCH_NOLIMIT ();
                   c2 = RE_STRING_CHAR (d, target_multibyte);
  #ifdef emacs
-                 UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
+                 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
  #endif
                   s2 = SYNTAX (c2);
  
@@ -6107,7 +6117,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1,
             {
               ssize_t offset = PTR_TO_OFFSET (d);
               ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
-             UPDATE_SYNTAX_TABLE_FAST (pos1);
+             UPDATE_SYNTAX_TABLE (pos1);
             }
  #endif
             {
diff --git a/src/regex.h b/src/regex.h

index 082f7e010d8af26b7f6a6a382f534934c6df6122..3a2d74d86a1227a962d789b9f807ef5f2860198b 100644 (file)
--- a/src/regex.h
+++ b/src/regex.h
@@ -181,8 +181,15 @@ typedef unsigned long reg_syntax_t;
     string; if it's nil, we are matching text in the current buffer; if
     it's t, we are matching text in a C string.
  
-   This is defined as a macro in thread.h, which see.  */
-/* extern Lisp_Object re_match_object; */
+   This value is effectively another parameter to re_search_2 and
+   re_match_2.  No calls into Lisp or thread switches are allowed
+   before setting re_match_object and calling into the regex search
+   and match functions.  These functions capture the current value of
+   re_match_object into gl_state on entry.
+
+   TODO: once we get rid of the !emacs case in this code, turn into an
+   actual function parameter.  */
+extern Lisp_Object re_match_object;
  #endif
  
  /* Roughly the maximum number of failure points on the stack.  */
diff --git a/src/search.c b/src/search.c

index a21c01ca4b4f83d1f54d4dd4ec70b79359f052b5..ccdb659776da9656c57e2e192835dd22478986eb 100644 (file)
--- a/src/search.c
+++ b/src/search.c
@@ -48,6 +48,8 @@ struct regexp_cache
    char fastmap[0400];
    /* True means regexp was compiled to do full POSIX backtracking.  */
    bool posix;
+  /* True means we're inside a buffer match.  */
+  bool busy;
  };
  
  /* The instances of that struct.  */
@@ -93,6 +95,8 @@ static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
                                  ptrdiff_t, ptrdiff_t, EMACS_INT, int,
                                  Lisp_Object, Lisp_Object, bool);
  
+Lisp_Object re_match_object;
+
  static _Noreturn void
  matcher_overflow (void)
  {
@@ -110,14 +114,6 @@ freeze_buffer_relocation (void)
  #endif
  }
  
-static void
-thaw_buffer_relocation (void)
-{
-#ifdef REL_ALLOC
-  unbind_to (SPECPDL_INDEX () - 1, Qnil);
-#endif
-}
-
  /* Compile a regexp and signal a Lisp error if anything goes wrong.
     PATTERN is the pattern to compile.
     CP is the place to put the result.
@@ -134,6 +130,7 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
    const char *whitespace_regexp;
    char *val;
  
+  eassert (!cp->busy);
    cp->regexp = Qnil;
    cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
    cp->posix = posix;
@@ -170,10 +167,11 @@ shrink_regexp_cache (void)
    struct regexp_cache *cp;
  
    for (cp = searchbuf_head; cp != 0; cp = cp->next)
-    {
-      cp->buf.allocated = cp->buf.used;
-      cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
-    }
+    if (!cp->busy)
+      {
+        cp->buf.allocated = cp->buf.used;
+        cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
+      }
  }
  
  /* Clear the regexp cache w.r.t. a particular syntax table,
@@ -190,10 +188,25 @@ clear_regexp_cache (void)
      /* It's tempting to compare with the syntax-table we've actually changed,
         but it's not sufficient because char-table inheritance means that
         modifying one syntax-table can change others at the same time.  */
-    if (!EQ (searchbufs[i].syntax_table, Qt))
+    if (!searchbufs[i].busy && !EQ (searchbufs[i].syntax_table, Qt))
        searchbufs[i].regexp = Qnil;
  }
  
+static void
+unfreeze_pattern (void *arg)
+{
+  struct regexp_cache *searchbuf = arg;
+  searchbuf->busy = false;
+}
+
+static void
+freeze_pattern (struct regexp_cache *searchbuf)
+{
+  eassert (!searchbuf->busy);
+  record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
+  searchbuf->busy = true;
+}
+
  /* Compile a regexp if necessary, but first check to see if there's one in
     the cache.
     PATTERN is the pattern to compile.
@@ -205,7 +218,7 @@ clear_regexp_cache (void)
     POSIX is true if we want full backtracking (POSIX style) for this pattern.
     False means backtrack only enough to get a valid match.  */
  
-struct re_pattern_buffer *
+static struct regexp_cache *
  compile_pattern (Lisp_Object pattern, struct re_registers *regp,
                  Lisp_Object translate, bool posix, bool multibyte)
  {
@@ -222,6 +235,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
        if (NILP (cp->regexp))
         goto compile_it;
        if (SCHARS (cp->regexp) == SCHARS (pattern)
+          && !cp->busy
           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
           && !NILP (Fstring_equal (cp->regexp, pattern))
           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
@@ -237,7 +251,10 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
          string value.  */
        if (cp->next == 0)
         {
+          if (cp->busy)
+            error ("Too much matching reentrancy");
         compile_it:
+          eassert (!cp->busy);
           compile_pattern_1 (cp, pattern, translate, posix);
           break;
         }
@@ -258,8 +275,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp,
    /* The compiled pattern can be used both for multibyte and unibyte
       target.  But, we have to tell which the pattern is used for. */
    cp->buf.target_multibyte = multibyte;
-
-  return &cp->buf;
+  return cp;
  }
  
  \f
@@ -270,7 +286,6 @@ looking_at_1 (Lisp_Object string, bool posix)
    unsigned char *p1, *p2;
    ptrdiff_t s1, s2;
    register ptrdiff_t i;
-  struct re_pattern_buffer *bufp;
  
    if (running_asynch_code)
      save_search_regs ();
@@ -280,13 +295,17 @@ looking_at_1 (Lisp_Object string, bool posix)
                          BVAR (current_buffer, case_eqv_table));
  
    CHECK_STRING (string);
-  bufp = compile_pattern (string,
-                         (NILP (Vinhibit_changing_match_data)
-                          ? &search_regs : NULL),
-                         (!NILP (BVAR (current_buffer, case_fold_search))
-                          ? BVAR (current_buffer, case_canon_table) : Qnil),
-                         posix,
-                         !NILP (BVAR (current_buffer, enable_multibyte_characters)));
+
+  /* Snapshot in case Lisp changes the value.  */
+  bool preserve_match_data = NILP (Vinhibit_changing_match_data);
+
+  struct regexp_cache *cache_entry = compile_pattern (
+    string,
+    preserve_match_data ? &search_regs : NULL,
+    (!NILP (BVAR (current_buffer, case_fold_search))
+     ? BVAR (current_buffer, case_canon_table) : Qnil),
+    posix,
+    !NILP (BVAR (current_buffer, enable_multibyte_characters)));
  
    /* Do a pending quit right away, to avoid paradoxical behavior */
    maybe_quit ();
@@ -310,21 +329,20 @@ looking_at_1 (Lisp_Object string, bool posix)
        s2 = 0;
      }
  
-  re_match_object = Qnil;
-
+  ptrdiff_t count = SPECPDL_INDEX ();
    freeze_buffer_relocation ();
-  i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
+  freeze_pattern (cache_entry);
+  re_match_object = Qnil;
+  i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
                   PT_BYTE - BEGV_BYTE,
-                 (NILP (Vinhibit_changing_match_data)
-                  ? &search_regs : NULL),
+                 preserve_match_data ? &search_regs : NULL,
                   ZV_BYTE - BEGV_BYTE);
-  thaw_buffer_relocation ();
  
    if (i == -2)
      matcher_overflow ();
  
    val = (i >= 0 ? Qt : Qnil);
-  if (NILP (Vinhibit_changing_match_data) && i >= 0)
+  if (preserve_match_data && i >= 0)
    {
      for (i = 0; i < search_regs.num_regs; i++)
        if (search_regs.start[i] >= 0)
@@ -338,7 +356,7 @@ looking_at_1 (Lisp_Object string, bool posix)
      XSETBUFFER (last_thing_searched, current_buffer);
    }
  
-  return val;
+  return unbind_to (count, val);
  }
  
  DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
@@ -396,15 +414,14 @@ string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
    set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
                          BVAR (current_buffer, case_eqv_table));
  
-  bufp = compile_pattern (regexp,
-                         (NILP (Vinhibit_changing_match_data)
-                          ? &search_regs : NULL),
-                         (!NILP (BVAR (current_buffer, case_fold_search))
-                          ? BVAR (current_buffer, case_canon_table) : Qnil),
-                         posix,
-                         STRING_MULTIBYTE (string));
+  bufp = &compile_pattern (regexp,
+                           (NILP (Vinhibit_changing_match_data)
+                            ? &search_regs : NULL),
+                           (!NILP (BVAR (current_buffer, case_fold_search))
+                            ? BVAR (current_buffer, case_canon_table) : Qnil),
+                           posix,
+                           STRING_MULTIBYTE (string))->buf;
    re_match_object = string;
-
    val = re_search (bufp, SSDATA (string),
                    SBYTES (string), pos_byte,
                    SBYTES (string) - pos_byte,
@@ -471,10 +488,9 @@ fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
    ptrdiff_t val;
    struct re_pattern_buffer *bufp;
  
-  bufp = compile_pattern (regexp, 0, table,
-                         0, STRING_MULTIBYTE (string));
+  bufp = &compile_pattern (regexp, 0, table,
+                           0, STRING_MULTIBYTE (string))->buf;
    re_match_object = string;
-
    val = re_search (bufp, SSDATA (string),
                    SBYTES (string), 0,
                    SBYTES (string), 0);
@@ -494,10 +510,10 @@ fast_c_string_match_ignore_case (Lisp_Object regexp,
    struct re_pattern_buffer *bufp;
  
    regexp = string_make_unibyte (regexp);
+  bufp = &compile_pattern (regexp, 0,
+                           Vascii_canon_table, 0,
+                           0)->buf;
    re_match_object = Qt;
-  bufp = compile_pattern (regexp, 0,
-                         Vascii_canon_table, 0,
-                         0);
    val = re_search (bufp, string, len, 0, len, 0);
    return val;
  }
@@ -513,7 +529,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
                  ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
  {
    bool multibyte;
-  struct re_pattern_buffer *buf;
    unsigned char *p1, *p2;
    ptrdiff_t s1, s2;
    ptrdiff_t len;
@@ -528,7 +543,6 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
        s1 = 0;
        p2 = SDATA (string);
        s2 = SBYTES (string);
-      re_match_object = string;
        multibyte = STRING_MULTIBYTE (string);
      }
    else
@@ -554,16 +568,19 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
           s1 = ZV_BYTE - BEGV_BYTE;
           s2 = 0;
         }
-      re_match_object = Qnil;
        multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
      }
  
-  buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
+  struct regexp_cache *cache_entry =
+    compile_pattern (regexp, 0, Qnil, 0, multibyte);
+  ptrdiff_t count = SPECPDL_INDEX ();
    freeze_buffer_relocation ();
-  len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
+  freeze_pattern (cache_entry);
+  re_match_object = STRINGP (string) ? string : Qnil;
+  len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
                     pos_byte, NULL, limit_byte);
-  thaw_buffer_relocation ();
  
+  unbind_to (count, Qnil);
    return len;
  }
  
@@ -1151,355 +1168,372 @@ while (0)
  static struct re_registers search_regs_1;
  
  static EMACS_INT
-search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
-              ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
-              int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
+search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
+                  ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
+                  Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
  {
-  ptrdiff_t len = SCHARS (string);
-  ptrdiff_t len_byte = SBYTES (string);
-  register ptrdiff_t i;
+  unsigned char *p1, *p2;
+  ptrdiff_t s1, s2;
  
-  if (running_asynch_code)
-    save_search_regs ();
+  /* Snapshot in case Lisp changes the value.  */
+  bool preserve_match_data = NILP (Vinhibit_changing_match_data);
  
-  /* Searching 0 times means don't move.  */
-  /* Null string is found at starting position.  */
-  if (len == 0 || n == 0)
+  struct regexp_cache *cache_entry =
+    compile_pattern (string,
+                     preserve_match_data ? &search_regs : &search_regs_1,
+                     trt, posix,
+                     !NILP (BVAR (current_buffer, enable_multibyte_characters)));
+  struct re_pattern_buffer *bufp = &cache_entry->buf;
+
+  maybe_quit ();               /* Do a pending quit right away,
+                                  to avoid paradoxical behavior */
+  /* Get pointers and sizes of the two strings
+     that make up the visible portion of the buffer. */
+
+  p1 = BEGV_ADDR;
+  s1 = GPT_BYTE - BEGV_BYTE;
+  p2 = GAP_END_ADDR;
+  s2 = ZV_BYTE - GPT_BYTE;
+  if (s1 < 0)
      {
-      set_search_regs (pos_byte, 0);
-      return pos;
+      p2 = p1;
+      s2 = ZV_BYTE - BEGV_BYTE;
+      s1 = 0;
      }
-
-  if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
+  if (s2 < 0)
      {
-      unsigned char *p1, *p2;
-      ptrdiff_t s1, s2;
-      struct re_pattern_buffer *bufp;
+      s1 = ZV_BYTE - BEGV_BYTE;
+      s2 = 0;
+    }
  
-      bufp = compile_pattern (string,
-                             (NILP (Vinhibit_changing_match_data)
-                              ? &search_regs : &search_regs_1),
-                             trt, posix,
-                             !NILP (BVAR (current_buffer, enable_multibyte_characters)));
+  ptrdiff_t count = SPECPDL_INDEX ();
+  freeze_buffer_relocation ();
+  freeze_pattern (cache_entry);
  
-      maybe_quit ();           /* Do a pending quit right away,
-                                  to avoid paradoxical behavior */
-      /* Get pointers and sizes of the two strings
-        that make up the visible portion of the buffer. */
+  while (n < 0)
+    {
+      ptrdiff_t val;
  
-      p1 = BEGV_ADDR;
-      s1 = GPT_BYTE - BEGV_BYTE;
-      p2 = GAP_END_ADDR;
-      s2 = ZV_BYTE - GPT_BYTE;
-      if (s1 < 0)
-       {
-         p2 = p1;
-         s2 = ZV_BYTE - BEGV_BYTE;
-         s1 = 0;
-       }
-      if (s2 < 0)
-       {
-         s1 = ZV_BYTE - BEGV_BYTE;
-         s2 = 0;
-       }
        re_match_object = Qnil;
+      val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
+                         pos_byte - BEGV_BYTE, lim_byte - pos_byte,
+                         preserve_match_data ? &search_regs : &search_regs_1,
+                         /* Don't allow match past current point */
+                         pos_byte - BEGV_BYTE);
+      if (val == -2)
+        {
+          matcher_overflow ();
+        }
+      if (val >= 0)
+        {
+          if (preserve_match_data)
+            {
+              pos_byte = search_regs.start[0] + BEGV_BYTE;
+              for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
+                if (search_regs.start[i] >= 0)
+                  {
+                    search_regs.start[i]
+                      = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
+                    search_regs.end[i]
+                      = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
+                  }
+              XSETBUFFER (last_thing_searched, current_buffer);
+              /* Set pos to the new position. */
+              pos = search_regs.start[0];
+            }
+          else
+            {
+              pos_byte = search_regs_1.start[0] + BEGV_BYTE;
+              /* Set pos to the new position.  */
+              pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
+            }
+        }
+      else
+        {
+          unbind_to (count, Qnil);
+          return (n);
+        }
+      n++;
+      maybe_quit ();
+    }
+  while (n > 0)
+    {
+      ptrdiff_t val;
  
-      freeze_buffer_relocation ();
+      re_match_object = Qnil;
+      val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
+                         pos_byte - BEGV_BYTE, lim_byte - pos_byte,
+                         preserve_match_data ? &search_regs : &search_regs_1,
+                         lim_byte - BEGV_BYTE);
+      if (val == -2)
+        {
+          matcher_overflow ();
+        }
+      if (val >= 0)
+        {
+          if (preserve_match_data)
+            {
+              pos_byte = search_regs.end[0] + BEGV_BYTE;
+              for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
+                if (search_regs.start[i] >= 0)
+                  {
+                    search_regs.start[i]
+                      = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
+                    search_regs.end[i]
+                      = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
+                  }
+              XSETBUFFER (last_thing_searched, current_buffer);
+              pos = search_regs.end[0];
+            }
+          else
+            {
+              pos_byte = search_regs_1.end[0] + BEGV_BYTE;
+              pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
+            }
+        }
+      else
+        {
+          unbind_to (count, Qnil);
+          return (0 - n);
+        }
+      n--;
+      maybe_quit ();
+    }
+  unbind_to (count, Qnil);
+  return (pos);
+}
  
-      while (n < 0)
-       {
-         ptrdiff_t val;
-
-         val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
-                            pos_byte - BEGV_BYTE, lim_byte - pos_byte,
-                            (NILP (Vinhibit_changing_match_data)
-                             ? &search_regs : &search_regs_1),
-                            /* Don't allow match past current point */
-                            pos_byte - BEGV_BYTE);
-         if (val == -2)
-           {
-             matcher_overflow ();
-           }
-         if (val >= 0)
-           {
-             if (NILP (Vinhibit_changing_match_data))
-               {
-                 pos_byte = search_regs.start[0] + BEGV_BYTE;
-                 for (i = 0; i < search_regs.num_regs; i++)
-                   if (search_regs.start[i] >= 0)
-                     {
-                       search_regs.start[i]
-                         = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
-                       search_regs.end[i]
-                         = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
-                     }
-                 XSETBUFFER (last_thing_searched, current_buffer);
-                 /* Set pos to the new position. */
-                 pos = search_regs.start[0];
-               }
-             else
-               {
-                 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
-                 /* Set pos to the new position.  */
-                 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
-               }
-           }
-         else
-           {
-             thaw_buffer_relocation ();
-             return (n);
-           }
-         n++;
-         maybe_quit ();
-       }
-      while (n > 0)
-       {
-         ptrdiff_t val;
-
-         val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
-                            pos_byte - BEGV_BYTE, lim_byte - pos_byte,
-                            (NILP (Vinhibit_changing_match_data)
-                             ? &search_regs : &search_regs_1),
-                            lim_byte - BEGV_BYTE);
-         if (val == -2)
-           {
-             matcher_overflow ();
-           }
-         if (val >= 0)
-           {
-             if (NILP (Vinhibit_changing_match_data))
-               {
-                 pos_byte = search_regs.end[0] + BEGV_BYTE;
-                 for (i = 0; i < search_regs.num_regs; i++)
-                   if (search_regs.start[i] >= 0)
-                     {
-                       search_regs.start[i]
-                         = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
-                       search_regs.end[i]
-                         = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
-                     }
-                 XSETBUFFER (last_thing_searched, current_buffer);
-                 pos = search_regs.end[0];
-               }
-             else
-               {
-                 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
-                 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
-               }
-           }
-         else
-           {
-             thaw_buffer_relocation ();
-             return (0 - n);
-           }
-         n--;
-         maybe_quit ();
-       }
-      thaw_buffer_relocation ();
-      return (pos);
+static EMACS_INT
+search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
+                      ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
+                      EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt,
+                      bool posix)
+{
+  unsigned char *raw_pattern, *pat;
+  ptrdiff_t raw_pattern_size;
+  ptrdiff_t raw_pattern_size_byte;
+  unsigned char *patbuf;
+  bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
+  unsigned char *base_pat;
+  /* Set to positive if we find a non-ASCII char that need
+     translation.  Otherwise set to zero later.  */
+  int char_base = -1;
+  bool boyer_moore_ok = 1;
+  USE_SAFE_ALLOCA;
+
+  /* MULTIBYTE says whether the text to be searched is multibyte.
+     We must convert PATTERN to match that, or we will not really
+     find things right.  */
+
+  if (multibyte == STRING_MULTIBYTE (string))
+    {
+      raw_pattern = SDATA (string);
+      raw_pattern_size = SCHARS (string);
+      raw_pattern_size_byte = SBYTES (string);
      }
-  else                         /* non-RE case */
+  else if (multibyte)
      {
-      unsigned char *raw_pattern, *pat;
-      ptrdiff_t raw_pattern_size;
-      ptrdiff_t raw_pattern_size_byte;
-      unsigned char *patbuf;
-      bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
-      unsigned char *base_pat;
-      /* Set to positive if we find a non-ASCII char that need
-        translation.  Otherwise set to zero later.  */
-      int char_base = -1;
-      bool boyer_moore_ok = 1;
-      USE_SAFE_ALLOCA;
-
-      /* MULTIBYTE says whether the text to be searched is multibyte.
-        We must convert PATTERN to match that, or we will not really
-        find things right.  */
-
-      if (multibyte == STRING_MULTIBYTE (string))
-       {
-         raw_pattern = SDATA (string);
-         raw_pattern_size = SCHARS (string);
-         raw_pattern_size_byte = SBYTES (string);
-       }
-      else if (multibyte)
-       {
-         raw_pattern_size = SCHARS (string);
-         raw_pattern_size_byte
-           = count_size_as_multibyte (SDATA (string),
-                                      raw_pattern_size);
-         raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
-         copy_text (SDATA (string), raw_pattern,
-                    SCHARS (string), 0, 1);
-       }
-      else
-       {
-         /* Converting multibyte to single-byte.
-
-            ??? Perhaps this conversion should be done in a special way
-            by subtracting nonascii-insert-offset from each non-ASCII char,
-            so that only the multibyte chars which really correspond to
-            the chosen single-byte character set can possibly match.  */
-         raw_pattern_size = SCHARS (string);
-         raw_pattern_size_byte = SCHARS (string);
-         raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
-         copy_text (SDATA (string), raw_pattern,
-                    SBYTES (string), 1, 0);
-       }
+      raw_pattern_size = SCHARS (string);
+      raw_pattern_size_byte
+        = count_size_as_multibyte (SDATA (string),
+                                   raw_pattern_size);
+      raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
+      copy_text (SDATA (string), raw_pattern,
+                 SCHARS (string), 0, 1);
+    }
+  else
+    {
+      /* Converting multibyte to single-byte.
+
+         ??? Perhaps this conversion should be done in a special way
+         by subtracting nonascii-insert-offset from each non-ASCII char,
+         so that only the multibyte chars which really correspond to
+         the chosen single-byte character set can possibly match.  */
+      raw_pattern_size = SCHARS (string);
+      raw_pattern_size_byte = SCHARS (string);
+      raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
+      copy_text (SDATA (string), raw_pattern,
+                 SBYTES (string), 1, 0);
+    }
  
-      /* Copy and optionally translate the pattern.  */
-      len = raw_pattern_size;
-      len_byte = raw_pattern_size_byte;
-      SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
-      pat = patbuf;
-      base_pat = raw_pattern;
-      if (multibyte)
-       {
-         /* Fill patbuf by translated characters in STRING while
-            checking if we can use boyer-moore search.  If TRT is
-            non-nil, we can use boyer-moore search only if TRT can be
-            represented by the byte array of 256 elements.  For that,
-            all non-ASCII case-equivalents of all case-sensitive
-            characters in STRING must belong to the same character
-            group (two characters belong to the same group iff their
-            multibyte forms are the same except for the last byte;
-            i.e. every 64 characters form a group; U+0000..U+003F,
-            U+0040..U+007F, U+0080..U+00BF, ...).  */
-
-         while (--len >= 0)
-           {
-             unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
-             int c, translated, inverse;
-             int in_charlen, charlen;
-
-             /* If we got here and the RE flag is set, it's because we're
-                dealing with a regexp known to be trivial, so the backslash
-                just quotes the next character.  */
-             if (RE && *base_pat == '\\')
-               {
-                 len--;
-                 raw_pattern_size--;
-                 len_byte--;
-                 base_pat++;
-               }
+  /* Copy and optionally translate the pattern.  */
+  ptrdiff_t len = raw_pattern_size;
+  ptrdiff_t len_byte = raw_pattern_size_byte;
+  SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
+  pat = patbuf;
+  base_pat = raw_pattern;
+  if (multibyte)
+    {
+      /* Fill patbuf by translated characters in STRING while
+         checking if we can use boyer-moore search.  If TRT is
+         non-nil, we can use boyer-moore search only if TRT can be
+         represented by the byte array of 256 elements.  For that,
+         all non-ASCII case-equivalents of all case-sensitive
+         characters in STRING must belong to the same character
+         group (two characters belong to the same group iff their
+         multibyte forms are the same except for the last byte;
+         i.e. every 64 characters form a group; U+0000..U+003F,
+         U+0040..U+007F, U+0080..U+00BF, ...).  */
+
+      while (--len >= 0)
+        {
+          unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
+          int c, translated, inverse;
+          int in_charlen, charlen;
+
+          /* If we got here and the RE flag is set, it's because we're
+             dealing with a regexp known to be trivial, so the backslash
+             just quotes the next character.  */
+          if (RE && *base_pat == '\\')
+            {
+              len--;
+              raw_pattern_size--;
+              len_byte--;
+              base_pat++;
+            }
  
-             c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
+          c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
  
-             if (NILP (trt))
-               {
-                 str = base_pat;
-                 charlen = in_charlen;
-               }
-             else
-               {
-                 /* Translate the character.  */
-                 TRANSLATE (translated, trt, c);
-                 charlen = CHAR_STRING (translated, str_base);
-                 str = str_base;
-
-                 /* Check if C has any other case-equivalents.  */
-                 TRANSLATE (inverse, inverse_trt, c);
-                 /* If so, check if we can use boyer-moore.  */
-                 if (c != inverse && boyer_moore_ok)
-                   {
-                     /* Check if all equivalents belong to the same
-                        group of characters.  Note that the check of C
-                        itself is done by the last iteration.  */
-                     int this_char_base = -1;
+          if (NILP (trt))
+            {
+              str = base_pat;
+              charlen = in_charlen;
+            }
+          else
+            {
+              /* Translate the character.  */
+              TRANSLATE (translated, trt, c);
+              charlen = CHAR_STRING (translated, str_base);
+              str = str_base;
+
+              /* Check if C has any other case-equivalents.  */
+              TRANSLATE (inverse, inverse_trt, c);
+              /* If so, check if we can use boyer-moore.  */
+              if (c != inverse && boyer_moore_ok)
+                {
+                  /* Check if all equivalents belong to the same
+                     group of characters.  Note that the check of C
+                     itself is done by the last iteration.  */
+                  int this_char_base = -1;
+
+                  while (boyer_moore_ok)
+                    {
+                      if (ASCII_CHAR_P (inverse))
+                        {
+                          if (this_char_base > 0)
+                            boyer_moore_ok = 0;
+                          else
+                            this_char_base = 0;
+                        }
+                      else if (CHAR_BYTE8_P (inverse))
+                        /* Boyer-moore search can't handle a
+                           translation of an eight-bit
+                           character.  */
+                        boyer_moore_ok = 0;
+                      else if (this_char_base < 0)
+                        {
+                          this_char_base = inverse & ~0x3F;
+                          if (char_base < 0)
+                            char_base = this_char_base;
+                          else if (this_char_base != char_base)
+                            boyer_moore_ok = 0;
+                        }
+                      else if ((inverse & ~0x3F) != this_char_base)
+                        boyer_moore_ok = 0;
+                      if (c == inverse)
+                        break;
+                      TRANSLATE (inverse, inverse_trt, inverse);
+                    }
+                }
+            }
  
-                     while (boyer_moore_ok)
-                       {
-                         if (ASCII_CHAR_P (inverse))
-                           {
-                             if (this_char_base > 0)
-                               boyer_moore_ok = 0;
-                             else
-                               this_char_base = 0;
-                           }
-                         else if (CHAR_BYTE8_P (inverse))
-                           /* Boyer-moore search can't handle a
-                              translation of an eight-bit
-                              character.  */
-                           boyer_moore_ok = 0;
-                         else if (this_char_base < 0)
-                           {
-                             this_char_base = inverse & ~0x3F;
-                             if (char_base < 0)
-                               char_base = this_char_base;
-                             else if (this_char_base != char_base)
-                               boyer_moore_ok = 0;
-                           }
-                         else if ((inverse & ~0x3F) != this_char_base)
-                           boyer_moore_ok = 0;
-                         if (c == inverse)
-                           break;
-                         TRANSLATE (inverse, inverse_trt, inverse);
-                       }
-                   }
-               }
+          /* Store this character into the translated pattern.  */
+          memcpy (pat, str, charlen);
+          pat += charlen;
+          base_pat += in_charlen;
+          len_byte -= in_charlen;
+        }
  
-             /* Store this character into the translated pattern.  */
-             memcpy (pat, str, charlen);
-             pat += charlen;
-             base_pat += in_charlen;
-             len_byte -= in_charlen;
-           }
+      /* If char_base is still negative we didn't find any translated
+         non-ASCII characters.  */
+      if (char_base < 0)
+        char_base = 0;
+    }
+  else
+    {
+      /* Unibyte buffer.  */
+      char_base = 0;
+      while (--len >= 0)
+        {
+          int c, translated, inverse;
  
-         /* If char_base is still negative we didn't find any translated
-            non-ASCII characters.  */
-         if (char_base < 0)
-           char_base = 0;
-       }
-      else
-       {
-         /* Unibyte buffer.  */
-         char_base = 0;
-         while (--len >= 0)
-           {
-             int c, translated, inverse;
+          /* If we got here and the RE flag is set, it's because we're
+             dealing with a regexp known to be trivial, so the backslash
+             just quotes the next character.  */
+          if (RE && *base_pat == '\\')
+            {
+              len--;
+              raw_pattern_size--;
+              base_pat++;
+            }
+          c = *base_pat++;
+          TRANSLATE (translated, trt, c);
+          *pat++ = translated;
+          /* Check that none of C's equivalents violates the
+             assumptions of boyer_moore.  */
+          TRANSLATE (inverse, inverse_trt, c);
+          while (1)
+            {
+              if (inverse >= 0200)
+                {
+                  boyer_moore_ok = 0;
+                  break;
+                }
+              if (c == inverse)
+                break;
+              TRANSLATE (inverse, inverse_trt, inverse);
+            }
+        }
+    }
  
-             /* If we got here and the RE flag is set, it's because we're
-                dealing with a regexp known to be trivial, so the backslash
-                just quotes the next character.  */
-             if (RE && *base_pat == '\\')
-               {
-                 len--;
-                 raw_pattern_size--;
-                 base_pat++;
-               }
-             c = *base_pat++;
-             TRANSLATE (translated, trt, c);
-             *pat++ = translated;
-             /* Check that none of C's equivalents violates the
-                assumptions of boyer_moore.  */
-             TRANSLATE (inverse, inverse_trt, c);
-             while (1)
-               {
-                 if (inverse >= 0200)
-                   {
-                     boyer_moore_ok = 0;
-                     break;
-                   }
-                 if (c == inverse)
-                   break;
-                 TRANSLATE (inverse, inverse_trt, inverse);
-               }
-           }
-       }
+  len_byte = pat - patbuf;
+  pat = base_pat = patbuf;
+
+  EMACS_INT result
+    = (boyer_moore_ok
+       ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
+                      pos_byte, lim_byte,
+                      char_base)
+       : simple_search (n, pat, raw_pattern_size, len_byte, trt,
+                        pos, pos_byte, lim, lim_byte));
+  SAFE_FREE ();
+  return result;
+}
+
+static EMACS_INT
+search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
+              ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
+              int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
+{
+  if (running_asynch_code)
+    save_search_regs ();
  
-      len_byte = pat - patbuf;
-      pat = base_pat = patbuf;
-
-      EMACS_INT result
-       = (boyer_moore_ok
-          ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
-                         pos_byte, lim_byte,
-                         char_base)
-          : simple_search (n, pat, raw_pattern_size, len_byte, trt,
-                           pos, pos_byte, lim, lim_byte));
-      SAFE_FREE ();
-      return result;
+  /* Searching 0 times means don't move.  */
+  /* Null string is found at starting position.  */
+  if (n == 0 || SCHARS (string) == 0)
+    {
+      set_search_regs (pos_byte, 0);
+      return pos;
      }
+
+  if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
+    pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
+                            n, trt, inverse_trt, posix);
+  else
+    pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
+                                n, RE, trt, inverse_trt, posix);
+
+  return pos;
  }
  \f
  /* Do a simple string search N times for the string PAT,
@@ -3353,6 +3387,7 @@ the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
    return val;
  }
  \f
+
  void
  syms_of_search (void)
  {
@@ -3365,6 +3400,7 @@ syms_of_search (void)
        searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
        searchbufs[i].regexp = Qnil;
        searchbufs[i].f_whitespace_regexp = Qnil;
+      searchbufs[i].busy = false;
        searchbufs[i].syntax_table = Qnil;
        staticpro (&searchbufs[i].regexp);
        staticpro (&searchbufs[i].f_whitespace_regexp);
@@ -3405,6 +3441,9 @@ syms_of_search (void)
    saved_last_thing_searched = Qnil;
    staticpro (&saved_last_thing_searched);
  
+  re_match_object = Qnil;
+  staticpro (&re_match_object);
+
    DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
        doc: /* Regexp to substitute for bunches of spaces in regexp search.
  Some commands use this for user-specified regexps.
diff --git a/src/syntax.h b/src/syntax.h

index 2171cbbba45a3273eeadb41403f2f7966ef501fb..f02a17ce8d69233eec438ed1387b95232822bbd1 100644 (file)
--- a/src/syntax.h
+++ b/src/syntax.h
@@ -186,13 +186,6 @@ UPDATE_SYNTAX_TABLE_FORWARD (ptrdiff_t charpos)
                                  false, gl_state.object);
  }
  
-INLINE void
-UPDATE_SYNTAX_TABLE_FORWARD_FAST (ptrdiff_t charpos)
-{
-  if (parse_sexp_lookup_properties && charpos >= gl_state.e_property)
-    update_syntax_table (charpos + gl_state.offset, 1, false, gl_state.object);
-}
-
  /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
     currently good for a position after CHARPOS.  */
  
@@ -212,13 +205,6 @@ UPDATE_SYNTAX_TABLE (ptrdiff_t charpos)
    UPDATE_SYNTAX_TABLE_FORWARD (charpos);
  }
  
-INLINE void
-UPDATE_SYNTAX_TABLE_FAST (ptrdiff_t charpos)
-{
-  UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
-  UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos);
-}
-
  /* Set up the buffer-global syntax table.  */
  
  INLINE void
diff --git a/src/thread.h b/src/thread.h

index 2c8914e1b28ef50ccbc876a3fcd9d6bcc359aa60..c10e5ecb75871c9dfc0dda44edca885c3068d710 100644 (file)
--- a/src/thread.h
+++ b/src/thread.h
@@ -137,15 +137,6 @@ struct thread_state
    struct re_registers m_saved_search_regs;
  #define saved_search_regs (current_thread->m_saved_search_regs)
  
-  /* This is the string or buffer in which we
-     are matching.  It is used for looking up syntax properties.
-
-     If the value is a Lisp string object, we are matching text in that
-     string; if it's nil, we are matching text in the current buffer; if
-     it's t, we are matching text in a C string.  */
-  Lisp_Object m_re_match_object;
-#define re_match_object (current_thread->m_re_match_object)
-
    /* This member is different from waiting_for_input.
       It is used to communicate to a lisp process-filter/sentinel (via the
       function Fwaiting_for_user_input_p) whether Emacs was waiting
author	Daniel Colascione <dancol@dancol.org>
	Sat, 16 Jun 2018 20:46:10 +0000 (13:46 -0700)
committer	Daniel Colascione <dancol@dancol.org>
	Sat, 16 Jun 2018 20:46:38 +0000 (13:46 -0700)
src/lisp.h		patch \| blob \| history
src/regex.c		patch \| blob \| history
src/regex.h		patch \| blob \| history
src/search.c		patch \| blob \| history
src/syntax.h		patch \| blob \| history
src/thread.h		patch \| blob \| history