Add support for new '\_<' and '\_>' regexp operators, matching the

author Stefan Monnier <monnier@iro.umontreal.ca>

Wed, 19 May 2004 16:38:34 +0000 (16:38 +0000)

committer Stefan Monnier <monnier@iro.umontreal.ca>

Wed, 19 May 2004 16:38:34 +0000 (16:38 +0000)
author Stefan Monnier <monnier@iro.umontreal.ca>
Wed, 19 May 2004 16:38:34 +0000 (16:38 +0000)
committer Stefan Monnier <monnier@iro.umontreal.ca>
Wed, 19 May 2004 16:38:34 +0000 (16:38 +0000)
diff --git a/src/ChangeLog b/src/ChangeLog

index 6fcd3fa477c5d95d1757486073e1c027c7ffcaa4..c1f0706b9280697a58d615dc09284f6d99d96b9b 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,7 +1,33 @@
+2004-05-19  Jim Blandy  <jimb@redhat.com>
+
+       Add support for new '\_<' and '\_>' regexp operators, matching the
+       beginning and ends of symbols.
+       * regex.c (enum syntaxcode): Add Ssymbol.
+       (init_syntax_once): Set the syntax for '_' to Ssymbol, not Sword.
+       (re_opcode_t): New opcodes `symbeg' and `symend'.
+       (print_partial_compiled_pattern): Print the new opcodes properly.
+       (regex_compile): Parse the new operators.
+       (analyse_first): Skip symbeg and symend (they match only the empty string).
+       (mutually_exclusive_p): `symend' is mutually exclusive with \s_ and
+       \sw; `symbeg' is mutually exclusive with \S_ and \Sw.
+       (re_match_2_internal): Match symbeg and symend.
+
+       * search.c (trivial_regexp_p): \_ is no longer a trivial regexp.
+
  2004-05-19  Kim F. Storm  <storm@cua.dk>
  
         * .gdbinit (xsymbol): Fix last change.
  
+2004-05-18  Stefan Monnier  <monnier@iro.umontreal.ca>
+
+       * .gdbinit (xprintstr): New fun.
+       (xstring, xprintsym): Use it.
+
+       * w32proc.c (create_child): Use INTMASK.
+
+       * alloc.c (Fgarbage_collect): Do all the marking before flushing
+       unmarked elements of the undo list.
+
  2004-05-18  David Ponce  <david@dponce.com>
  
         * print.c (print): Reset print_depth before to call print_object.
diff --git a/src/regex.c b/src/regex.c

index a518ef81a0c865eb8c04babd8028382473de0a1d..0c1343bf58431143d8f1e3e8df7a1abaa794d858 100644 (file)
--- a/src/regex.c
+++ b/src/regex.c
@@ -2,7 +2,7 @@
     0.12.  (Implements POSIX draft P1003.2/D11.2, except for some of the
     internationalization features.)
  
-   Copyright (C) 1993,94,95,96,97,98,99,2000 Free Software Foundation, Inc.
+   Copyright (C) 1993,94,95,96,97,98,99,2000,04  Free Software Foundation, Inc.
  
     This program is free software; you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -217,7 +217,7 @@ char *realloc ();
  /* Define the syntax stuff for \<, \>, etc.  */
  
  /* Sword must be nonzero for the wordchar pattern commands in re_match_2.  */
-enum syntaxcode { Swhitespace = 0, Sword = 1 };
+enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
  
  # ifdef SWITCH_ENUM_BUG
  #  define SWITCH_ENUM_CAST(x) ((int)(x))
@@ -398,7 +398,7 @@ init_syntax_once ()
       if (ISALNUM (c))
         re_syntax_table[c] = Sword;
  
-   re_syntax_table['_'] = Sword;
+   re_syntax_table['_'] = Ssymbol;
  
     done = 1;
  }
@@ -655,6 +655,9 @@ typedef enum
    wordbound,   /* Succeeds if at a word boundary.  */
    notwordbound,        /* Succeeds if not at a word boundary.  */
  
+  symbeg,       /* Succeeds if at symbol beginning.  */
+  symend,       /* Succeeds if at symbol end.  */
+
         /* Matches any character whose syntax is specified.  Followed by
            a byte which contains a syntax code, e.g., Sword.  */
    syntaxspec,
@@ -1094,6 +1097,14 @@ print_partial_compiled_pattern (start, end)
         case wordend:
           fprintf (stderr, "/wordend");
  
+       case symbeg:
+         printf ("/symbeg");
+         break;
+
+       case symend:
+         printf ("/symend");
+         break;
+
         case syntaxspec:
           fprintf (stderr, "/syntaxspec");
           mcnt = *p++;
@@ -3398,6 +3409,19 @@ regex_compile (pattern, size, syntax, bufp)
               BUF_PUSH (wordend);
               break;
  
+           case '_':
+             if (syntax & RE_NO_GNU_OPS)
+               goto normal_char;
+              laststart = b;
+              PATFETCH (c);
+              if (c == '<')
+                BUF_PUSH (symbeg);
+              else if (c == '>')
+                BUF_PUSH (symend);
+              else
+                FREE_STACK_RETURN (REG_BADPAT);
+              break;
+
             case 'b':
               if (syntax & RE_NO_GNU_OPS)
                 goto normal_char;
@@ -3890,6 +3914,8 @@ analyse_first (p, pend, fastmap, multibyte)
         case notwordbound:
         case wordbeg:
         case wordend:
+       case symbeg:
+       case symend:
           continue;
  
  
@@ -4654,14 +4680,20 @@ mutually_exclusive_p (bufp, p1, p2)
        break;
  
      case wordend:
-    case notsyntaxspec:
+      return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
+    case symend:
        return ((re_opcode_t) *p1 == syntaxspec
-             && p1[1] == (op2 == wordend ? Sword : p2[1]));
+              && (p1[1] == Ssymbol || p1[1] == Sword));
+    case notsyntaxspec:
+      return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
  
      case wordbeg:
-    case syntaxspec:
+      return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
+    case symbeg:
        return ((re_opcode_t) *p1 == notsyntaxspec
-             && p1[1] == (op2 == wordbeg ? Sword : p2[1]));
+              && (p1[1] == Ssymbol || p1[1] == Sword));
+    case syntaxspec:
+      return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
  
      case wordbound:
        return (((re_opcode_t) *p1 == notsyntaxspec
@@ -5803,6 +5835,92 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
             }
           break;
  
+       case symbeg:
+         DEBUG_PRINT1 ("EXECUTING symbeg.\n");
+
+         /* We FAIL in one of the following cases: */
+
+         /* Case 1: D is at the end of string.  */
+         if (AT_STRINGS_END (d))
+           goto fail;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             re_wchar_t c1, c2;
+             int s1, s2;
+#ifdef emacs
+             int offset = PTR_TO_OFFSET (d);
+             int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+             UPDATE_SYNTAX_TABLE (charpos);
+#endif
+             PREFETCH ();
+             c2 = RE_STRING_CHAR (d, dend - d);
+             s2 = SYNTAX (c2);
+       
+             /* Case 2: S2 is neither Sword nor Ssymbol. */
+             if (s2 != Sword && s2 != Ssymbol)
+               goto fail;
+
+             /* Case 3: D is not at the beginning of string ... */
+             if (!AT_STRINGS_BEG (d))
+               {
+                 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+                 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
+#endif
+                 s1 = SYNTAX (c1);
+
+                 /* ... and S1 is Sword or Ssymbol.  */
+                 if (s1 == Sword || s1 == Ssymbol)
+                   goto fail;
+               }
+           }
+         break;
+
+       case symend:
+         DEBUG_PRINT1 ("EXECUTING symend.\n");
+
+         /* We FAIL in one of the following cases: */
+
+         /* Case 1: D is at the beginning of string.  */
+         if (AT_STRINGS_BEG (d))
+           goto fail;
+         else
+           {
+             /* C1 is the character before D, S1 is the syntax of C1, C2
+                is the character at D, and S2 is the syntax of C2.  */
+             re_wchar_t c1, c2;
+             int s1, s2;
+#ifdef emacs
+             int offset = PTR_TO_OFFSET (d) - 1;
+             int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+             UPDATE_SYNTAX_TABLE (charpos);
+#endif
+             GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+             s1 = SYNTAX (c1);
+
+             /* Case 2: S1 is neither Ssymbol nor Sword.  */
+             if (s1 != Sword && s1 != Ssymbol)
+               goto fail;
+
+             /* Case 3: D is not at the end of string ... */
+             if (!AT_STRINGS_END (d))
+               {
+                 PREFETCH_NOLIMIT ();
+                 c2 = RE_STRING_CHAR (d, dend - d);
+#ifdef emacs
+                 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+#endif
+                 s2 = SYNTAX (c2);
+
+                 /* ... and S2 is Sword or Ssymbol.  */
+                 if (s2 == Sword || s2 == Ssymbol)
+                    goto fail;
+               }
+           }
+         break;
+
         case syntaxspec:
         case notsyntaxspec:
           not = (re_opcode_t) *(p - 1) == notsyntaxspec;
author	Stefan Monnier <monnier@iro.umontreal.ca>
	Wed, 19 May 2004 16:38:34 +0000 (16:38 +0000)
committer	Stefan Monnier <monnier@iro.umontreal.ca>
	Wed, 19 May 2004 16:38:34 +0000 (16:38 +0000)
src/ChangeLog		patch \| blob \| history
src/regex.c		patch \| blob \| history