]> git.eshelyaron.com Git - emacs.git/commitdiff
(regex_compile): Remove the `regnum' counter.
authorStefan Monnier <monnier@iro.umontreal.ca>
Tue, 12 Jun 2007 18:40:18 +0000 (18:40 +0000)
committerStefan Monnier <monnier@iro.umontreal.ca>
Tue, 12 Jun 2007 18:40:18 +0000 (18:40 +0000)
Use bufp->re_nsub instead.  Add support for \(?N:RE\).

etc/NEWS
lispref/searching.texi
src/ChangeLog
src/regex.c

index 83e7477463dcf36d81c3f17c30cd084e9a4992f0..2c740fe0ef8e4187ce2c6b38412ad4b30dc62e7e 100644 (file)
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -70,6 +70,9 @@ considered for update.
 \f
 * Lisp Changes in Emacs 23.1
 
++++
+** The regexp form \(?<num>:<regexp>\) specifies the group number explicitly.
+
 \f
 * New Packages for Lisp Programming in Emacs 23.1
 
index 10e8c672962e00089cda4858ba2c10356a01e597..f923a58f1125f1cd26cd1a4abaad62d5ea689635 100644 (file)
@@ -650,6 +650,15 @@ Shy groups are particularly useful for mechanically-constructed regular
 expressions because they can be added automatically without altering the
 numbering of any ordinary, non-shy groups.
 
+@item \(?@var{num}: @dots{} \)
+is the @dfn{explicitly numbered group} construct.  Normal groups get
+their number implicitly, based on their position, which can be
+inconvenient.  This construct allows you to force a particular group
+number.  There is no particular restriction on the numbering,
+e.g.@: you can have several groups with the same number.
+Implicitly numbered groups always get the smallest integer larger than
+the one of any previous group.
+
 @item \@var{digit}
 matches the same text that matched the @var{digit}th occurrence of a
 grouping (@samp{\( @dots{} \)}) construct.
index 26b07eb45e809916ca7ddd4d7cd3cf2c64d4a920..1ce80e41fc05a77d944a9267f90b53e145874139 100644 (file)
@@ -1,3 +1,8 @@
+2007-06-12  Stefan Monnier  <monnier@iro.umontreal.ca>
+
+       * regex.c (regex_compile): Remove the `regnum' counter.
+       Use bufp->re_nsub instead.  Add support for \(?N:RE\).
+
 2007-06-12  Glenn Morris  <rgm@gnu.org>
 
        * config.in (HAVE_GIF): Doc fix.
index 7784a3ae616d485f8efc3f884bdb2e9a3b27a0f2..1e80b9bbeef285c1467a49d25990d5fd300e267f 100644 (file)
@@ -2482,11 +2482,6 @@ regex_compile (pattern, size, syntax, bufp)
      last -- ends with a forward jump of this sort.  */
   unsigned char *fixup_alt_jump = 0;
 
-  /* Counts open-groups as they are encountered.  Remembered for the
-     matching close-group on the compile stack, so the same register
-     number is put in the stop_memory as the start_memory.  */
-  regnum_t regnum = 0;
-
   /* Work area for range table of charset.  */
   struct range_table_work_area range_table_work;
 
@@ -3123,28 +3118,54 @@ regex_compile (pattern, size, syntax, bufp)
            handle_open:
              {
                int shy = 0;
+               regnum_t regnum = 0;
                if (p+1 < pend)
                  {
                    /* Look for a special (?...) construct */
                    if ((syntax & RE_SHY_GROUPS) && *p == '?')
                      {
                        PATFETCH (c); /* Gobble up the '?'.  */
-                       PATFETCH (c);
-                       switch (c)
+                       while (!shy)
                          {
-                         case ':': shy = 1; break;
-                         default:
-                           /* Only (?:...) is supported right now. */
-                           FREE_STACK_RETURN (REG_BADPAT);
+                           PATFETCH (c);
+                           switch (c)
+                             {
+                             case ':': shy = 1; break;
+                             case '0':
+                               /* An explicitly specified regnum must start
+                                  with non-0. */
+                               if (regnum == 0)
+                                 FREE_STACK_RETURN (REG_BADPAT);
+                             case '1': case '2': case '3': case '4':
+                             case '5': case '6': case '7': case '8': case '9':
+                               regnum = 10*regnum + (c - '0'); break;
+                             default:
+                               /* Only (?:...) is supported right now. */
+                               FREE_STACK_RETURN (REG_BADPAT);
+                             }
                          }
                      }
                  }
 
                if (!shy)
-                 {
-                   bufp->re_nsub++;
-                   regnum++;
+                 regnum = ++bufp->re_nsub;
+               else if (regnum)
+                 { /* It's actually not shy, but explicitly numbered.  */
+                   shy = 0;
+                   if (regnum > bufp->re_nsub)
+                     bufp->re_nsub = regnum;
+                   else if (regnum > bufp->re_nsub
+                            /* Ideally, we'd want to check that the specified
+                               group can't have matched (i.e. all subgroups
+                               using the same regnum are in other branches of
+                               OR patterns), but we don't currently keep track
+                               of enough info to do that easily.  */
+                            || group_in_compile_stack (compile_stack, regnum))
+                     FREE_STACK_RETURN (REG_BADPAT);
                  }
+               else
+                 /* It's really shy.  */
+                 regnum = - bufp->re_nsub;
 
                if (COMPILE_STACK_FULL)
                  {
@@ -3163,12 +3184,11 @@ regex_compile (pattern, size, syntax, bufp)
                COMPILE_STACK_TOP.fixup_alt_jump
                  = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
                COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
-               COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum;
+               COMPILE_STACK_TOP.regnum = regnum;
 
-               /* Do not push a
-                  start_memory for groups beyond the last one we can
-                  represent in the compiled pattern.  */
-               if (regnum <= MAX_REGNUM && !shy)
+               /* Do not push a start_memory for groups beyond the last one
+                  we can represent in the compiled pattern.  */
+               if (regnum <= MAX_REGNUM && regnum > 0)
                  BUF_PUSH_2 (start_memory, regnum);
 
                compile_stack.avail++;
@@ -3213,7 +3233,7 @@ regex_compile (pattern, size, syntax, bufp)
                /* We don't just want to restore into `regnum', because
                   later groups should continue to be numbered higher,
                   as in `(ab)c(de)' -- the second group is #2.  */
-               regnum_t this_group_regnum;
+               regnum_t regnum;
 
                compile_stack.avail--;
                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
@@ -3222,7 +3242,7 @@ regex_compile (pattern, size, syntax, bufp)
                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
                    : 0;
                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
-               this_group_regnum = COMPILE_STACK_TOP.regnum;
+               regnum = COMPILE_STACK_TOP.regnum;
                /* If we've reached MAX_REGNUM groups, then this open
                   won't actually generate any code, so we'll have to
                   clear pending_exact explicitly.  */
@@ -3230,8 +3250,8 @@ regex_compile (pattern, size, syntax, bufp)
 
                /* We're at the end of the group, so now we know how many
                   groups were inside this one.  */
-               if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0)
-                 BUF_PUSH_2 (stop_memory, this_group_regnum);
+               if (regnum <= MAX_REGNUM && regnum > 0)
+                 BUF_PUSH_2 (stop_memory, regnum);
              }
              break;
 
@@ -3557,8 +3577,9 @@ regex_compile (pattern, size, syntax, bufp)
 
                reg = c - '0';
 
-               /* Can't back reference to a subexpression before its end.  */
-               if (reg > regnum || group_in_compile_stack (compile_stack, reg))
+               if (reg > bufp->re_nsub || reg < 1
+                   /* Can't back reference to a subexp before its end.  */
+                   || group_in_compile_stack (compile_stack, reg))
                  FREE_STACK_RETURN (REG_ESUBREG);
 
                laststart = b;