* lisp/emacs-lisp/rx.el: Make it a superset of sregex.

author Stefan Monnier <monnier@iro.umontreal.ca>

Sun, 26 Dec 2010 23:17:09 +0000 (18:17 -0500)

committer Stefan Monnier <monnier@iro.umontreal.ca>

Sun, 26 Dec 2010 23:17:09 +0000 (18:17 -0500)
author Stefan Monnier <monnier@iro.umontreal.ca>
Sun, 26 Dec 2010 23:17:09 +0000 (18:17 -0500)
committer Stefan Monnier <monnier@iro.umontreal.ca>
Sun, 26 Dec 2010 23:17:09 +0000 (18:17 -0500)
diff --git a/etc/NEWS b/etc/NEWS

index f7288de8b13413e99c6412687985b25c4276558e..f21028adc8c58efb0a1401f5bbb13f6c50313efd 100644 (file)
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -541,6 +541,8 @@ listing object name completions when being sent text via
  
  *** An API for manipulating SQL product definitions has been added.
  
+** sregex.el is now obsolete, since rx.el is a strict superset.
+
  ** s-region.el is now declared obsolete, superceded by shift-select-mode
  enabled by default in 23.1.
  
diff --git a/lisp/ChangeLog b/lisp/ChangeLog

index ccf5b5c40eae2b7cb949f0c3d5a0708f97708f86..21d90eee903a1c866898c6610a1e743ca47a3714 100644 (file)
--- a/lisp/ChangeLog
+++ b/lisp/ChangeLog
@@ -1,3 +1,17 @@
+2010-12-26  Stefan Monnier  <monnier@iro.umontreal.ca>
+
+       * emacs-lisp/rx.el: Make it a superset of sregex.
+       (rx-constituents): Add `any => "."', mark `repeat' as taking any number
+       of args, add `regex' alias.
+       (rx-info): Add arg to distinguish head and standalone forms.
+       (rx-check, rx-form): Pass the corresponding arg.
+       (rx-**): Simplify.
+       (rx-repeat): Make it work for any number of args.
+       (rx-syntax): Make it accept syntax chars as is.
+       * obsolete/sregex.el: Move from emacs-lisp/.
+       * emacs-lisp/re-builder.el: Remove sregex support.
+       * emacs-lisp/edebug.el (sregexq, rx): Remove redundant defs.
+
  2010-12-25  Eli Zaretskii  <eliz@gnu.org>
  
         * mouse.el (mouse-yank-primary): On MS-Windows, try the (emulated)
@@ -19,8 +33,8 @@
  2010-12-21  Daiki Ueno  <ueno@unixuser.org>
  
         * obsolete/pgg-parse.el, obsolete/pgg-pgp5.el, obsolete/pgg-pgp.el,
-       * obsolete/pgg-gpg.el, obsolete/pgg-def.el, obsolete/pgg.el: Move
-       from lisp/.
+       * obsolete/pgg-gpg.el, obsolete/pgg-def.el, obsolete/pgg.el:
+       Move from lisp/.
  
  2010-12-20  Leo  <sdl.web@gmail.com>
  
diff --git a/lisp/emacs-lisp/edebug.el b/lisp/emacs-lisp/edebug.el

index 77953b37021773e4af8c81e951dc10ae6c6286f2..d4af24aaaff802a7e0778b7a98d5174bb62b5586 100644 (file)
--- a/lisp/emacs-lisp/edebug.el
+++ b/lisp/emacs-lisp/edebug.el
@@ -2131,8 +2131,6 @@ expressions; a `progn' form will be returned enclosing these forms."
  
  (def-edebug-spec with-custom-print body)
  
-(def-edebug-spec sregexq (&rest sexp))
-(def-edebug-spec rx (&rest sexp))
  
  ;;; The debugger itself
  
diff --git a/lisp/emacs-lisp/re-builder.el b/lisp/emacs-lisp/re-builder.el

index 1845effd5bb11833e9578a11369c019008ec6f9e..eacabf72c959fdcdea3b6ebbe7e856f372eea82f 100644 (file)
--- a/lisp/emacs-lisp/re-builder.el
+++ b/lisp/emacs-lisp/re-builder.el
@@ -60,8 +60,8 @@
  ;; even the auto updates go all the way.  Forcing an update overrides
  ;; this limit allowing an easy way to see all matches.
  
-;; Currently `re-builder' understands five different forms of input,
-;; namely `read', `string', `rx', and `sregex' syntax.  Read
+;; Currently `re-builder' understands three different forms of input,
+;; namely `read', `string', and `rx' syntax.  Read
  ;; syntax and string syntax are both delimited by `"'s and behave
  ;; according to their name.  With the `string' syntax there's no need
  ;; to escape the backslashes and double quotes simplifying the editing
@@ -75,7 +75,7 @@
  ;; When editing a symbolic regular expression, only the first
  ;; expression in the RE Builder buffer is considered, which helps
  ;; limiting the extent of the expression like the `"'s do for the text
-;; modes.  For the `sregex' syntax the function `sregex' is applied to
+;; modes.  For the `rx' syntax the function `rx-to-string' is applied to
  ;; the evaluated expression read.  So you can use quoted arguments
  ;; with something like '("findme") or you can construct arguments to
  ;; your hearts delight with a valid ELisp expression.  (The compiled
@@ -126,11 +126,10 @@
  
  (defcustom reb-re-syntax 'read
    "Syntax for the REs in the RE Builder.
-Can either be `read', `string', `sregex', or `rx'."
+Can either be `read', `string', or `rx'."
    :group 're-builder
    :type '(choice (const :tag "Read syntax" read)
                  (const :tag "String syntax" string)
-                (const :tag "`sregex' syntax" sregex)
                  (const :tag "`rx' syntax" rx)))
  
  (defcustom reb-auto-match-limit 200
@@ -279,10 +278,8 @@ Except for Lisp syntax this is the same as `reb-regexp'.")
    emacs-lisp-mode "RE Builder Lisp"
    "Major mode for interactively building symbolic Regular Expressions."
    ;; Pull in packages as needed
-  (cond        ((eq reb-re-syntax 'sregex)     ; sregex is not autoloaded
-        (require 'sregex))             ; right now..
-       ((eq reb-re-syntax 'rx)         ; rx-to-string is autoloaded
-        (require 'rx)))                ; require rx anyway
+  (cond        ((memq reb-re-syntax '(sregex rx)) ; rx-to-string is autoloaded
+        (require 'rx)))                   ; require rx anyway
    (reb-mode-common))
  
  ;; Use the same "\C-c" keymap as `reb-mode' and use font-locking from
@@ -612,9 +609,7 @@ optional fourth argument FORCE is non-nil."
  
  (defun reb-cook-regexp (re)
    "Return RE after processing it according to `reb-re-syntax'."
-  (cond ((eq reb-re-syntax 'sregex)
-        (apply 'sregex (eval (car (read-from-string re)))))
-       ((eq reb-re-syntax 'rx)
+  (cond ((memq reb-re-syntax '(sregex rx))
          (rx-to-string (eval (car (read-from-string re)))))
         (t re)))
  
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el

index 522d452c2dc7c05a6699781a6c15a4ba463d534c..b3b88c3ce4fe5c989ef037bc162ba4b46130f160 100644 (file)
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -120,19 +120,17 @@
      (nonl              . not-newline)  ; SRE
      (anything          . (rx-anything 0 nil))
      (any               . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE
+    (any               . ".")          ; sregex
      (in                        . any)
      (char              . any)          ; sregex
      (not-char          . (rx-not-char 1 nil rx-check-any)) ; sregex
      (not               . (rx-not 1 1 rx-check-not))
-    ;; Partially consistent with sregex, whose `repeat' is like our
-    ;; `**'.  (`repeat' with optional max arg and multiple sexp forms
-    ;; is ambiguous.)
-    (repeat            . (rx-repeat 2 3))
+    (repeat            . (rx-repeat 2 nil))
      (=                 . (rx-= 2 nil))    ; SRE
      (>=                        . (rx->= 2 nil))   ; SRE
      (**                        . (rx-** 2 nil))   ; SRE
      (submatch          . (rx-submatch 1 nil)) ; SRE
-    (group             . submatch)
+    (group             . submatch)     ; sregex
      (zero-or-more      . (rx-kleene 1 nil))
      (one-or-more       . (rx-kleene 1 nil))
      (zero-or-one       . (rx-kleene 1 nil))
@@ -175,6 +173,7 @@
      (category          . (rx-category 1 1 rx-check-category))
      (eval              . (rx-eval 1 1))
      (regexp            . (rx-regexp 1 1 stringp))
+    (regex             . regexp)       ; sregex
      (digit             . "[[:digit:]]")
      (numeric           . digit)        ; SRE
      (num               . digit)        ; SRE
@@ -295,15 +294,27 @@ regular expression strings.")
  `zero-or-more', and `one-or-more'.  Dynamically bound.")
  
  
-(defun rx-info (op)
+(defun rx-info (op head)
    "Return parsing/code generation info for OP.
  If OP is the space character ASCII 32, return info for the symbol `?'.
  If OP is the character `?', return info for the symbol `??'.
-See also `rx-constituents'."
+See also `rx-constituents'.
+If HEAD is non-nil, then OP is the head of a sexp, otherwise it's
+a standalone symbol."
    (cond ((eq op ? ) (setq op '\?))
         ((eq op ??) (setq op '\??)))
-  (while (and (not (null op)) (symbolp op))
-    (setq op (cdr (assq op rx-constituents))))
+  (let (old-op)
+    (while (and (not (null op)) (symbolp op))
+      (setq old-op op)
+      (setq op (cdr (assq op rx-constituents)))
+      (when (if head (stringp op) (consp op))
+        ;; We found something but of the wrong kind.  Let's look for an
+        ;; alternate definition for the other case.
+        (let ((new-op
+               (cdr (assq old-op (cdr (memq (assq old-op rx-constituents)
+                                            rx-constituents))))))
+          (if (and new-op (not (if head (stringp new-op) (consp new-op))))
+              (setq op new-op))))))
    op)
  
  
@@ -311,7 +322,7 @@ See also `rx-constituents'."
    "Check FORM according to its car's parsing info."
    (unless (listp form)
      (error "rx `%s' needs argument(s)" form))
-  (let* ((rx (rx-info (car form)))
+  (let* ((rx (rx-info (car form) 'head))
          (nargs (1- (length form)))
          (min-args (nth 1 rx))
          (max-args (nth 2 rx))
@@ -643,14 +654,17 @@ If SKIP is non-nil, allow that number of items after the head, i.e.
  (defun rx-** (form)
    "Parse and produce code from FORM `(** N M ...)'."
    (rx-check form)
-  (setq form (cons 'repeat (cdr (rx-trans-forms form 2))))
-  (rx-form form '*))
+  (rx-form (cons 'repeat (cdr (rx-trans-forms form 2))) '*))
  
  
  (defun rx-repeat (form)
    "Parse and produce code from FORM.
-FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'."
+FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'."
    (rx-check form)
+  (if (> (length form) 4)
+      (setq form (rx-trans-forms form 2)))
+  (if (null (nth 2 form))
+      (setq form (list* (nth 0 form) (nth 1 form) (nthcdr 3 form))))
    (cond ((= (length form) 3)
          (unless (and (integerp (nth 1 form))
                       (> (nth 1 form) 0))
@@ -749,15 +763,18 @@ of all atomic regexps."
    "Parse and produce code from FORM, which is `(syntax SYMBOL)'."
    (rx-check form)
    (let* ((sym (cadr form))
-        (syntax (assq sym rx-syntax)))
+        (syntax (cdr (assq sym rx-syntax))))
      (unless syntax
        ;; Try sregex compatibility.
-      (let ((name (symbol-name sym)))
-       (if (= 1 (length name))
-           (setq syntax (rassq (aref name 0) rx-syntax))))
+      (cond
+       ((character sym) (setq syntax sym))
+       ((symbolp sym)
+        (let ((name (symbol-name sym)))
+          (if (= 1 (length name))
+              (setq syntax (aref name 0))))))
        (unless syntax
-       (error "Unknown rx syntax `%s'" (cadr form))))
-    (format "\\s%c" (cdr syntax))))
+       (error "Unknown rx syntax `%s'" sym)))
+    (format "\\s%c" syntax)))
  
  
  (defun rx-check-category (form)
@@ -811,7 +828,7 @@ shy groups around the result and some more in other functions."
      (cond ((integerp form)
            (regexp-quote (char-to-string form)))
           ((symbolp form)
-          (let ((info (rx-info form)))
+          (let ((info (rx-info form nil)))
              (cond ((stringp info)
                     info)
                    ((null info)
@@ -819,7 +836,7 @@ shy groups around the result and some more in other functions."
                    (t
                     (funcall (nth 0 info) form)))))
           ((consp form)
-          (let ((info (rx-info (car form))))
+          (let ((info (rx-info (car form) 'head)))
              (unless (consp info)
                (error "Unknown rx form `%s'" (car form)))
              (funcall (nth 0 info) form)))
diff --git a/lisp/emacs-lisp/sregex.el b/lisp/emacs-lisp/sregex.el

deleted file mode 100644 (file)

index f5e3aac..0000000
--- a/lisp/emacs-lisp/sregex.el
+++ /dev/null
@@ -1,608 +0,0 @@
-;;; sregex.el --- symbolic regular expressions
-
-;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004,
-;;   2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
-
-;; Author: Bob Glickstein <bobg+sregex@zanshin.com>
-;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com>
-;; Keywords: extensions
-
-;; This file is part of GNU Emacs.
-
-;; GNU Emacs is free software: you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation, either version 3 of the License, or
-;; (at your option) any later version.
-
-;; GNU Emacs is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;; GNU General Public License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
-
-;;; Commentary:
-
-;; This package allows you to write regular expressions using a
-;; totally new, Lisp-like syntax.
-
-;; A "symbolic regular expression" (sregex for short) is a Lisp form
-;; that, when evaluated, produces the string form of the specified
-;; regular expression.  Here's a simple example:
-
-;;   (sregexq (or "Bob" "Robert"))  =>  "Bob\\|Robert"
-
-;; As you can see, an sregex is specified by placing one or more
-;; special clauses in a call to `sregexq'.  The clause in this case is
-;; the `or' of two strings (not to be confused with the Lisp function
-;; `or').  The list of allowable clauses appears below.
-
-;; With sregex, it is never necessary to "escape" magic characters
-;; that are meant to be taken literally; that happens automatically.
-;; For example:
-
-;;   (sregexq "M*A*S*H")  =>  "M\\*A\\*S\\*H"
-
-;; It is also unnecessary to "group" parts of the expression together
-;; to overcome operator precedence; that also happens automatically.
-;; For example:
-
-;;   (sregexq (opt (or "Bob" "Robert")))  =>  "\\(?:Bob\\|Robert\\)?"
-
-;; It *is* possible to group parts of the expression in order to refer
-;; to them with numbered backreferences:
-
-;;   (sregexq (group (or "Go" "Run"))
-;;            ", Spot, "
-;;            (backref 1))             =>  "\\(Go\\|Run\\), Spot, \\1"
-
-;; `sregexq' is a macro.  Each time it is used, it constructs a simple
-;; Lisp expression that then invokes a moderately complex engine to
-;; interpret the sregex and render the string form.  Because of this,
-;; I don't recommend sprinkling calls to `sregexq' throughout your
-;; code, the way one normally does with string regexes (which are
-;; cheap to evaluate).  Instead, it's wiser to precompute the regexes
-;; you need wherever possible instead of repeatedly constructing the
-;; same ones over and over.  Example:
-
-;;    (let ((field-regex (sregexq (opt "resent-")
-;;                                (or "to" "cc" "bcc"))))
-;;      ...
-;;      (while ...
-;;        ...
-;;        (re-search-forward field-regex ...)
-;;        ...))
-
-;; The arguments to `sregexq' are automatically quoted, but the
-;; flipside of this is that it is not straightforward to include
-;; computed (i.e., non-constant) values in `sregexq' expressions.  So
-;; `sregex' is a function that is like `sregexq' but which does not
-;; automatically quote its values.  Literal sregex clauses must be
-;; explicitly quoted like so:
-
-;;   (sregex '(or "Bob" "Robert"))  =>  "Bob\\|Robert"
-
-;; but computed clauses can be included easily, allowing for the reuse
-;; of common clauses:
-
-;;  (let ((dotstar '(0+ any))
-;;        (whitespace '(1+ (syntax ?-)))
-;;        (digits '(1+ (char (?0 . ?9)))))
-;;    (sregex 'bol dotstar ":" whitespace digits))  =>  "^.*:\\s-+[0-9]+"
-
-;; To use this package in a Lisp program, simply (require 'sregex).
-
-;; Here are the clauses allowed in an `sregex' or `sregexq'
-;; expression:
-
-;; - a string
-;;   This stands for the literal string.  If it contains
-;;   metacharacters, they will be escaped in the resulting regex
-;;   (using `regexp-quote').
-
-;; - the symbol `any'
-;;   This stands for ".", a regex matching any character except
-;;   newline.
-
-;; - the symbol `bol'
-;;   Stands for "^", matching the empty string at the beginning of a line
-
-;; - the symbol `eol'
-;;   Stands for "$", matching the empty string at the end of a line
-
-;; - (group CLAUSE ...)
-;;   Groups the given CLAUSEs using "\\(" and "\\)".
-
-;; - (sequence CLAUSE ...)
-
-;;   Groups the given CLAUSEs; may or may not use "\\(?:" and "\\)".
-;;   Clauses grouped by `sequence' do not count for purposes of
-;;   numbering backreferences.  Use `sequence' in situations like
-;;   this:
-
-;;     (sregexq (or "dog" "cat"
-;;                  (sequence (opt "sea ") "monkey")))
-;;                                  =>  "dog\\|cat\\|\\(?:sea \\)?monkey"
-
-;;   where a single `or' alternate needs to contain multiple
-;;   subclauses.
-
-;; - (backref N)
-;;   Matches the same string previously matched by the Nth "group" in
-;;   the same sregex.  N is a positive integer.
-
-;; - (or CLAUSE ...)
-;;   Matches any one of the CLAUSEs by separating them with "\\|".
-
-;; - (0+ CLAUSE ...)
-;;   Concatenates the given CLAUSEs and matches zero or more
-;;   occurrences by appending "*".
-
-;; - (1+ CLAUSE ...)
-;;   Concatenates the given CLAUSEs and matches one or more
-;;   occurrences by appending "+".
-
-;; - (opt CLAUSE ...)
-;;   Concatenates the given CLAUSEs and matches zero or one occurrence
-;;   by appending "?".
-
-;; - (repeat MIN MAX CLAUSE ...)
-;;   Concatenates the given CLAUSEs and constructs a regex matching at
-;;   least MIN occurrences and at most MAX occurrences.  MIN must be a
-;;   non-negative integer.  MAX must be a non-negative integer greater
-;;   than or equal to MIN; or MAX can be nil to mean "infinity."
-
-;; - (char CHAR-CLAUSE ...)
-;;   Creates a "character class" matching one character from the given
-;;   set.  See below for how to construct a CHAR-CLAUSE.
-
-;; - (not-char CHAR-CLAUSE ...)
-;;   Creates a "character class" matching any one character not in the
-;;   given set.  See below for how to construct a CHAR-CLAUSE.
-
-;; - the symbol `bot'
-;;   Stands for "\\`", matching the empty string at the beginning of
-;;   text (beginning of a string or of a buffer).
-
-;; - the symbol `eot'
-;;   Stands for "\\'", matching the empty string at the end of text.
-
-;; - the symbol `point'
-;;   Stands for "\\=", matching the empty string at point.
-
-;; - the symbol `word-boundary'
-;;   Stands for "\\b", matching the empty string at the beginning or
-;;   end of a word.
-
-;; - the symbol `not-word-boundary'
-;;   Stands for "\\B", matching the empty string not at the beginning
-;;   or end of a word.
-
-;; - the symbol `bow'
-;;   Stands for "\\<", matching the empty string at the beginning of a
-;;   word.
-
-;; - the symbol `eow'
-;;   Stands for "\\>", matching the empty string at the end of a word.
-
-;; - the symbol `wordchar'
-;;   Stands for the regex "\\w", matching a word-constituent character
-;;   (as determined by the current syntax table)
-
-;; - the symbol `not-wordchar'
-;;   Stands for the regex "\\W", matching a non-word-constituent
-;;   character.
-
-;; - (syntax CODE)
-;;   Stands for the regex "\\sCODE", where CODE is a syntax table code
-;;   (a single character).  Matches any character with the requested
-;;   syntax.
-
-;; - (not-syntax CODE)
-;;   Stands for the regex "\\SCODE", where CODE is a syntax table code
-;;   (a single character).  Matches any character without the
-;;   requested syntax.
-
-;; - (regex REGEX)
-;;   This is a "trapdoor" for including ordinary regular expression
-;;   strings in the result.  Some regular expressions are clearer when
-;;   written the old way: "[a-z]" vs. (sregexq (char (?a . ?z))), for
-;;   instance.  However, see the note under "Bugs," below.
-
-;; Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
-;; has one of the following forms:
-
-;; - a character
-;;   Adds that character to the set.
-
-;; - a string
-;;   Adds all the characters in the string to the set.
-
-;; - A pair (MIN . MAX)
-;;   Where MIN and MAX are characters, adds the range of characters
-;;   from MIN through MAX to the set.
-
-;;; To do:
-
-;; An earlier version of this package could optionally translate the
-;; symbolic regex into other languages' syntaxes, e.g. Perl.  For
-;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would
-;; yield "ab|cd" instead of "ab\\|cd".  It might be useful to restore
-;; such a facility.
-
-;; - handle multibyte chars in sregex--char-aux
-;; - add support for character classes ([:blank:], ...)
-;; - add support for non-greedy operators *? and +?
-;; - bug: (sregexq (opt (opt ?a))) returns "a??" which is a non-greedy "a?"
-
-;;; Bugs:
-
-;;; Code:
-
-(eval-when-compile (require 'cl))
-
-;; Compatibility code for when we didn't have shy-groups
-(defvar sregex--current-sregex nil)
-(defun sregex-info () nil)
-(defmacro sregex-save-match-data (&rest forms) (cons 'save-match-data forms))
-(defun sregex-replace-match (r &optional f l str subexp x)
-  (replace-match r f l str subexp))
-(defun sregex-match-string (c &optional i x) (match-string c i))
-(defun sregex-match-string-no-properties (count &optional in-string sregex)
-  (match-string-no-properties count in-string))
-(defun sregex-match-beginning (count &optional sregex) (match-beginning count))
-(defun sregex-match-end (count &optional sregex) (match-end count))
-(defun sregex-match-data (&optional sregex) (match-data))
-(defun sregex-backref-num (n &optional sregex) n)
-
-
-(defun sregex (&rest exps)
-  "Symbolic regular expression interpreter.
-This is exactly like `sregexq' (q.v.) except that it evaluates all its
-arguments, so literal sregex clauses must be quoted.  For example:
-
-  (sregex '(or \"Bob\" \"Robert\"))  =>  \"Bob\\\\|Robert\"
-
-An argument-evaluating sregex interpreter lets you reuse sregex
-subexpressions:
-
-  (let ((dotstar '(0+ any))
-        (whitespace '(1+ (syntax ?-)))
-        (digits '(1+ (char (?0 . ?9)))))
-    (sregex 'bol dotstar \":\" whitespace digits))  =>  \"^.*:\\\\s-+[0-9]+\""
-  (sregex--sequence exps nil))
-
-(defmacro sregexq (&rest exps)
-  "Symbolic regular expression interpreter.
-This macro allows you to specify a regular expression (regexp) in
-symbolic form, and converts it into the string form required by Emacs's
-regex functions such as `re-search-forward' and `looking-at'.  Here is
-a simple example:
-
-  (sregexq (or \"Bob\" \"Robert\"))  =>  \"Bob\\\\|Robert\"
-
-As you can see, an sregex is specified by placing one or more special
-clauses in a call to `sregexq'.  The clause in this case is the `or'
-of two strings (not to be confused with the Lisp function `or').  The
-list of allowable clauses appears below.
-
-With `sregex', it is never necessary to \"escape\" magic characters
-that are meant to be taken literally; that happens automatically.
-For example:
-
-  (sregexq \"M*A*S*H\")  =>  \"M\\\\*A\\\\*S\\\\*H\"
-
-It is also unnecessary to \"group\" parts of the expression together
-to overcome operator precedence; that also happens automatically.
-For example:
-
-  (sregexq (opt (or \"Bob\" \"Robert\")))  =>  \"\\\\(Bob\\\\|Robert\\\\)?\"
-
-It *is* possible to group parts of the expression in order to refer
-to them with numbered backreferences:
-
-  (sregexq (group (or \"Go\" \"Run\"))
-           \", Spot, \"
-           (backref 1))             =>  \"\\\\(Go\\\\|Run\\\\), Spot, \\\\1\"
-
-If `sregexq' needs to introduce its own grouping parentheses, it will
-automatically renumber your backreferences:
-
-  (sregexq (opt \"resent-\")
-           (group (or \"to\" \"cc\" \"bcc\"))
-           \": \"
-           (backref 1))  =>  \"\\\\(resent-\\\\)?\\\\(to\\\\|cc\\\\|bcc\\\\): \\\\2\"
-
-`sregexq' is a macro.  Each time it is used, it constructs a simple
-Lisp expression that then invokes a moderately complex engine to
-interpret the sregex and render the string form.  Because of this, I
-don't recommend sprinkling calls to `sregexq' throughout your code,
-the way one normally does with string regexes (which are cheap to
-evaluate).  Instead, it's wiser to precompute the regexes you need
-wherever possible instead of repeatedly constructing the same ones
-over and over.  Example:
-
-   (let ((field-regex (sregexq (opt \"resent-\")
-                               (or \"to\" \"cc\" \"bcc\"))))
-     ...
-     (while ...
-       ...
-       (re-search-forward field-regex ...)
-       ...))
-
-The arguments to `sregexq' are automatically quoted, but the
-flipside of this is that it is not straightforward to include
-computed (i.e., non-constant) values in `sregexq' expressions.  So
-`sregex' is a function that is like `sregexq' but which does not
-automatically quote its values.  Literal sregex clauses must be
-explicitly quoted like so:
-
-  (sregex '(or \"Bob\" \"Robert\"))  =>  \"Bob\\\\|Robert\"
-
-but computed clauses can be included easily, allowing for the reuse
-of common clauses:
-
-  (let ((dotstar '(0+ any))
-        (whitespace '(1+ (syntax ?-)))
-        (digits '(1+ (char (?0 . ?9)))))
-    (sregex 'bol dotstar \":\" whitespace digits))  =>  \"^.*:\\\\s-+[0-9]+\"
-
-Here are the clauses allowed in an `sregex' or `sregexq' expression:
-
-- a string
-  This stands for the literal string.  If it contains
-  metacharacters, they will be escaped in the resulting regex
-  (using `regexp-quote').
-
-- the symbol `any'
-  This stands for \".\", a regex matching any character except
-  newline.
-
-- the symbol `bol'
-  Stands for \"^\", matching the empty string at the beginning of a line
-
-- the symbol `eol'
-  Stands for \"$\", matching the empty string at the end of a line
-
-- (group CLAUSE ...)
-  Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\".
-
-- (sequence CLAUSE ...)
-
-  Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\".
-  Clauses grouped by `sequence' do not count for purposes of
-  numbering backreferences.  Use `sequence' in situations like
-  this:
-
-    (sregexq (or \"dog\" \"cat\"
-                 (sequence (opt \"sea \") \"monkey\")))
-                                 =>  \"dog\\\\|cat\\\\|\\\\(?:sea \\\\)?monkey\"
-
-  where a single `or' alternate needs to contain multiple
-  subclauses.
-
-- (backref N)
-  Matches the same string previously matched by the Nth \"group\" in
-  the same sregex.  N is a positive integer.
-
-- (or CLAUSE ...)
-  Matches any one of the CLAUSEs by separating them with \"\\\\|\".
-
-- (0+ CLAUSE ...)
-  Concatenates the given CLAUSEs and matches zero or more
-  occurrences by appending \"*\".
-
-- (1+ CLAUSE ...)
-  Concatenates the given CLAUSEs and matches one or more
-  occurrences by appending \"+\".
-
-- (opt CLAUSE ...)
-  Concatenates the given CLAUSEs and matches zero or one occurrence
-  by appending \"?\".
-
-- (repeat MIN MAX CLAUSE ...)
-  Concatenates the given CLAUSEs and constructs a regex matching at
-  least MIN occurrences and at most MAX occurrences.  MIN must be a
-  non-negative integer.  MAX must be a non-negative integer greater
-  than or equal to MIN; or MAX can be nil to mean \"infinity.\"
-
-- (char CHAR-CLAUSE ...)
-  Creates a \"character class\" matching one character from the given
-  set.  See below for how to construct a CHAR-CLAUSE.
-
-- (not-char CHAR-CLAUSE ...)
-  Creates a \"character class\" matching any one character not in the
-  given set.  See below for how to construct a CHAR-CLAUSE.
-
-- the symbol `bot'
-  Stands for \"\\\\`\", matching the empty string at the beginning of
-  text (beginning of a string or of a buffer).
-
-- the symbol `eot'
-  Stands for \"\\\\'\", matching the empty string at the end of text.
-
-- the symbol `point'
-  Stands for \"\\\\=\\=\", matching the empty string at point.
-
-- the symbol `word-boundary'
-  Stands for \"\\\\b\", matching the empty string at the beginning or
-  end of a word.
-
-- the symbol `not-word-boundary'
-  Stands for \"\\\\B\", matching the empty string not at the beginning
-  or end of a word.
-
-- the symbol `bow'
-  Stands for \"\\\\=\\<\", matching the empty string at the beginning of a
-  word.
-
-- the symbol `eow'
-  Stands for \"\\\\=\\>\", matching the empty string at the end of a word.
-
-- the symbol `wordchar'
-  Stands for the regex \"\\\\w\", matching a word-constituent character
-  (as determined by the current syntax table)
-
-- the symbol `not-wordchar'
-  Stands for the regex \"\\\\W\", matching a non-word-constituent
-  character.
-
-- (syntax CODE)
-  Stands for the regex \"\\\\sCODE\", where CODE is a syntax table code
-  (a single character).  Matches any character with the requested
-  syntax.
-
-- (not-syntax CODE)
-  Stands for the regex \"\\\\SCODE\", where CODE is a syntax table code
-  (a single character).  Matches any character without the
-  requested syntax.
-
-- (regex REGEX)
-  This is a \"trapdoor\" for including ordinary regular expression
-  strings in the result.  Some regular expressions are clearer when
-  written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for
-  instance.
-
-Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
-has one of the following forms:
-
-- a character
-  Adds that character to the set.
-
-- a string
-  Adds all the characters in the string to the set.
-
-- A pair (MIN . MAX)
-  Where MIN and MAX are characters, adds the range of characters
-  from MIN through MAX to the set."
-  `(apply 'sregex ',exps))
-
-(defun sregex--engine (exp combine)
-  (cond
-   ((stringp exp)
-    (if (and combine
-            (eq combine 'suffix)
-            (/= (length exp) 1))
-       (concat "\\(?:" (regexp-quote exp) "\\)")
-      (regexp-quote exp)))
-   ((symbolp exp)
-    (ecase exp
-      (any ".")
-      (bol "^")
-      (eol "$")
-      (wordchar "\\w")
-      (not-wordchar "\\W")
-      (bot "\\`")
-      (eot "\\'")
-      (point "\\=")
-      (word-boundary "\\b")
-      (not-word-boundary "\\B")
-      (bow "\\<")
-      (eow "\\>")))
-   ((consp exp)
-    (funcall (intern (concat "sregex--"
-                            (symbol-name (car exp))))
-            (cdr exp)
-            combine))
-   (t (error "Invalid expression: %s" exp))))
-
-(defun sregex--sequence (exps combine)
-  (if (= (length exps) 1) (sregex--engine (car exps) combine)
-    (let ((re (mapconcat
-              (lambda (e) (sregex--engine e 'concat))
-              exps "")))
-      (if (eq combine 'suffix)
-          (concat "\\(?:" re "\\)")
-        re))))
-
-(defun sregex--or (exps combine)
-  (if (= (length exps) 1) (sregex--engine (car exps) combine)
-    (let ((re (mapconcat
-              (lambda (e) (sregex--engine e 'or))
-              exps "\\|")))
-      (if (not (eq combine 'or))
-          (concat "\\(?:" re "\\)")
-        re))))
-
-(defun sregex--group (exps combine) (concat "\\(" (sregex--sequence exps nil) "\\)"))
-
-(defun sregex--backref (exps combine) (concat "\\" (int-to-string (car exps))))
-(defun sregex--opt (exps combine) (concat (sregex--sequence exps 'suffix) "?"))
-(defun sregex--0+ (exps combine) (concat (sregex--sequence exps 'suffix) "*"))
-(defun sregex--1+ (exps combine) (concat (sregex--sequence exps 'suffix) "+"))
-
-(defun sregex--char (exps combine) (sregex--char-aux nil exps))
-(defun sregex--not-char (exps combine) (sregex--char-aux t exps))
-
-(defun sregex--syntax (exps combine) (format "\\s%c" (car exps)))
-(defun sregex--not-syntax (exps combine) (format "\\S%c" (car exps)))
-
-(defun sregex--regex (exps combine)
-  (if combine (concat "\\(?:" (car exps) "\\)") (car exps)))
-
-(defun sregex--repeat (exps combine)
-  (let* ((min (or (pop exps) 0))
-        (minstr (number-to-string min))
-        (max (pop exps)))
-    (concat (sregex--sequence exps 'suffix)
-           (concat "\\{" minstr ","
-                   (when max (number-to-string max)) "\\}"))))
-
-(defun sregex--char-range (start end)
-  (let ((startc (char-to-string start))
-       (endc (char-to-string end)))
-    (cond
-     ((> end (+ start 2)) (concat startc "-" endc))
-     ((> end (+ start 1)) (concat startc (char-to-string (1+ start)) endc))
-     ((> end start) (concat startc endc))
-     (t startc))))
-
-(defun sregex--char-aux (complement args)
-  ;; regex-opt does the same, we should join effort.
-  (let ((chars (make-bool-vector 256 nil))) ; Yeah, right!
-    (dolist (arg args)
-      (cond ((integerp arg) (aset chars arg t))
-           ((stringp arg) (mapc (lambda (c) (aset chars c t)) arg))
-           ((consp arg)
-            (let ((start (car arg))
-                  (end (cdr arg)))
-              (when (> start end)
-                (let ((tmp start)) (setq start end) (setq end tmp)))
-              ;; now start <= end
-              (let ((i start))
-                (while (<= i end)
-                  (aset chars i t)
-                  (setq i (1+ i))))))))
-    ;; now chars is a map of the characters in the class
-    (let ((caret (aref chars ?^))
-         (dash (aref chars ?-))
-         (class (if (aref chars ?\]) "]" "")))
-      (aset chars ?^ nil)
-      (aset chars ?- nil)
-      (aset chars ?\] nil)
-
-      (let (start end)
-       (dotimes (i 256)
-         (if (aref chars i)
-             (progn
-               (unless start (setq start i))
-               (setq end i)
-               (aset chars i nil))
-           (when start
-             (setq class (concat class (sregex--char-range start end)))
-             (setq start nil))))
-       (if start
-           (setq class (concat class (sregex--char-range start end)))))
-
-      (if (> (length class) 0)
-         (setq class (concat class (if caret "^") (if dash "-")))
-       (setq class (concat class (if dash "-") (if caret "^"))))
-      (if (and (not complement) (= (length class) 1))
-         (regexp-quote class)
-       (concat "[" (if complement "^") class "]")))))
-
-(provide 'sregex)
-
-;; arch-tag: 460c1f5a-eb6e-42ec-a451-ffac78bdf492
-;;; sregex.el ends here
diff --git a/lisp/obsolete/sregex.el b/lisp/obsolete/sregex.el

new file mode 100644 (file)

index 0000000..ef4700c
--- /dev/null
+++ b/lisp/obsolete/sregex.el
@@ -0,0 +1,609 @@
+;;; sregex.el --- symbolic regular expressions
+
+;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004,
+;;   2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+;; Author: Bob Glickstein <bobg+sregex@zanshin.com>
+;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com>
+;; Keywords: extensions
+;; Obsolete-since: 24.1
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This package allows you to write regular expressions using a
+;; totally new, Lisp-like syntax.
+
+;; A "symbolic regular expression" (sregex for short) is a Lisp form
+;; that, when evaluated, produces the string form of the specified
+;; regular expression.  Here's a simple example:
+
+;;   (sregexq (or "Bob" "Robert"))  =>  "Bob\\|Robert"
+
+;; As you can see, an sregex is specified by placing one or more
+;; special clauses in a call to `sregexq'.  The clause in this case is
+;; the `or' of two strings (not to be confused with the Lisp function
+;; `or').  The list of allowable clauses appears below.
+
+;; With sregex, it is never necessary to "escape" magic characters
+;; that are meant to be taken literally; that happens automatically.
+;; For example:
+
+;;   (sregexq "M*A*S*H")  =>  "M\\*A\\*S\\*H"
+
+;; It is also unnecessary to "group" parts of the expression together
+;; to overcome operator precedence; that also happens automatically.
+;; For example:
+
+;;   (sregexq (opt (or "Bob" "Robert")))  =>  "\\(?:Bob\\|Robert\\)?"
+
+;; It *is* possible to group parts of the expression in order to refer
+;; to them with numbered backreferences:
+
+;;   (sregexq (group (or "Go" "Run"))
+;;            ", Spot, "
+;;            (backref 1))             =>  "\\(Go\\|Run\\), Spot, \\1"
+
+;; `sregexq' is a macro.  Each time it is used, it constructs a simple
+;; Lisp expression that then invokes a moderately complex engine to
+;; interpret the sregex and render the string form.  Because of this,
+;; I don't recommend sprinkling calls to `sregexq' throughout your
+;; code, the way one normally does with string regexes (which are
+;; cheap to evaluate).  Instead, it's wiser to precompute the regexes
+;; you need wherever possible instead of repeatedly constructing the
+;; same ones over and over.  Example:
+
+;;    (let ((field-regex (sregexq (opt "resent-")
+;;                                (or "to" "cc" "bcc"))))
+;;      ...
+;;      (while ...
+;;        ...
+;;        (re-search-forward field-regex ...)
+;;        ...))
+
+;; The arguments to `sregexq' are automatically quoted, but the
+;; flipside of this is that it is not straightforward to include
+;; computed (i.e., non-constant) values in `sregexq' expressions.  So
+;; `sregex' is a function that is like `sregexq' but which does not
+;; automatically quote its values.  Literal sregex clauses must be
+;; explicitly quoted like so:
+
+;;   (sregex '(or "Bob" "Robert"))  =>  "Bob\\|Robert"
+
+;; but computed clauses can be included easily, allowing for the reuse
+;; of common clauses:
+
+;;  (let ((dotstar '(0+ any))
+;;        (whitespace '(1+ (syntax ?-)))
+;;        (digits '(1+ (char (?0 . ?9)))))
+;;    (sregex 'bol dotstar ":" whitespace digits))  =>  "^.*:\\s-+[0-9]+"
+
+;; To use this package in a Lisp program, simply (require 'sregex).
+
+;; Here are the clauses allowed in an `sregex' or `sregexq'
+;; expression:
+
+;; - a string
+;;   This stands for the literal string.  If it contains
+;;   metacharacters, they will be escaped in the resulting regex
+;;   (using `regexp-quote').
+
+;; - the symbol `any'
+;;   This stands for ".", a regex matching any character except
+;;   newline.
+
+;; - the symbol `bol'
+;;   Stands for "^", matching the empty string at the beginning of a line
+
+;; - the symbol `eol'
+;;   Stands for "$", matching the empty string at the end of a line
+
+;; - (group CLAUSE ...)
+;;   Groups the given CLAUSEs using "\\(" and "\\)".
+
+;; - (sequence CLAUSE ...)
+
+;;   Groups the given CLAUSEs; may or may not use "\\(?:" and "\\)".
+;;   Clauses grouped by `sequence' do not count for purposes of
+;;   numbering backreferences.  Use `sequence' in situations like
+;;   this:
+
+;;     (sregexq (or "dog" "cat"
+;;                  (sequence (opt "sea ") "monkey")))
+;;                                  =>  "dog\\|cat\\|\\(?:sea \\)?monkey"
+
+;;   where a single `or' alternate needs to contain multiple
+;;   subclauses.
+
+;; - (backref N)
+;;   Matches the same string previously matched by the Nth "group" in
+;;   the same sregex.  N is a positive integer.
+
+;; - (or CLAUSE ...)
+;;   Matches any one of the CLAUSEs by separating them with "\\|".
+
+;; - (0+ CLAUSE ...)
+;;   Concatenates the given CLAUSEs and matches zero or more
+;;   occurrences by appending "*".
+
+;; - (1+ CLAUSE ...)
+;;   Concatenates the given CLAUSEs and matches one or more
+;;   occurrences by appending "+".
+
+;; - (opt CLAUSE ...)
+;;   Concatenates the given CLAUSEs and matches zero or one occurrence
+;;   by appending "?".
+
+;; - (repeat MIN MAX CLAUSE ...)
+;;   Concatenates the given CLAUSEs and constructs a regex matching at
+;;   least MIN occurrences and at most MAX occurrences.  MIN must be a
+;;   non-negative integer.  MAX must be a non-negative integer greater
+;;   than or equal to MIN; or MAX can be nil to mean "infinity."
+
+;; - (char CHAR-CLAUSE ...)
+;;   Creates a "character class" matching one character from the given
+;;   set.  See below for how to construct a CHAR-CLAUSE.
+
+;; - (not-char CHAR-CLAUSE ...)
+;;   Creates a "character class" matching any one character not in the
+;;   given set.  See below for how to construct a CHAR-CLAUSE.
+
+;; - the symbol `bot'
+;;   Stands for "\\`", matching the empty string at the beginning of
+;;   text (beginning of a string or of a buffer).
+
+;; - the symbol `eot'
+;;   Stands for "\\'", matching the empty string at the end of text.
+
+;; - the symbol `point'
+;;   Stands for "\\=", matching the empty string at point.
+
+;; - the symbol `word-boundary'
+;;   Stands for "\\b", matching the empty string at the beginning or
+;;   end of a word.
+
+;; - the symbol `not-word-boundary'
+;;   Stands for "\\B", matching the empty string not at the beginning
+;;   or end of a word.
+
+;; - the symbol `bow'
+;;   Stands for "\\<", matching the empty string at the beginning of a
+;;   word.
+
+;; - the symbol `eow'
+;;   Stands for "\\>", matching the empty string at the end of a word.
+
+;; - the symbol `wordchar'
+;;   Stands for the regex "\\w", matching a word-constituent character
+;;   (as determined by the current syntax table)
+
+;; - the symbol `not-wordchar'
+;;   Stands for the regex "\\W", matching a non-word-constituent
+;;   character.
+
+;; - (syntax CODE)
+;;   Stands for the regex "\\sCODE", where CODE is a syntax table code
+;;   (a single character).  Matches any character with the requested
+;;   syntax.
+
+;; - (not-syntax CODE)
+;;   Stands for the regex "\\SCODE", where CODE is a syntax table code
+;;   (a single character).  Matches any character without the
+;;   requested syntax.
+
+;; - (regex REGEX)
+;;   This is a "trapdoor" for including ordinary regular expression
+;;   strings in the result.  Some regular expressions are clearer when
+;;   written the old way: "[a-z]" vs. (sregexq (char (?a . ?z))), for
+;;   instance.  However, see the note under "Bugs," below.
+
+;; Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
+;; has one of the following forms:
+
+;; - a character
+;;   Adds that character to the set.
+
+;; - a string
+;;   Adds all the characters in the string to the set.
+
+;; - A pair (MIN . MAX)
+;;   Where MIN and MAX are characters, adds the range of characters
+;;   from MIN through MAX to the set.
+
+;;; To do:
+
+;; An earlier version of this package could optionally translate the
+;; symbolic regex into other languages' syntaxes, e.g. Perl.  For
+;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would
+;; yield "ab|cd" instead of "ab\\|cd".  It might be useful to restore
+;; such a facility.
+
+;; - handle multibyte chars in sregex--char-aux
+;; - add support for character classes ([:blank:], ...)
+;; - add support for non-greedy operators *? and +?
+;; - bug: (sregexq (opt (opt ?a))) returns "a??" which is a non-greedy "a?"
+
+;;; Bugs:
+
+;;; Code:
+
+(eval-when-compile (require 'cl))
+
+;; Compatibility code for when we didn't have shy-groups
+(defvar sregex--current-sregex nil)
+(defun sregex-info () nil)
+(defmacro sregex-save-match-data (&rest forms) (cons 'save-match-data forms))
+(defun sregex-replace-match (r &optional f l str subexp x)
+  (replace-match r f l str subexp))
+(defun sregex-match-string (c &optional i x) (match-string c i))
+(defun sregex-match-string-no-properties (count &optional in-string sregex)
+  (match-string-no-properties count in-string))
+(defun sregex-match-beginning (count &optional sregex) (match-beginning count))
+(defun sregex-match-end (count &optional sregex) (match-end count))
+(defun sregex-match-data (&optional sregex) (match-data))
+(defun sregex-backref-num (n &optional sregex) n)
+
+
+(defun sregex (&rest exps)
+  "Symbolic regular expression interpreter.
+This is exactly like `sregexq' (q.v.) except that it evaluates all its
+arguments, so literal sregex clauses must be quoted.  For example:
+
+  (sregex '(or \"Bob\" \"Robert\"))  =>  \"Bob\\\\|Robert\"
+
+An argument-evaluating sregex interpreter lets you reuse sregex
+subexpressions:
+
+  (let ((dotstar '(0+ any))
+        (whitespace '(1+ (syntax ?-)))
+        (digits '(1+ (char (?0 . ?9)))))
+    (sregex 'bol dotstar \":\" whitespace digits))  =>  \"^.*:\\\\s-+[0-9]+\""
+  (sregex--sequence exps nil))
+
+(defmacro sregexq (&rest exps)
+  "Symbolic regular expression interpreter.
+This macro allows you to specify a regular expression (regexp) in
+symbolic form, and converts it into the string form required by Emacs's
+regex functions such as `re-search-forward' and `looking-at'.  Here is
+a simple example:
+
+  (sregexq (or \"Bob\" \"Robert\"))  =>  \"Bob\\\\|Robert\"
+
+As you can see, an sregex is specified by placing one or more special
+clauses in a call to `sregexq'.  The clause in this case is the `or'
+of two strings (not to be confused with the Lisp function `or').  The
+list of allowable clauses appears below.
+
+With `sregex', it is never necessary to \"escape\" magic characters
+that are meant to be taken literally; that happens automatically.
+For example:
+
+  (sregexq \"M*A*S*H\")  =>  \"M\\\\*A\\\\*S\\\\*H\"
+
+It is also unnecessary to \"group\" parts of the expression together
+to overcome operator precedence; that also happens automatically.
+For example:
+
+  (sregexq (opt (or \"Bob\" \"Robert\")))  =>  \"\\\\(Bob\\\\|Robert\\\\)?\"
+
+It *is* possible to group parts of the expression in order to refer
+to them with numbered backreferences:
+
+  (sregexq (group (or \"Go\" \"Run\"))
+           \", Spot, \"
+           (backref 1))             =>  \"\\\\(Go\\\\|Run\\\\), Spot, \\\\1\"
+
+If `sregexq' needs to introduce its own grouping parentheses, it will
+automatically renumber your backreferences:
+
+  (sregexq (opt \"resent-\")
+           (group (or \"to\" \"cc\" \"bcc\"))
+           \": \"
+           (backref 1))  =>  \"\\\\(resent-\\\\)?\\\\(to\\\\|cc\\\\|bcc\\\\): \\\\2\"
+
+`sregexq' is a macro.  Each time it is used, it constructs a simple
+Lisp expression that then invokes a moderately complex engine to
+interpret the sregex and render the string form.  Because of this, I
+don't recommend sprinkling calls to `sregexq' throughout your code,
+the way one normally does with string regexes (which are cheap to
+evaluate).  Instead, it's wiser to precompute the regexes you need
+wherever possible instead of repeatedly constructing the same ones
+over and over.  Example:
+
+   (let ((field-regex (sregexq (opt \"resent-\")
+                               (or \"to\" \"cc\" \"bcc\"))))
+     ...
+     (while ...
+       ...
+       (re-search-forward field-regex ...)
+       ...))
+
+The arguments to `sregexq' are automatically quoted, but the
+flipside of this is that it is not straightforward to include
+computed (i.e., non-constant) values in `sregexq' expressions.  So
+`sregex' is a function that is like `sregexq' but which does not
+automatically quote its values.  Literal sregex clauses must be
+explicitly quoted like so:
+
+  (sregex '(or \"Bob\" \"Robert\"))  =>  \"Bob\\\\|Robert\"
+
+but computed clauses can be included easily, allowing for the reuse
+of common clauses:
+
+  (let ((dotstar '(0+ any))
+        (whitespace '(1+ (syntax ?-)))
+        (digits '(1+ (char (?0 . ?9)))))
+    (sregex 'bol dotstar \":\" whitespace digits))  =>  \"^.*:\\\\s-+[0-9]+\"
+
+Here are the clauses allowed in an `sregex' or `sregexq' expression:
+
+- a string
+  This stands for the literal string.  If it contains
+  metacharacters, they will be escaped in the resulting regex
+  (using `regexp-quote').
+
+- the symbol `any'
+  This stands for \".\", a regex matching any character except
+  newline.
+
+- the symbol `bol'
+  Stands for \"^\", matching the empty string at the beginning of a line
+
+- the symbol `eol'
+  Stands for \"$\", matching the empty string at the end of a line
+
+- (group CLAUSE ...)
+  Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\".
+
+- (sequence CLAUSE ...)
+
+  Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\".
+  Clauses grouped by `sequence' do not count for purposes of
+  numbering backreferences.  Use `sequence' in situations like
+  this:
+
+    (sregexq (or \"dog\" \"cat\"
+                 (sequence (opt \"sea \") \"monkey\")))
+                                 =>  \"dog\\\\|cat\\\\|\\\\(?:sea \\\\)?monkey\"
+
+  where a single `or' alternate needs to contain multiple
+  subclauses.
+
+- (backref N)
+  Matches the same string previously matched by the Nth \"group\" in
+  the same sregex.  N is a positive integer.
+
+- (or CLAUSE ...)
+  Matches any one of the CLAUSEs by separating them with \"\\\\|\".
+
+- (0+ CLAUSE ...)
+  Concatenates the given CLAUSEs and matches zero or more
+  occurrences by appending \"*\".
+
+- (1+ CLAUSE ...)
+  Concatenates the given CLAUSEs and matches one or more
+  occurrences by appending \"+\".
+
+- (opt CLAUSE ...)
+  Concatenates the given CLAUSEs and matches zero or one occurrence
+  by appending \"?\".
+
+- (repeat MIN MAX CLAUSE ...)
+  Concatenates the given CLAUSEs and constructs a regex matching at
+  least MIN occurrences and at most MAX occurrences.  MIN must be a
+  non-negative integer.  MAX must be a non-negative integer greater
+  than or equal to MIN; or MAX can be nil to mean \"infinity.\"
+
+- (char CHAR-CLAUSE ...)
+  Creates a \"character class\" matching one character from the given
+  set.  See below for how to construct a CHAR-CLAUSE.
+
+- (not-char CHAR-CLAUSE ...)
+  Creates a \"character class\" matching any one character not in the
+  given set.  See below for how to construct a CHAR-CLAUSE.
+
+- the symbol `bot'
+  Stands for \"\\\\`\", matching the empty string at the beginning of
+  text (beginning of a string or of a buffer).
+
+- the symbol `eot'
+  Stands for \"\\\\'\", matching the empty string at the end of text.
+
+- the symbol `point'
+  Stands for \"\\\\=\\=\", matching the empty string at point.
+
+- the symbol `word-boundary'
+  Stands for \"\\\\b\", matching the empty string at the beginning or
+  end of a word.
+
+- the symbol `not-word-boundary'
+  Stands for \"\\\\B\", matching the empty string not at the beginning
+  or end of a word.
+
+- the symbol `bow'
+  Stands for \"\\\\=\\<\", matching the empty string at the beginning of a
+  word.
+
+- the symbol `eow'
+  Stands for \"\\\\=\\>\", matching the empty string at the end of a word.
+
+- the symbol `wordchar'
+  Stands for the regex \"\\\\w\", matching a word-constituent character
+  (as determined by the current syntax table)
+
+- the symbol `not-wordchar'
+  Stands for the regex \"\\\\W\", matching a non-word-constituent
+  character.
+
+- (syntax CODE)
+  Stands for the regex \"\\\\sCODE\", where CODE is a syntax table code
+  (a single character).  Matches any character with the requested
+  syntax.
+
+- (not-syntax CODE)
+  Stands for the regex \"\\\\SCODE\", where CODE is a syntax table code
+  (a single character).  Matches any character without the
+  requested syntax.
+
+- (regex REGEX)
+  This is a \"trapdoor\" for including ordinary regular expression
+  strings in the result.  Some regular expressions are clearer when
+  written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for
+  instance.
+
+Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
+has one of the following forms:
+
+- a character
+  Adds that character to the set.
+
+- a string
+  Adds all the characters in the string to the set.
+
+- A pair (MIN . MAX)
+  Where MIN and MAX are characters, adds the range of characters
+  from MIN through MAX to the set."
+  `(apply 'sregex ',exps))
+
+(defun sregex--engine (exp combine)
+  (cond
+   ((stringp exp)
+    (if (and combine
+            (eq combine 'suffix)
+            (/= (length exp) 1))
+       (concat "\\(?:" (regexp-quote exp) "\\)")
+      (regexp-quote exp)))
+   ((symbolp exp)
+    (ecase exp
+      (any ".")
+      (bol "^")
+      (eol "$")
+      (wordchar "\\w")
+      (not-wordchar "\\W")
+      (bot "\\`")
+      (eot "\\'")
+      (point "\\=")
+      (word-boundary "\\b")
+      (not-word-boundary "\\B")
+      (bow "\\<")
+      (eow "\\>")))
+   ((consp exp)
+    (funcall (intern (concat "sregex--"
+                            (symbol-name (car exp))))
+            (cdr exp)
+            combine))
+   (t (error "Invalid expression: %s" exp))))
+
+(defun sregex--sequence (exps combine)
+  (if (= (length exps) 1) (sregex--engine (car exps) combine)
+    (let ((re (mapconcat
+              (lambda (e) (sregex--engine e 'concat))
+              exps "")))
+      (if (eq combine 'suffix)
+          (concat "\\(?:" re "\\)")
+        re))))
+
+(defun sregex--or (exps combine)
+  (if (= (length exps) 1) (sregex--engine (car exps) combine)
+    (let ((re (mapconcat
+              (lambda (e) (sregex--engine e 'or))
+              exps "\\|")))
+      (if (not (eq combine 'or))
+          (concat "\\(?:" re "\\)")
+        re))))
+
+(defun sregex--group (exps combine) (concat "\\(" (sregex--sequence exps nil) "\\)"))
+
+(defun sregex--backref (exps combine) (concat "\\" (int-to-string (car exps))))
+(defun sregex--opt (exps combine) (concat (sregex--sequence exps 'suffix) "?"))
+(defun sregex--0+ (exps combine) (concat (sregex--sequence exps 'suffix) "*"))
+(defun sregex--1+ (exps combine) (concat (sregex--sequence exps 'suffix) "+"))
+
+(defun sregex--char (exps combine) (sregex--char-aux nil exps))
+(defun sregex--not-char (exps combine) (sregex--char-aux t exps))
+
+(defun sregex--syntax (exps combine) (format "\\s%c" (car exps)))
+(defun sregex--not-syntax (exps combine) (format "\\S%c" (car exps)))
+
+(defun sregex--regex (exps combine)
+  (if combine (concat "\\(?:" (car exps) "\\)") (car exps)))
+
+(defun sregex--repeat (exps combine)
+  (let* ((min (or (pop exps) 0))
+        (minstr (number-to-string min))
+        (max (pop exps)))
+    (concat (sregex--sequence exps 'suffix)
+           (concat "\\{" minstr ","
+                   (when max (number-to-string max)) "\\}"))))
+
+(defun sregex--char-range (start end)
+  (let ((startc (char-to-string start))
+       (endc (char-to-string end)))
+    (cond
+     ((> end (+ start 2)) (concat startc "-" endc))
+     ((> end (+ start 1)) (concat startc (char-to-string (1+ start)) endc))
+     ((> end start) (concat startc endc))
+     (t startc))))
+
+(defun sregex--char-aux (complement args)
+  ;; regex-opt does the same, we should join effort.
+  (let ((chars (make-bool-vector 256 nil))) ; Yeah, right!
+    (dolist (arg args)
+      (cond ((integerp arg) (aset chars arg t))
+           ((stringp arg) (mapc (lambda (c) (aset chars c t)) arg))
+           ((consp arg)
+            (let ((start (car arg))
+                  (end (cdr arg)))
+              (when (> start end)
+                (let ((tmp start)) (setq start end) (setq end tmp)))
+              ;; now start <= end
+              (let ((i start))
+                (while (<= i end)
+                  (aset chars i t)
+                  (setq i (1+ i))))))))
+    ;; now chars is a map of the characters in the class
+    (let ((caret (aref chars ?^))
+         (dash (aref chars ?-))
+         (class (if (aref chars ?\]) "]" "")))
+      (aset chars ?^ nil)
+      (aset chars ?- nil)
+      (aset chars ?\] nil)
+
+      (let (start end)
+       (dotimes (i 256)
+         (if (aref chars i)
+             (progn
+               (unless start (setq start i))
+               (setq end i)
+               (aset chars i nil))
+           (when start
+             (setq class (concat class (sregex--char-range start end)))
+             (setq start nil))))
+       (if start
+           (setq class (concat class (sregex--char-range start end)))))
+
+      (if (> (length class) 0)
+         (setq class (concat class (if caret "^") (if dash "-")))
+       (setq class (concat class (if dash "-") (if caret "^"))))
+      (if (and (not complement) (= (length class) 1))
+         (regexp-quote class)
+       (concat "[" (if complement "^") class "]")))))
+
+(provide 'sregex)
+
+;; arch-tag: 460c1f5a-eb6e-42ec-a451-ffac78bdf492
+;;; sregex.el ends here
author	Stefan Monnier <monnier@iro.umontreal.ca>
	Sun, 26 Dec 2010 23:17:09 +0000 (18:17 -0500)
committer	Stefan Monnier <monnier@iro.umontreal.ca>
	Sun, 26 Dec 2010 23:17:09 +0000 (18:17 -0500)
etc/NEWS		patch \| blob \| history
lisp/ChangeLog		patch \| blob \| history
lisp/emacs-lisp/edebug.el		patch \| blob \| history
lisp/emacs-lisp/re-builder.el		patch \| blob \| history
lisp/emacs-lisp/rx.el		patch \| blob \| history
lisp/emacs-lisp/sregex.el	[deleted file]	patch \| blob \| history
lisp/obsolete/sregex.el	[new file with mode: 0644]	patch \| blob