*** An API for manipulating SQL product definitions has been added.
+** sregex.el is now obsolete, since rx.el is a strict superset.
+
** s-region.el is now declared obsolete, superceded by shift-select-mode
enabled by default in 23.1.
+2010-12-26 Stefan Monnier <monnier@iro.umontreal.ca>
+
+ * emacs-lisp/rx.el: Make it a superset of sregex.
+ (rx-constituents): Add `any => "."', mark `repeat' as taking any number
+ of args, add `regex' alias.
+ (rx-info): Add arg to distinguish head and standalone forms.
+ (rx-check, rx-form): Pass the corresponding arg.
+ (rx-**): Simplify.
+ (rx-repeat): Make it work for any number of args.
+ (rx-syntax): Make it accept syntax chars as is.
+ * obsolete/sregex.el: Move from emacs-lisp/.
+ * emacs-lisp/re-builder.el: Remove sregex support.
+ * emacs-lisp/edebug.el (sregexq, rx): Remove redundant defs.
+
2010-12-25 Eli Zaretskii <eliz@gnu.org>
* mouse.el (mouse-yank-primary): On MS-Windows, try the (emulated)
2010-12-21 Daiki Ueno <ueno@unixuser.org>
* obsolete/pgg-parse.el, obsolete/pgg-pgp5.el, obsolete/pgg-pgp.el,
- * obsolete/pgg-gpg.el, obsolete/pgg-def.el, obsolete/pgg.el: Move
- from lisp/.
+ * obsolete/pgg-gpg.el, obsolete/pgg-def.el, obsolete/pgg.el:
+ Move from lisp/.
2010-12-20 Leo <sdl.web@gmail.com>
(def-edebug-spec with-custom-print body)
-(def-edebug-spec sregexq (&rest sexp))
-(def-edebug-spec rx (&rest sexp))
;;; The debugger itself
;; even the auto updates go all the way. Forcing an update overrides
;; this limit allowing an easy way to see all matches.
-;; Currently `re-builder' understands five different forms of input,
-;; namely `read', `string', `rx', and `sregex' syntax. Read
+;; Currently `re-builder' understands three different forms of input,
+;; namely `read', `string', and `rx' syntax. Read
;; syntax and string syntax are both delimited by `"'s and behave
;; according to their name. With the `string' syntax there's no need
;; to escape the backslashes and double quotes simplifying the editing
;; When editing a symbolic regular expression, only the first
;; expression in the RE Builder buffer is considered, which helps
;; limiting the extent of the expression like the `"'s do for the text
-;; modes. For the `sregex' syntax the function `sregex' is applied to
+;; modes. For the `rx' syntax the function `rx-to-string' is applied to
;; the evaluated expression read. So you can use quoted arguments
;; with something like '("findme") or you can construct arguments to
;; your hearts delight with a valid ELisp expression. (The compiled
(defcustom reb-re-syntax 'read
"Syntax for the REs in the RE Builder.
-Can either be `read', `string', `sregex', or `rx'."
+Can either be `read', `string', or `rx'."
:group 're-builder
:type '(choice (const :tag "Read syntax" read)
(const :tag "String syntax" string)
- (const :tag "`sregex' syntax" sregex)
(const :tag "`rx' syntax" rx)))
(defcustom reb-auto-match-limit 200
emacs-lisp-mode "RE Builder Lisp"
"Major mode for interactively building symbolic Regular Expressions."
;; Pull in packages as needed
- (cond ((eq reb-re-syntax 'sregex) ; sregex is not autoloaded
- (require 'sregex)) ; right now..
- ((eq reb-re-syntax 'rx) ; rx-to-string is autoloaded
- (require 'rx))) ; require rx anyway
+ (cond ((memq reb-re-syntax '(sregex rx)) ; rx-to-string is autoloaded
+ (require 'rx))) ; require rx anyway
(reb-mode-common))
;; Use the same "\C-c" keymap as `reb-mode' and use font-locking from
(defun reb-cook-regexp (re)
"Return RE after processing it according to `reb-re-syntax'."
- (cond ((eq reb-re-syntax 'sregex)
- (apply 'sregex (eval (car (read-from-string re)))))
- ((eq reb-re-syntax 'rx)
+ (cond ((memq reb-re-syntax '(sregex rx))
(rx-to-string (eval (car (read-from-string re)))))
(t re)))
(nonl . not-newline) ; SRE
(anything . (rx-anything 0 nil))
(any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE
+ (any . ".") ; sregex
(in . any)
(char . any) ; sregex
(not-char . (rx-not-char 1 nil rx-check-any)) ; sregex
(not . (rx-not 1 1 rx-check-not))
- ;; Partially consistent with sregex, whose `repeat' is like our
- ;; `**'. (`repeat' with optional max arg and multiple sexp forms
- ;; is ambiguous.)
- (repeat . (rx-repeat 2 3))
+ (repeat . (rx-repeat 2 nil))
(= . (rx-= 2 nil)) ; SRE
(>= . (rx->= 2 nil)) ; SRE
(** . (rx-** 2 nil)) ; SRE
(submatch . (rx-submatch 1 nil)) ; SRE
- (group . submatch)
+ (group . submatch) ; sregex
(zero-or-more . (rx-kleene 1 nil))
(one-or-more . (rx-kleene 1 nil))
(zero-or-one . (rx-kleene 1 nil))
(category . (rx-category 1 1 rx-check-category))
(eval . (rx-eval 1 1))
(regexp . (rx-regexp 1 1 stringp))
+ (regex . regexp) ; sregex
(digit . "[[:digit:]]")
(numeric . digit) ; SRE
(num . digit) ; SRE
`zero-or-more', and `one-or-more'. Dynamically bound.")
-(defun rx-info (op)
+(defun rx-info (op head)
"Return parsing/code generation info for OP.
If OP is the space character ASCII 32, return info for the symbol `?'.
If OP is the character `?', return info for the symbol `??'.
-See also `rx-constituents'."
+See also `rx-constituents'.
+If HEAD is non-nil, then OP is the head of a sexp, otherwise it's
+a standalone symbol."
(cond ((eq op ? ) (setq op '\?))
((eq op ??) (setq op '\??)))
- (while (and (not (null op)) (symbolp op))
- (setq op (cdr (assq op rx-constituents))))
+ (let (old-op)
+ (while (and (not (null op)) (symbolp op))
+ (setq old-op op)
+ (setq op (cdr (assq op rx-constituents)))
+ (when (if head (stringp op) (consp op))
+ ;; We found something but of the wrong kind. Let's look for an
+ ;; alternate definition for the other case.
+ (let ((new-op
+ (cdr (assq old-op (cdr (memq (assq old-op rx-constituents)
+ rx-constituents))))))
+ (if (and new-op (not (if head (stringp new-op) (consp new-op))))
+ (setq op new-op))))))
op)
"Check FORM according to its car's parsing info."
(unless (listp form)
(error "rx `%s' needs argument(s)" form))
- (let* ((rx (rx-info (car form)))
+ (let* ((rx (rx-info (car form) 'head))
(nargs (1- (length form)))
(min-args (nth 1 rx))
(max-args (nth 2 rx))
(defun rx-** (form)
"Parse and produce code from FORM `(** N M ...)'."
(rx-check form)
- (setq form (cons 'repeat (cdr (rx-trans-forms form 2))))
- (rx-form form '*))
+ (rx-form (cons 'repeat (cdr (rx-trans-forms form 2))) '*))
(defun rx-repeat (form)
"Parse and produce code from FORM.
-FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'."
+FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'."
(rx-check form)
+ (if (> (length form) 4)
+ (setq form (rx-trans-forms form 2)))
+ (if (null (nth 2 form))
+ (setq form (list* (nth 0 form) (nth 1 form) (nthcdr 3 form))))
(cond ((= (length form) 3)
(unless (and (integerp (nth 1 form))
(> (nth 1 form) 0))
"Parse and produce code from FORM, which is `(syntax SYMBOL)'."
(rx-check form)
(let* ((sym (cadr form))
- (syntax (assq sym rx-syntax)))
+ (syntax (cdr (assq sym rx-syntax))))
(unless syntax
;; Try sregex compatibility.
- (let ((name (symbol-name sym)))
- (if (= 1 (length name))
- (setq syntax (rassq (aref name 0) rx-syntax))))
+ (cond
+ ((character sym) (setq syntax sym))
+ ((symbolp sym)
+ (let ((name (symbol-name sym)))
+ (if (= 1 (length name))
+ (setq syntax (aref name 0))))))
(unless syntax
- (error "Unknown rx syntax `%s'" (cadr form))))
- (format "\\s%c" (cdr syntax))))
+ (error "Unknown rx syntax `%s'" sym)))
+ (format "\\s%c" syntax)))
(defun rx-check-category (form)
(cond ((integerp form)
(regexp-quote (char-to-string form)))
((symbolp form)
- (let ((info (rx-info form)))
+ (let ((info (rx-info form nil)))
(cond ((stringp info)
info)
((null info)
(t
(funcall (nth 0 info) form)))))
((consp form)
- (let ((info (rx-info (car form))))
+ (let ((info (rx-info (car form) 'head)))
(unless (consp info)
(error "Unknown rx form `%s'" (car form)))
(funcall (nth 0 info) form)))
+++ /dev/null
-;;; sregex.el --- symbolic regular expressions
-
-;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004,
-;; 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
-
-;; Author: Bob Glickstein <bobg+sregex@zanshin.com>
-;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com>
-;; Keywords: extensions
-
-;; This file is part of GNU Emacs.
-
-;; GNU Emacs is free software: you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation, either version 3 of the License, or
-;; (at your option) any later version.
-
-;; GNU Emacs is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;; GNU General Public License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
-
-;;; Commentary:
-
-;; This package allows you to write regular expressions using a
-;; totally new, Lisp-like syntax.
-
-;; A "symbolic regular expression" (sregex for short) is a Lisp form
-;; that, when evaluated, produces the string form of the specified
-;; regular expression. Here's a simple example:
-
-;; (sregexq (or "Bob" "Robert")) => "Bob\\|Robert"
-
-;; As you can see, an sregex is specified by placing one or more
-;; special clauses in a call to `sregexq'. The clause in this case is
-;; the `or' of two strings (not to be confused with the Lisp function
-;; `or'). The list of allowable clauses appears below.
-
-;; With sregex, it is never necessary to "escape" magic characters
-;; that are meant to be taken literally; that happens automatically.
-;; For example:
-
-;; (sregexq "M*A*S*H") => "M\\*A\\*S\\*H"
-
-;; It is also unnecessary to "group" parts of the expression together
-;; to overcome operator precedence; that also happens automatically.
-;; For example:
-
-;; (sregexq (opt (or "Bob" "Robert"))) => "\\(?:Bob\\|Robert\\)?"
-
-;; It *is* possible to group parts of the expression in order to refer
-;; to them with numbered backreferences:
-
-;; (sregexq (group (or "Go" "Run"))
-;; ", Spot, "
-;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1"
-
-;; `sregexq' is a macro. Each time it is used, it constructs a simple
-;; Lisp expression that then invokes a moderately complex engine to
-;; interpret the sregex and render the string form. Because of this,
-;; I don't recommend sprinkling calls to `sregexq' throughout your
-;; code, the way one normally does with string regexes (which are
-;; cheap to evaluate). Instead, it's wiser to precompute the regexes
-;; you need wherever possible instead of repeatedly constructing the
-;; same ones over and over. Example:
-
-;; (let ((field-regex (sregexq (opt "resent-")
-;; (or "to" "cc" "bcc"))))
-;; ...
-;; (while ...
-;; ...
-;; (re-search-forward field-regex ...)
-;; ...))
-
-;; The arguments to `sregexq' are automatically quoted, but the
-;; flipside of this is that it is not straightforward to include
-;; computed (i.e., non-constant) values in `sregexq' expressions. So
-;; `sregex' is a function that is like `sregexq' but which does not
-;; automatically quote its values. Literal sregex clauses must be
-;; explicitly quoted like so:
-
-;; (sregex '(or "Bob" "Robert")) => "Bob\\|Robert"
-
-;; but computed clauses can be included easily, allowing for the reuse
-;; of common clauses:
-
-;; (let ((dotstar '(0+ any))
-;; (whitespace '(1+ (syntax ?-)))
-;; (digits '(1+ (char (?0 . ?9)))))
-;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+"
-
-;; To use this package in a Lisp program, simply (require 'sregex).
-
-;; Here are the clauses allowed in an `sregex' or `sregexq'
-;; expression:
-
-;; - a string
-;; This stands for the literal string. If it contains
-;; metacharacters, they will be escaped in the resulting regex
-;; (using `regexp-quote').
-
-;; - the symbol `any'
-;; This stands for ".", a regex matching any character except
-;; newline.
-
-;; - the symbol `bol'
-;; Stands for "^", matching the empty string at the beginning of a line
-
-;; - the symbol `eol'
-;; Stands for "$", matching the empty string at the end of a line
-
-;; - (group CLAUSE ...)
-;; Groups the given CLAUSEs using "\\(" and "\\)".
-
-;; - (sequence CLAUSE ...)
-
-;; Groups the given CLAUSEs; may or may not use "\\(?:" and "\\)".
-;; Clauses grouped by `sequence' do not count for purposes of
-;; numbering backreferences. Use `sequence' in situations like
-;; this:
-
-;; (sregexq (or "dog" "cat"
-;; (sequence (opt "sea ") "monkey")))
-;; => "dog\\|cat\\|\\(?:sea \\)?monkey"
-
-;; where a single `or' alternate needs to contain multiple
-;; subclauses.
-
-;; - (backref N)
-;; Matches the same string previously matched by the Nth "group" in
-;; the same sregex. N is a positive integer.
-
-;; - (or CLAUSE ...)
-;; Matches any one of the CLAUSEs by separating them with "\\|".
-
-;; - (0+ CLAUSE ...)
-;; Concatenates the given CLAUSEs and matches zero or more
-;; occurrences by appending "*".
-
-;; - (1+ CLAUSE ...)
-;; Concatenates the given CLAUSEs and matches one or more
-;; occurrences by appending "+".
-
-;; - (opt CLAUSE ...)
-;; Concatenates the given CLAUSEs and matches zero or one occurrence
-;; by appending "?".
-
-;; - (repeat MIN MAX CLAUSE ...)
-;; Concatenates the given CLAUSEs and constructs a regex matching at
-;; least MIN occurrences and at most MAX occurrences. MIN must be a
-;; non-negative integer. MAX must be a non-negative integer greater
-;; than or equal to MIN; or MAX can be nil to mean "infinity."
-
-;; - (char CHAR-CLAUSE ...)
-;; Creates a "character class" matching one character from the given
-;; set. See below for how to construct a CHAR-CLAUSE.
-
-;; - (not-char CHAR-CLAUSE ...)
-;; Creates a "character class" matching any one character not in the
-;; given set. See below for how to construct a CHAR-CLAUSE.
-
-;; - the symbol `bot'
-;; Stands for "\\`", matching the empty string at the beginning of
-;; text (beginning of a string or of a buffer).
-
-;; - the symbol `eot'
-;; Stands for "\\'", matching the empty string at the end of text.
-
-;; - the symbol `point'
-;; Stands for "\\=", matching the empty string at point.
-
-;; - the symbol `word-boundary'
-;; Stands for "\\b", matching the empty string at the beginning or
-;; end of a word.
-
-;; - the symbol `not-word-boundary'
-;; Stands for "\\B", matching the empty string not at the beginning
-;; or end of a word.
-
-;; - the symbol `bow'
-;; Stands for "\\<", matching the empty string at the beginning of a
-;; word.
-
-;; - the symbol `eow'
-;; Stands for "\\>", matching the empty string at the end of a word.
-
-;; - the symbol `wordchar'
-;; Stands for the regex "\\w", matching a word-constituent character
-;; (as determined by the current syntax table)
-
-;; - the symbol `not-wordchar'
-;; Stands for the regex "\\W", matching a non-word-constituent
-;; character.
-
-;; - (syntax CODE)
-;; Stands for the regex "\\sCODE", where CODE is a syntax table code
-;; (a single character). Matches any character with the requested
-;; syntax.
-
-;; - (not-syntax CODE)
-;; Stands for the regex "\\SCODE", where CODE is a syntax table code
-;; (a single character). Matches any character without the
-;; requested syntax.
-
-;; - (regex REGEX)
-;; This is a "trapdoor" for including ordinary regular expression
-;; strings in the result. Some regular expressions are clearer when
-;; written the old way: "[a-z]" vs. (sregexq (char (?a . ?z))), for
-;; instance. However, see the note under "Bugs," below.
-
-;; Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
-;; has one of the following forms:
-
-;; - a character
-;; Adds that character to the set.
-
-;; - a string
-;; Adds all the characters in the string to the set.
-
-;; - A pair (MIN . MAX)
-;; Where MIN and MAX are characters, adds the range of characters
-;; from MIN through MAX to the set.
-
-;;; To do:
-
-;; An earlier version of this package could optionally translate the
-;; symbolic regex into other languages' syntaxes, e.g. Perl. For
-;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would
-;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore
-;; such a facility.
-
-;; - handle multibyte chars in sregex--char-aux
-;; - add support for character classes ([:blank:], ...)
-;; - add support for non-greedy operators *? and +?
-;; - bug: (sregexq (opt (opt ?a))) returns "a??" which is a non-greedy "a?"
-
-;;; Bugs:
-
-;;; Code:
-
-(eval-when-compile (require 'cl))
-
-;; Compatibility code for when we didn't have shy-groups
-(defvar sregex--current-sregex nil)
-(defun sregex-info () nil)
-(defmacro sregex-save-match-data (&rest forms) (cons 'save-match-data forms))
-(defun sregex-replace-match (r &optional f l str subexp x)
- (replace-match r f l str subexp))
-(defun sregex-match-string (c &optional i x) (match-string c i))
-(defun sregex-match-string-no-properties (count &optional in-string sregex)
- (match-string-no-properties count in-string))
-(defun sregex-match-beginning (count &optional sregex) (match-beginning count))
-(defun sregex-match-end (count &optional sregex) (match-end count))
-(defun sregex-match-data (&optional sregex) (match-data))
-(defun sregex-backref-num (n &optional sregex) n)
-
-
-(defun sregex (&rest exps)
- "Symbolic regular expression interpreter.
-This is exactly like `sregexq' (q.v.) except that it evaluates all its
-arguments, so literal sregex clauses must be quoted. For example:
-
- (sregex '(or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
-
-An argument-evaluating sregex interpreter lets you reuse sregex
-subexpressions:
-
- (let ((dotstar '(0+ any))
- (whitespace '(1+ (syntax ?-)))
- (digits '(1+ (char (?0 . ?9)))))
- (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\""
- (sregex--sequence exps nil))
-
-(defmacro sregexq (&rest exps)
- "Symbolic regular expression interpreter.
-This macro allows you to specify a regular expression (regexp) in
-symbolic form, and converts it into the string form required by Emacs's
-regex functions such as `re-search-forward' and `looking-at'. Here is
-a simple example:
-
- (sregexq (or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
-
-As you can see, an sregex is specified by placing one or more special
-clauses in a call to `sregexq'. The clause in this case is the `or'
-of two strings (not to be confused with the Lisp function `or'). The
-list of allowable clauses appears below.
-
-With `sregex', it is never necessary to \"escape\" magic characters
-that are meant to be taken literally; that happens automatically.
-For example:
-
- (sregexq \"M*A*S*H\") => \"M\\\\*A\\\\*S\\\\*H\"
-
-It is also unnecessary to \"group\" parts of the expression together
-to overcome operator precedence; that also happens automatically.
-For example:
-
- (sregexq (opt (or \"Bob\" \"Robert\"))) => \"\\\\(Bob\\\\|Robert\\\\)?\"
-
-It *is* possible to group parts of the expression in order to refer
-to them with numbered backreferences:
-
- (sregexq (group (or \"Go\" \"Run\"))
- \", Spot, \"
- (backref 1)) => \"\\\\(Go\\\\|Run\\\\), Spot, \\\\1\"
-
-If `sregexq' needs to introduce its own grouping parentheses, it will
-automatically renumber your backreferences:
-
- (sregexq (opt \"resent-\")
- (group (or \"to\" \"cc\" \"bcc\"))
- \": \"
- (backref 1)) => \"\\\\(resent-\\\\)?\\\\(to\\\\|cc\\\\|bcc\\\\): \\\\2\"
-
-`sregexq' is a macro. Each time it is used, it constructs a simple
-Lisp expression that then invokes a moderately complex engine to
-interpret the sregex and render the string form. Because of this, I
-don't recommend sprinkling calls to `sregexq' throughout your code,
-the way one normally does with string regexes (which are cheap to
-evaluate). Instead, it's wiser to precompute the regexes you need
-wherever possible instead of repeatedly constructing the same ones
-over and over. Example:
-
- (let ((field-regex (sregexq (opt \"resent-\")
- (or \"to\" \"cc\" \"bcc\"))))
- ...
- (while ...
- ...
- (re-search-forward field-regex ...)
- ...))
-
-The arguments to `sregexq' are automatically quoted, but the
-flipside of this is that it is not straightforward to include
-computed (i.e., non-constant) values in `sregexq' expressions. So
-`sregex' is a function that is like `sregexq' but which does not
-automatically quote its values. Literal sregex clauses must be
-explicitly quoted like so:
-
- (sregex '(or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
-
-but computed clauses can be included easily, allowing for the reuse
-of common clauses:
-
- (let ((dotstar '(0+ any))
- (whitespace '(1+ (syntax ?-)))
- (digits '(1+ (char (?0 . ?9)))))
- (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"
-
-Here are the clauses allowed in an `sregex' or `sregexq' expression:
-
-- a string
- This stands for the literal string. If it contains
- metacharacters, they will be escaped in the resulting regex
- (using `regexp-quote').
-
-- the symbol `any'
- This stands for \".\", a regex matching any character except
- newline.
-
-- the symbol `bol'
- Stands for \"^\", matching the empty string at the beginning of a line
-
-- the symbol `eol'
- Stands for \"$\", matching the empty string at the end of a line
-
-- (group CLAUSE ...)
- Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\".
-
-- (sequence CLAUSE ...)
-
- Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\".
- Clauses grouped by `sequence' do not count for purposes of
- numbering backreferences. Use `sequence' in situations like
- this:
-
- (sregexq (or \"dog\" \"cat\"
- (sequence (opt \"sea \") \"monkey\")))
- => \"dog\\\\|cat\\\\|\\\\(?:sea \\\\)?monkey\"
-
- where a single `or' alternate needs to contain multiple
- subclauses.
-
-- (backref N)
- Matches the same string previously matched by the Nth \"group\" in
- the same sregex. N is a positive integer.
-
-- (or CLAUSE ...)
- Matches any one of the CLAUSEs by separating them with \"\\\\|\".
-
-- (0+ CLAUSE ...)
- Concatenates the given CLAUSEs and matches zero or more
- occurrences by appending \"*\".
-
-- (1+ CLAUSE ...)
- Concatenates the given CLAUSEs and matches one or more
- occurrences by appending \"+\".
-
-- (opt CLAUSE ...)
- Concatenates the given CLAUSEs and matches zero or one occurrence
- by appending \"?\".
-
-- (repeat MIN MAX CLAUSE ...)
- Concatenates the given CLAUSEs and constructs a regex matching at
- least MIN occurrences and at most MAX occurrences. MIN must be a
- non-negative integer. MAX must be a non-negative integer greater
- than or equal to MIN; or MAX can be nil to mean \"infinity.\"
-
-- (char CHAR-CLAUSE ...)
- Creates a \"character class\" matching one character from the given
- set. See below for how to construct a CHAR-CLAUSE.
-
-- (not-char CHAR-CLAUSE ...)
- Creates a \"character class\" matching any one character not in the
- given set. See below for how to construct a CHAR-CLAUSE.
-
-- the symbol `bot'
- Stands for \"\\\\`\", matching the empty string at the beginning of
- text (beginning of a string or of a buffer).
-
-- the symbol `eot'
- Stands for \"\\\\'\", matching the empty string at the end of text.
-
-- the symbol `point'
- Stands for \"\\\\=\\=\", matching the empty string at point.
-
-- the symbol `word-boundary'
- Stands for \"\\\\b\", matching the empty string at the beginning or
- end of a word.
-
-- the symbol `not-word-boundary'
- Stands for \"\\\\B\", matching the empty string not at the beginning
- or end of a word.
-
-- the symbol `bow'
- Stands for \"\\\\=\\<\", matching the empty string at the beginning of a
- word.
-
-- the symbol `eow'
- Stands for \"\\\\=\\>\", matching the empty string at the end of a word.
-
-- the symbol `wordchar'
- Stands for the regex \"\\\\w\", matching a word-constituent character
- (as determined by the current syntax table)
-
-- the symbol `not-wordchar'
- Stands for the regex \"\\\\W\", matching a non-word-constituent
- character.
-
-- (syntax CODE)
- Stands for the regex \"\\\\sCODE\", where CODE is a syntax table code
- (a single character). Matches any character with the requested
- syntax.
-
-- (not-syntax CODE)
- Stands for the regex \"\\\\SCODE\", where CODE is a syntax table code
- (a single character). Matches any character without the
- requested syntax.
-
-- (regex REGEX)
- This is a \"trapdoor\" for including ordinary regular expression
- strings in the result. Some regular expressions are clearer when
- written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for
- instance.
-
-Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
-has one of the following forms:
-
-- a character
- Adds that character to the set.
-
-- a string
- Adds all the characters in the string to the set.
-
-- A pair (MIN . MAX)
- Where MIN and MAX are characters, adds the range of characters
- from MIN through MAX to the set."
- `(apply 'sregex ',exps))
-
-(defun sregex--engine (exp combine)
- (cond
- ((stringp exp)
- (if (and combine
- (eq combine 'suffix)
- (/= (length exp) 1))
- (concat "\\(?:" (regexp-quote exp) "\\)")
- (regexp-quote exp)))
- ((symbolp exp)
- (ecase exp
- (any ".")
- (bol "^")
- (eol "$")
- (wordchar "\\w")
- (not-wordchar "\\W")
- (bot "\\`")
- (eot "\\'")
- (point "\\=")
- (word-boundary "\\b")
- (not-word-boundary "\\B")
- (bow "\\<")
- (eow "\\>")))
- ((consp exp)
- (funcall (intern (concat "sregex--"
- (symbol-name (car exp))))
- (cdr exp)
- combine))
- (t (error "Invalid expression: %s" exp))))
-
-(defun sregex--sequence (exps combine)
- (if (= (length exps) 1) (sregex--engine (car exps) combine)
- (let ((re (mapconcat
- (lambda (e) (sregex--engine e 'concat))
- exps "")))
- (if (eq combine 'suffix)
- (concat "\\(?:" re "\\)")
- re))))
-
-(defun sregex--or (exps combine)
- (if (= (length exps) 1) (sregex--engine (car exps) combine)
- (let ((re (mapconcat
- (lambda (e) (sregex--engine e 'or))
- exps "\\|")))
- (if (not (eq combine 'or))
- (concat "\\(?:" re "\\)")
- re))))
-
-(defun sregex--group (exps combine) (concat "\\(" (sregex--sequence exps nil) "\\)"))
-
-(defun sregex--backref (exps combine) (concat "\\" (int-to-string (car exps))))
-(defun sregex--opt (exps combine) (concat (sregex--sequence exps 'suffix) "?"))
-(defun sregex--0+ (exps combine) (concat (sregex--sequence exps 'suffix) "*"))
-(defun sregex--1+ (exps combine) (concat (sregex--sequence exps 'suffix) "+"))
-
-(defun sregex--char (exps combine) (sregex--char-aux nil exps))
-(defun sregex--not-char (exps combine) (sregex--char-aux t exps))
-
-(defun sregex--syntax (exps combine) (format "\\s%c" (car exps)))
-(defun sregex--not-syntax (exps combine) (format "\\S%c" (car exps)))
-
-(defun sregex--regex (exps combine)
- (if combine (concat "\\(?:" (car exps) "\\)") (car exps)))
-
-(defun sregex--repeat (exps combine)
- (let* ((min (or (pop exps) 0))
- (minstr (number-to-string min))
- (max (pop exps)))
- (concat (sregex--sequence exps 'suffix)
- (concat "\\{" minstr ","
- (when max (number-to-string max)) "\\}"))))
-
-(defun sregex--char-range (start end)
- (let ((startc (char-to-string start))
- (endc (char-to-string end)))
- (cond
- ((> end (+ start 2)) (concat startc "-" endc))
- ((> end (+ start 1)) (concat startc (char-to-string (1+ start)) endc))
- ((> end start) (concat startc endc))
- (t startc))))
-
-(defun sregex--char-aux (complement args)
- ;; regex-opt does the same, we should join effort.
- (let ((chars (make-bool-vector 256 nil))) ; Yeah, right!
- (dolist (arg args)
- (cond ((integerp arg) (aset chars arg t))
- ((stringp arg) (mapc (lambda (c) (aset chars c t)) arg))
- ((consp arg)
- (let ((start (car arg))
- (end (cdr arg)))
- (when (> start end)
- (let ((tmp start)) (setq start end) (setq end tmp)))
- ;; now start <= end
- (let ((i start))
- (while (<= i end)
- (aset chars i t)
- (setq i (1+ i))))))))
- ;; now chars is a map of the characters in the class
- (let ((caret (aref chars ?^))
- (dash (aref chars ?-))
- (class (if (aref chars ?\]) "]" "")))
- (aset chars ?^ nil)
- (aset chars ?- nil)
- (aset chars ?\] nil)
-
- (let (start end)
- (dotimes (i 256)
- (if (aref chars i)
- (progn
- (unless start (setq start i))
- (setq end i)
- (aset chars i nil))
- (when start
- (setq class (concat class (sregex--char-range start end)))
- (setq start nil))))
- (if start
- (setq class (concat class (sregex--char-range start end)))))
-
- (if (> (length class) 0)
- (setq class (concat class (if caret "^") (if dash "-")))
- (setq class (concat class (if dash "-") (if caret "^"))))
- (if (and (not complement) (= (length class) 1))
- (regexp-quote class)
- (concat "[" (if complement "^") class "]")))))
-
-(provide 'sregex)
-
-;; arch-tag: 460c1f5a-eb6e-42ec-a451-ffac78bdf492
-;;; sregex.el ends here
--- /dev/null
+;;; sregex.el --- symbolic regular expressions
+
+;; Copyright (C) 1997, 1998, 2000, 2001, 2002, 2003, 2004,
+;; 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+
+;; Author: Bob Glickstein <bobg+sregex@zanshin.com>
+;; Maintainer: Bob Glickstein <bobg+sregex@zanshin.com>
+;; Keywords: extensions
+;; Obsolete-since: 24.1
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This package allows you to write regular expressions using a
+;; totally new, Lisp-like syntax.
+
+;; A "symbolic regular expression" (sregex for short) is a Lisp form
+;; that, when evaluated, produces the string form of the specified
+;; regular expression. Here's a simple example:
+
+;; (sregexq (or "Bob" "Robert")) => "Bob\\|Robert"
+
+;; As you can see, an sregex is specified by placing one or more
+;; special clauses in a call to `sregexq'. The clause in this case is
+;; the `or' of two strings (not to be confused with the Lisp function
+;; `or'). The list of allowable clauses appears below.
+
+;; With sregex, it is never necessary to "escape" magic characters
+;; that are meant to be taken literally; that happens automatically.
+;; For example:
+
+;; (sregexq "M*A*S*H") => "M\\*A\\*S\\*H"
+
+;; It is also unnecessary to "group" parts of the expression together
+;; to overcome operator precedence; that also happens automatically.
+;; For example:
+
+;; (sregexq (opt (or "Bob" "Robert"))) => "\\(?:Bob\\|Robert\\)?"
+
+;; It *is* possible to group parts of the expression in order to refer
+;; to them with numbered backreferences:
+
+;; (sregexq (group (or "Go" "Run"))
+;; ", Spot, "
+;; (backref 1)) => "\\(Go\\|Run\\), Spot, \\1"
+
+;; `sregexq' is a macro. Each time it is used, it constructs a simple
+;; Lisp expression that then invokes a moderately complex engine to
+;; interpret the sregex and render the string form. Because of this,
+;; I don't recommend sprinkling calls to `sregexq' throughout your
+;; code, the way one normally does with string regexes (which are
+;; cheap to evaluate). Instead, it's wiser to precompute the regexes
+;; you need wherever possible instead of repeatedly constructing the
+;; same ones over and over. Example:
+
+;; (let ((field-regex (sregexq (opt "resent-")
+;; (or "to" "cc" "bcc"))))
+;; ...
+;; (while ...
+;; ...
+;; (re-search-forward field-regex ...)
+;; ...))
+
+;; The arguments to `sregexq' are automatically quoted, but the
+;; flipside of this is that it is not straightforward to include
+;; computed (i.e., non-constant) values in `sregexq' expressions. So
+;; `sregex' is a function that is like `sregexq' but which does not
+;; automatically quote its values. Literal sregex clauses must be
+;; explicitly quoted like so:
+
+;; (sregex '(or "Bob" "Robert")) => "Bob\\|Robert"
+
+;; but computed clauses can be included easily, allowing for the reuse
+;; of common clauses:
+
+;; (let ((dotstar '(0+ any))
+;; (whitespace '(1+ (syntax ?-)))
+;; (digits '(1+ (char (?0 . ?9)))))
+;; (sregex 'bol dotstar ":" whitespace digits)) => "^.*:\\s-+[0-9]+"
+
+;; To use this package in a Lisp program, simply (require 'sregex).
+
+;; Here are the clauses allowed in an `sregex' or `sregexq'
+;; expression:
+
+;; - a string
+;; This stands for the literal string. If it contains
+;; metacharacters, they will be escaped in the resulting regex
+;; (using `regexp-quote').
+
+;; - the symbol `any'
+;; This stands for ".", a regex matching any character except
+;; newline.
+
+;; - the symbol `bol'
+;; Stands for "^", matching the empty string at the beginning of a line
+
+;; - the symbol `eol'
+;; Stands for "$", matching the empty string at the end of a line
+
+;; - (group CLAUSE ...)
+;; Groups the given CLAUSEs using "\\(" and "\\)".
+
+;; - (sequence CLAUSE ...)
+
+;; Groups the given CLAUSEs; may or may not use "\\(?:" and "\\)".
+;; Clauses grouped by `sequence' do not count for purposes of
+;; numbering backreferences. Use `sequence' in situations like
+;; this:
+
+;; (sregexq (or "dog" "cat"
+;; (sequence (opt "sea ") "monkey")))
+;; => "dog\\|cat\\|\\(?:sea \\)?monkey"
+
+;; where a single `or' alternate needs to contain multiple
+;; subclauses.
+
+;; - (backref N)
+;; Matches the same string previously matched by the Nth "group" in
+;; the same sregex. N is a positive integer.
+
+;; - (or CLAUSE ...)
+;; Matches any one of the CLAUSEs by separating them with "\\|".
+
+;; - (0+ CLAUSE ...)
+;; Concatenates the given CLAUSEs and matches zero or more
+;; occurrences by appending "*".
+
+;; - (1+ CLAUSE ...)
+;; Concatenates the given CLAUSEs and matches one or more
+;; occurrences by appending "+".
+
+;; - (opt CLAUSE ...)
+;; Concatenates the given CLAUSEs and matches zero or one occurrence
+;; by appending "?".
+
+;; - (repeat MIN MAX CLAUSE ...)
+;; Concatenates the given CLAUSEs and constructs a regex matching at
+;; least MIN occurrences and at most MAX occurrences. MIN must be a
+;; non-negative integer. MAX must be a non-negative integer greater
+;; than or equal to MIN; or MAX can be nil to mean "infinity."
+
+;; - (char CHAR-CLAUSE ...)
+;; Creates a "character class" matching one character from the given
+;; set. See below for how to construct a CHAR-CLAUSE.
+
+;; - (not-char CHAR-CLAUSE ...)
+;; Creates a "character class" matching any one character not in the
+;; given set. See below for how to construct a CHAR-CLAUSE.
+
+;; - the symbol `bot'
+;; Stands for "\\`", matching the empty string at the beginning of
+;; text (beginning of a string or of a buffer).
+
+;; - the symbol `eot'
+;; Stands for "\\'", matching the empty string at the end of text.
+
+;; - the symbol `point'
+;; Stands for "\\=", matching the empty string at point.
+
+;; - the symbol `word-boundary'
+;; Stands for "\\b", matching the empty string at the beginning or
+;; end of a word.
+
+;; - the symbol `not-word-boundary'
+;; Stands for "\\B", matching the empty string not at the beginning
+;; or end of a word.
+
+;; - the symbol `bow'
+;; Stands for "\\<", matching the empty string at the beginning of a
+;; word.
+
+;; - the symbol `eow'
+;; Stands for "\\>", matching the empty string at the end of a word.
+
+;; - the symbol `wordchar'
+;; Stands for the regex "\\w", matching a word-constituent character
+;; (as determined by the current syntax table)
+
+;; - the symbol `not-wordchar'
+;; Stands for the regex "\\W", matching a non-word-constituent
+;; character.
+
+;; - (syntax CODE)
+;; Stands for the regex "\\sCODE", where CODE is a syntax table code
+;; (a single character). Matches any character with the requested
+;; syntax.
+
+;; - (not-syntax CODE)
+;; Stands for the regex "\\SCODE", where CODE is a syntax table code
+;; (a single character). Matches any character without the
+;; requested syntax.
+
+;; - (regex REGEX)
+;; This is a "trapdoor" for including ordinary regular expression
+;; strings in the result. Some regular expressions are clearer when
+;; written the old way: "[a-z]" vs. (sregexq (char (?a . ?z))), for
+;; instance. However, see the note under "Bugs," below.
+
+;; Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
+;; has one of the following forms:
+
+;; - a character
+;; Adds that character to the set.
+
+;; - a string
+;; Adds all the characters in the string to the set.
+
+;; - A pair (MIN . MAX)
+;; Where MIN and MAX are characters, adds the range of characters
+;; from MIN through MAX to the set.
+
+;;; To do:
+
+;; An earlier version of this package could optionally translate the
+;; symbolic regex into other languages' syntaxes, e.g. Perl. For
+;; instance, with Perl syntax selected, (sregexq (or "ab" "cd")) would
+;; yield "ab|cd" instead of "ab\\|cd". It might be useful to restore
+;; such a facility.
+
+;; - handle multibyte chars in sregex--char-aux
+;; - add support for character classes ([:blank:], ...)
+;; - add support for non-greedy operators *? and +?
+;; - bug: (sregexq (opt (opt ?a))) returns "a??" which is a non-greedy "a?"
+
+;;; Bugs:
+
+;;; Code:
+
+(eval-when-compile (require 'cl))
+
+;; Compatibility code for when we didn't have shy-groups
+(defvar sregex--current-sregex nil)
+(defun sregex-info () nil)
+(defmacro sregex-save-match-data (&rest forms) (cons 'save-match-data forms))
+(defun sregex-replace-match (r &optional f l str subexp x)
+ (replace-match r f l str subexp))
+(defun sregex-match-string (c &optional i x) (match-string c i))
+(defun sregex-match-string-no-properties (count &optional in-string sregex)
+ (match-string-no-properties count in-string))
+(defun sregex-match-beginning (count &optional sregex) (match-beginning count))
+(defun sregex-match-end (count &optional sregex) (match-end count))
+(defun sregex-match-data (&optional sregex) (match-data))
+(defun sregex-backref-num (n &optional sregex) n)
+
+
+(defun sregex (&rest exps)
+ "Symbolic regular expression interpreter.
+This is exactly like `sregexq' (q.v.) except that it evaluates all its
+arguments, so literal sregex clauses must be quoted. For example:
+
+ (sregex '(or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
+
+An argument-evaluating sregex interpreter lets you reuse sregex
+subexpressions:
+
+ (let ((dotstar '(0+ any))
+ (whitespace '(1+ (syntax ?-)))
+ (digits '(1+ (char (?0 . ?9)))))
+ (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\""
+ (sregex--sequence exps nil))
+
+(defmacro sregexq (&rest exps)
+ "Symbolic regular expression interpreter.
+This macro allows you to specify a regular expression (regexp) in
+symbolic form, and converts it into the string form required by Emacs's
+regex functions such as `re-search-forward' and `looking-at'. Here is
+a simple example:
+
+ (sregexq (or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
+
+As you can see, an sregex is specified by placing one or more special
+clauses in a call to `sregexq'. The clause in this case is the `or'
+of two strings (not to be confused with the Lisp function `or'). The
+list of allowable clauses appears below.
+
+With `sregex', it is never necessary to \"escape\" magic characters
+that are meant to be taken literally; that happens automatically.
+For example:
+
+ (sregexq \"M*A*S*H\") => \"M\\\\*A\\\\*S\\\\*H\"
+
+It is also unnecessary to \"group\" parts of the expression together
+to overcome operator precedence; that also happens automatically.
+For example:
+
+ (sregexq (opt (or \"Bob\" \"Robert\"))) => \"\\\\(Bob\\\\|Robert\\\\)?\"
+
+It *is* possible to group parts of the expression in order to refer
+to them with numbered backreferences:
+
+ (sregexq (group (or \"Go\" \"Run\"))
+ \", Spot, \"
+ (backref 1)) => \"\\\\(Go\\\\|Run\\\\), Spot, \\\\1\"
+
+If `sregexq' needs to introduce its own grouping parentheses, it will
+automatically renumber your backreferences:
+
+ (sregexq (opt \"resent-\")
+ (group (or \"to\" \"cc\" \"bcc\"))
+ \": \"
+ (backref 1)) => \"\\\\(resent-\\\\)?\\\\(to\\\\|cc\\\\|bcc\\\\): \\\\2\"
+
+`sregexq' is a macro. Each time it is used, it constructs a simple
+Lisp expression that then invokes a moderately complex engine to
+interpret the sregex and render the string form. Because of this, I
+don't recommend sprinkling calls to `sregexq' throughout your code,
+the way one normally does with string regexes (which are cheap to
+evaluate). Instead, it's wiser to precompute the regexes you need
+wherever possible instead of repeatedly constructing the same ones
+over and over. Example:
+
+ (let ((field-regex (sregexq (opt \"resent-\")
+ (or \"to\" \"cc\" \"bcc\"))))
+ ...
+ (while ...
+ ...
+ (re-search-forward field-regex ...)
+ ...))
+
+The arguments to `sregexq' are automatically quoted, but the
+flipside of this is that it is not straightforward to include
+computed (i.e., non-constant) values in `sregexq' expressions. So
+`sregex' is a function that is like `sregexq' but which does not
+automatically quote its values. Literal sregex clauses must be
+explicitly quoted like so:
+
+ (sregex '(or \"Bob\" \"Robert\")) => \"Bob\\\\|Robert\"
+
+but computed clauses can be included easily, allowing for the reuse
+of common clauses:
+
+ (let ((dotstar '(0+ any))
+ (whitespace '(1+ (syntax ?-)))
+ (digits '(1+ (char (?0 . ?9)))))
+ (sregex 'bol dotstar \":\" whitespace digits)) => \"^.*:\\\\s-+[0-9]+\"
+
+Here are the clauses allowed in an `sregex' or `sregexq' expression:
+
+- a string
+ This stands for the literal string. If it contains
+ metacharacters, they will be escaped in the resulting regex
+ (using `regexp-quote').
+
+- the symbol `any'
+ This stands for \".\", a regex matching any character except
+ newline.
+
+- the symbol `bol'
+ Stands for \"^\", matching the empty string at the beginning of a line
+
+- the symbol `eol'
+ Stands for \"$\", matching the empty string at the end of a line
+
+- (group CLAUSE ...)
+ Groups the given CLAUSEs using \"\\\\(\" and \"\\\\)\".
+
+- (sequence CLAUSE ...)
+
+ Groups the given CLAUSEs; may or may not use \"\\\\(\" and \"\\\\)\".
+ Clauses grouped by `sequence' do not count for purposes of
+ numbering backreferences. Use `sequence' in situations like
+ this:
+
+ (sregexq (or \"dog\" \"cat\"
+ (sequence (opt \"sea \") \"monkey\")))
+ => \"dog\\\\|cat\\\\|\\\\(?:sea \\\\)?monkey\"
+
+ where a single `or' alternate needs to contain multiple
+ subclauses.
+
+- (backref N)
+ Matches the same string previously matched by the Nth \"group\" in
+ the same sregex. N is a positive integer.
+
+- (or CLAUSE ...)
+ Matches any one of the CLAUSEs by separating them with \"\\\\|\".
+
+- (0+ CLAUSE ...)
+ Concatenates the given CLAUSEs and matches zero or more
+ occurrences by appending \"*\".
+
+- (1+ CLAUSE ...)
+ Concatenates the given CLAUSEs and matches one or more
+ occurrences by appending \"+\".
+
+- (opt CLAUSE ...)
+ Concatenates the given CLAUSEs and matches zero or one occurrence
+ by appending \"?\".
+
+- (repeat MIN MAX CLAUSE ...)
+ Concatenates the given CLAUSEs and constructs a regex matching at
+ least MIN occurrences and at most MAX occurrences. MIN must be a
+ non-negative integer. MAX must be a non-negative integer greater
+ than or equal to MIN; or MAX can be nil to mean \"infinity.\"
+
+- (char CHAR-CLAUSE ...)
+ Creates a \"character class\" matching one character from the given
+ set. See below for how to construct a CHAR-CLAUSE.
+
+- (not-char CHAR-CLAUSE ...)
+ Creates a \"character class\" matching any one character not in the
+ given set. See below for how to construct a CHAR-CLAUSE.
+
+- the symbol `bot'
+ Stands for \"\\\\`\", matching the empty string at the beginning of
+ text (beginning of a string or of a buffer).
+
+- the symbol `eot'
+ Stands for \"\\\\'\", matching the empty string at the end of text.
+
+- the symbol `point'
+ Stands for \"\\\\=\\=\", matching the empty string at point.
+
+- the symbol `word-boundary'
+ Stands for \"\\\\b\", matching the empty string at the beginning or
+ end of a word.
+
+- the symbol `not-word-boundary'
+ Stands for \"\\\\B\", matching the empty string not at the beginning
+ or end of a word.
+
+- the symbol `bow'
+ Stands for \"\\\\=\\<\", matching the empty string at the beginning of a
+ word.
+
+- the symbol `eow'
+ Stands for \"\\\\=\\>\", matching the empty string at the end of a word.
+
+- the symbol `wordchar'
+ Stands for the regex \"\\\\w\", matching a word-constituent character
+ (as determined by the current syntax table)
+
+- the symbol `not-wordchar'
+ Stands for the regex \"\\\\W\", matching a non-word-constituent
+ character.
+
+- (syntax CODE)
+ Stands for the regex \"\\\\sCODE\", where CODE is a syntax table code
+ (a single character). Matches any character with the requested
+ syntax.
+
+- (not-syntax CODE)
+ Stands for the regex \"\\\\SCODE\", where CODE is a syntax table code
+ (a single character). Matches any character without the
+ requested syntax.
+
+- (regex REGEX)
+ This is a \"trapdoor\" for including ordinary regular expression
+ strings in the result. Some regular expressions are clearer when
+ written the old way: \"[a-z]\" vs. (sregexq (char (?a . ?z))), for
+ instance.
+
+Each CHAR-CLAUSE that is passed to (char ...) and (not-char ...)
+has one of the following forms:
+
+- a character
+ Adds that character to the set.
+
+- a string
+ Adds all the characters in the string to the set.
+
+- A pair (MIN . MAX)
+ Where MIN and MAX are characters, adds the range of characters
+ from MIN through MAX to the set."
+ `(apply 'sregex ',exps))
+
+(defun sregex--engine (exp combine)
+ (cond
+ ((stringp exp)
+ (if (and combine
+ (eq combine 'suffix)
+ (/= (length exp) 1))
+ (concat "\\(?:" (regexp-quote exp) "\\)")
+ (regexp-quote exp)))
+ ((symbolp exp)
+ (ecase exp
+ (any ".")
+ (bol "^")
+ (eol "$")
+ (wordchar "\\w")
+ (not-wordchar "\\W")
+ (bot "\\`")
+ (eot "\\'")
+ (point "\\=")
+ (word-boundary "\\b")
+ (not-word-boundary "\\B")
+ (bow "\\<")
+ (eow "\\>")))
+ ((consp exp)
+ (funcall (intern (concat "sregex--"
+ (symbol-name (car exp))))
+ (cdr exp)
+ combine))
+ (t (error "Invalid expression: %s" exp))))
+
+(defun sregex--sequence (exps combine)
+ (if (= (length exps) 1) (sregex--engine (car exps) combine)
+ (let ((re (mapconcat
+ (lambda (e) (sregex--engine e 'concat))
+ exps "")))
+ (if (eq combine 'suffix)
+ (concat "\\(?:" re "\\)")
+ re))))
+
+(defun sregex--or (exps combine)
+ (if (= (length exps) 1) (sregex--engine (car exps) combine)
+ (let ((re (mapconcat
+ (lambda (e) (sregex--engine e 'or))
+ exps "\\|")))
+ (if (not (eq combine 'or))
+ (concat "\\(?:" re "\\)")
+ re))))
+
+(defun sregex--group (exps combine) (concat "\\(" (sregex--sequence exps nil) "\\)"))
+
+(defun sregex--backref (exps combine) (concat "\\" (int-to-string (car exps))))
+(defun sregex--opt (exps combine) (concat (sregex--sequence exps 'suffix) "?"))
+(defun sregex--0+ (exps combine) (concat (sregex--sequence exps 'suffix) "*"))
+(defun sregex--1+ (exps combine) (concat (sregex--sequence exps 'suffix) "+"))
+
+(defun sregex--char (exps combine) (sregex--char-aux nil exps))
+(defun sregex--not-char (exps combine) (sregex--char-aux t exps))
+
+(defun sregex--syntax (exps combine) (format "\\s%c" (car exps)))
+(defun sregex--not-syntax (exps combine) (format "\\S%c" (car exps)))
+
+(defun sregex--regex (exps combine)
+ (if combine (concat "\\(?:" (car exps) "\\)") (car exps)))
+
+(defun sregex--repeat (exps combine)
+ (let* ((min (or (pop exps) 0))
+ (minstr (number-to-string min))
+ (max (pop exps)))
+ (concat (sregex--sequence exps 'suffix)
+ (concat "\\{" minstr ","
+ (when max (number-to-string max)) "\\}"))))
+
+(defun sregex--char-range (start end)
+ (let ((startc (char-to-string start))
+ (endc (char-to-string end)))
+ (cond
+ ((> end (+ start 2)) (concat startc "-" endc))
+ ((> end (+ start 1)) (concat startc (char-to-string (1+ start)) endc))
+ ((> end start) (concat startc endc))
+ (t startc))))
+
+(defun sregex--char-aux (complement args)
+ ;; regex-opt does the same, we should join effort.
+ (let ((chars (make-bool-vector 256 nil))) ; Yeah, right!
+ (dolist (arg args)
+ (cond ((integerp arg) (aset chars arg t))
+ ((stringp arg) (mapc (lambda (c) (aset chars c t)) arg))
+ ((consp arg)
+ (let ((start (car arg))
+ (end (cdr arg)))
+ (when (> start end)
+ (let ((tmp start)) (setq start end) (setq end tmp)))
+ ;; now start <= end
+ (let ((i start))
+ (while (<= i end)
+ (aset chars i t)
+ (setq i (1+ i))))))))
+ ;; now chars is a map of the characters in the class
+ (let ((caret (aref chars ?^))
+ (dash (aref chars ?-))
+ (class (if (aref chars ?\]) "]" "")))
+ (aset chars ?^ nil)
+ (aset chars ?- nil)
+ (aset chars ?\] nil)
+
+ (let (start end)
+ (dotimes (i 256)
+ (if (aref chars i)
+ (progn
+ (unless start (setq start i))
+ (setq end i)
+ (aset chars i nil))
+ (when start
+ (setq class (concat class (sregex--char-range start end)))
+ (setq start nil))))
+ (if start
+ (setq class (concat class (sregex--char-range start end)))))
+
+ (if (> (length class) 0)
+ (setq class (concat class (if caret "^") (if dash "-")))
+ (setq class (concat class (if dash "-") (if caret "^"))))
+ (if (and (not complement) (= (length class) 1))
+ (regexp-quote class)
+ (concat "[" (if complement "^") class "]")))))
+
+(provide 'sregex)
+
+;; arch-tag: 460c1f5a-eb6e-42ec-a451-ffac78bdf492
+;;; sregex.el ends here