;; Olin Shivers's SRE, with concessions to Emacs regexp peculiarities,
;; and the older Emacs package Sregex.
+;;; Legacy syntax still accepted by rx:
+;;
+;; These are constructs from earlier rx and sregex implementations
+;; that were mistakes, accidents or just not very good ideas in hindsight.
+
+;; Obsolete: accepted but not documented
+;;
+;; Obsolete Preferred
+;; --------------------------------------------------------
+;; (not word-boundary) not-word-boundary
+;; (not-syntax X) (not (syntax X))
+;; not-wordchar (not wordchar)
+;; (not-char ...) (not (any ...))
+;; any nonl, not-newline
+;; (repeat N FORM) (= N FORM)
+;; (syntax CHARACTER) (syntax NAME)
+;; (syntax CHAR-SYM) [1] (syntax NAME)
+;; (category chinse-two-byte) (category chinese-two-byte)
+;; unibyte ascii
+;; multibyte nonascii
+;; --------------------------------------------------------
+;; [1] where CHAR-SYM is a symbol with single-character name
+
+;; Obsolescent: accepted and documented but discouraged
+;;
+;; Obsolescent Preferred
+;; --------------------------------------------------------
+;; (and ...) (seq ...), (: ...), (sequence ...)
+;; anything anychar
+;; minimal-match, maximal-match lazy ops: ??, *?, +?
+
+;; FIXME: Prepare a phase-out by emitting compile-time warnings about
+;; at least some of the legacy constructs above.
+
;;; Code:
+
;; The `rx--translate...' functions below return (REGEXP . PRECEDENCE),
;; where REGEXP is a list of string expressions that will be
;; concatenated into a regexp, and PRECEDENCE is one of
('not-word-boundary (cons (list "\\B") t))
('symbol-start (cons (list "\\_<") t))
('symbol-end (cons (list "\\_>") t))
- ('not-wordchar (cons (list "\\W") t))
+ ('not-wordchar (rx--translate '(not wordchar)))
(_
(cond
((let ((class (cdr (assq sym rx--char-classes))))
(setq syntax char)))))))
(unless syntax
(error "Unknown rx syntax name `%s'" sym)))
- (cons (list (string ?\\ (if negated ?S ?s) syntax))
+ ;; Produce \w and \W instead of \sw and \Sw, for smaller size.
+ (cons (list (if (eq syntax ?w)
+ (string ?\\ (if negated ?W ?w))
+ (string ?\\ (if negated ?S ?s) syntax)))
t)))
(defconst rx--categories
"^\\`\\'\\`\\'\\`\\'\\`\\'$"))
(should (equal (rx point word-start word-end bow eow symbol-start symbol-end
word-boundary not-word-boundary not-wordchar)
- "\\=\\<\\>\\<\\>\\_<\\_>\\b\\B\\W"))
+ "\\=\\<\\>\\<\\>\\_<\\_>\\b\\B[^[:word:]]"))
(should (equal (rx digit numeric num control cntrl)
"[[:digit:]][[:digit:]][[:digit:]][[:cntrl:]][[:cntrl:]]"))
(should (equal (rx hex-digit hex xdigit blank)
(should (equal (rx (syntax whitespace) (syntax punctuation)
(syntax word) (syntax symbol)
(syntax open-parenthesis) (syntax close-parenthesis))
- "\\s-\\s.\\sw\\s_\\s(\\s)"))
+ "\\s-\\s.\\w\\s_\\s(\\s)"))
(should (equal (rx (syntax string-quote) (syntax paired-delimiter)
(syntax escape) (syntax character-quote)
(syntax comment-start) (syntax comment-end)
"\\B"))
(should (equal (rx (not ascii) (not lower-case) (not wordchar))
"[^[:ascii:]][^[:lower:]][^[:word:]]"))
- (should (equal (rx (not (syntax punctuation)) (not (syntax escape)))
- "\\S.\\S\\"))
+ (should (equal (rx (not (syntax punctuation)) (not (syntax escape))
+ (not (syntax word)))
+ "\\S.\\S\\\\W"))
(should (equal (rx (not (category tone-mark)) (not (category lao)))
"\\C4\\Co"))
(should (equal (rx (not (not ascii)) (not (not (not (any "a-z")))))