@item @code{(not @var{charspec})}
@cindex @code{not} in rx
Match a character not included in @var{charspec}. @var{charspec} can
-be an @code{any}, @code{not}, @code{or}, @code{intersection},
-@code{syntax} or @code{category} form, or a character class.
+be a character, a single-character string, an @code{any}, @code{not},
+@code{or}, @code{intersection}, @code{syntax} or @code{category} form,
+or a character class.
If @var{charspec} is an @code{or} form, its arguments have the same
restrictions as those of @code{intersection}; see below.@*
Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}},
@item @code{(intersection @var{charset}@dots{})}
@cindex @code{intersection} in rx
Match a character included in all of the @var{charset}s.
-Each @var{charset} can be an @code{any} form without character
-classes, or an @code{intersection}, @code{or} or @code{not} form whose
-arguments are also @var{charset}s.
+Each @var{charset} can be a character, a single-character string, an
+@code{any} form without character classes, or an @code{intersection},
+@code{or} or @code{not} form whose arguments are also @var{charset}s.
@item @code{not-newline}, @code{nonl}
@cindex @code{not-newline} in rx
(rx--every (lambda (x) (not (symbolp x))) (cdr form)))
(and (memq (car form) '(not or | intersection))
(rx--every #'rx--charset-p (cdr form)))))
+ (characterp form)
+ (and (stringp form) (= (length form) 1))
(and (or (symbolp form) (consp form))
(let ((expanded (rx--expand-def form)))
(and expanded
((eq arg 'word-boundary)
(rx--translate-symbol
(if negated 'word-boundary 'not-word-boundary)))
+ ((characterp arg)
+ (rx--generate-alt (not negated) (list (cons arg arg)) nil))
+ ((and (stringp arg) (= (length arg) 1))
+ (let ((char (string-to-char arg)))
+ (rx--generate-alt (not negated) (list (cons char char)) nil)))
((let ((expanded (rx--expand-def arg)))
(and expanded
(rx--translate-not negated (list expanded)))))
(defun rx--charset-intervals (charset)
"Return a sorted list of non-adjacent disjoint intervals from CHARSET.
CHARSET is any expression allowed in a character set expression:
-either `any' (no classes permitted), or `not', `or' or `intersection'
-forms whose arguments are charsets."
+characters, single-char strings, `any' forms (no classes permitted),
+or `not', `or' or `intersection' forms whose arguments are charsets."
(pcase charset
(`(,(or 'any 'in 'char) . ,body)
(let ((parsed (rx--parse-any body)))
(`(not ,x) (rx--complement-intervals (rx--charset-intervals x)))
(`(,(or 'or '|) . ,body) (rx--charset-union body))
(`(intersection . ,body) (rx--charset-intersection body))
+ ((pred characterp)
+ (list (cons charset charset)))
+ ((guard (and (stringp charset) (= (length charset) 1)))
+ (let ((char (string-to-char charset)))
+ (list (cons char char))))
(_ (let ((expanded (rx--expand-def charset)))
(if expanded
(rx--charset-intervals expanded)
character, a string, a range as string \"A-Z\" or cons
(?A . ?Z), or a character class (see below). Alias: in, char.
(not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC
- can be (any ...), (or ...), (intersection ...),
- (syntax ...), (category ...), or a character class.
-(intersection CHARSET...) Intersection of CHARSETs.
- CHARSET is (any...), (not...), (or...) or (intersection...).
+ can be a character, single-char string, (any ...), (or ...),
+ (intersection ...), (syntax ...), (category ...),
+ or a character class.
+(intersection CHARSET...) Match all CHARSETs.
+ CHARSET is (any...), (not...), (or...) or (intersection...),
+ a character or a single-char string.
not-newline Match any character except a newline. Alias: nonl.
anychar Match any character. Alias: anything.
unmatchable Never match anything at all.
(should (equal (rx (not (category tone-mark)) (not (category lao)))
"\\C4\\Co"))
(should (equal (rx (not (not ascii)) (not (not (not (any "a-z")))))
- "[[:ascii:]][^a-z]")))
+ "[[:ascii:]][^a-z]"))
+ (should (equal (rx (not ?a) (not "b") (not (not "c")) (not (not ?d)))
+ "[^a][^b]cd")))
(ert-deftest rx-charset-or ()
(should (equal (rx (or))
"[a-ru-z]"))
(should (equal (rx (or (intersection (any "c-z") (any "a-g"))
(not (any "a-k"))))
- "[^abh-k]")))
+ "[^abh-k]"))
+ (should (equal (rx (or ?f (any "b-e") "a") (not (or ?x "y" (any "s-w"))))
+ "[a-f][^s-y]")))
(ert-deftest rx-def-in-charset-or ()
(rx-let ((a (any "badc"))
- (b (| a (any "def"))))
- (should (equal (rx (or b (any "q")))
- "[a-fq]")))
+ (b (| a (any "def")))
+ (c ?a)
+ (d "b"))
+ (should (equal (rx (or b (any "q")) (or c d))
+ "[a-fq][ab]")))
(rx-let ((diff-| (a b) (not (or (not a) b))))
(should (equal (rx (diff-| (any "a-z") (any "gr")))
"[a-fh-qs-z]"))))
"[e-m]"))
(should (equal (rx (intersection (or (any "a-f") (any "f-t"))
(any "e-w")))
- "[e-t]")))
+ "[e-t]"))
+ (should (equal (rx (intersection ?m (any "a-z") "m"))
+ "m")))
(ert-deftest rx-def-in-intersection ()
(rx-let ((a (any "a-g"))