From: Mattias EngdegÄrd Date: Fri, 13 Dec 2019 12:10:58 +0000 (+0100) Subject: Allow characters and single-char strings in rx charsets X-Git-Tag: emacs-27.0.90~361 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=82b4e48c590cf2c0448a751e641b0ee7a6a02438;p=emacs.git Allow characters and single-char strings in rx charsets The `not' and `intersection' forms, and `or' inside these forms, now accept characters and single-character strings as arguments. Previously, they had to be wrapped in `any' forms. This does not add expressive power but is a convenience and is easily understood. * doc/lispref/searching.texi (Rx Constructs): Amend the documentation. * etc/NEWS: Announce the change. * lisp/emacs-lisp/rx.el (rx--charset-p, rx--translate-not) (rx--charset-intervals, rx): Accept characters and 1-char strings in more places. * test/lisp/emacs-lisp/rx-tests.el (rx-not, rx-charset-or) (rx-def-in-charset-or, rx-intersection): Test the change. --- diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 0c6c7cc68b5..700880c2289 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi @@ -1214,8 +1214,9 @@ Corresponding string regexp: @samp{[@dots{}]} @item @code{(not @var{charspec})} @cindex @code{not} in rx Match a character not included in @var{charspec}. @var{charspec} can -be an @code{any}, @code{not}, @code{or}, @code{intersection}, -@code{syntax} or @code{category} form, or a character class. +be a character, a single-character string, an @code{any}, @code{not}, +@code{or}, @code{intersection}, @code{syntax} or @code{category} form, +or a character class. If @var{charspec} is an @code{or} form, its arguments have the same restrictions as those of @code{intersection}; see below.@* Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}}, @@ -1224,9 +1225,9 @@ Corresponding string regexp: @samp{[^@dots{}]}, @samp{\S@var{code}}, @item @code{(intersection @var{charset}@dots{})} @cindex @code{intersection} in rx Match a character included in all of the @var{charset}s. -Each @var{charset} can be an @code{any} form without character -classes, or an @code{intersection}, @code{or} or @code{not} form whose -arguments are also @var{charset}s. +Each @var{charset} can be a character, a single-character string, an +@code{any} form without character classes, or an @code{intersection}, +@code{or} or @code{not} form whose arguments are also @var{charset}s. @item @code{not-newline}, @code{nonl} @cindex @code{not-newline} in rx diff --git a/etc/NEWS b/etc/NEWS index 1e0422c761f..a7f3c3d2fe8 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -2124,6 +2124,9 @@ Both match any single character; 'anychar' is more descriptive. With 'or' and 'not', it can be used to compose character-matching expressions from simpler parts. ++++ +*** 'not' argument can now be a character or single-char string. + ** Frames +++ diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index a5cab1db888..43f7a4e2752 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el @@ -309,6 +309,8 @@ and set operations." (rx--every (lambda (x) (not (symbolp x))) (cdr form))) (and (memq (car form) '(not or | intersection)) (rx--every #'rx--charset-p (cdr form))))) + (characterp form) + (and (stringp form) (= (length form) 1)) (and (or (symbolp form) (consp form)) (let ((expanded (rx--expand-def form))) (and expanded @@ -521,6 +523,11 @@ If NEGATED, negate the sense (thus making it positive)." ((eq arg 'word-boundary) (rx--translate-symbol (if negated 'word-boundary 'not-word-boundary))) + ((characterp arg) + (rx--generate-alt (not negated) (list (cons arg arg)) nil)) + ((and (stringp arg) (= (length arg) 1)) + (let ((char (string-to-char arg))) + (rx--generate-alt (not negated) (list (cons char char)) nil))) ((let ((expanded (rx--expand-def arg))) (and expanded (rx--translate-not negated (list expanded))))) @@ -571,8 +578,8 @@ If NEGATED, negate the sense (thus making it positive)." (defun rx--charset-intervals (charset) "Return a sorted list of non-adjacent disjoint intervals from CHARSET. CHARSET is any expression allowed in a character set expression: -either `any' (no classes permitted), or `not', `or' or `intersection' -forms whose arguments are charsets." +characters, single-char strings, `any' forms (no classes permitted), +or `not', `or' or `intersection' forms whose arguments are charsets." (pcase charset (`(,(or 'any 'in 'char) . ,body) (let ((parsed (rx--parse-any body))) @@ -584,6 +591,11 @@ forms whose arguments are charsets." (`(not ,x) (rx--complement-intervals (rx--charset-intervals x))) (`(,(or 'or '|) . ,body) (rx--charset-union body)) (`(intersection . ,body) (rx--charset-intersection body)) + ((pred characterp) + (list (cons charset charset))) + ((guard (and (stringp charset) (= (length charset) 1))) + (let ((char (string-to-char charset))) + (list (cons char char)))) (_ (let ((expanded (rx--expand-def charset))) (if expanded (rx--charset-intervals expanded) @@ -1161,10 +1173,12 @@ CHAR Match a literal character. character, a string, a range as string \"A-Z\" or cons (?A . ?Z), or a character class (see below). Alias: in, char. (not CHARSPEC) Match one character not matched by CHARSPEC. CHARSPEC - can be (any ...), (or ...), (intersection ...), - (syntax ...), (category ...), or a character class. -(intersection CHARSET...) Intersection of CHARSETs. - CHARSET is (any...), (not...), (or...) or (intersection...). + can be a character, single-char string, (any ...), (or ...), + (intersection ...), (syntax ...), (category ...), + or a character class. +(intersection CHARSET...) Match all CHARSETs. + CHARSET is (any...), (not...), (or...) or (intersection...), + a character or a single-char string. not-newline Match any character except a newline. Alias: nonl. anychar Match any character. Alias: anything. unmatchable Never match anything at all. diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index 344f46764c8..a82f1f83645 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el @@ -272,7 +272,9 @@ (should (equal (rx (not (category tone-mark)) (not (category lao))) "\\C4\\Co")) (should (equal (rx (not (not ascii)) (not (not (not (any "a-z"))))) - "[[:ascii:]][^a-z]"))) + "[[:ascii:]][^a-z]")) + (should (equal (rx (not ?a) (not "b") (not (not "c")) (not (not ?d))) + "[^a][^b]cd"))) (ert-deftest rx-charset-or () (should (equal (rx (or)) @@ -294,13 +296,17 @@ "[a-ru-z]")) (should (equal (rx (or (intersection (any "c-z") (any "a-g")) (not (any "a-k")))) - "[^abh-k]"))) + "[^abh-k]")) + (should (equal (rx (or ?f (any "b-e") "a") (not (or ?x "y" (any "s-w")))) + "[a-f][^s-y]"))) (ert-deftest rx-def-in-charset-or () (rx-let ((a (any "badc")) - (b (| a (any "def")))) - (should (equal (rx (or b (any "q"))) - "[a-fq]"))) + (b (| a (any "def"))) + (c ?a) + (d "b")) + (should (equal (rx (or b (any "q")) (or c d)) + "[a-fq][ab]"))) (rx-let ((diff-| (a b) (not (or (not a) b)))) (should (equal (rx (diff-| (any "a-z") (any "gr"))) "[a-fh-qs-z]")))) @@ -326,7 +332,9 @@ "[e-m]")) (should (equal (rx (intersection (or (any "a-f") (any "f-t")) (any "e-w"))) - "[e-t]"))) + "[e-t]")) + (should (equal (rx (intersection ?m (any "a-z") "m")) + "m"))) (ert-deftest rx-def-in-intersection () (rx-let ((a (any "a-g"))