From b71d4ce056ca291594682b2a8536a4a768a97330 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Sat, 29 Dec 2018 11:09:27 +0100 Subject: [PATCH] Handle raw bytes, and LF in ranges, in rx `any' argument strings * lisp/emacs-lisp/rx.el (rx-check-any-string): Rewrite to handle raw bytes in unibyte strings and accept LF as range endpoints (Bug#33205). * test/lisp/emacs-lisp/rx-tests.el: Add tests for the above. --- lisp/emacs-lisp/rx.el | 51 ++++++++++++++++++-------------- test/lisp/emacs-lisp/rx-tests.el | 22 ++++++++++++++ 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 1230df4f15d..1cae22f870a 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el @@ -449,28 +449,35 @@ Only both edges of each range is checked." (defun rx-check-any-string (str) - "Check string argument STR for Rx `any'." - (let ((i 0) - c1 c2 l) - (if (= 0 (length str)) - (error "String arg for Rx `any' must not be empty")) - (while (string-match ".-." str i) - ;; string before range: convert it to characters - (if (< i (match-beginning 0)) - (setq l (nconc - l - (append (substring str i (match-beginning 0)) nil)))) - ;; range - (setq i (match-end 0) - c1 (aref str (match-beginning 0)) - c2 (aref str (1- i))) - (cond - ((< c1 c2) (setq l (nconc l (list (cons c1 c2))))) - ((= c1 c2) (setq l (nconc l (list c1)))))) - ;; rest? - (if (< i (length str)) - (setq l (nconc l (append (substring str i) nil)))) - l)) + "Turn the `any' argument string STR into a list of characters. +The original order is not preserved. Ranges, \"A-Z\", become pairs, (?A . ?Z)." + (let ((decode-char + ;; Make sure raw bytes are decoded as such, to avoid confusion with + ;; U+0080..U+00FF. + (if (multibyte-string-p str) + #'identity + (lambda (c) (if (<= #x80 c #xff) + (+ c #x3fff00) + c)))) + (len (length str)) + (i 0) + (ret nil)) + (if (= 0 len) + (error "String arg for Rx `any' must not be empty")) + (while (< i len) + (cond ((and (< i (- len 2)) + (= (aref str (+ i 1)) ?-)) + ;; Range. + (let ((start (funcall decode-char (aref str i))) + (end (funcall decode-char (aref str (+ i 2))))) + (cond ((< start end) (push (cons start end) ret)) + ((= start end) (push start ret))) + (setq i (+ i 3)))) + (t + ;; Single character. + (push (funcall decode-char (aref str i)) ret) + (setq i (+ i 1))))) + ret)) (defun rx-check-any (arg) diff --git a/test/lisp/emacs-lisp/rx-tests.el b/test/lisp/emacs-lisp/rx-tests.el index d15e3d77199..8b3ce6cb01f 100644 --- a/test/lisp/emacs-lisp/rx-tests.el +++ b/test/lisp/emacs-lisp/rx-tests.el @@ -33,6 +33,28 @@ (number-sequence ?< ?\]) (number-sequence ?- ?:)))))) +(ert-deftest rx-char-any-range-nl () + "Test character alternatives with LF as a range endpoint." + (should (equal (rx (any "\n-\r")) + "[\n-\r]")) + (should (equal (rx (any "\a-\n")) + "[\a-\n]"))) + +(ert-deftest rx-char-any-raw-byte () + "Test raw bytes in character alternatives." + ;; Separate raw characters. + (should (equal (string-match-p (rx (any "\326A\333B")) + "X\326\333") + 1)) + ;; Range of raw characters, unibyte. + (should (equal (string-match-p (rx (any "\200-\377")) + "ÿA\310B") + 2)) + ;; Range of raw characters, multibyte. + (should (equal (string-match-p (rx (any "Å\211\326-\377\177")) + "XY\355\177\327") + 2))) + (ert-deftest rx-pcase () (should (equal (pcase "a 1 2 3 1 1 b" ((rx (let u (+ digit)) space -- 2.39.5