If NEGATED is non-nil, negate the result; INTERVALS is a sorted
list of disjoint intervals and CLASSES a list of named character
classes."
- (let ((items (append intervals classes)))
- ;; Move lone ] and range ]-x to the start.
- (let ((rbrac-l (assq ?\] items)))
- (when rbrac-l
- (setq items (cons rbrac-l (delq rbrac-l items)))))
-
- ;; Split x-] and move the lone ] to the start.
- (let ((rbrac-r (rassq ?\] items)))
- (when (and rbrac-r (not (eq (car rbrac-r) ?\])))
- (setcdr rbrac-r ?\\)
- (setq items (cons '(?\] . ?\]) items))))
-
- ;; Split ,-- (which would end up as ,- otherwise).
- (let ((dash-r (rassq ?- items)))
- (when (eq (car dash-r) ?,)
- (setcdr dash-r ?,)
- (setq items (nconc items '((?- . ?-))))))
-
- ;; Remove - (lone or at start of interval)
- (let ((dash-l (assq ?- items)))
- (when dash-l
- (if (eq (cdr dash-l) ?-)
- (setq items (delq dash-l items)) ; Remove lone -
- (setcar dash-l ?.)) ; Reduce --x to .-x
- (setq items (nconc items '((?- . ?-))))))
-
- ;; Deal with leading ^ and range ^-x in non-negated set.
- (when (and (eq (car-safe (car items)) ?^)
- (not negated))
- (if (eq (cdar items) ?^)
- ;; single leading ^
- (when (cdr items)
- ;; Move the ^ to second place.
- (setq items (cons (cadr items)
- (cons (car items) (cddr items)))))
- ;; Split ^-x to _-x^
- (setq items (cons (cons ?_ (cdar items))
- (cons '(?^ . ?^)
- (cdr items))))))
-
- (cond
- ;; Empty set: if negated, any char, otherwise match-nothing.
- ((null items)
+ ;; No, this is not pretty code. You try doing it in a way that is both
+ ;; elegant and efficient. Or just one of the two. I dare you.
+ (cond
+ ;; Single character.
+ ((and intervals (eq (caar intervals) (cdar intervals))
+ (null (cdr intervals))
+ (null classes))
+ (let ((ch (caar intervals)))
(if negated
- (rx--translate-symbol 'anything)
- (rx--empty)))
- ;; Single non-negated character.
- ((and (null (cdr items))
- (consp (car items))
- (eq (caar items) (cdar items))
- (not negated))
- (cons (list (regexp-quote (char-to-string (caar items))))
- t))
- ;; Negated newline.
- ((and (equal items '((?\n . ?\n)))
- negated)
- (rx--translate-symbol 'nonl))
- ;; At least one character or class, possibly negated.
- (t
+ (if (eq ch ?\n)
+ ;; Single negated newline.
+ (rx--translate-symbol 'nonl)
+ ;; Single negated character (other than newline).
+ (cons (list (string ?\[ ?^ ch ?\])) t))
+ ;; Single literal character.
+ (cons (list (regexp-quote (char-to-string ch))) t))))
+
+ ;; Empty set (or any char).
+ ((and (null intervals) (null classes))
+ (if negated
+ (rx--translate-symbol 'anything)
+ (rx--empty)))
+
+ ;; More than one character, or at least one class.
+ (t
+ (let ((dash nil) (caret nil))
+ ;; Move ] and range ]-x to the start.
+ (let ((rbrac-l (assq ?\] intervals)))
+ (when rbrac-l
+ (setq intervals (cons rbrac-l (remq rbrac-l intervals)))))
+
+ ;; Split x-] and move the lone ] to the start.
+ (let ((rbrac-r (rassq ?\] intervals)))
+ (when (and rbrac-r (not (eq (car rbrac-r) ?\])))
+ (setcdr rbrac-r ?\\)
+ (setq intervals (cons '(?\] . ?\]) intervals))))
+
+ ;; Split ,-- (which would end up as ,- otherwise).
+ (let ((dash-r (rassq ?- intervals)))
+ (when (eq (car dash-r) ?,)
+ (setcdr dash-r ?,)
+ (setq dash "-")))
+
+ ;; Remove - (lone or at start of interval)
+ (let ((dash-l (assq ?- intervals)))
+ (when dash-l
+ (if (eq (cdr dash-l) ?-)
+ (setq intervals (remq dash-l intervals)) ; Remove lone -
+ (setcar dash-l ?.)) ; Reduce --x to .-x
+ (setq dash "-")))
+
+ ;; Deal with leading ^ and range ^-x in non-negated set.
+ (when (and (eq (caar intervals) ?^)
+ (not negated))
+ (if (eq (cdar intervals) ?^)
+ ;; single leading ^
+ (if (or (cdr intervals) classes)
+ ;; something else to put before the ^
+ (progn
+ (setq intervals (cdr intervals)) ; remove lone ^
+ (setq caret "^")) ; put ^ (almost) last
+ ;; nothing else but a lone -
+ (setq intervals (cons '(?- . ?-) intervals)) ; move - first
+ (setq dash nil))
+ ;; split ^-x to _-x^
+ (setq intervals `((?_ . ,(cdar intervals)) (?^ . ?^)
+ . ,(cdr intervals)))))
+
(cons
(list
(concat
"["
(and negated "^")
- (mapconcat (lambda (item)
- (cond ((symbolp item)
- (format "[:%s:]" item))
- ((eq (car item) (cdr item))
- (char-to-string (car item)))
- ((eq (1+ (car item)) (cdr item))
- (string (car item) (cdr item)))
+ (mapconcat (lambda (iv)
+ (cond ((eq (car iv) (cdr iv))
+ (char-to-string (car iv)))
+ ((eq (1+ (car iv)) (cdr iv))
+ (string (car iv) (cdr iv)))
;; Ranges that go between normal chars and raw bytes
;; must be split to avoid being mutilated
;; by Emacs's regexp parser.
- ((<= (car item) #x3fff7f (cdr item))
- (string (car item) ?- #x3fff7f
- #x3fff80 ?- (cdr item)))
+ ((<= (car iv) #x3fff7f (cdr iv))
+ (string (car iv) ?- #x3fff7f
+ #x3fff80 ?- (cdr iv)))
(t
- (string (car item) ?- (cdr item)))))
- items nil)
+ (string (car iv) ?- (cdr iv)))))
+ intervals)
+ (mapconcat (lambda (cls) (format "[:%s:]" cls)) classes)
+ caret ; ^ or nothing
+ dash ; - or nothing
"]"))
t)))))
(defun rx--union-intervals (ivs-a ivs-b)
"Union of the interval lists IVS-A and IVS-B."
- (rx--complement-intervals
- (rx--intersect-intervals
- (rx--complement-intervals ivs-a)
- (rx--complement-intervals ivs-b))))
+ (let ((union nil))
+ (while (and ivs-a ivs-b)
+ (let ((a (car ivs-a))
+ (b (car ivs-b)))
+ (cond
+ ((< (1+ (cdr a)) (car b)) ; a before b, not adacent
+ (push a union)
+ (setq ivs-a (cdr ivs-a)))
+ ((< (1+ (cdr b)) (car a)) ; b before a, not adacent
+ (push b union)
+ (setq ivs-b (cdr ivs-b)))
+ (t ; a and b adjacent or overlap
+ (setq ivs-a (cdr ivs-a))
+ (setq ivs-b (cdr ivs-b))
+ (if (< (cdr a) (cdr b))
+ (push (cons (min (car a) (car b))
+ (cdr b))
+ ivs-b)
+ (push (cons (min (car a) (car b))
+ (cdr a))
+ ivs-a))))))
+ (nconc (nreverse union) (or ivs-a ivs-b))))
(defun rx--charset-intervals (charset)
"Return a sorted list of non-adjacent disjoint intervals from CHARSET.
(should (equal (rx (any "-]^" ascii) (not (any "-]^" ascii)))
"[]^[:ascii:]-][^]^[:ascii:]-]"))
(should (equal (rx (any "^" lower upper) (not (any "^" lower upper)))
- "[[:lower:]^[:upper:]][^^[:lower:][:upper:]]"))
+ "[[:lower:][:upper:]^][^^[:lower:][:upper:]]"))
(should (equal (rx (any "-" lower upper) (not (any "-" lower upper)))
"[[:lower:][:upper:]-][^[:lower:][:upper:]-]"))
(should (equal (rx (any "]" lower upper) (not (any "]" lower upper)))
(rx-submatch-n '(group-n 3 (+ nonl) eol)))
"\\(?3:.+$\\)")))
+;;; unit tests for internal functions
+
+(ert-deftest rx--complement-intervals ()
+ (should (equal (rx--complement-intervals '())
+ '((0 . #x3fffff))))
+ (should (equal (rx--complement-intervals '((10 . 20) (30 . 40)))
+ '((0 . 9) (21 . 29) (41 . #x3fffff))))
+ (should (equal (rx--complement-intervals '((0 . #x3fffff)))
+ '()))
+ (should (equal (rx--complement-intervals
+ '((0 . 10) (20 . 20) (30 . #x3fffff)))
+ '((11 . 19) (21 . 29)))))
+
+(ert-deftest rx--union-intervals ()
+ (should (equal (rx--union-intervals '() '()) '()))
+ (should (equal (rx--union-intervals '() '((10 . 20) (30 . 40)))
+ '((10 . 20) (30 . 40))))
+ (should (equal (rx--union-intervals '((10 . 20) (30 . 40)) '())
+ '((10 . 20) (30 . 40))))
+ (should (equal (rx--union-intervals '((5 . 15) (18 . 24) (32 . 40))
+ '((10 . 20) (30 . 40) (50 . 60)))
+ '((5 . 24) (30 . 40) (50 . 60))))
+ (should (equal (rx--union-intervals '((10 . 20) (30 . 40) (50 . 60))
+ '((0 . 9) (21 . 29) (41 . 50)))
+ '((0 . 60))))
+ (should (equal (rx--union-intervals '((10 . 20) (30 . 40))
+ '((12 . 18) (28 . 42)))
+ '((10 . 20) (28 . 42))))
+ (should (equal (rx--union-intervals '((10 . 20) (30 . 40))
+ '((0 . #x3fffff)))
+ '((0 . #x3fffff)))))
+
+(ert-deftest rx--intersect-intervals ()
+ (should (equal (rx--intersect-intervals '() '()) '()))
+ (should (equal (rx--intersect-intervals '() '((10 . 20) (30 . 40)))
+ '()))
+ (should (equal (rx--intersect-intervals '((10 . 20) (30 . 40)) '())
+ '()))
+ (should (equal (rx--intersect-intervals '((5 . 15) (18 . 24) (32 . 40))
+ '((10 . 20) (30 . 40) (50 . 60)))
+ '((10 . 15) (18 . 20) (32 . 40))))
+ (should (equal (rx--intersect-intervals '((10 . 20) (30 . 40) (50 . 60))
+ '((0 . 9) (21 . 29) (41 . 50)))
+ '((50 . 50))))
+ (should (equal (rx--intersect-intervals '((10 . 20) (30 . 40))
+ '((12 . 18) (28 . 42)))
+ '((12 . 18) (30 . 40))))
+ (should (equal (rx--intersect-intervals '((10 . 20) (30 . 40))
+ '((0 . #x3fffff)))
+ '((10 . 20) (30 . 40)))))
+
(provide 'rx-tests)
;;; rx-tests.el ends here