(completion-ignore-case nil)
(completion-regexp-list nil)
(open (cond ((stringp paren) paren) (paren "\\(")))
- (sorted-strings (delete-dups
- (sort (copy-sequence strings) 'string-lessp)))
(re
(cond
;; No strings: return an unmatchable regexp.
((null strings)
(concat (or open "\\(?:") regexp-unmatchable "\\)"))
- ;; If we cannot reorder, give up all attempts at
- ;; optimisation. There is room for improvement (Bug#34641).
- ((and keep-order (regexp-opt--contains-prefix sorted-strings))
- (concat (or open "\\(?:")
- (mapconcat #'regexp-quote strings "\\|")
- "\\)"))
+
+ ;; The algorithm will generate a pattern that matches
+ ;; longer strings in the list before shorter. If the
+ ;; list order matters, then no string must come after a
+ ;; proper prefix of that string. To check this, verify
+ ;; that a straight or-pattern matches each string
+ ;; entirely.
+ ((and keep-order
+ (let* ((case-fold-search nil)
+ (alts (mapconcat #'regexp-quote strings "\\|")))
+ (and (let ((s strings))
+ (while (and s
+ (string-match alts (car s))
+ (= (match-end 0) (length (car s))))
+ (setq s (cdr s)))
+ ;; If we exited early, we found evidence that
+ ;; regexp-opt-group cannot be used.
+ s)
+ (concat (or open "\\(?:") alts "\\)")))))
(t
- (regexp-opt-group sorted-strings (or open t) (not open))))))
+ (regexp-opt-group
+ (delete-dups (sort (copy-sequence strings) 'string-lessp))
+ (or open t) (not open))))))
(cond ((eq paren 'words)
(concat "\\<" re "\\>"))
((eq paren 'symbols)
(concat "[" all "]")))))))
-(defun regexp-opt--contains-prefix (strings)
- "Whether STRINGS contains a proper prefix of one of its other elements.
-STRINGS must be a list of sorted strings without duplicates."
- (let ((s strings))
- ;; In a lexicographically sorted list, a string always immediately
- ;; succeeds one of its prefixes.
- (while (and (cdr s)
- (not (string-equal
- (car s)
- (substring (cadr s) 0 (min (length (car s))
- (length (cadr s)))))))
- (setq s (cdr s)))
- (cdr s)))
-
-
(provide 'regexp-opt)
;;; regexp-opt.el ends here
-;;; regexp-opt-tests.el --- Tests for regexp-opt.el
+;;; regexp-opt-tests.el --- Tests for regexp-opt.el -*- lexical-binding: t -*-
;; Copyright (C) 2013-2019 Free Software Foundation, Inc.
(require 'regexp-opt)
+(defun regexp-opt-test--permutation (n list)
+ "The Nth permutation of LIST, 0 ≤ N < (length LIST)!."
+ (let ((len (length list))
+ (perm-list nil))
+ (dotimes (i len)
+ (let* ((d (- len i))
+ (k (mod n d)))
+ (push (nth k list) perm-list)
+ (setq list (append (butlast list (- (length list) k))
+ (nthcdr (1+ k) list)))
+ (setq n (/ n d))))
+ (nreverse perm-list)))
+
+(defun regexp-opt-test--factorial (n)
+ "N!"
+ (apply #'* (number-sequence 1 n)))
+
+(defun regexp-opt-test--permutations (list)
+ "All permutations of LIST."
+ (mapcar (lambda (i) (regexp-opt-test--permutation i list))
+ (number-sequence 0 (1- (regexp-opt-test--factorial (length list))))))
+
+(defun regexp-opt-test--match-all (words re)
+ (mapcar (lambda (w) (and (string-match re w)
+ (match-string 0 w)))
+ words))
+
+(defun regexp-opt-test--check-perm (perm)
+ (let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
+ (opt-re (regexp-opt perm nil t))
+ (ref (regexp-opt-test--match-all perm ref-re))
+ (opt (regexp-opt-test--match-all perm opt-re)))
+ (equal opt ref)))
+
+(defun regexp-opt-test--explain-perm (perm)
+ (let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
+ (opt-re (regexp-opt perm nil t))
+ (ref (regexp-opt-test--match-all perm ref-re))
+ (opt (regexp-opt-test--match-all perm opt-re)))
+ (concat "\n"
+ (format "Naïve regexp: %s\n" ref-re)
+ (format "Optimised regexp: %s\n" opt-re)
+ (format "Got: %s\n" opt)
+ (format "Expected: %s\n" ref))))
+
+(put 'regexp-opt-test--check-perm 'ert-explainer 'regexp-opt-test--explain-perm)
+
+(ert-deftest regexp-opt-keep-order ()
+ "Check that KEEP-ORDER works."
+ (dolist (perm (regexp-opt-test--permutations '("abc" "bca" "cab")))
+ (should (regexp-opt-test--check-perm perm)))
+ (dolist (perm (regexp-opt-test--permutations '("abc" "ab" "bca" "bc")))
+ (should (regexp-opt-test--check-perm perm)))
+ (dolist (perm (regexp-opt-test--permutations '("abxy" "cdxy")))
+ (should (regexp-opt-test--check-perm perm)))
+ (dolist (perm (regexp-opt-test--permutations '("afgx" "bfgx" "afgy" "bfgy")))
+ (should (regexp-opt-test--check-perm perm)))
+ (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc")))
+ (should (regexp-opt-test--check-perm perm))))
+
(ert-deftest regexp-opt-charset ()
(should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]"))
(should (equal (regexp-opt-charset '(?D ?d ?B ?a ?b ?C ?7 ?a ?c ?A))