From 30f3432e9519f61882faa303e7851e761d2d18ea Mon Sep 17 00:00:00 2001 From: Artur Malabarba Date: Fri, 4 Dec 2015 15:12:10 +0000 Subject: [PATCH] * lisp/character-fold.el: Remove special case-folding support MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit (character-fold-to-regexp): Remove special code for case-folding. Char-fold search still respects the `case-fold-search' variable (i.e., f matches F). This only removes the code that was added to ensure that f also matched all chars that F matched. For instance, after this commit, f no longer matches 𝔽. This was necessary because the logic created a regexp with 2^(length of the string) redundant paths. So, when a very long string "almost" matched, Emacs took a very long time to figure out that it didn't. This became particularly relevant because isearch's lazy-highlight does a search bounded by (1- match-end) (which, in most circumstances, is a search that almost matches). A recipe for this can be found in bug#22090. --- lisp/character-fold.el | 22 ++++------------------ test/automated/character-fold-tests.el | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/lisp/character-fold.el b/lisp/character-fold.el index fb28bae7281..1e49fe2f0e5 100644 --- a/lisp/character-fold.el +++ b/lisp/character-fold.el @@ -157,8 +157,6 @@ FROM is for internal use. It specifies an index in the STRING from which to start." (let* ((spaces 0) (multi-char-table (char-table-extra-slot character-fold-table 0)) - (lower-case-table (current-case-table)) - (upper-case-table (char-table-extra-slot lower-case-table 0)) (i (or from 0)) (end (length string)) (out nil)) @@ -178,21 +176,9 @@ from which to start." (setq spaces 0)) (let ((regexp (or (aref character-fold-table c) (regexp-quote (string c)))) - (alist nil)) - ;; Long string. The regexp would probably be too long. - (unless (> end 50) - (setq alist (aref multi-char-table c)) - (when case-fold-search - (let ((other-c (aref lower-case-table c))) - (when (or (not other-c) - (eq other-c c)) - (setq other-c (aref upper-case-table c))) - (when other-c - (setq alist (append alist (aref multi-char-table other-c))) - (setq regexp (concat "\\(?:" regexp "\\|" - (or (aref character-fold-table other-c) - (regexp-quote (string other-c))) - "\\)")))))) + ;; Long string. The regexp would probably be too long. + (alist (unless (> end 50) + (aref multi-char-table c)))) (push (let ((matched-entries nil) (max-length 0)) (dolist (entry alist) @@ -229,7 +215,7 @@ from which to start." (push (character-fold--make-space-string spaces) out)) (let ((regexp (apply #'concat (nreverse out)))) ;; Limited by `MAX_BUF_SIZE' in `regex.c'. - (if (> (length regexp) 10000) + (if (> (length regexp) 5000) (regexp-quote string) regexp)))) diff --git a/test/automated/character-fold-tests.el b/test/automated/character-fold-tests.el index 40735e5df7f..4e8761e6f7b 100644 --- a/test/automated/character-fold-tests.el +++ b/test/automated/character-fold-tests.el @@ -98,6 +98,27 @@ ;; (character-fold--test-match-exactly "a12" "xxyy") )) +(ert-deftest character-fold--speed-test () + (dolist (string (append '("tty-set-up-initial-frame-face" + "tty-set-up-initial-frame-face-frame-faceframe-faceframe-faceframe-face") + (mapcar #'character-fold--random-word '(10 50 100 + 50 100)))) + (message "Testing %s" string) + ;; Make sure we didn't just fallback on the trivial search. + (should-not (string= (regexp-quote string) + (character-fold-to-regexp string))) + (with-temp-buffer + (save-excursion (insert string)) + (let ((time (time-to-seconds (current-time)))) + ;; Our initial implementation of case-folding in char-folding + ;; created a lot of redundant paths in the regexp. Because of + ;; that, if a really long string "almost" matches, the regexp + ;; engine took a long time to realise that it doesn't match. + (should-not (character-fold-search-forward (concat string "c") nil 'noerror)) + ;; Ensure it took less than a second. + (should (< (- (time-to-seconds (current-time)) + time) + 1)))))) (provide 'character-fold-tests) ;;; character-fold-tests.el ends here -- 2.39.5