From: Juri Linkov Date: Thu, 4 Jul 2019 20:49:33 +0000 (+0300) Subject: * lisp/char-fold.el (char-fold-to-regexp): Implement arg LAX (bug#36398). X-Git-Tag: emacs-27.0.90~2102 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=19b1cefa3ba00ea383bd0910523c6e972fedbe02;p=emacs.git * lisp/char-fold.el (char-fold-to-regexp): Implement arg LAX (bug#36398). * test/lisp/char-fold-tests.el (char-fold--test-multi-lax): New test. --- diff --git a/lisp/char-fold.el b/lisp/char-fold.el index 7223ecf738c..9d3ea17b413 100644 --- a/lisp/char-fold.el +++ b/lisp/char-fold.el @@ -148,12 +148,18 @@ Exceptionally for the space character (32), ALIST is ignored.") (make-list n (or (aref char-fold-table ?\s) " "))))) ;;;###autoload -(defun char-fold-to-regexp (string &optional _lax from) +(defun char-fold-to-regexp (string &optional lax from) "Return a regexp matching anything that char-folds into STRING. Any character in STRING that has an entry in `char-fold-table' is replaced with that entry (which is a regexp) and other characters are `regexp-quote'd. +When LAX is non-nil, then the final character also matches ligatures +partially, for instance, the search string \"f\" will match \"fi\", +so when typing the search string in isearch while the cursor is on +a ligature, the search won't try to immediately advance to the next +complete match, but will stay on the partially matched ligature. + If the resulting regexp would be too long for Emacs to handle, just return the result of calling `regexp-quote' on STRING. @@ -183,36 +189,40 @@ from which to start." ;; Long string. The regexp would probably be too long. (alist (unless (> end 50) (aref multi-char-table c)))) - (push (let ((matched-entries nil) - (max-length 0)) - (dolist (entry alist) - (let* ((suffix (car entry)) - (len-suf (length suffix))) - (when (eq (compare-strings suffix 0 nil - string (1+ i) (+ i 1 len-suf) - nil) - t) - (push (cons len-suf (cdr entry)) matched-entries) - (setq max-length (max max-length len-suf))))) - ;; If no suffixes matched, just go on. - (if (not matched-entries) - regexp + (push (if (and lax alist (= (1+ i) end)) + (concat "\\(?:" regexp "\\|" + (mapconcat (lambda (entry) + (cdr entry)) alist "\\|") "\\)") + (let ((matched-entries nil) + (max-length 0)) + (dolist (entry alist) + (let* ((suffix (car entry)) + (len-suf (length suffix))) + (when (eq (compare-strings suffix 0 nil + string (1+ i) (+ i 1 len-suf) + nil) + t) + (push (cons len-suf (cdr entry)) matched-entries) + (setq max-length (max max-length len-suf))))) + ;; If no suffixes matched, just go on. + (if (not matched-entries) + regexp ;;; If N suffixes match, we "branch" out into N+1 executions for the ;;; length of the longest match. This means "fix" will match "fix" but ;;; not "fⅸ", but it's necessary to keep the regexp size from scaling ;;; exponentially. See https://lists.gnu.org/r/emacs-devel/2015-11/msg02562.html - (let ((subs (substring string (1+ i) (+ i 1 max-length)))) - ;; `i' is still going to inc by 1 below. - (setq i (+ i max-length)) - (concat - "\\(?:" - (mapconcat (lambda (entry) - (let ((length (car entry)) - (suffix-regexp (cdr entry))) - (concat suffix-regexp - (char-fold-to-regexp subs nil length)))) - `((0 . ,regexp) . ,matched-entries) "\\|") - "\\)")))) + (let ((subs (substring string (1+ i) (+ i 1 max-length)))) + ;; `i' is still going to inc by 1 below. + (setq i (+ i max-length)) + (concat + "\\(?:" + (mapconcat (lambda (entry) + (let ((length (car entry)) + (suffix-regexp (cdr entry))) + (concat suffix-regexp + (char-fold-to-regexp subs nil length)))) + `((0 . ,regexp) . ,matched-entries) "\\|") + "\\)"))))) out)))) (setq i (1+ i))) (when (> spaces 0) diff --git a/test/lisp/char-fold-tests.el b/test/lisp/char-fold-tests.el index 3fde312a133..e9dfd2b7336 100644 --- a/test/lisp/char-fold-tests.el +++ b/test/lisp/char-fold-tests.el @@ -82,6 +82,14 @@ (set-char-table-extra-slot char-fold-table 0 multi) (char-fold--test-match-exactly (car it) (cdr it))))) +(ert-deftest char-fold--test-multi-lax () + (dolist (it '(("f" . "fi") ("f" . "ff"))) + (with-temp-buffer + (insert (cdr it)) + (goto-char (point-min)) + (should (search-forward-regexp + (char-fold-to-regexp (car it) 'lax) nil 'noerror))))) + (ert-deftest char-fold--test-fold-to-regexp () (let ((char-fold-table (make-char-table 'char-fold-table)) (multi (make-char-table 'char-fold-table)))