From: Juri Linkov Date: Tue, 23 Jul 2019 20:27:28 +0000 (+0300) Subject: Customizable char-fold with char-fold-symmetric, char-fold-include (bug#35689) X-Git-Tag: emacs-27.0.90~1817^2~151 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=376f5df3cca0dbf186823e5b329d76b52019473d;p=emacs.git Customizable char-fold with char-fold-symmetric, char-fold-include (bug#35689) * doc/emacs/search.texi (Lax Search): Document char-fold-symmetric, char-fold-include, char-fold-exclude. * lisp/char-fold.el (char-fold--default-include) (char-fold--default-exclude, char-fold--default-symmetric) (char-fold--previous): New defconsts. (char-fold-include, char-fold-exclude, char-fold-symmetric): New defcustoms. (char-fold-make-table): Use them. (char-fold-update-table): New function called at top-level. * test/lisp/char-fold-tests.el (char-fold--test-no-match-exactly) (char-fold--permutation): New functions. (char-fold--test-without-customization) (char-fold--test-with-customization): New tests. --- diff --git a/doc/emacs/search.texi b/doc/emacs/search.texi index b47d51a2b66..66af5d40162 100644 --- a/doc/emacs/search.texi +++ b/doc/emacs/search.texi @@ -1354,10 +1354,21 @@ folding, but only for that search. (Replace commands have a different default, controlled by a separate option; see @ref{Replacement and Lax Matches}.) - Like with case folding, typing an explicit variant of a character, -such as @code{@"a}, as part of the search string disables character -folding for that search. If you delete such a character from the -search string, this effect ceases. +@vindex char-fold-symmetric + By default, typing an explicit variant of a character, such as +@code{@"a}, as part of the search string doesn't match its base +character, such as @code{a}. But if you customize the variable +@code{char-fold-symmetric} to @code{t}, then search commands treat +equivalent characters the same and use of any of a set of equivalent +characters in a search string finds any of them in the text being +searched, so typing an accented character @code{@"a} matches the +letter @code{a} as well as all the other variants like @code{@'a}. + +@vindex char-fold-include +@vindex char-fold-exclude + You can add new foldings using the customizable variable +@code{char-fold-include}, or remove the existing ones using the +customizable variable @code{char-fold-exclude}. @node Replace @section Replacement Commands diff --git a/etc/NEWS b/etc/NEWS index 6a02c386960..5313270411c 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1175,6 +1175,15 @@ rather than stopping after one level, such that searching for e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER IOTA WITH OXIA. ++++ +*** New char-folding options: 'char-fold-include' lets you add ad hoc +foldings, 'char-fold-exclude' to remove foldings from default decomposition, +and 'char-fold-symmetric' to search for any of an equivalence class of +characters. For example, with a 'nil' value of 'char-fold-symmetric' +you can search for "e" to find "é", but not vice versa. With a non-nil +value you can search for either, for example, you can search for "é" +to find "e". + ** Debugger +++ diff --git a/lisp/char-fold.el b/lisp/char-fold.el index a5c4e5e411b..f379229e6c4 100644 --- a/lisp/char-fold.el +++ b/lisp/char-fold.el @@ -22,7 +22,18 @@ ;;; Code: -(eval-and-compile (put 'char-fold-table 'char-table-extra-slots 1)) +(eval-and-compile + (put 'char-fold-table 'char-table-extra-slots 1) + (defconst char-fold--default-include + '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»") + (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›") + (?` "❛" "‘" "‛" "󠀢" "❮" "‹"))) + (defconst char-fold--default-exclude nil) + (defconst char-fold--default-symmetric nil) + (defconst char-fold--previous (list char-fold--default-include + char-fold--default-exclude + char-fold--default-symmetric))) + (eval-and-compile (defun char-fold-make-table () @@ -116,21 +127,70 @@ (aref equiv (car simpler-decomp))))))))))) table) - ;; Add some manual entries. - (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»") - (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›") - (?` "❛" "‘" "‛" "󠀢" "❮" "‹"))) + ;; Add some entries to default decomposition + (dolist (it (or (bound-and-true-p char-fold-include) + char-fold--default-include)) (let ((idx (car it)) (chars (cdr it))) (aset equiv idx (append chars (aref equiv idx))))) + ;; Remove some entries from default decomposition + (dolist (it (or (bound-and-true-p char-fold-exclude) + char-fold--default-exclude)) + (let ((idx (car it)) + (chars (cdr it))) + (when (aref equiv idx) + (dolist (char chars) + (aset equiv idx (remove char (aref equiv idx))))))) + + ;; Add symmetric entries + (when (or (bound-and-true-p char-fold-symmetric) + char-fold--default-symmetric) + (let ((symmetric (make-hash-table :test 'eq))) + ;; Initialize hashes + (map-char-table + (lambda (char decomp-list) + (puthash char (make-hash-table :test 'equal) symmetric) + (dolist (decomp decomp-list) + (puthash (string-to-char decomp) (make-hash-table :test 'equal) symmetric))) + equiv) + + (map-char-table + (lambda (char decomp-list) + (dolist (decomp decomp-list) + (if (< (length decomp) 2) + ;; Add single-char symmetric pairs to hash + (let ((decomp-list (cons (char-to-string char) decomp-list)) + (decomp-hash (gethash (string-to-char decomp) symmetric))) + (dolist (decomp2 decomp-list) + (unless (equal decomp decomp2) + (puthash decomp2 t decomp-hash) + (puthash decomp t (gethash (string-to-char decomp2) symmetric))))) + ;; Add multi-char symmetric pairs to equiv-multi char-table + (let ((decomp-list (cons (char-to-string char) decomp-list)) + (prefix (string-to-char decomp)) + (suffix (substring decomp 1))) + (puthash decomp t (gethash char symmetric)) + (dolist (decomp2 decomp-list) + (if (< (length decomp2) 2) + (aset equiv-multi prefix + (cons (cons suffix (regexp-quote decomp2)) + (aref equiv-multi prefix))))))))) + equiv) + + ;; Update equiv char-table from hash + (maphash + (lambda (char decomp-hash) + (let (schars) + (maphash (lambda (schar _) (push schar schars)) decomp-hash) + (aset equiv char schars))) + symmetric))) + ;; Convert the lists of characters we compiled into regexps. (map-char-table (lambda (char decomp-list) (let ((re (regexp-opt (cons (char-to-string char) decomp-list)))) - (if (consp char) ; FIXME: char never is consp? - (set-char-table-range equiv char re) - (aset equiv char re)))) + (aset equiv char re))) equiv) equiv))) @@ -159,6 +219,61 @@ For instance, the default alist for ?f includes: Exceptionally for the space character (32), ALIST is ignored.") + +(defun char-fold-update-table () + (let ((new (list (or (bound-and-true-p char-fold-include) + char-fold--default-include) + (or (bound-and-true-p char-fold-exclude) + char-fold--default-exclude) + (or (bound-and-true-p char-fold-symmetric) + char-fold--default-symmetric)))) + (unless (equal char-fold--previous new) + (setq char-fold-table (char-fold-make-table) + char-fold--previous new)))) + +(defcustom char-fold-include char-fold--default-include + "Additional character foldings to include. +Each entry is a list of a character and the strings that fold into it." + :type '(alist :key-type (character :tag "Fold to character") + :value-type (repeat (string :tag "Fold from string"))) + :initialize #'custom-initialize-default + :set (lambda (sym val) + (custom-set-default sym val) + (char-fold-update-table)) + :group 'isearch + :version "27.1") + +(defcustom char-fold-exclude char-fold--default-exclude + "Character foldings to remove from default decompisitions. +Each entry is a list of a character and the strings to remove from folding." + :type '(alist :key-type (character :tag "Fold to character") + :value-type (repeat (string :tag "Fold from string"))) + :initialize #'custom-initialize-default + :set (lambda (sym val) + (custom-set-default sym val) + (char-fold-update-table)) + :group 'isearch + :version "27.1") + +(defcustom char-fold-symmetric char-fold--default-symmetric + "Non-nil means char-fold searching treats equivalent chars the same. +That is, use of any of a set of char-fold equivalent chars in a search +string finds any of them in the text being searched. + +If nil then only the \"base\" or \"canonical\" char of the set matches +any of them. The others match only themselves, even when char-folding +is turned on." + :type 'boolean + :initialize #'custom-initialize-default + :set (lambda (sym val) + (custom-set-default sym val) + (char-fold-update-table)) + :group 'isearch + :version "27.1") + +(char-fold-update-table) + + (defun char-fold--make-space-string (n) "Return a string that matches N spaces." (format "\\(?:%s\\|%s\\)" diff --git a/test/lisp/char-fold-tests.el b/test/lisp/char-fold-tests.el index e9dfd2b7336..e519435ef05 100644 --- a/test/lisp/char-fold-tests.el +++ b/test/lisp/char-fold-tests.el @@ -44,6 +44,16 @@ (should (string-match (char-fold--ascii-upcase re) (downcase it))) (should (string-match (char-fold--ascii-downcase re) (upcase it))))))) +(defun char-fold--test-no-match-exactly (string &rest strings-to-match) + (let ((re (concat "\\`" (char-fold-to-regexp string) "\\'"))) + (dolist (it strings-to-match) + (should-not (string-match re it))) + ;; Case folding + (let ((case-fold-search t)) + (dolist (it strings-to-match) + (should-not (string-match (char-fold--ascii-upcase re) (downcase it))) + (should-not (string-match (char-fold--ascii-downcase re) (upcase it))))))) + (defun char-fold--test-search-with-contents (contents string) (with-temp-buffer (insert contents) @@ -53,6 +63,11 @@ (should (char-fold-search-forward string nil 'noerror)) (should (char-fold-search-backward string nil 'noerror)))) +(defun char-fold--permutation (strings) + (mapcar (lambda (string) + (cons string (remove string strings))) + strings)) + (ert-deftest char-fold--test-consistency () (dotimes (n 30) @@ -132,5 +147,65 @@ ;; Ensure it took less than a second. (should (< (- (time-to-seconds) time) 1)))))) +(ert-deftest char-fold--test-without-customization () + (let* ((matches + '( + ("e" "ℯ" "ḗ" "ë" "ë") + ("ι" + "ί" ;; 1 level decomposition + "ί" ;; 2 level decomposition + ;; FIXME: + ;; "ΐ" ;; 3 level decomposition + ) + ))) + (dolist (strings matches) + (apply 'char-fold--test-match-exactly strings)))) + +(ert-deftest char-fold--test-with-customization () + :tags '(:expensive-test) + (let* ((char-fold-include + '( + (?ß "ss") ;; de + (?o "ø") ;; da no nb nn + (?l "ł") ;; pl + )) + ;; FIXME: move language-specific settings to defaults + (char-fold-exclude + '( + (?a "å") ;; sv da no nb nn + (?a "ä") ;; sv fi et + (?o "ö") ;; sv fi et + (?n "ñ") ;; es + (?и "й") ;; ru + )) + (char-fold-symmetric t) + (char-fold-table (char-fold-make-table)) + (matches + '( + ("e" "ℯ" "ḗ" "ë" "ë") + ("е" "ё" "ё") + ("ι" "ί" "ί" + ;; FIXME: "ΐ" + ) + ("ß" "ss") + ("o" "ø") + ("l" "ł") + + )) + (no-matches + '( + ("a" "å") + ("a" "ä") + ("o" "ö") + ("n" "ñ") + ("и" "й") + ))) + (dolist (strings matches) + (dolist (permutation (char-fold--permutation strings)) + (apply 'char-fold--test-match-exactly permutation))) + (dolist (strings no-matches) + (dolist (permutation (char-fold--permutation strings)) + (apply 'char-fold--test-no-match-exactly permutation))))) + (provide 'char-fold-tests) ;;; char-fold-tests.el ends here