default, controlled by a separate option; see @ref{Replacement and Lax
Matches}.)
- Like with case folding, typing an explicit variant of a character,
-such as @code{@"a}, as part of the search string disables character
-folding for that search. If you delete such a character from the
-search string, this effect ceases.
+@vindex char-fold-symmetric
+ By default, typing an explicit variant of a character, such as
+@code{@"a}, as part of the search string doesn't match its base
+character, such as @code{a}. But if you customize the variable
+@code{char-fold-symmetric} to @code{t}, then search commands treat
+equivalent characters the same and use of any of a set of equivalent
+characters in a search string finds any of them in the text being
+searched, so typing an accented character @code{@"a} matches the
+letter @code{a} as well as all the other variants like @code{@'a}.
+
+@vindex char-fold-include
+@vindex char-fold-exclude
+ You can add new foldings using the customizable variable
+@code{char-fold-include}, or remove the existing ones using the
+customizable variable @code{char-fold-exclude}.
@node Replace
@section Replacement Commands
;;; Code:
-(eval-and-compile (put 'char-fold-table 'char-table-extra-slots 1))
+(eval-and-compile
+ (put 'char-fold-table 'char-table-extra-slots 1)
+ (defconst char-fold--default-include
+ '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
+ (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›")
+ (?` "❛" "‘" "‛" "" "❮" "‹")))
+ (defconst char-fold--default-exclude nil)
+ (defconst char-fold--default-symmetric nil)
+ (defconst char-fold--previous (list char-fold--default-include
+ char-fold--default-exclude
+ char-fold--default-symmetric)))
+
\f
(eval-and-compile
(defun char-fold-make-table ()
(aref equiv (car simpler-decomp)))))))))))
table)
- ;; Add some manual entries.
- (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»")
- (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "" "❮" "❯" "‹" "›")
- (?` "❛" "‘" "‛" "" "❮" "‹")))
+ ;; Add some entries to default decomposition
+ (dolist (it (or (bound-and-true-p char-fold-include)
+ char-fold--default-include))
(let ((idx (car it))
(chars (cdr it)))
(aset equiv idx (append chars (aref equiv idx)))))
+ ;; Remove some entries from default decomposition
+ (dolist (it (or (bound-and-true-p char-fold-exclude)
+ char-fold--default-exclude))
+ (let ((idx (car it))
+ (chars (cdr it)))
+ (when (aref equiv idx)
+ (dolist (char chars)
+ (aset equiv idx (remove char (aref equiv idx)))))))
+
+ ;; Add symmetric entries
+ (when (or (bound-and-true-p char-fold-symmetric)
+ char-fold--default-symmetric)
+ (let ((symmetric (make-hash-table :test 'eq)))
+ ;; Initialize hashes
+ (map-char-table
+ (lambda (char decomp-list)
+ (puthash char (make-hash-table :test 'equal) symmetric)
+ (dolist (decomp decomp-list)
+ (puthash (string-to-char decomp) (make-hash-table :test 'equal) symmetric)))
+ equiv)
+
+ (map-char-table
+ (lambda (char decomp-list)
+ (dolist (decomp decomp-list)
+ (if (< (length decomp) 2)
+ ;; Add single-char symmetric pairs to hash
+ (let ((decomp-list (cons (char-to-string char) decomp-list))
+ (decomp-hash (gethash (string-to-char decomp) symmetric)))
+ (dolist (decomp2 decomp-list)
+ (unless (equal decomp decomp2)
+ (puthash decomp2 t decomp-hash)
+ (puthash decomp t (gethash (string-to-char decomp2) symmetric)))))
+ ;; Add multi-char symmetric pairs to equiv-multi char-table
+ (let ((decomp-list (cons (char-to-string char) decomp-list))
+ (prefix (string-to-char decomp))
+ (suffix (substring decomp 1)))
+ (puthash decomp t (gethash char symmetric))
+ (dolist (decomp2 decomp-list)
+ (if (< (length decomp2) 2)
+ (aset equiv-multi prefix
+ (cons (cons suffix (regexp-quote decomp2))
+ (aref equiv-multi prefix)))))))))
+ equiv)
+
+ ;; Update equiv char-table from hash
+ (maphash
+ (lambda (char decomp-hash)
+ (let (schars)
+ (maphash (lambda (schar _) (push schar schars)) decomp-hash)
+ (aset equiv char schars)))
+ symmetric)))
+
;; Convert the lists of characters we compiled into regexps.
(map-char-table
(lambda (char decomp-list)
(let ((re (regexp-opt (cons (char-to-string char) decomp-list))))
- (if (consp char) ; FIXME: char never is consp?
- (set-char-table-range equiv char re)
- (aset equiv char re))))
+ (aset equiv char re)))
equiv)
equiv)))
Exceptionally for the space character (32), ALIST is ignored.")
+\f
+(defun char-fold-update-table ()
+ (let ((new (list (or (bound-and-true-p char-fold-include)
+ char-fold--default-include)
+ (or (bound-and-true-p char-fold-exclude)
+ char-fold--default-exclude)
+ (or (bound-and-true-p char-fold-symmetric)
+ char-fold--default-symmetric))))
+ (unless (equal char-fold--previous new)
+ (setq char-fold-table (char-fold-make-table)
+ char-fold--previous new))))
+
+(defcustom char-fold-include char-fold--default-include
+ "Additional character foldings to include.
+Each entry is a list of a character and the strings that fold into it."
+ :type '(alist :key-type (character :tag "Fold to character")
+ :value-type (repeat (string :tag "Fold from string")))
+ :initialize #'custom-initialize-default
+ :set (lambda (sym val)
+ (custom-set-default sym val)
+ (char-fold-update-table))
+ :group 'isearch
+ :version "27.1")
+
+(defcustom char-fold-exclude char-fold--default-exclude
+ "Character foldings to remove from default decompisitions.
+Each entry is a list of a character and the strings to remove from folding."
+ :type '(alist :key-type (character :tag "Fold to character")
+ :value-type (repeat (string :tag "Fold from string")))
+ :initialize #'custom-initialize-default
+ :set (lambda (sym val)
+ (custom-set-default sym val)
+ (char-fold-update-table))
+ :group 'isearch
+ :version "27.1")
+
+(defcustom char-fold-symmetric char-fold--default-symmetric
+ "Non-nil means char-fold searching treats equivalent chars the same.
+That is, use of any of a set of char-fold equivalent chars in a search
+string finds any of them in the text being searched.
+
+If nil then only the \"base\" or \"canonical\" char of the set matches
+any of them. The others match only themselves, even when char-folding
+is turned on."
+ :type 'boolean
+ :initialize #'custom-initialize-default
+ :set (lambda (sym val)
+ (custom-set-default sym val)
+ (char-fold-update-table))
+ :group 'isearch
+ :version "27.1")
+
+(char-fold-update-table)
+
+\f
(defun char-fold--make-space-string (n)
"Return a string that matches N spaces."
(format "\\(?:%s\\|%s\\)"
(should (string-match (char-fold--ascii-upcase re) (downcase it)))
(should (string-match (char-fold--ascii-downcase re) (upcase it)))))))
+(defun char-fold--test-no-match-exactly (string &rest strings-to-match)
+ (let ((re (concat "\\`" (char-fold-to-regexp string) "\\'")))
+ (dolist (it strings-to-match)
+ (should-not (string-match re it)))
+ ;; Case folding
+ (let ((case-fold-search t))
+ (dolist (it strings-to-match)
+ (should-not (string-match (char-fold--ascii-upcase re) (downcase it)))
+ (should-not (string-match (char-fold--ascii-downcase re) (upcase it)))))))
+
(defun char-fold--test-search-with-contents (contents string)
(with-temp-buffer
(insert contents)
(should (char-fold-search-forward string nil 'noerror))
(should (char-fold-search-backward string nil 'noerror))))
+(defun char-fold--permutation (strings)
+ (mapcar (lambda (string)
+ (cons string (remove string strings)))
+ strings))
+
\f
(ert-deftest char-fold--test-consistency ()
(dotimes (n 30)
;; Ensure it took less than a second.
(should (< (- (time-to-seconds) time) 1))))))
+(ert-deftest char-fold--test-without-customization ()
+ (let* ((matches
+ '(
+ ("e" "ℯ" "ḗ" "ë" "ë")
+ ("ι"
+ "ί" ;; 1 level decomposition
+ "ί" ;; 2 level decomposition
+ ;; FIXME:
+ ;; "ΐ" ;; 3 level decomposition
+ )
+ )))
+ (dolist (strings matches)
+ (apply 'char-fold--test-match-exactly strings))))
+
+(ert-deftest char-fold--test-with-customization ()
+ :tags '(:expensive-test)
+ (let* ((char-fold-include
+ '(
+ (?ß "ss") ;; de
+ (?o "ø") ;; da no nb nn
+ (?l "ł") ;; pl
+ ))
+ ;; FIXME: move language-specific settings to defaults
+ (char-fold-exclude
+ '(
+ (?a "å") ;; sv da no nb nn
+ (?a "ä") ;; sv fi et
+ (?o "ö") ;; sv fi et
+ (?n "ñ") ;; es
+ (?и "й") ;; ru
+ ))
+ (char-fold-symmetric t)
+ (char-fold-table (char-fold-make-table))
+ (matches
+ '(
+ ("e" "ℯ" "ḗ" "ë" "ë")
+ ("е" "ё" "ё")
+ ("ι" "ί" "ί"
+ ;; FIXME: "ΐ"
+ )
+ ("ß" "ss")
+ ("o" "ø")
+ ("l" "ł")
+
+ ))
+ (no-matches
+ '(
+ ("a" "å")
+ ("a" "ä")
+ ("o" "ö")
+ ("n" "ñ")
+ ("и" "й")
+ )))
+ (dolist (strings matches)
+ (dolist (permutation (char-fold--permutation strings))
+ (apply 'char-fold--test-match-exactly permutation)))
+ (dolist (strings no-matches)
+ (dolist (permutation (char-fold--permutation strings))
+ (apply 'char-fold--test-no-match-exactly permutation)))))
+
(provide 'char-fold-tests)
;;; char-fold-tests.el ends here