From f9337bc36d17a8819c0d05be8d3a1edcc34c6c79 Mon Sep 17 00:00:00 2001 From: Robert Pluim Date: Mon, 22 Jul 2019 20:27:59 +0200 Subject: [PATCH] Follow decomposition chains when constructing char-fold-table * lisp/char-fold.el (char-fold-make-table): Decompose the decomposition of each character, adding equivalences to the original character, until no more decompositions are left. --- etc/NEWS | 8 ++++++++ lisp/char-fold.el | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/etc/NEWS b/etc/NEWS index 7fd22145821..6a02c386960 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1167,6 +1167,14 @@ and case-sensitivity together with search strings in the search ring. +++ *** 'flush-lines' prints and returns the number of deleted matching lines. +--- +*** 'char-fold-to-regexp' now matches more variants of a base character. +The table used to check for equivalence of characters is now built +using the complete chain of unicode decompositions of a character, +rather than stopping after one level, such that searching for +e.g. GREEK SMALL LETTER IOTA will now also find GREEK SMALL LETTER +IOTA WITH OXIA. + ** Debugger +++ diff --git a/lisp/char-fold.el b/lisp/char-fold.el index 9d3ea17b413..a5c4e5e411b 100644 --- a/lisp/char-fold.el +++ b/lisp/char-fold.el @@ -78,6 +78,25 @@ (cons (char-to-string char) (aref equiv (car decomp)))))))) (funcall make-decomp-match-char decomp char) + ;; Check to see if the first char of the decomposition + ;; has a further decomposition. If so, add a mapping + ;; back from that second decomposition to the original + ;; character. This allows e.g. 'ι' (GREEK SMALL LETTER + ;; IOTA) to match both the Basic Greek block and + ;; Extended Greek block variants of IOTA + + ;; diacritical(s). Repeat until there are no more + ;; decompositions. + (let ((dec decomp) + next-decomp) + (while dec + (setq next-decomp (char-table-range table (car dec))) + (when (consp next-decomp) + (when (symbolp (car next-decomp)) + (setq next-decomp (cdr next-decomp))) + (if (not (eq (car dec) + (car next-decomp))) + (funcall make-decomp-match-char (list (car next-decomp)) char))) + (setq dec next-decomp))) ;; Do it again, without the non-spacing characters. ;; This allows 'a' to match 'ä'. (let ((simpler-decomp nil) -- 2.39.2