From c7a19e0c80ec6134ab6fb1950d3e1ac59a7b986f Mon Sep 17 00:00:00 2001 From: Artur Malabarba
Date: Tue, 27 Jan 2015 14:08:01 -0200 Subject: [PATCH] * lisp/isearch.el: Fold many unicode characters to ASCII (isearch-character-fold-search, isearch--character-fold-extras) (isearch--character-fold-table): New variable. (isearch--character-folded-regexp): New function. (isearch-search-fun-default): Use them. * lisp/replace.el (replace-character-fold): New variable. (replace-search): Use it. * etc/NEWS: Document it. --- etc/NEWS | 14 +++++++++ lisp/isearch.el | 78 +++++++++++++++++++++++++++++++++++++++++++++++++ lisp/replace.el | 9 ++++++ 3 files changed, 101 insertions(+) diff --git a/etc/NEWS b/etc/NEWS index 31055ac5303..e4cf2d65e09 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -84,6 +84,20 @@ command line when `initial-buffer-choice' is non-nil. * Changes in Emacs 25.1 +** `isearch' and `query-replace' now perform character folding in matches. +This is analogous to case-folding, but applies between Unicode +characters and their ASCII counterparts. This means many characters +will match entire groups of charactes. + +For instance, the " will match all variants of unicode double quotes +(like â and â), and the letter a will match all of its accented +cousins, even those composed of multiple characters, as well as many +other symbols like â, â, â, and â. + +** New function `isearch--character-folded-regexp' can be used +by searching commands to produce a a regexp matching anything that +character-folds into STRING. + ** New command `checkdoc-package-keywords' checks if the current package keywords are recognized. Set the new option `checkdoc-package-keywords-flag' to non-nil to make diff --git a/lisp/isearch.el b/lisp/isearch.el index d1b92bd6a9d..44ce9023d8a 100644 --- a/lisp/isearch.el +++ b/lisp/isearch.el @@ -272,6 +272,79 @@ Default value, nil, means edit the string instead." :version "23.1" :group 'isearch) +(defvar isearch-character-fold-search t + "Non-nil if isearch should fold similar characters. +This means some characters will match entire groups of charactes. +For instance, \" will match all variants of double quotes, and +the letter a will match all of its accented versions (and then +some).") + +(defconst isearch--character-fold-extras + '((?\" "ï¼" "â" "â" "â" "â" "â¹" "ã" "â" "â" "â" "â" "â " "â" "â" "ã" "ã" "ð·" "ð¶" "ð¸" "«" "»") + (?' "â" "â" "â" "â" "â" "â" "â" "â" "ó ¢" "â®" "â¯" "â¹" "âº") + (?` "â" "â" "â" "ó ¢" "â®" "â¹") + ;; `isearch-character-fold-search' doesn't interact with + ;; `isearch-lax-whitespace' yet. So we need to add this here. + (?\s " " "\r" "\n")) + "Extra entries to add to `isearch--character-fold-table'. +Used to specify character folding not covered by unicode +decomposition. Each car is a character and each cdr is a list of +strings that it should match (itself excluded).") + +(defvar isearch--character-fold-table + (eval-when-compile + (require 'subr-x) + (let ((equiv (make-char-table 'character-fold-table))) + ;; Compile a list of all complex characters that each simple + ;; character should match. + (dotimes (i (length equiv)) + (let ((dd (get-char-code-property i 'decomposition)) + d k found) + ;; Skip trivial cases (?a decomposes to (?a)). + (unless (and (eq i (car dd))) + ;; Discard a possible formatting tag. + (when (symbolp (car-safe dd)) + (setq dd (cdr dd))) + ;; Is k a number or letter, per unicode standard? + (setq d dd) + (while (and d (not found)) + (setq k (pop d)) + (setq found (and (characterp k) + (memq (get-char-code-property k 'general-category) + '(Lu Ll Lt Lm Lo Nd Nl No))))) + ;; If there's no number or letter on the + ;; decomposition, find the first character in it. + (setq d dd) + (while (and d (not found)) + (setq k (pop d)) + (setq found (characterp k))) + ;; Add i to the list of characters that k can + ;; represent. Also add its decomposition, so we can + ;; match multi-char representations like (format "a%c" 769) + (when (and found (not (eq i k))) + (aset equiv k (cons (apply #'string dd) + (cons (char-to-string i) + (aref equiv k)))))))) + (dotimes (i (length equiv)) + (when-let ((chars (append (cdr (assq i isearch--character-fold-extras)) + (aref equiv i)))) + (aset equiv i (regexp-opt (cons (char-to-string i) chars))))) + equiv)) + "Used for folding characters of the same group during search.") + +(defun isearch--character-folded-regexp (string) + "Return a regexp matching anything that character-folds into STRING. +If `isearch-character-fold-search' is nil, `regexp-quote' string. +Otherwise, any character in STRING that has an entry in +`isearch--character-fold-table' is replaced with that entry +\(which is a regexp) and other characters are `regexp-quote'd." + (if isearch-character-fold-search + (apply #'concat + (mapcar (lambda (c) (or (aref isearch--character-fold-table c) + (regexp-quote (string c)))) + string)) + (regexp-quote string))) + (defcustom isearch-lazy-highlight t "Controls the lazy-highlighting during incremental search. When non-nil, all text in the buffer matching the current search @@ -2607,6 +2680,11 @@ Can be changed via `isearch-search-fun-function' for special needs." 're-search-backward-lax-whitespace)) (isearch-regexp (if isearch-forward 're-search-forward 're-search-backward)) + (isearch-character-fold-search + (lambda (string &optional bound noerror count) + (funcall (if isearch-forward #'re-search-forward #'re-search-backward) + (isearch--character-folded-regexp string) + bound noerror count))) ((and isearch-lax-whitespace search-whitespace-regexp) (if isearch-forward 'search-forward-lax-whitespace diff --git a/lisp/replace.el b/lisp/replace.el index 74909efa380..5e3ddc551fb 100644 --- a/lisp/replace.el +++ b/lisp/replace.el @@ -33,6 +33,14 @@ :type 'boolean :group 'matching) +(defcustom replace-character-fold t + "Non-nil means `query-replace' should do character folding in matches. +This means, for instance, that ' will match a large variety of +unicode quotes." + :type 'boolean + :group 'matching + :version "25.1") + (defcustom replace-lax-whitespace nil "Non-nil means `query-replace' matches a sequence of whitespace chars. When you enter a space or spaces in the strings to be replaced, @@ -2005,6 +2013,7 @@ It is called with three arguments, as if it were ;; used after `recursive-edit' might override them. (let* ((isearch-regexp regexp-flag) (isearch-word delimited-flag) + (isearch-character-fold-search replace-character-fold) (isearch-lax-whitespace replace-lax-whitespace) (isearch-regexp-lax-whitespace -- 2.39.2