From 795c9189165881c84a8ca4570510ab2db0775d07 Mon Sep 17 00:00:00 2001 From: Artur Malabarba Date: Wed, 24 Jun 2015 20:01:10 +0100 Subject: [PATCH] * lisp/character-fold.el: New file (Bug#20887) (character-fold-to-regexp): New function. * lisp/replace.el (replace-search): Check value of `character-fold-search'. * lisp/isearch.el: Move character-folding code to character-fold.el (isearch-toggle-character-fold): New command. (isearch-mode-map): Bind it to "\M-sf". (isearch-mode): Check value of `character-fold-search'. --- lisp/character-fold.el | 109 +++++++++++++++++++++++++++++++++++++++++ lisp/isearch.el | 94 ++++++----------------------------- lisp/replace.el | 5 +- 3 files changed, 127 insertions(+), 81 deletions(-) create mode 100644 lisp/character-fold.el diff --git a/lisp/character-fold.el b/lisp/character-fold.el new file mode 100644 index 00000000000..b716e593c0c --- /dev/null +++ b/lisp/character-fold.el @@ -0,0 +1,109 @@ +;;; chracter-fold.el --- matching unicode characters to their ascii similars -*- lexical-binding: t; -*- + +;; Copyright (C) 2015 Free Software Foundation, Inc. + +;; Maintainer: emacs-devel@gnu.org +;; Keywords: matching + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Code: + + +;;;###autoload +(defvar character-fold-search t + "Non-nil if searches should fold similar characters. +This means some characters will match entire groups of charactes. +For instance, \" will match all variants of double quotes, and +the letter a will match all of its accented versions (and then +some).") + +(defconst character-fold-table + (eval-when-compile + (let ((equiv (make-char-table 'character-fold-table))) + ;; Compile a list of all complex characters that each simple + ;; character should match. + (map-char-table + (lambda (i dec) + (when (consp dec) + ;; Discard a possible formatting tag. + (when (symbolp (car dec)) + (setq dec (cdr dec))) + ;; Skip trivial cases lika ?a decomposing to (?a). + (unless (or (and (eq i (car dec)) + (not (cdr dec)))) + (let ((d dec) k found multiletter) + (while (and d (not found)) + (setq k (pop d)) + ;; Is k a number or letter, per unicode standard? + (setq found (memq (get-char-code-property k 'general-category) + '(Lu Ll Lt Lm Lo Nd Nl No)))) + (if found + ;; Check if the decomposition has more than one letter, + ;; because then we don't want the first letter to match + ;; the decomposition. + (dolist (k d) + (when (memq (get-char-code-property k 'general-category) + '(Lu Ll Lt Lm Lo Nd Nl No)) + (setq multiletter t))) + ;; If there's no number or letter on the + ;; decomposition, take the first character in it. + (setq found (car-safe dec))) + ;; Add i to the list of characters that k can + ;; represent. Also possibly add its decomposition, so we can + ;; match multi-char representations like (format "a%c" 769) + (when (and found (not (eq i k))) + (let ((chars (cons (char-to-string i) (aref equiv k)))) + (aset equiv k + (if multiletter chars + (cons (apply #'string dec) chars))))))))) + (unicode-property-table-internal 'decomposition)) + (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»") + (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›") + (?` "❛" "‘" "‛" "󠀢" "❮" "‹") + (?\s "\t" "\r" "\n"))) + (let ((idx (car it)) + (chars (cdr it))) + (aset equiv idx (append chars (aref equiv idx))))) + (map-char-table + (lambda (i v) (let ((re (regexp-opt (cons (char-to-string i) v)))) + (if (consp i) + (set-char-table-range equiv i re) + (aset equiv i re)))) + equiv) + equiv)) + "Used for folding characters of the same group during search.") + +;;;###autoload +(defun character-fold-to-regexp (string &optional lax) + "Return a regexp matching anything that character-folds into STRING. +If `character-fold-search' is nil, `regexp-quote' string. +Otherwise, any character in STRING that has an entry in +`character-fold-table' is replaced with that entry (which is a +regexp) and other characters are `regexp-quote'd. +If LAX is non-nil, any single whitespace character is allowed to +match any number of times." + (if character-fold-search + (apply #'concat + (mapcar (lambda (c) (let ((out (or (aref character-fold-table c) + (regexp-quote (string c))))) + (if (and lax (memq c '(?\s ?\t ?\r ?\n ))) + (concat out "+") + out))) + string)) + (regexp-quote string))) + +;;; chracter-fold.el ends here diff --git a/lisp/isearch.el b/lisp/isearch.el index 44ce9023d8a..9ecbbdf11ed 100644 --- a/lisp/isearch.el +++ b/lisp/isearch.el @@ -272,79 +272,6 @@ Default value, nil, means edit the string instead." :version "23.1" :group 'isearch) -(defvar isearch-character-fold-search t - "Non-nil if isearch should fold similar characters. -This means some characters will match entire groups of charactes. -For instance, \" will match all variants of double quotes, and -the letter a will match all of its accented versions (and then -some).") - -(defconst isearch--character-fold-extras - '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»") - (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›") - (?` "❛" "‘" "‛" "󠀢" "❮" "‹") - ;; `isearch-character-fold-search' doesn't interact with - ;; `isearch-lax-whitespace' yet. So we need to add this here. - (?\s " " "\r" "\n")) - "Extra entries to add to `isearch--character-fold-table'. -Used to specify character folding not covered by unicode -decomposition. Each car is a character and each cdr is a list of -strings that it should match (itself excluded).") - -(defvar isearch--character-fold-table - (eval-when-compile - (require 'subr-x) - (let ((equiv (make-char-table 'character-fold-table))) - ;; Compile a list of all complex characters that each simple - ;; character should match. - (dotimes (i (length equiv)) - (let ((dd (get-char-code-property i 'decomposition)) - d k found) - ;; Skip trivial cases (?a decomposes to (?a)). - (unless (and (eq i (car dd))) - ;; Discard a possible formatting tag. - (when (symbolp (car-safe dd)) - (setq dd (cdr dd))) - ;; Is k a number or letter, per unicode standard? - (setq d dd) - (while (and d (not found)) - (setq k (pop d)) - (setq found (and (characterp k) - (memq (get-char-code-property k 'general-category) - '(Lu Ll Lt Lm Lo Nd Nl No))))) - ;; If there's no number or letter on the - ;; decomposition, find the first character in it. - (setq d dd) - (while (and d (not found)) - (setq k (pop d)) - (setq found (characterp k))) - ;; Add i to the list of characters that k can - ;; represent. Also add its decomposition, so we can - ;; match multi-char representations like (format "a%c" 769) - (when (and found (not (eq i k))) - (aset equiv k (cons (apply #'string dd) - (cons (char-to-string i) - (aref equiv k)))))))) - (dotimes (i (length equiv)) - (when-let ((chars (append (cdr (assq i isearch--character-fold-extras)) - (aref equiv i)))) - (aset equiv i (regexp-opt (cons (char-to-string i) chars))))) - equiv)) - "Used for folding characters of the same group during search.") - -(defun isearch--character-folded-regexp (string) - "Return a regexp matching anything that character-folds into STRING. -If `isearch-character-fold-search' is nil, `regexp-quote' string. -Otherwise, any character in STRING that has an entry in -`isearch--character-fold-table' is replaced with that entry -\(which is a regexp) and other characters are `regexp-quote'd." - (if isearch-character-fold-search - (apply #'concat - (mapcar (lambda (c) (or (aref isearch--character-fold-table c) - (regexp-quote (string c)))) - string)) - (regexp-quote string))) - (defcustom isearch-lazy-highlight t "Controls the lazy-highlighting during incremental search. When non-nil, all text in the buffer matching the current search @@ -592,6 +519,7 @@ This is like `describe-bindings', but displays only Isearch keys." (define-key map "\M-sw" 'isearch-toggle-word) (define-key map "\M-s_" 'isearch-toggle-symbol) (define-key map "\M-s " 'isearch-toggle-lax-whitespace) + (define-key map "\M-s'" #'isearch-toggle-character-fold) (define-key map [?\M-%] 'isearch-query-replace) (define-key map [?\C-\M-%] 'isearch-query-replace-regexp) @@ -907,6 +835,9 @@ See the command `isearch-forward-symbol' for more information." ;; isearch-forward-regexp isearch-backward-regexp) ;; "List of commands for which isearch-mode does not recursive-edit.") +(autoload 'character-fold-to-regexp "character-fold") +(put 'character-fold-to-regexp 'isearch-message-prefix "char-fold ") +(defvar character-fold-search) (defun isearch-mode (forward &optional regexp op-fun recursive-edit word) "Start Isearch minor mode. @@ -931,7 +862,8 @@ convert the search string to a regexp used by regexp search functions." ;; Initialize global vars. (setq isearch-forward forward isearch-regexp regexp - isearch-word word + isearch-word (or word (and character-fold-search + 'character-fold-to-regexp)) isearch-op-fun op-fun isearch-last-case-fold-search isearch-case-fold-search isearch-case-fold-search case-fold-search @@ -1581,6 +1513,15 @@ Use `isearch-exit' to quit without signaling." (setq isearch-success t isearch-adjusted t) (isearch-update)) +(defun isearch-toggle-character-fold () + "Toggle character folding in searching on or off." + (interactive) + (setq isearch-word (unless (eq isearch-word #'character-fold-to-regexp) + #'character-fold-to-regexp)) + (if isearch-word (setq isearch-regexp nil)) + (setq isearch-success t isearch-adjusted t) + (isearch-update)) + (defun isearch-toggle-lax-whitespace () "Toggle whitespace matching in searching on or off. In ordinary search, toggles the value of the variable @@ -2680,11 +2621,6 @@ Can be changed via `isearch-search-fun-function' for special needs." 're-search-backward-lax-whitespace)) (isearch-regexp (if isearch-forward 're-search-forward 're-search-backward)) - (isearch-character-fold-search - (lambda (string &optional bound noerror count) - (funcall (if isearch-forward #'re-search-forward #'re-search-backward) - (isearch--character-folded-regexp string) - bound noerror count))) ((and isearch-lax-whitespace search-whitespace-regexp) (if isearch-forward 'search-forward-lax-whitespace diff --git a/lisp/replace.el b/lisp/replace.el index 5e3ddc551fb..65a2b41f1ea 100644 --- a/lisp/replace.el +++ b/lisp/replace.el @@ -2012,8 +2012,9 @@ It is called with three arguments, as if it were ;; outside of this function because then another I-search ;; used after `recursive-edit' might override them. (let* ((isearch-regexp regexp-flag) - (isearch-word delimited-flag) - (isearch-character-fold-search replace-character-fold) + (isearch-word (or delimited-flag + (and replace-character-fold + #'character-fold-to-regexp))) (isearch-lax-whitespace replace-lax-whitespace) (isearch-regexp-lax-whitespace -- 2.39.2