From: Juri Linkov Date: Tue, 17 May 2016 20:55:38 +0000 (+0300) Subject: * lisp/char-fold.el: Rename from character-fold.el. X-Git-Tag: emacs-25.0.95~73 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=1fe1e0a;p=emacs.git * lisp/char-fold.el: Rename from character-fold.el. * lisp/replace.el (replace-char-fold): Rename from replace-character-fold. * test/automated/char-fold-tests.el: Rename from character-fold-tests.el. http://lists.gnu.org/archive/html/emacs-devel/2015-12/msg00529.html --- diff --git a/doc/emacs/search.texi b/doc/emacs/search.texi index 7958a4a2bfa..d841934c855 100644 --- a/doc/emacs/search.texi +++ b/doc/emacs/search.texi @@ -1264,13 +1264,13 @@ but match under character folding are known as @dfn{equivalent character sequences}. @kindex M-s ' @r{(Incremental Search)} -@findex isearch-toggle-character-fold +@findex isearch-toggle-char-fold Generally, search commands in Emacs do not by default perform character folding in order to match equivalent character sequences. You can enable this behavior by customizing the variable -@code{search-default-mode} to @code{character-fold-to-regexp}. +@code{search-default-mode} to @code{char-fold-to-regexp}. @xref{Search Customizations}. Within an incremental search, typing -@kbd{M-s '} (@code{isearch-toggle-character-fold}) toggles character +@kbd{M-s '} (@code{isearch-toggle-char-fold}) toggles character folding, but only for that search. (Replace commands have a different default, controlled by a separate option; see @ref{Replacement and Lax Matches}.) @@ -1481,7 +1481,7 @@ replacement is done without case conversion. (@pxref{Lax Search, character folding}) when looking for the text to replace. To enable character folding for matching in @code{query-replace} and @code{replace-string}, set the variable -@code{replace-character-fold} to a non-@code{nil} value. (This +@code{replace-char-fold} to a non-@code{nil} value. (This setting does not affect the replacement text, only how Emacs finds the text to replace. It also doesn't affect @code{replace-regexp}.) diff --git a/etc/NEWS b/etc/NEWS index 3031cab4782..670465f2ab8 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -633,11 +633,11 @@ as many other symbols like U+249C (PARENTHESIZED LATIN SMALL LETTER A). Character folding is enabled by customizing 'search-default-mode' to -the value 'character-fold-to-regexp'. You can also toggle character +the value 'char-fold-to-regexp'. You can also toggle character folding in the middle of a search by typing 'M-s ''. 'query-replace' honors character folding if the new variable -'replace-character-fold' is customized to a non-nil value. +'replace-char-fold' is customized to a non-nil value. +++ *** New user option 'search-default-mode'. @@ -647,9 +647,9 @@ value, nil specifies that Isearch does literal searches (however, as in previous Emacs versions). +++ -*** New function 'character-fold-to-regexp' can be used +*** New function 'char-fold-to-regexp' can be used by searching commands to produce a regexp matching anything that -character-folds into STRING. +char-folds into STRING. +++ *** The new 'M-s M-w' key binding uses eww to search the web for the diff --git a/lisp/char-fold.el b/lisp/char-fold.el new file mode 100644 index 00000000000..68bea29ea45 --- /dev/null +++ b/lisp/char-fold.el @@ -0,0 +1,242 @@ +;;; char-fold.el --- match unicode to similar ASCII -*- lexical-binding: t; -*- + +;; Copyright (C) 2015-2016 Free Software Foundation, Inc. + +;; Maintainer: emacs-devel@gnu.org +;; Keywords: matching + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see . + +;;; Code: + +(eval-and-compile (put 'char-fold-table 'char-table-extra-slots 1)) + +(defconst char-fold-table + (eval-when-compile + (let ((equiv (make-char-table 'char-fold-table)) + (equiv-multi (make-char-table 'char-fold-table)) + (table (unicode-property-table-internal 'decomposition))) + (set-char-table-extra-slot equiv 0 equiv-multi) + + ;; Ensure the table is populated. + (let ((func (char-table-extra-slot table 1))) + (map-char-table (lambda (char v) + (when (consp char) + (funcall func (car char) v table))) + table)) + + ;; Compile a list of all complex characters that each simple + ;; character should match. + ;; In summary this loop does 3 things: + ;; - A complex character might be allowed to match its decomp. + ;; - The decomp is allowed to match the complex character. + ;; - A single char of the decomp might be allowed to match the + ;; character. + ;; Some examples in the comments below. + (map-char-table + (lambda (char decomp) + (when (consp decomp) + ;; Skip trivial cases like ?a decomposing to (?a). + (unless (and (not (cdr decomp)) + (eq char (car decomp))) + (if (symbolp (car decomp)) + ;; Discard a possible formatting tag. + (setq decomp (cdr decomp)) + ;; If there's no formatting tag, ensure that char matches + ;; its decomp exactly. This is because we want 'ä' to + ;; match 'ä', but we don't want '¹' to match '1'. + (aset equiv char + (cons (apply #'string decomp) + (aref equiv char)))) + + ;; Allow the entire decomp to match char. If decomp has + ;; multiple characters, this is done by adding an entry + ;; to the alist of the first character in decomp. This + ;; allows 'ff' to match 'ff', 'ä' to match 'ä', and '1' to + ;; match '¹'. + (let ((make-decomp-match-char + (lambda (decomp char) + (if (cdr decomp) + (aset equiv-multi (car decomp) + (cons (cons (apply #'string (cdr decomp)) + (regexp-quote (string char))) + (aref equiv-multi (car decomp)))) + (aset equiv (car decomp) + (cons (char-to-string char) + (aref equiv (car decomp)))))))) + (funcall make-decomp-match-char decomp char) + ;; Do it again, without the non-spacing characters. + ;; This allows 'a' to match 'ä'. + (let ((simpler-decomp nil) + (found-one nil)) + (dolist (c decomp) + (if (> (get-char-code-property c 'canonical-combining-class) 0) + (setq found-one t) + (push c simpler-decomp))) + (when (and simpler-decomp found-one) + (funcall make-decomp-match-char simpler-decomp char) + ;; Finally, if the decomp only had one spacing + ;; character, we allow this character to match the + ;; decomp. This is to let 'a' match 'ä'. + (unless (cdr simpler-decomp) + (aset equiv (car simpler-decomp) + (cons (apply #'string decomp) + (aref equiv (car simpler-decomp))))))))))) + table) + + ;; Add some manual entries. + (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»") + (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›") + (?` "❛" "‘" "‛" "󠀢" "❮" "‹"))) + (let ((idx (car it)) + (chars (cdr it))) + (aset equiv idx (append chars (aref equiv idx))))) + + ;; Convert the lists of characters we compiled into regexps. + (map-char-table + (lambda (char dec-list) + (let ((re (regexp-opt (cons (char-to-string char) dec-list)))) + (if (consp char) + (set-char-table-range equiv char re) + (aset equiv char re)))) + equiv) + equiv)) + "Used for folding characters of the same group during search. +This is a char-table with the `char-fold-table' subtype. + +Let us refer to the character in question by char-x. +Each entry is either nil (meaning char-x only matches literally) +or a regexp. This regexp should match anything that char-x can +match by itself \(including char-x). For instance, the default +regexp for the ?+ character is \"[+⁺₊﬩﹢+]\". + +This table also has one extra slot which is also a char-table. +Each entry in the extra slot is an alist used for multi-character +matching (which may be nil). The elements of the alist should +have the form (SUFFIX . OTHER-REGEXP). If the characters after +char-x are equal to SUFFIX, then this combination of char-x + +SUFFIX is allowed to match OTHER-REGEXP. This is in addition to +char-x being allowed to match REGEXP. +For instance, the default alist for ?f includes: + \((\"fl\" . \"ffl\") (\"fi\" . \"ffi\") + (\"i\" . \"fi\") (\"f\" . \"ff\")) + +Exceptionally for the space character (32), ALIST is ignored.") + +(defun char-fold--make-space-string (n) + "Return a string that matches N spaces." + (format "\\(?:%s\\|%s\\)" + (make-string n ?\s) + (apply #'concat + (make-list n (or (aref char-fold-table ?\s) " "))))) + +;;;###autoload +(defun char-fold-to-regexp (string &optional _lax from) + "Return a regexp matching anything that char-folds into STRING. +Any character in STRING that has an entry in +`char-fold-table' is replaced with that entry (which is a +regexp) and other characters are `regexp-quote'd. + +If the resulting regexp would be too long for Emacs to handle, +just return the result of calling `regexp-quote' on STRING. + +FROM is for internal use. It specifies an index in the STRING +from which to start." + (let* ((spaces 0) + (multi-char-table (char-table-extra-slot char-fold-table 0)) + (i (or from 0)) + (end (length string)) + (out nil)) + ;; When the user types a space, we want to match the table entry + ;; for ?\s, which is generally a regexp like "[ ...]". However, + ;; the `search-spaces-regexp' variable doesn't "see" spaces inside + ;; these regexp constructs, so we need to use "\\( \\|[ ...]\\)" + ;; instead (to manually expose a space). Furthermore, the lax + ;; search engine acts on a bunch of spaces, not on individual + ;; spaces, so if the string contains sequential spaces like " ", we + ;; need to keep them grouped together like this: "\\( \\|[ ...][ ...]\\)". + (while (< i end) + (pcase (aref string i) + (`?\s (setq spaces (1+ spaces))) + (c (when (> spaces 0) + (push (char-fold--make-space-string spaces) out) + (setq spaces 0)) + (let ((regexp (or (aref char-fold-table c) + (regexp-quote (string c)))) + ;; Long string. The regexp would probably be too long. + (alist (unless (> end 50) + (aref multi-char-table c)))) + (push (let ((matched-entries nil) + (max-length 0)) + (dolist (entry alist) + (let* ((suffix (car entry)) + (len-suf (length suffix))) + (when (eq (compare-strings suffix 0 nil + string (1+ i) (+ i 1 len-suf) + nil) + t) + (push (cons len-suf (cdr entry)) matched-entries) + (setq max-length (max max-length len-suf))))) + ;; If no suffixes matched, just go on. + (if (not matched-entries) + regexp +;;; If N suffixes match, we "branch" out into N+1 executions for the +;;; length of the longest match. This means "fix" will match "fix" but +;;; not "fⅸ", but it's necessary to keep the regexp size from scaling +;;; exponentially. See https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html + (let ((subs (substring string (1+ i) (+ i 1 max-length)))) + ;; `i' is still going to inc by 1 below. + (setq i (+ i max-length)) + (concat + "\\(?:" + (mapconcat (lambda (entry) + (let ((length (car entry)) + (suffix-regexp (cdr entry))) + (concat suffix-regexp + (char-fold-to-regexp subs nil length)))) + `((0 . ,regexp) . ,matched-entries) "\\|") + "\\)")))) + out)))) + (setq i (1+ i))) + (when (> spaces 0) + (push (char-fold--make-space-string spaces) out)) + (let ((regexp (apply #'concat (nreverse out)))) + ;; Limited by `MAX_BUF_SIZE' in `regex.c'. + (if (> (length regexp) 5000) + (regexp-quote string) + regexp)))) + + +;;; Commands provided for completeness. +(defun char-fold-search-forward (string &optional bound noerror count) + "Search forward for a char-folded version of STRING. +STRING is converted to a regexp with `char-fold-to-regexp', +which is searched for with `re-search-forward'. +BOUND NOERROR COUNT are passed to `re-search-forward'." + (interactive "sSearch: ") + (re-search-forward (char-fold-to-regexp string) bound noerror count)) + +(defun char-fold-search-backward (string &optional bound noerror count) + "Search backward for a char-folded version of STRING. +STRING is converted to a regexp with `char-fold-to-regexp', +which is searched for with `re-search-backward'. +BOUND NOERROR COUNT are passed to `re-search-backward'." + (interactive "sSearch: ") + (re-search-backward (char-fold-to-regexp string) bound noerror count)) + +(provide 'char-fold) + +;;; char-fold.el ends here diff --git a/lisp/character-fold.el b/lisp/character-fold.el deleted file mode 100644 index 2d3a8c67fa5..00000000000 --- a/lisp/character-fold.el +++ /dev/null @@ -1,242 +0,0 @@ -;;; character-fold.el --- match unicode to similar ASCII -*- lexical-binding: t; -*- - -;; Copyright (C) 2015-2016 Free Software Foundation, Inc. - -;; Maintainer: emacs-devel@gnu.org -;; Keywords: matching - -;; This file is part of GNU Emacs. - -;; GNU Emacs is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. - -;; GNU Emacs is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. - -;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs. If not, see . - -;;; Code: - -(eval-and-compile (put 'character-fold-table 'char-table-extra-slots 1)) - -(defconst character-fold-table - (eval-when-compile - (let ((equiv (make-char-table 'character-fold-table)) - (equiv-multi (make-char-table 'character-fold-table)) - (table (unicode-property-table-internal 'decomposition))) - (set-char-table-extra-slot equiv 0 equiv-multi) - - ;; Ensure the table is populated. - (let ((func (char-table-extra-slot table 1))) - (map-char-table (lambda (char v) - (when (consp char) - (funcall func (car char) v table))) - table)) - - ;; Compile a list of all complex characters that each simple - ;; character should match. - ;; In summary this loop does 3 things: - ;; - A complex character might be allowed to match its decomp. - ;; - The decomp is allowed to match the complex character. - ;; - A single char of the decomp might be allowed to match the - ;; character. - ;; Some examples in the comments below. - (map-char-table - (lambda (char decomp) - (when (consp decomp) - ;; Skip trivial cases like ?a decomposing to (?a). - (unless (and (not (cdr decomp)) - (eq char (car decomp))) - (if (symbolp (car decomp)) - ;; Discard a possible formatting tag. - (setq decomp (cdr decomp)) - ;; If there's no formatting tag, ensure that char matches - ;; its decomp exactly. This is because we want 'ä' to - ;; match 'ä', but we don't want '¹' to match '1'. - (aset equiv char - (cons (apply #'string decomp) - (aref equiv char)))) - - ;; Allow the entire decomp to match char. If decomp has - ;; multiple characters, this is done by adding an entry - ;; to the alist of the first character in decomp. This - ;; allows 'ff' to match 'ff', 'ä' to match 'ä', and '1' to - ;; match '¹'. - (let ((make-decomp-match-char - (lambda (decomp char) - (if (cdr decomp) - (aset equiv-multi (car decomp) - (cons (cons (apply #'string (cdr decomp)) - (regexp-quote (string char))) - (aref equiv-multi (car decomp)))) - (aset equiv (car decomp) - (cons (char-to-string char) - (aref equiv (car decomp)))))))) - (funcall make-decomp-match-char decomp char) - ;; Do it again, without the non-spacing characters. - ;; This allows 'a' to match 'ä'. - (let ((simpler-decomp nil) - (found-one nil)) - (dolist (c decomp) - (if (> (get-char-code-property c 'canonical-combining-class) 0) - (setq found-one t) - (push c simpler-decomp))) - (when (and simpler-decomp found-one) - (funcall make-decomp-match-char simpler-decomp char) - ;; Finally, if the decomp only had one spacing - ;; character, we allow this character to match the - ;; decomp. This is to let 'a' match 'ä'. - (unless (cdr simpler-decomp) - (aset equiv (car simpler-decomp) - (cons (apply #'string decomp) - (aref equiv (car simpler-decomp))))))))))) - table) - - ;; Add some manual entries. - (dolist (it '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" "🙶" "🙸" "«" "»") - (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›") - (?` "❛" "‘" "‛" "󠀢" "❮" "‹"))) - (let ((idx (car it)) - (chars (cdr it))) - (aset equiv idx (append chars (aref equiv idx))))) - - ;; Convert the lists of characters we compiled into regexps. - (map-char-table - (lambda (char dec-list) - (let ((re (regexp-opt (cons (char-to-string char) dec-list)))) - (if (consp char) - (set-char-table-range equiv char re) - (aset equiv char re)))) - equiv) - equiv)) - "Used for folding characters of the same group during search. -This is a char-table with the `character-fold-table' subtype. - -Let us refer to the character in question by char-x. -Each entry is either nil (meaning char-x only matches literally) -or a regexp. This regexp should match anything that char-x can -match by itself \(including char-x). For instance, the default -regexp for the ?+ character is \"[+⁺₊﬩﹢+]\". - -This table also has one extra slot which is also a char-table. -Each entry in the extra slot is an alist used for multi-character -matching (which may be nil). The elements of the alist should -have the form (SUFFIX . OTHER-REGEXP). If the characters after -char-x are equal to SUFFIX, then this combination of char-x + -SUFFIX is allowed to match OTHER-REGEXP. This is in addition to -char-x being allowed to match REGEXP. -For instance, the default alist for ?f includes: - \((\"fl\" . \"ffl\") (\"fi\" . \"ffi\") - (\"i\" . \"fi\") (\"f\" . \"ff\")) - -Exceptionally for the space character (32), ALIST is ignored.") - -(defun character-fold--make-space-string (n) - "Return a string that matches N spaces." - (format "\\(?:%s\\|%s\\)" - (make-string n ?\s) - (apply #'concat - (make-list n (or (aref character-fold-table ?\s) " "))))) - -;;;###autoload -(defun character-fold-to-regexp (string &optional _lax from) - "Return a regexp matching anything that character-folds into STRING. -Any character in STRING that has an entry in -`character-fold-table' is replaced with that entry (which is a -regexp) and other characters are `regexp-quote'd. - -If the resulting regexp would be too long for Emacs to handle, -just return the result of calling `regexp-quote' on STRING. - -FROM is for internal use. It specifies an index in the STRING -from which to start." - (let* ((spaces 0) - (multi-char-table (char-table-extra-slot character-fold-table 0)) - (i (or from 0)) - (end (length string)) - (out nil)) - ;; When the user types a space, we want to match the table entry - ;; for ?\s, which is generally a regexp like "[ ...]". However, - ;; the `search-spaces-regexp' variable doesn't "see" spaces inside - ;; these regexp constructs, so we need to use "\\( \\|[ ...]\\)" - ;; instead (to manually expose a space). Furthermore, the lax - ;; search engine acts on a bunch of spaces, not on individual - ;; spaces, so if the string contains sequential spaces like " ", we - ;; need to keep them grouped together like this: "\\( \\|[ ...][ ...]\\)". - (while (< i end) - (pcase (aref string i) - (`?\s (setq spaces (1+ spaces))) - (c (when (> spaces 0) - (push (character-fold--make-space-string spaces) out) - (setq spaces 0)) - (let ((regexp (or (aref character-fold-table c) - (regexp-quote (string c)))) - ;; Long string. The regexp would probably be too long. - (alist (unless (> end 50) - (aref multi-char-table c)))) - (push (let ((matched-entries nil) - (max-length 0)) - (dolist (entry alist) - (let* ((suffix (car entry)) - (len-suf (length suffix))) - (when (eq (compare-strings suffix 0 nil - string (1+ i) (+ i 1 len-suf) - nil) - t) - (push (cons len-suf (cdr entry)) matched-entries) - (setq max-length (max max-length len-suf))))) - ;; If no suffixes matched, just go on. - (if (not matched-entries) - regexp -;;; If N suffixes match, we "branch" out into N+1 executions for the -;;; length of the longest match. This means "fix" will match "fix" but -;;; not "fⅸ", but it's necessary to keep the regexp size from scaling -;;; exponentially. See https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html - (let ((subs (substring string (1+ i) (+ i 1 max-length)))) - ;; `i' is still going to inc by 1 below. - (setq i (+ i max-length)) - (concat - "\\(?:" - (mapconcat (lambda (entry) - (let ((length (car entry)) - (suffix-regexp (cdr entry))) - (concat suffix-regexp - (character-fold-to-regexp subs nil length)))) - `((0 . ,regexp) . ,matched-entries) "\\|") - "\\)")))) - out)))) - (setq i (1+ i))) - (when (> spaces 0) - (push (character-fold--make-space-string spaces) out)) - (let ((regexp (apply #'concat (nreverse out)))) - ;; Limited by `MAX_BUF_SIZE' in `regex.c'. - (if (> (length regexp) 5000) - (regexp-quote string) - regexp)))) - - -;;; Commands provided for completeness. -(defun character-fold-search-forward (string &optional bound noerror count) - "Search forward for a character-folded version of STRING. -STRING is converted to a regexp with `character-fold-to-regexp', -which is searched for with `re-search-forward'. -BOUND NOERROR COUNT are passed to `re-search-forward'." - (interactive "sSearch: ") - (re-search-forward (character-fold-to-regexp string) bound noerror count)) - -(defun character-fold-search-backward (string &optional bound noerror count) - "Search backward for a character-folded version of STRING. -STRING is converted to a regexp with `character-fold-to-regexp', -which is searched for with `re-search-backward'. -BOUND NOERROR COUNT are passed to `re-search-backward'." - (interactive "sSearch: ") - (re-search-backward (character-fold-to-regexp string) bound noerror count)) - -(provide 'character-fold) - -;;; character-fold.el ends here diff --git a/lisp/isearch.el b/lisp/isearch.el index 418d9ea273a..e4de0b627e3 100644 --- a/lisp/isearch.el +++ b/lisp/isearch.el @@ -222,7 +222,7 @@ It is nil if none yet.") Default value, nil, means edit the string instead." :type 'boolean) -(autoload 'character-fold-to-regexp "character-fold") +(autoload 'char-fold-to-regexp "char-fold") (defcustom search-default-mode nil "Default mode to use when starting isearch. @@ -236,7 +236,7 @@ isearch). If a function, use that function as an `isearch-regexp-function'. Example functions (and the keys to toggle them during isearch) are `word-search-regexp' \(`\\[isearch-toggle-word]'), `isearch-symbol-regexp' -\(`\\[isearch-toggle-symbol]'), and `character-fold-to-regexp' \(`\\[isearch-toggle-character-fold]')." +\(`\\[isearch-toggle-symbol]'), and `char-fold-to-regexp' \(`\\[isearch-toggle-char-fold]')." ;; :type is set below by `isearch-define-mode-toggle'. :type '(choice (const :tag "Literal search" nil) (const :tag "Regexp search" t) @@ -718,7 +718,7 @@ Type \\[isearch-toggle-invisible] to toggle search in invisible text. Type \\[isearch-toggle-regexp] to toggle regular-expression mode. Type \\[isearch-toggle-word] to toggle word mode. Type \\[isearch-toggle-symbol] to toggle symbol mode. -Type \\[isearch-toggle-character-fold] to toggle character folding. +Type \\[isearch-toggle-char-fold] to toggle character folding. Type \\[isearch-toggle-lax-whitespace] to toggle whitespace matching. In incremental searches, a space or spaces normally matches any whitespace @@ -1546,9 +1546,9 @@ The command then executes BODY and updates the isearch prompt." Turning on word search turns off regexp mode.") (isearch-define-mode-toggle symbol "_" isearch-symbol-regexp "\ Turning on symbol search turns off regexp mode.") -(isearch-define-mode-toggle character-fold "'" character-fold-to-regexp "\ +(isearch-define-mode-toggle char-fold "'" char-fold-to-regexp "\ Turning on character-folding turns off regexp mode.") -(put 'character-fold-to-regexp 'isearch-message-prefix "char-fold ") +(put 'char-fold-to-regexp 'isearch-message-prefix "char-fold ") (isearch-define-mode-toggle regexp "r" nil nil (setq isearch-regexp (not isearch-regexp)) diff --git a/lisp/menu-bar.el b/lisp/menu-bar.el index 6571a4b9d4f..640395e8d7d 100644 --- a/lisp/menu-bar.el +++ b/lisp/menu-bar.el @@ -1257,7 +1257,7 @@ mail status in mode line")) (defvar menu-bar-search-options-menu (let ((menu (make-sparse-keymap "Search Options"))) - (dolist (x '((character-fold-to-regexp "Fold Characters" "Character folding") + (dolist (x '((char-fold-to-regexp "Fold Characters" "Character folding") (isearch-symbol-regexp "Whole Symbols" "Whole symbol") (word-search-regexp "Whole Words" "Whole word"))) (bindings--define-key menu (vector (nth 0 x)) diff --git a/lisp/replace.el b/lisp/replace.el index 26e5875dc08..fe90062cc8a 100644 --- a/lisp/replace.el +++ b/lisp/replace.el @@ -33,7 +33,7 @@ :type 'boolean :group 'matching) -(defcustom replace-character-fold nil +(defcustom replace-char-fold nil "Non-nil means replacement commands should do character folding in matches. This means, for instance, that \\=' will match a large variety of unicode quotes. @@ -324,7 +324,7 @@ If `replace-lax-whitespace' is non-nil, a space or spaces in the string to be replaced will match a sequence of whitespace chars defined by the regexp in `search-whitespace-regexp'. -If `replace-character-fold' is non-nil, matching uses character folding, +If `replace-char-fold' is non-nil, matching uses character folding, i.e. it ignores diacritics and other differences between equivalent character strings. @@ -383,7 +383,7 @@ If `replace-regexp-lax-whitespace' is non-nil, a space or spaces in the regexp to be replaced will match a sequence of whitespace chars defined by the regexp in `search-whitespace-regexp'. -This function is not affected by `replace-character-fold'. +This function is not affected by `replace-char-fold'. Third arg DELIMITED (prefix arg if interactive), if non-nil, means replace only matches surrounded by word boundaries. A negative prefix arg means @@ -474,7 +474,7 @@ If `replace-regexp-lax-whitespace' is non-nil, a space or spaces in the regexp to be replaced will match a sequence of whitespace chars defined by the regexp in `search-whitespace-regexp'. -This function is not affected by `replace-character-fold'. +This function is not affected by `replace-char-fold'. Third arg DELIMITED (prefix arg if interactive), if non-nil, means replace only matches that are surrounded by word boundaries. @@ -568,7 +568,7 @@ If `replace-lax-whitespace' is non-nil, a space or spaces in the string to be replaced will match a sequence of whitespace chars defined by the regexp in `search-whitespace-regexp'. -If `replace-character-fold' is non-nil, matching uses character folding, +If `replace-char-fold' is non-nil, matching uses character folding, i.e. it ignores diacritics and other differences between equivalent character strings. @@ -623,7 +623,7 @@ If `replace-regexp-lax-whitespace' is non-nil, a space or spaces in the regexp to be replaced will match a sequence of whitespace chars defined by the regexp in `search-whitespace-regexp'. -This function is not affected by `replace-character-fold' +This function is not affected by `replace-char-fold' In Transient Mark mode, if the mark is active, operate on the contents of the region. Otherwise, operate from point to the end of the buffer's @@ -2051,9 +2051,9 @@ It is called with three arguments, as if it were ;; used after `recursive-edit' might override them. (let* ((isearch-regexp regexp-flag) (isearch-regexp-function (or delimited-flag - (and replace-character-fold + (and replace-char-fold (not regexp-flag) - #'character-fold-to-regexp))) + #'char-fold-to-regexp))) (isearch-lax-whitespace replace-lax-whitespace) (isearch-regexp-lax-whitespace diff --git a/test/automated/char-fold-tests.el b/test/automated/char-fold-tests.el new file mode 100644 index 00000000000..485254aa6cf --- /dev/null +++ b/test/automated/char-fold-tests.el @@ -0,0 +1,124 @@ +;;; char-fold-tests.el --- Tests for char-fold.el -*- lexical-binding: t; -*- + +;; Copyright (C) 2013-2016 Free Software Foundation, Inc. + +;; Author: Artur Malabarba + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;;; Code: + +(require 'ert) +(require 'char-fold) + +(defun char-fold--random-word (n) + (mapconcat (lambda (_) (string (+ 9 (random 117)))) + (make-list n nil) "")) + +(defun char-fold--test-search-with-contents (contents string) + (with-temp-buffer + (insert contents) + (goto-char (point-min)) + (should (search-forward-regexp (char-fold-to-regexp string) nil 'noerror)) + (goto-char (point-min)) + (should (char-fold-search-forward string nil 'noerror)) + (should (char-fold-search-backward string nil 'noerror)))) + + +(ert-deftest char-fold--test-consistency () + (dotimes (n 30) + (let ((w (char-fold--random-word n))) + ;; A folded string should always match the original string. + (char-fold--test-search-with-contents w w)))) + +(ert-deftest char-fold--test-lax-whitespace () + (dotimes (n 40) + (let ((w1 (char-fold--random-word n)) + (w2 (char-fold--random-word n)) + (search-spaces-regexp "\\s-+")) + (char-fold--test-search-with-contents + (concat w1 "\s\n\s\t\f\t\n\r\t" w2) + (concat w1 " " w2)) + (char-fold--test-search-with-contents + (concat w1 "\s\n\s\t\f\t\n\r\t" w2) + (concat w1 (make-string 10 ?\s) w2))))) + +(defun char-fold--test-match-exactly (string &rest strings-to-match) + (let ((re (concat "\\`" (char-fold-to-regexp string) "\\'"))) + (dolist (it strings-to-match) + (should (string-match re it))) + ;; Case folding + (let ((case-fold-search t)) + (dolist (it strings-to-match) + (should (string-match (upcase re) (downcase it))) + (should (string-match (downcase re) (upcase it))))))) + +(ert-deftest char-fold--test-some-defaults () + (dolist (it '(("ffl" . "ffl") ("ffi" . "ffi") + ("fi" . "fi") ("ff" . "ff") + ("ä" . "ä"))) + (char-fold--test-search-with-contents (cdr it) (car it)) + (let ((multi (char-table-extra-slot char-fold-table 0)) + (char-fold-table (make-char-table 'char-fold-table))) + (set-char-table-extra-slot char-fold-table 0 multi) + (char-fold--test-match-exactly (car it) (cdr it))))) + +(ert-deftest char-fold--test-fold-to-regexp () + (let ((char-fold-table (make-char-table 'char-fold-table)) + (multi (make-char-table 'char-fold-table))) + (set-char-table-extra-slot char-fold-table 0 multi) + (aset char-fold-table ?a "xx") + (aset char-fold-table ?1 "44") + (aset char-fold-table ?\s "-!-") + (char-fold--test-match-exactly "a1a1" "xx44xx44") + (char-fold--test-match-exactly "a1 a 1" "xx44-!--!-xx-!-44") + (aset multi ?a '(("1" . "99") + ("2" . "88") + ("12" . "77"))) + (char-fold--test-match-exactly "a" "xx") + (char-fold--test-match-exactly "a1" "xx44" "99") + (char-fold--test-match-exactly "a12" "77" "xx442" "992") + (char-fold--test-match-exactly "a2" "88") + (aset multi ?1 '(("2" . "yy"))) + (char-fold--test-match-exactly "a1" "xx44" "99") + (char-fold--test-match-exactly "a12" "77" "xx442" "992") + ;; Support for this case is disabled. See function definition or: + ;; https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html + ;; (char-fold--test-match-exactly "a12" "xxyy") + )) + +(ert-deftest char-fold--speed-test () + (dolist (string (append '("tty-set-up-initial-frame-face" + "tty-set-up-initial-frame-face-frame-faceframe-faceframe-faceframe-face") + (mapcar #'char-fold--random-word '(10 50 100 + 50 100)))) + (message "Testing %s" string) + ;; Make sure we didn't just fallback on the trivial search. + (should-not (string= (regexp-quote string) + (char-fold-to-regexp string))) + (with-temp-buffer + (save-excursion (insert string)) + (let ((time (time-to-seconds (current-time)))) + ;; Our initial implementation of case-folding in char-folding + ;; created a lot of redundant paths in the regexp. Because of + ;; that, if a really long string "almost" matches, the regexp + ;; engine took a long time to realize that it doesn't match. + (should-not (char-fold-search-forward (concat string "c") nil 'noerror)) + ;; Ensure it took less than a second. + (should (< (- (time-to-seconds (current-time)) + time) + 1)))))) + +(provide 'char-fold-tests) +;;; char-fold-tests.el ends here diff --git a/test/automated/character-fold-tests.el b/test/automated/character-fold-tests.el deleted file mode 100644 index c611217712e..00000000000 --- a/test/automated/character-fold-tests.el +++ /dev/null @@ -1,124 +0,0 @@ -;;; character-fold-tests.el --- Tests for character-fold.el -*- lexical-binding: t; -*- - -;; Copyright (C) 2013-2016 Free Software Foundation, Inc. - -;; Author: Artur Malabarba - -;; This program is free software; you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. - -;; This program is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. - -;; You should have received a copy of the GNU General Public License -;; along with this program. If not, see . - -;;; Code: - -(require 'ert) -(require 'character-fold) - -(defun character-fold--random-word (n) - (mapconcat (lambda (_) (string (+ 9 (random 117)))) - (make-list n nil) "")) - -(defun character-fold--test-search-with-contents (contents string) - (with-temp-buffer - (insert contents) - (goto-char (point-min)) - (should (search-forward-regexp (character-fold-to-regexp string) nil 'noerror)) - (goto-char (point-min)) - (should (character-fold-search-forward string nil 'noerror)) - (should (character-fold-search-backward string nil 'noerror)))) - - -(ert-deftest character-fold--test-consistency () - (dotimes (n 30) - (let ((w (character-fold--random-word n))) - ;; A folded string should always match the original string. - (character-fold--test-search-with-contents w w)))) - -(ert-deftest character-fold--test-lax-whitespace () - (dotimes (n 40) - (let ((w1 (character-fold--random-word n)) - (w2 (character-fold--random-word n)) - (search-spaces-regexp "\\s-+")) - (character-fold--test-search-with-contents - (concat w1 "\s\n\s\t\f\t\n\r\t" w2) - (concat w1 " " w2)) - (character-fold--test-search-with-contents - (concat w1 "\s\n\s\t\f\t\n\r\t" w2) - (concat w1 (make-string 10 ?\s) w2))))) - -(defun character-fold--test-match-exactly (string &rest strings-to-match) - (let ((re (concat "\\`" (character-fold-to-regexp string) "\\'"))) - (dolist (it strings-to-match) - (should (string-match re it))) - ;; Case folding - (let ((case-fold-search t)) - (dolist (it strings-to-match) - (should (string-match (upcase re) (downcase it))) - (should (string-match (downcase re) (upcase it))))))) - -(ert-deftest character-fold--test-some-defaults () - (dolist (it '(("ffl" . "ffl") ("ffi" . "ffi") - ("fi" . "fi") ("ff" . "ff") - ("ä" . "ä"))) - (character-fold--test-search-with-contents (cdr it) (car it)) - (let ((multi (char-table-extra-slot character-fold-table 0)) - (character-fold-table (make-char-table 'character-fold-table))) - (set-char-table-extra-slot character-fold-table 0 multi) - (character-fold--test-match-exactly (car it) (cdr it))))) - -(ert-deftest character-fold--test-fold-to-regexp () - (let ((character-fold-table (make-char-table 'character-fold-table)) - (multi (make-char-table 'character-fold-table))) - (set-char-table-extra-slot character-fold-table 0 multi) - (aset character-fold-table ?a "xx") - (aset character-fold-table ?1 "44") - (aset character-fold-table ?\s "-!-") - (character-fold--test-match-exactly "a1a1" "xx44xx44") - (character-fold--test-match-exactly "a1 a 1" "xx44-!--!-xx-!-44") - (aset multi ?a '(("1" . "99") - ("2" . "88") - ("12" . "77"))) - (character-fold--test-match-exactly "a" "xx") - (character-fold--test-match-exactly "a1" "xx44" "99") - (character-fold--test-match-exactly "a12" "77" "xx442" "992") - (character-fold--test-match-exactly "a2" "88") - (aset multi ?1 '(("2" . "yy"))) - (character-fold--test-match-exactly "a1" "xx44" "99") - (character-fold--test-match-exactly "a12" "77" "xx442" "992") - ;; Support for this case is disabled. See function definition or: - ;; https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html - ;; (character-fold--test-match-exactly "a12" "xxyy") - )) - -(ert-deftest character-fold--speed-test () - (dolist (string (append '("tty-set-up-initial-frame-face" - "tty-set-up-initial-frame-face-frame-faceframe-faceframe-faceframe-face") - (mapcar #'character-fold--random-word '(10 50 100 - 50 100)))) - (message "Testing %s" string) - ;; Make sure we didn't just fallback on the trivial search. - (should-not (string= (regexp-quote string) - (character-fold-to-regexp string))) - (with-temp-buffer - (save-excursion (insert string)) - (let ((time (time-to-seconds (current-time)))) - ;; Our initial implementation of case-folding in char-folding - ;; created a lot of redundant paths in the regexp. Because of - ;; that, if a really long string "almost" matches, the regexp - ;; engine took a long time to realize that it doesn't match. - (should-not (character-fold-search-forward (concat string "c") nil 'noerror)) - ;; Ensure it took less than a second. - (should (< (- (time-to-seconds (current-time)) - time) - 1)))))) - -(provide 'character-fold-tests) -;;; character-fold-tests.el ends here