From 96c2c098aeed5c85733577ebbdaf33af6fbb59e9 Mon Sep 17 00:00:00 2001 From: Mark Oteiza Date: Thu, 31 Aug 2017 17:22:39 -0400 Subject: [PATCH] Make ucs-names a hash table (Bug#28302) * etc/NEWS: Mention the type change. * lisp/descr-text.el (describe-char): Use gethash to access ucs-names. Hardcode BEL's name into the function instead of needlessly mapping over the hash table in the spirit of rassoc. * lisp/international/mule-cmds.el (ucs-names): Fix variable and function docstrings. Initialize a hash table for ucs-names--the number of entries is 42845 here. Switch to hash-table getters/setters. (mule--ucs-names-annotation): Use hash-table getter. (char-from-name): Upcase the string if ignore-case is truthy. * lisp/leim/quail/latin-ltx.el: Use maphash instead of dolist. --- etc/NEWS | 3 +++ lisp/descr-text.el | 6 ++--- lisp/international/mule-cmds.el | 43 +++++++++++++++++---------------- lisp/leim/quail/latin-ltx.el | 30 +++++++++++------------ 4 files changed, 43 insertions(+), 39 deletions(-) diff --git a/etc/NEWS b/etc/NEWS index 0889303f82e..d32b0e5bc89 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1154,6 +1154,9 @@ table implementation. This uses a new bytecode op 'switch', which isn't compatible with previous Emacs versions. This functionality can be disabled by setting 'byte-compile-cond-use-jump-table' to nil. +--- +** The alist 'ucs-names' is now a hash table. + --- ** 'C-up', 'C-down', 'C-left' and 'C-right' are now defined in term mode to send the same escape sequences that xterm does. This makes diff --git a/lisp/descr-text.el b/lisp/descr-text.el index 6f36bbed680..b3c96988dd6 100644 --- a/lisp/descr-text.el +++ b/lisp/descr-text.el @@ -617,16 +617,16 @@ relevant to POS." (list (let* ((names (ucs-names)) (name - (or (when (= char 7) + (or (when (= char ?\a) ;; Special case for "BELL" which is ;; apparently the only char which ;; doesn't have a new name and whose ;; old-name is shadowed by a newer char ;; with that name (bug#25641). - (car (rassoc char names))) + "BELL (BEL)") (get-char-code-property char 'name) (get-char-code-property char 'old-name)))) - (if (and name (assoc-string name names)) + (if (and name (gethash name names)) (format "type \"C-x 8 RET %x\" or \"C-x 8 RET %s\"" char name) diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index 338ca6a6e3c..a596411eb78 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el @@ -2923,10 +2923,10 @@ on encoding." (make-obsolete-variable 'nonascii-translation-table "do not use it." "23.1") (defvar ucs-names nil - "Alist of cached (CHAR-NAME . CHAR-CODE) pairs.") + "Hash table of cached CHAR-NAME keys to CHAR-CODE values.") (defun ucs-names () - "Return alist of (CHAR-NAME . CHAR-CODE) pairs cached in `ucs-names'." + "Return table of CHAR-NAME keys and CHAR-CODE values cached in `ucs-names'." (or ucs-names (let ((ranges '((#x0000 . #x33FF) @@ -2954,38 +2954,39 @@ on encoding." ;; (#x20000 . #xDFFFF) CJK Ideograph Extension A, B, etc, unused (#xE0000 . #xE01FF))) (gc-cons-threshold 10000000) - names) - (dolist (range ranges) - (let ((c (car range)) - (end (cdr range))) - (while (<= c end) + (names (make-hash-table :size 42943 :test #'equal))) + (dolist (range ranges) + (let ((c (car range)) + (end (cdr range))) + (while (<= c end) (let ((new-name (get-char-code-property c 'name)) (old-name (get-char-code-property c 'old-name))) - ;; In theory this code could end up pushing an "old-name" that - ;; shadows a "new-name" but in practice every time an - ;; `old-name' conflicts with a `new-name', the newer one has a - ;; higher code, so it gets pushed later! - (if new-name (push (cons new-name c) names)) - (if old-name (push (cons old-name c) names)) - (setq c (1+ c)))))) - ;; Special case for "BELL" which is apparently the only char which - ;; doesn't have a new name and whose old-name is shadowed by a newer - ;; char with that name. - (setq ucs-names `(("BELL (BEL)" . 7) ,@names))))) + ;; In theory this code could end up pushing an "old-name" that + ;; shadows a "new-name" but in practice every time an + ;; `old-name' conflicts with a `new-name', the newer one has a + ;; higher code, so it gets pushed later! + (if new-name (puthash new-name c names)) + (if old-name (puthash old-name c names)) + (setq c (1+ c)))))) + ;; Special case for "BELL" which is apparently the only char which + ;; doesn't have a new name and whose old-name is shadowed by a newer + ;; char with that name. + (puthash "BELL (BEL)" ?\a names) + (setq ucs-names names)))) (defun mule--ucs-names-annotation (name) ;; FIXME: It would be much better to add this annotation before rather than ;; after the char name, so the annotations are aligned. ;; FIXME: The default behavior of displaying annotations in italics ;; doesn't work well here. - (let ((char (assoc name ucs-names))) - (when char (format " (%c)" (cdr char))))) + (let ((char (gethash name ucs-names))) + (when char (format " (%c)" char)))) (defun char-from-name (string &optional ignore-case) "Return a character as a number from its Unicode name STRING. If optional IGNORE-CASE is non-nil, ignore case in STRING. Return nil if STRING does not name a character." - (or (cdr (assoc-string string (ucs-names) ignore-case)) + (or (gethash (if ignore-case (upcase string) string) (ucs-names)) (let ((minus (string-match-p "-[0-9A-F]+\\'" string))) (when minus ;; Parse names like "VARIATION SELECTOR-17" and "CJK diff --git a/lisp/leim/quail/latin-ltx.el b/lisp/leim/quail/latin-ltx.el index 6c5afcd4f93..d8ea90ec3ec 100644 --- a/lisp/leim/quail/latin-ltx.el +++ b/lisp/leim/quail/latin-ltx.el @@ -75,20 +75,20 @@ system, including many technical ones. Examples: (`(,seq ,re) (let ((count 0) (re (eval re t))) - (dolist (pair (ucs-names)) - (let ((name (car pair)) - (char (cdr pair))) - (when (and (characterp char) ;; Ignore char-ranges. - (string-match re name)) - (let ((keys (if (stringp seq) - (replace-match seq nil nil name) - (funcall seq name char)))) - (if (listp keys) - (dolist (x keys) - (setq count (1+ count)) - (push (list x char) newrules)) - (setq count (1+ count)) - (push (list keys char) newrules)))))) + (maphash + (lambda (name char) + (when (and (characterp char) ;; Ignore char-ranges. + (string-match re name)) + (let ((keys (if (stringp seq) + (replace-match seq nil nil name) + (funcall seq name char)))) + (if (listp keys) + (dolist (x keys) + (setq count (1+ count)) + (push (list x char) newrules)) + (setq count (1+ count)) + (push (list keys char) newrules))))) + (ucs-names)) ;; (message "latin-ltx: %d mappings for %S" count re) )))) (setq newrules (delete-dups newrules)) @@ -206,7 +206,7 @@ system, including many technical ones. Examples: ((lambda (name char) (let* ((base (concat (match-string 1 name) (match-string 3 name))) - (basechar (cdr (assoc base (ucs-names))))) + (basechar (gethash base (ucs-names)))) (when (latin-ltx--ascii-p basechar) (string (if (match-end 2) ?^ ?_) basechar)))) "\\(.*\\)SU\\(?:B\\|\\(PER\\)\\)SCRIPT \\(.*\\)") -- 2.39.2