(defun ucs-names ()
"Return table of CHAR-NAME keys and CHAR-CODE values cached in `ucs-names'."
(or ucs-names
+ ;; Sometimes these ranges will need adjusting as codepoints are
+ ;; added to unicode. The test case
+ ;; 'mule-cmds-tests--ucs-names-missing-names' will tell you
+ ;; which are missing (Bug#65997).
(let ((ranges
'((#x0000 . #x33FF)
;; (#x3400 . #x4DBF) CJK Ideographs Extension A
(#x14400 . #x14646)
;; (#x14647 . #x167FF) unused
(#x16800 . #x16F9F)
- (#x16FE0 . #x16FE3)
+ (#x16FE0 . #x16FF1)
;; (#x17000 . #x187FF) Tangut Ideographs
;; (#x18800 . #x18AFF) Tangut Components
;; (#x18B00 . #x18CFF) Khitan Small Script
;; (#x18D00 . #x18D0F) Tangut Ideograph Supplement
;; (#x18D10 . #x1AFEF) unused
- (#x1AFF0 . #x1B12F)
- ;; (#x1B130 . #x1B14F) unused
+ (#x1AFF0 . #x1B122)
+ ;; (#x1B123 . #x1B131) unused
+ (#x1B132 . #x1B132)
+ ;; (#x1B133 . #x1B14F) unused
(#x1B150 . #x1B16F)
(#x1B170 . #x1B2FF)
;; (#x1B300 . #x1BBFF) unused
(while (<= c end)
(let ((new-name (get-char-code-property c 'name))
(old-name (get-char-code-property c 'old-name)))
- ;; In theory this code could end up pushing an "old-name" that
- ;; shadows a "new-name" but in practice every time an
- ;; `old-name' conflicts with a `new-name', the newer one has a
- ;; higher code, so it gets pushed later!
+ ;; This code used to push both old-name and new-name
+ ;; on the assumption that the new-name codepoint would
+ ;; always be higher, which was true for a long time.
+ ;; As of at latest 2023-09-15, this is no longer true,
+ ;; so we now skip the old-name if it conflicts with an
+ ;; existing new-name (Bug#65997).
(if new-name (puthash new-name c names))
- (if old-name (puthash old-name c names))
+ (when (and old-name
+ (not (gethash old-name names)))
+ (puthash old-name c names))
;; Unicode uses the spelling "lamda" in character
;; names, instead of "lambda", due to "preferences
;; expressed by the Greek National Body" (Bug#30513).
(kbd "C-x RET c u t f - 8 RET C-u C-u c a b RET")
(read-string "prompt:"))))))
+;;Bug#65997, ensure that old-names haven't overriden new names.
+(ert-deftest mule-cmds-tests--ucs-names-old-name-override ()
+ (let (code-points)
+ (dotimes (u (1+ (max-char 'ucs)))
+ (when-let* ((name (get-char-code-property u 'name))
+ (c (char-from-name name)))
+ (when (and (not (<= #xD800 u #xDFFF))
+ (not (= c u)))
+ (push (format "%X" u) code-points))))
+ (setq code-points (nreverse code-points))
+ (should (null code-points))))
+
+;; Bug#65997, ensure that all codepoints with names are in '(ucs-names)'.
+(ert-deftest mule-cmds-tests--ucs-names-missing-names ()
+ (let (code-points)
+ (dotimes (u (1+ (max-char 'ucs)))
+ (when-let ((name (get-char-code-property u 'name)))
+ (when (and (not (<= #xD800 u #xDFFF))
+ (not (<= #x18800 u #x18AFF))
+ (not (char-from-name name)))
+ (push (format "%X" u) code-points))))
+ (setq code-points (nreverse code-points))
+ (should (null code-points))))
+
(ert-deftest mule-utf-7 ()
;; utf-7 and utf-7-imap are not ASCII-compatible.
(should-not (coding-system-get 'utf-7 :ascii-compatible-p))