From b789b2f7869b64e0f805a68c4e5ba42aaa6084fc Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Fri, 13 Sep 2024 14:31:28 +0300 Subject: [PATCH] Improve accuracy of character categories * lisp/international/characters.el: Assign 'digit' category to all the characters whose Unicode 'general-category' is Nd. * admin/unidata/blocks.awk: Add code to assign 'symbol' category to all characters belonging to the 'symbol' script. * etc/NEWS: Announce the above changes (cherry picked from commit 7376623a244a91d1de5245645b4b3e8c9469d422) --- admin/unidata/blocks.awk | 8 ++++++-- etc/NEWS | 12 ++++++++++++ lisp/international/characters.el | 13 +++++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/admin/unidata/blocks.awk b/admin/unidata/blocks.awk index 6393b7bdc63..5126c0d3ff3 100755 --- a/admin/unidata/blocks.awk +++ b/admin/unidata/blocks.awk @@ -278,6 +278,10 @@ END { print " (or (memq (nth 2 elt) script-list)" print " (setq script-list (cons (nth 2 elt) script-list))))" print " (set-char-table-extra-slot char-script-table 0 (nreverse script-list)))" - print "\n" - print "(provide 'charscript)" + print "\n(map-char-table" + print " (lambda (ch script)" + print " (and (eq script 'symbol)" + print " (modify-category-entry ch ?5)))" + print " char-script-table)" + print "\n(provide 'charscript)" } diff --git a/etc/NEWS b/etc/NEWS index 6beddd6cffa..9766e30deb7 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -293,6 +293,18 @@ That convention was: '(error &rest ARGS)'. ** The 'rx' category name 'chinese-two-byte' must now be spelled correctly. An old alternative name (without the first 'e') has been removed. +--- +** All the digit characters now have the 'digit' category. +All the characters whose Unicode general-category is Nd now have the +'digit' category, whose mnemonic is '6'. This includes both ASCII and +non-ASCII digit characters. + +--- +** All the symbol characters now have the 'symbol' category. +All the characters that belong to the 'symbol' script (according to +'char-script-table') now have the 'symbol' category, whose mnemonic is +'5'. + * Lisp Changes in Emacs 31.1 diff --git a/lisp/international/characters.el b/lisp/international/characters.el index b13d5f9d7a3..44293b033c7 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -849,6 +849,19 @@ with L, LRE, or LRO Unicode bidi character type.") ;; Fixme: syntax for symbols &c ) + +;; Symbols and digits +;;; Each character whose script is 'symbol' gets the symbol category, +;;; see charscript.el. +;;; Each character whose Unicode general-category is Nd gets the digit +;;; category: +(let ((table (unicode-property-table-internal 'general-category))) + (when table + (map-char-table (lambda (key val) + (if (eq val 'Nd) + (modify-category-entry key ?6))) + table))) + (let ((pairs '("⁅⁆" ; U+2045 U+2046 "⁽⁾" ; U+207D U+207E -- 2.39.2