From a06a2950e168dddcbf1c3cd14697875d93a4f9ff Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Sat, 5 Aug 2023 17:55:56 +0300 Subject: [PATCH] Allow user control on char-width of "ambiguous" characters * src/character.c (syms_of_character) : New char-table. * lisp/international/characters.el (ambiguous-width-chars): Fill the table. (update-cjk-ambiguous-char-widths): New function. (cjk-ambiguous-chars-are-wide): New defcustom, uses 'update-cjk-ambiguous-char-widths' as its :set function. (use-cjk-char-width-table): Obey 'cjk-ambiguous-chars-are-wide' by adding another child char-table for ambiguous-width characters, where the width is set according to the option. * lisp/language/chinese.el ("Chinese-GB", "Chinese-BIG5") ("Chinese-CNS", "Chinese-EUC-TW", "Chinese-GBK"): * lisp/language/japanese.el ("Japanese"): * lisp/language/korean.el ("Korean"): Add new language-info slot 'cjk-locale-symbol'. Bug#64420 --- lisp/international/characters.el | 217 ++++++++++++++++++++++++++++++- lisp/language/chinese.el | 5 + lisp/language/japanese.el | 1 + lisp/language/korean.el | 1 + src/character.c | 8 ++ 5 files changed, 231 insertions(+), 1 deletion(-) diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 9aea5e27063..1aa570ca59a 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -1394,6 +1394,174 @@ with L, LRE, or LRO Unicode bidi character type.") (dolist (elt l) (set-char-table-range char-width-table elt 2))) +;; A: East Asian "Ambiguous" characters. +(let ((l '((#x00A1 . #x00A1) + (#x00A4 . #x00A4) + (#x00A7 . #x00A8) + (#x00AA . #x00AA) + (#x00AD . #x00AE) + (#x00B0 . #x00B4) + (#x00B6 . #x00BA) + (#x00BC . #x00BF) + (#x00C6 . #x00C6) + (#x00D0 . #x00D0) + (#x00D7 . #x00D8) + (#x00E0 . #x00E1) + (#x00E6 . #x00E6) + (#x00E8 . #x00EA) + (#x00EC . #x00ED) + (#x00F0 . #x00F0) + (#x00F2 . #x00F3) + (#x00F7 . #x00FA) + (#x00FC . #x00FC) + (#x00FE . #x00FE) + (#x0101 . #x0101) + (#x0111 . #x0111) + (#x0113 . #x0113) + (#x011B . #x011B) + (#x0126 . #x0127) + (#x012B . #x012B) + (#x0131 . #x0133) + (#x0138 . #x0138) + (#x013F . #x0142) + (#x0144 . #x0144) + (#x0148 . #x014B) + (#x014D . #x014D) + (#x0152 . #x0153) + (#x0166 . #x0167) + (#x016B . #x016B) + (#x01CE . #x01CE) + (#x01D0 . #x01D0) + (#x01D2 . #x01D2) + (#x01D4 . #x01D4) + (#x01D6 . #x01D6) + (#x01D8 . #x01D8) + (#x01DA . #x01DA) + (#x01DC . #x01DC) + (#x0251 . #x0251) + (#x0261 . #x0261) + (#x02C4 . #x02C4) + (#x02C7 . #x02C7) + (#x02C9 . #x02CB) + (#x02CD . #x02CD) + (#x02D0 . #x02D0) + (#x02D8 . #x02DB) + (#x02DD . #x02DD) + (#x02DF . #x02DF) + (#x0300 . #x036F) + (#x0391 . #x03A1) + (#x03A3 . #x03A9) + (#x03B1 . #x03C1) + (#x03C3 . #x03C9) + (#x0401 . #x0401) + (#x0410 . #x044F) + (#x0451 . #x0451) + (#x2010 . #x2010) + (#x2013 . #x2016) + (#x2018 . #x2019) + (#x201C . #x201D) + (#x2020 . #x2022) + (#x2024 . #x2027) + (#x2030 . #x2030) + (#x2032 . #x2033) + (#x2035 . #x2035) + (#x203E . #x203E) + (#x2074 . #x2074) + (#x207F . #x207F) + (#x2081 . #x2084) + (#x20AC . #x20AC) + (#x2103 . #x2103) + (#x2105 . #x2105) + (#x2109 . #x2109) + (#x2113 . #x2113) + (#x2116 . #x2116) + (#x2121 . #x2122) + (#x2126 . #x2126) + (#x212B . #x212B) + (#x2153 . #x2154) + (#x215B . #x215E) + (#x2160 . #x216B) + (#x2170 . #x2179) + (#x2189 . #x2189) + (#x2190 . #x2199) + (#x21B8 . #x21B9) + (#x21D2 . #x21D2) + (#x21D4 . #x21D4) + (#x21E7 . #x21E7) + (#x2200 . #x2200) + (#x2202 . #x2203) + (#x2207 . #x2208) + (#x220B . #x220B) + (#x220F . #x220F) + (#x2211 . #x2211) + (#x2215 . #x2215) + (#x221A . #x221A) + (#x221D . #x2220) + (#x2223 . #x2223) + (#x2225 . #x2225) + (#x2227 . #x222C) + (#x222E . #x222E) + (#x2234 . #x2237) + (#x223C . #x223D) + (#x2248 . #x2248) + (#x224C . #x224C) + (#x2252 . #x2252) + (#x2260 . #x2261) + (#x2264 . #x2267) + (#x226A . #x226B) + (#x226E . #x226F) + (#x2282 . #x2283) + (#x2286 . #x2287) + (#x2295 . #x2295) + (#x2299 . #x2299) + (#x22A5 . #x22A5) + (#x22BF . #x22BF) + (#x2312 . #x2312) + (#x2460 . #x24E9) + (#x24EB . #x254B) + (#x2550 . #x2573) + (#x2580 . #x258F) + (#x2592 . #x2595) + (#x25A0 . #x25A1) + (#x25A3 . #x25A9) + (#x25B2 . #x25B3) + (#x25B6 . #x25B7) + (#x25BC . #x25BD) + (#x25C0 . #x25C1) + (#x25C6 . #x25C8) + (#x25CE . #x25D1) + (#x25E2 . #x25E5) + (#x25EF . #x25EF) + (#x2605 . #x2606) + (#x260E . #x260F) + (#x261C . #x261C) + (#x261E . #x261E) + (#x2640 . #x2640) + (#x2642 . #x2642) + (#x2660 . #x2661) + (#x2663 . #x2665) + (#x2667 . #x266A) + (#x266C . #x266D) + (#x266F . #x266F) + (#x269E . #x269F) + (#x26BF . #x26BF) + (#x26C6 . #x26CD) + (#x26CF . #x26D3) + (#x26D5 . #x26E1) + (#x26E3 . #x26E3) + (#x26E8 . #x26E9) + (#x26EB . #x26F1) + (#x26F4 . #x26F4) + (#x26F6 . #x26F9) + (#x26FB . #x26FC) + (#x26FE . #x26FF) + (#x273D . #x273D) + (#x2776 . #x277F) + (#x2B56 . #x2B59) + (#x3248 . #x324F)))) + (dolist (elt l) + (set-char-table-range ambiguous-width-chars elt t))) + ;; Other double width ;;(map-charset-chars ;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) @@ -1427,6 +1595,45 @@ with L, LRE, or LRO Unicode bidi character type.") (chinese-cns11643-1 (#x2121 . #x427E))) (ko_KR nil (korean-ksc5601 (#x2121 . #x2C7E))))) +(defun update-cjk-ambiguous-char-widths (locale-name) + "Update character widths for LOCALE-NAME using `ambiguous-width-chars'. +LOCALE-NAME is the symbol of a CJK locale, such as \\='zh_CN." + (let ((slot (assq locale-name cjk-char-width-table-list))) + (or slot (error "Unknown locale for CJK language environment: %s" + locale-name)) + ;; Force recomputation of child table in 'use-cjk-char-width-table'. + (setcar (cdr slot) nil) + (use-cjk-char-width-table locale-name))) + + +(defcustom cjk-ambiguous-chars-are-wide t + "Whether the \"ambiguous-width\" characters take 2 columns on display. + +Some of the characters are defined by Unicode as being of \"ambiguous\" +width: the actual width, either 1 column or 2 columns, should be +determined at display time, depending on the language context. +If this variable is non-nil, Emacs will consider these characters as +full-width, i.e. taking 2 columns; otherwise they are narrow characters +taking 1 column on display. Which value is correct depends on the +fonts being used. In some CJK locales the fonts are set so that +these characters are displayed as full-width. This setting is most +important for text-mode frames, because there Emacs cannot access the +metrics of the fonts used by the console or the terminal emulator. + +Do not set this directly via `setq'; instead, use `setopt' or the +Customize commands. Alternatively, call `update-cjk-ambiguous-char-widths' +passing it the symbol of the current locale environment, after changing +the value of the variable with `setq'." + :type 'boolean + :set (lambda (symbol value) + (set-default symbol value) + (let ((locsym (get-language-info current-language-environment + 'cjk-locale-symbol))) + (when locsym + (update-cjk-ambiguous-char-widths locsym)))) + :version "30.1" + :group 'display) + ;; Internal use only. ;; Setup char-width-table appropriate for a language environment ;; corresponding to LOCALE-NAME (symbol). @@ -1448,7 +1655,15 @@ with L, LRE, or LRO Unicode bidi character type.") (car code-range) (cdr code-range))))) (optimize-char-table table) (set-char-table-parent table char-width-table) - (setcar (cdr slot) table))) + (let ((tbl (make-char-table nil))) + (map-char-table + (lambda (range _val) + (set-char-table-range tbl range + (if cjk-ambiguous-chars-are-wide 2 1))) + ambiguous-width-chars) + (optimize-char-table tbl) + (set-char-table-parent tbl table) + (setcar (cdr slot) tbl)))) (setq char-width-table (nth 1 slot)))) (defun use-default-char-width-table () diff --git a/lisp/language/chinese.el b/lisp/language/chinese.el index 26f1194aa4c..e35f3f179ad 100644 --- a/lisp/language/chinese.el +++ b/lisp/language/chinese.el @@ -111,6 +111,7 @@ (set-language-info-alist "Chinese-GB" '((charset chinese-gb2312 chinese-sisheng) (iso639-language . zh) + (cjk-locale-symbol . zh_CN) (setup-function . (lambda () (use-cjk-char-width-table 'zh_CN))) (exit-function . use-default-char-width-table) @@ -142,6 +143,7 @@ (set-language-info-alist "Chinese-BIG5" '((charset chinese-big5-1 chinese-big5-2) (iso639-language . zh) + (cjk-locale-symbol . zh_HK) (setup-function . (lambda () (use-cjk-char-width-table 'zh_HK))) (exit-function . use-default-char-width-table) @@ -198,6 +200,7 @@ chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7) (iso639-language . zh) + (cjk-locale-symbol . zh_TW) (setup-function . (lambda () (use-cjk-char-width-table 'zh_TW))) (exit-function . use-default-char-width-table) @@ -218,6 +221,7 @@ accepts Big5 for input also (which is then converted to CNS).")) chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7 chinese-big5-1 chinese-big5-2) (iso639-language . zh) + (cjk-locale-symbol . zh_TW) (setup-function . (lambda () (use-cjk-char-width-table 'zh_TW))) (exit-function . use-default-char-width-table) @@ -248,6 +252,7 @@ converted to CNS).")) (set-language-info-alist "Chinese-GBK" '((charset chinese-gbk) (iso639-language . zh) + (cjk-locale-symbol . zh_CN) (setup-function . (lambda () (use-cjk-char-width-table 'zh_CN))) (exit-function . use-default-char-width-table) diff --git a/lisp/language/japanese.el b/lisp/language/japanese.el index 681dc9d7b92..6042ebf4511 100644 --- a/lisp/language/japanese.el +++ b/lisp/language/japanese.el @@ -208,6 +208,7 @@ eucJP-ms is defined in ." "Japanese" '((setup-function . setup-japanese-environment-internal) (exit-function . use-default-char-width-table) (iso639-language . ja) + (cjk-locale-symbol . ja_JP) (tutorial . "TUTORIAL.ja") (charset japanese-jisx0208 japanese-jisx0212 latin-jisx0201 katakana-jisx0201 diff --git a/lisp/language/korean.el b/lisp/language/korean.el index fef5796bc4b..ede37d5d07c 100644 --- a/lisp/language/korean.el +++ b/lisp/language/korean.el @@ -68,6 +68,7 @@ (set-language-info-alist "Korean" '((setup-function . setup-korean-environment-internal) (exit-function . exit-korean-environment) + (cjk-locale-symbol . ko_KR) (iso639-language . ko) (tutorial . "TUTORIAL.ko") (charset korean-ksc5601 cp949) diff --git a/src/character.c b/src/character.c index f4164360f21..2118b20a7c7 100644 --- a/src/character.c +++ b/src/character.c @@ -1117,6 +1117,14 @@ A char-table for width (columns) of each character. */); char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR, make_fixnum (4)); + DEFVAR_LISP ("ambiguous-width-chars", Vambiguous_width_chars, + doc: /* +A char-table for characters whose width (columns) can be 1 or 2. + +The actual width depends on the language-environment and on the +value of `cjk-ambiguous-chars-are-wide'. */); + Vambiguous_width_chars = Fmake_char_table (Qnil, Qnil); + DEFVAR_LISP ("printable-chars", Vprintable_chars, doc: /* A char-table for each printable character. */); Vprintable_chars = Fmake_char_table (Qnil, Qnil); -- 2.39.2