From 77e5bcc52eadbcfc8e6d9a463782c04f5dc63c25 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Sun, 22 Dec 2013 20:02:37 +0200 Subject: [PATCH] Fix bug #16216 with 'name' Unicode property of control characters. admin/unidata/unidata-gen.el (unidata-split-name): Don't give any NAME to characters: the Unicode Standard says they have no name. doc/lispref/nonascii.texi (Character Properties): NAME or OLD-NAME properties can be nil (there's no empty string). --- admin/ChangeLog | 6 ++++++ admin/unidata/unidata-gen.el | 9 ++++++++- doc/lispref/ChangeLog | 14 +++++++++++--- doc/lispref/nonascii.texi | 5 +++-- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/admin/ChangeLog b/admin/ChangeLog index f49061e35df..7671ae409cd 100644 --- a/admin/ChangeLog +++ b/admin/ChangeLog @@ -1,3 +1,9 @@ +2013-12-22 Eli Zaretskii + + * unidata/unidata-gen.el (unidata-split-name): Don't give any NAME + to characters: the Unicode Standard says they have no + name. (Bug#16216) + 2013-12-12 David Engster * grammars/c.by (expr-binop): Add MOD. diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index fa8f81636e3..ed531eeea25 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el @@ -984,7 +984,14 @@ is the character itself."))) (l nil) (idx 0) c) - (if (= len 0) + (if (or (= len 0) + ;; Unicode Standard, paragraph 4.8: "For all other + ;; Unicode code points of all other types (Control, + ;; Private-Use, Surrogate, Noncharacter, and Reserved), + ;; the value of the Name property is the null string." + ;; We already handle elsewhere all the characters except + ;; Cc, Control characters, which are handled here. + (string= str "")) nil (dotimes (i len) (setq c (aref str i)) diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog index d03f6ae39da..c0a3c81e926 100644 --- a/doc/lispref/ChangeLog +++ b/doc/lispref/ChangeLog @@ -1,12 +1,20 @@ +2013-12-22 Eli Zaretskii + + * nonascii.texi (Character Properties): NAME or OLD-NAME + properties can be nil (there's no empty string). + 2013-12-22 Xue Fuqiao - * sequences.texi (Bool-Vectors): Document new bool-vector set operation functions. + * sequences.texi (Bool-Vectors): Document new bool-vector set + operation functions. * text.texi (Examining Properties): Document `get-pos-property'. - * variables.texi (Directory Local Variables): Document `enable-dir-local-variables'. + * variables.texi (Directory Local Variables): Document + `enable-dir-local-variables'. - * debugging.texi (Debugger Commands): Document `debugger-toggle-locals'. + * debugging.texi (Debugger Commands): Document + `debugger-toggle-locals'. 2013-12-21 Chong Yidong diff --git a/doc/lispref/nonascii.texi b/doc/lispref/nonascii.texi index 4132c1f8de0..ee4c8b77f3d 100644 --- a/doc/lispref/nonascii.texi +++ b/doc/lispref/nonascii.texi @@ -440,7 +440,7 @@ properties that Emacs knows about: Corresponds to the @code{Name} Unicode property. The value is a string consisting of upper-case Latin letters A to Z, digits, spaces, and hyphen @samp{-} characters. For unassigned codepoints, the value -is an empty string. +is @code{nil}. @cindex unicode general category @item general-category @@ -522,7 +522,8 @@ is @code{nil}. @item old-name Corresponds to the Unicode @code{Unicode_1_Name} property. The value -is a string. For unassigned codepoints, the value is an empty string. +is a string. Unassigned codepoints, and characters that have no value +for this property, the value is @code{nil}. @item iso-10646-comment Corresponds to the Unicode @code{ISO_Comment} property. The value is -- 2.39.2