@end example
@end defun
+@defun char-from-name string &optional ignore-case
+This function returns the character whose Unicode name is @var{string}.
+If @var{ignore-case} is non-@code{nil}, case is ignored in @var{string}.
+This function returns @code{nil} if @var{string} does not name a character.
+
+@example
+;; U+03A3
+(= (char-from-name "GREEK CAPITAL LETTER SIGMA") #x03A3)
+ @result{} t
+@end example
+@end defun
+
@defun get-byte &optional pos string
This function returns the byte at character position @var{pos} in the
current buffer. If the current buffer is unibyte, this is literally
compares their numerical values. According to this predicate,
"foo2.png" is smaller than "foo12.png".
++++
+** The new function 'char-from-name' converts a Unicode name string
+to the corresponding character code.
+
+++
** New functions 'sxhash-eq' and 'sxhash-eql' return hash codes of a
Lisp object suitable for use with 'eq' and 'eql' correspondingly. If
(let ((char (assoc name ucs-names)))
(when char (format " (%c)" (cdr char)))))
+(defun char-from-name (string &optional ignore-case)
+ "Return a character as a number from its Unicode name STRING.
+If optional IGNORE-CASE is non-nil, ignore case in STRING.
+Return nil if STRING does not name a character."
+ (or (cdr (assoc-string string (ucs-names) ignore-case))
+ (let ((minus (string-match-p "-[0-9A-F]+\\'" string)))
+ (when minus
+ ;; Parse names like "VARIATION SELECTOR-17" and "CJK
+ ;; COMPATIBILITY IDEOGRAPH-F900" that are not in ucs-names.
+ (ignore-errors
+ (let* ((case-fold-search ignore-case)
+ (vs (string-match-p "\\`VARIATION SELECTOR-" string))
+ (minus-num (string-to-number (substring string minus)
+ (if vs 10 16)))
+ (vs-offset (if vs (if (< minus-num -16) #xE00EF #xFDFF) 0))
+ (code (- vs-offset minus-num))
+ (name (get-char-code-property code 'name)))
+ (when (eq t (compare-strings string nil nil name nil nil
+ ignore-case))
+ code)))))))
+
(defun read-char-by-name (prompt)
"Read a character by its Unicode name or hex number string.
Display PROMPT and read a string that represents a character by its
the characters whose names include that substring, not necessarily
at the beginning of the name.
-This function also accepts a hexadecimal number of Unicode code
-point or a number in hash notation, e.g. #o21430 for octal,
-#x2318 for hex, or #10r8984 for decimal."
+Accept a name like \"CIRCULATION FUNCTION\", a hexadecimal
+number like \"2A10\", or a number in hash notation (e.g.,
+\"#x2a10\" for hex, \"10r10768\" for decimal, or \"#o25020\" for
+octal). Treat otherwise-ambiguous strings like \"BED\" (U+1F6CF)
+as names, not numbers."
(let* ((enable-recursive-minibuffers t)
(completion-ignore-case t)
(input
(category . unicode-name))
(complete-with-action action (ucs-names) string pred)))))
(char
- (cond
- ((string-match-p "\\`[0-9a-fA-F]+\\'" input)
- (string-to-number input 16))
- ((string-match-p "\\`#" input)
- (read input))
- (t
- (cdr (assoc-string input (ucs-names) t))))))
+ (cond
+ ((char-from-name input t))
+ ((string-match-p "\\`[0-9a-fA-F]+\\'" input)
+ (ignore-errors (string-to-number input 16)))
+ ((string-match-p "\\`#\\([bBoOxX]\\|[0-9]+[rR]\\)[0-9a-zA-Z]+\\'"
+ input)
+ (ignore-errors (read input))))))
(unless (characterp char)
(error "Invalid character"))
char))
static int
character_name_to_code (char const *name, ptrdiff_t name_len)
{
- Lisp_Object code;
-
- /* Code point as U+XXXX.... */
- if (name[0] == 'U' && name[1] == '+')
- {
- /* Pass the leading '+' to string_to_number, so that it
- rejects monstrosities such as negative values. */
- code = string_to_number (name + 1, 16, false);
- }
- else
- {
- /* Look up the name in the table returned by 'ucs-names'. */
- AUTO_STRING_WITH_LEN (namestr, name, name_len);
- Lisp_Object names = call0 (Qucs_names);
- code = CDR (Fassoc (namestr, names));
- }
-
- if (! (INTEGERP (code)
- && 0 <= XINT (code) && XINT (code) <= MAX_UNICODE_CHAR
- && ! char_surrogate_p (XINT (code))))
+ /* For "U+XXXX", pass the leading '+' to string_to_number to reject
+ monstrosities like "U+-0000". */
+ Lisp_Object code
+ = (name[0] == 'U' && name[1] == '+'
+ ? string_to_number (name + 1, 16, false)
+ : call2 (Qchar_from_name, make_unibyte_string (name, name_len), Qt));
+
+ if (! RANGED_INTEGERP (0, code, MAX_UNICODE_CHAR)
+ || char_surrogate_p (XINT (code)))
{
AUTO_STRING (format, "\\N{%s}");
AUTO_STRING_WITH_LEN (namestr, name, name_len);
DEFSYM (Qrehash_size, "rehash-size");
DEFSYM (Qrehash_threshold, "rehash-threshold");
- DEFSYM (Qucs_names, "ucs-names");
+ DEFSYM (Qchar_from_name, "char-from-name");
}
(ert-deftest lread-char-number ()
(should (equal (read "?\\N{U+A817}") #xA817)))
-(ert-deftest lread-char-name ()
+(ert-deftest lread-char-name-1 ()
(should (equal (read "?\\N{SYLOTI NAGRI LETTER \n DHO}")
#xA817)))
+(ert-deftest lread-char-name-2 ()
+ (should (equal (read "?\\N{BED}") #x1F6CF)))
+(ert-deftest lread-char-name-3 ()
+ (should (equal (read "?\\N{U+BED}") #xBED)))
+(ert-deftest lread-char-name-4 ()
+ (should (equal (read "?\\N{VARIATION SELECTOR-1}") #xFE00)))
+(ert-deftest lread-char-name-5 ()
+ (should (equal (read "?\\N{VARIATION SELECTOR-16}") #xFE0F)))
+(ert-deftest lread-char-name-6 ()
+ (should (equal (read "?\\N{VARIATION SELECTOR-17}") #xE0100)))
+(ert-deftest lread-char-name-7 ()
+ (should (equal (read "?\\N{VARIATION SELECTOR-256}") #xE01EF)))
+(ert-deftest lread-char-name-8 ()
+ (should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-F900}") #xF900)))
+(ert-deftest lread-char-name-9 ()
+ (should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-FAD9}") #xFAD9)))
+(ert-deftest lread-char-name-10 ()
+ (should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2F800}") #x2F800)))
+(ert-deftest lread-char-name-11 ()
+ (should (equal (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2FA1D}") #x2FA1D)))
(ert-deftest lread-char-invalid-number ()
(should-error (read "?\\N{U+110000}") :type 'invalid-read-syntax))
-(ert-deftest lread-char-invalid-name ()
+(ert-deftest lread-char-invalid-name-1 ()
(should-error (read "?\\N{DOES NOT EXIST}")) :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-2 ()
+ (should-error (read "?\\N{VARIATION SELECTOR-0}")) :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-3 ()
+ (should-error (read "?\\N{VARIATION SELECTOR-257}"))
+ :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-4 ()
+ (should-error (read "?\\N{VARIATION SELECTOR--0}"))
+ :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-5 ()
+ (should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-F8FF}"))
+ :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-6 ()
+ (should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-FADA}"))
+ :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-7 ()
+ (should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2F7FF}"))
+ :type 'invalid-read-syntax)
+(ert-deftest lread-char-invalid-name-8 ()
+ (should-error (read "?\\N{CJK COMPATIBILITY IDEOGRAPH-2FA1E}"))
+ :type 'invalid-read-syntax)
(ert-deftest lread-char-non-ascii-name ()
(should-error (read "?\\N{LATIN CAPITAL LETTER Ø}")
(should-error (read "?\\N{U+DFFF}") :type 'invalid-read-syntax))
(ert-deftest lread-string-char-number-1 ()
- (should (equal (read "a\\N{U+A817}b") "a\uA817bx")))
+ (should (equal (read "\"a\\N{U+A817}b\"") "a\uA817b")))
(ert-deftest lread-string-char-number-2 ()
(should-error (read "?\\N{0.5}") :type 'invalid-read-syntax))
(ert-deftest lread-string-char-number-3 ()
(should-error (read "?\\N{U+-0}") :type 'invalid-read-syntax))
(ert-deftest lread-string-char-name ()
- (should (equal (read "a\\N{SYLOTI NAGRI LETTER DHO}b") "a\uA817b")))
+ (should (equal (read "\"a\\N{SYLOTI NAGRI LETTER DHO}b\"") "a\uA817b")))
;;; lread-tests.el ends here