From 71b169b8c49d4c2f593b7074e8555f6e479b10f3 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Fri, 9 Jun 2006 18:22:30 +0000 Subject: [PATCH] (read_escape): Provide a Unicode character escape syntax; \u followed by exactly four or \U followed by exactly eight hex digits in a comment or string is read as a Unicode character with that code point. --- src/lread.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/lread.c b/src/lread.c index 31f974d9bc0..a0d4ad825dd 100644 --- a/src/lread.c +++ b/src/lread.c @@ -1764,6 +1764,9 @@ read_escape (readcharfun, stringp, byterep) int *byterep; { register int c = READCHAR; + /* \u allows up to four hex digits, \U up to eight. Default to the + behaviour for \u, and change this value in the case that \U is seen. */ + int unicode_hex_count = 4; *byterep = 0; @@ -1928,6 +1931,52 @@ read_escape (readcharfun, stringp, byterep) return i; } + case 'U': + /* Post-Unicode-2.0: Up to eight hex chars. */ + unicode_hex_count = 8; + case 'u': + + /* A Unicode escape. We only permit them in strings and characters, + not arbitrarily in the source code, as in some other languages. */ + { + int i = 0; + int count = 0; + Lisp_Object lisp_char; + struct gcpro gcpro1; + + while (++count <= unicode_hex_count) + { + c = READCHAR; + /* isdigit(), isalpha() may be locale-specific, which we don't + want. */ + if (c >= '0' && c <= '9') i = (i << 4) + (c - '0'); + else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10; + else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10; + else + { + error ("Non-hex digit used for Unicode escape"); + break; + } + } + + GCPRO1 (readcharfun); + lisp_char = call2(intern("decode-char"), intern("ucs"), + make_number(i)); + UNGCPRO; + + if (EQ(Qnil, lisp_char)) + { + /* This is ugly and horrible and trashes the user's data. */ + XSETFASTINT (i, MAKE_CHAR (charset_katakana_jisx0201, + 34 + 128, 46 + 128)); + return i; + } + else + { + return XFASTINT (lisp_char); + } + } + default: if (BASE_LEADING_CODE_P (c)) c = read_multibyte (c, readcharfun); -- 2.39.5