(read_escape): Provide a Unicode character escape syntax; \u followed by

author Eli Zaretskii <eliz@gnu.org>

Fri, 9 Jun 2006 18:22:30 +0000 (18:22 +0000)

committer Eli Zaretskii <eliz@gnu.org>

Fri, 9 Jun 2006 18:22:30 +0000 (18:22 +0000)
author Eli Zaretskii <eliz@gnu.org>
Fri, 9 Jun 2006 18:22:30 +0000 (18:22 +0000)
committer Eli Zaretskii <eliz@gnu.org>
Fri, 9 Jun 2006 18:22:30 +0000 (18:22 +0000)
diff --git a/src/lread.c b/src/lread.c

index 31f974d9bc03bd41f6aaaf6ac1b7cdc2f5beb73a..a0d4ad825ddc0870d007999c2190a3fdb8c76820 100644 (file)
--- a/src/lread.c
+++ b/src/lread.c
@@ -1764,6 +1764,9 @@ read_escape (readcharfun, stringp, byterep)
       int *byterep;
  {
    register int c = READCHAR;
+  /* \u allows up to four hex digits, \U up to eight. Default to the
+     behaviour for \u, and change this value in the case that \U is seen. */
+  int unicode_hex_count = 4;
  
    *byterep = 0;
  
@@ -1928,6 +1931,52 @@ read_escape (readcharfun, stringp, byterep)
         return i;
        }
  
+    case 'U':
+      /* Post-Unicode-2.0: Up to eight hex chars.  */
+      unicode_hex_count = 8;
+    case 'u':
+
+      /* A Unicode escape. We only permit them in strings and characters,
+        not arbitrarily in the source code, as in some other languages.  */
+      {
+       int i = 0;
+       int count = 0;
+       Lisp_Object lisp_char;
+       struct gcpro gcpro1;
+
+       while (++count <= unicode_hex_count)
+         {
+           c = READCHAR;
+           /* isdigit(), isalpha() may be locale-specific, which we don't
+              want. */
+           if      (c >= '0' && c <= '9')  i = (i << 4) + (c - '0');
+           else if (c >= 'a' && c <= 'f')  i = (i << 4) + (c - 'a') + 10;
+            else if (c >= 'A' && c <= 'F')  i = (i << 4) + (c - 'A') + 10;
+           else
+             {
+               error ("Non-hex digit used for Unicode escape");
+               break;
+             }
+         }
+
+       GCPRO1 (readcharfun);
+       lisp_char = call2(intern("decode-char"), intern("ucs"),
+                         make_number(i));
+       UNGCPRO;
+
+       if (EQ(Qnil, lisp_char))
+         {
+           /* This is ugly and horrible and trashes the user's data.  */
+           XSETFASTINT (i, MAKE_CHAR (charset_katakana_jisx0201,
+                                      34 + 128, 46 + 128));
+            return i;
+         }
+       else
+         {
+           return XFASTINT (lisp_char);
+         }
+      }
+
      default:
        if (BASE_LEADING_CODE_P (c))
         c = read_multibyte (c, readcharfun);
author	Eli Zaretskii <eliz@gnu.org>
	Fri, 9 Jun 2006 18:22:30 +0000 (18:22 +0000)
committer	Eli Zaretskii <eliz@gnu.org>
	Fri, 9 Jun 2006 18:22:30 +0000 (18:22 +0000)