enum { UNICODE_CHARACTER_NAME_LENGTH_BOUND = 200 };
/* Read a \-escape sequence, assuming we already read the `\'.
- When there is a difference between string and character literal \-sequences,
- the latter is assumed.
If the escape sequence forces unibyte, return eight-bit char. */
static int
-read_escape (Lisp_Object readcharfun)
+read_escape (Lisp_Object readcharfun, bool stringp)
{
- int modifiers = 0;
- again: ;
int c = READCHAR;
- int unicode_hex_count;
+ /* \u allows up to four hex digits, \U up to eight. Default to the
+ behavior for \u, and change this value in the case that \U is seen. */
+ int unicode_hex_count = 4;
switch (c)
{
case -1:
end_of_file_error ();
- case 'a': c = '\a'; break;
- case 'b': c = '\b'; break;
- case 'd': c = 127; break;
- case 'e': c = 27; break;
- case 'f': c = '\f'; break;
- case 'n': c = '\n'; break;
- case 'r': c = '\r'; break;
- case 't': c = '\t'; break;
- case 'v': c = '\v'; break;
+ case 'a':
+ return '\007';
+ case 'b':
+ return '\b';
+ case 'd':
+ return 0177;
+ case 'e':
+ return 033;
+ case 'f':
+ return '\f';
+ case 'n':
+ return '\n';
+ case 'r':
+ return '\r';
+ case 't':
+ return '\t';
+ case 'v':
+ return '\v';
+ case '\n':
+ return -1;
+ case ' ':
+ if (stringp)
+ return -1;
+ return ' ';
case 'M':
c = READCHAR;
if (c != '-')
error ("Invalid escape character syntax");
- modifiers |= meta_modifier;
c = READCHAR;
if (c == '\\')
- goto again;
- break;
+ c = read_escape (readcharfun, 0);
+ return c | meta_modifier;
case 'S':
c = READCHAR;
if (c != '-')
error ("Invalid escape character syntax");
- modifiers |= shift_modifier;
c = READCHAR;
if (c == '\\')
- goto again;
- break;
+ c = read_escape (readcharfun, 0);
+ return c | shift_modifier;
case 'H':
c = READCHAR;
if (c != '-')
error ("Invalid escape character syntax");
- modifiers |= hyper_modifier;
c = READCHAR;
if (c == '\\')
- goto again;
- break;
+ c = read_escape (readcharfun, 0);
+ return c | hyper_modifier;
case 'A':
c = READCHAR;
if (c != '-')
error ("Invalid escape character syntax");
- modifiers |= alt_modifier;
c = READCHAR;
if (c == '\\')
- goto again;
- break;
+ c = read_escape (readcharfun, 0);
+ return c | alt_modifier;
case 's':
c = READCHAR;
- if (c == '-')
- {
- modifiers |= super_modifier;
- c = READCHAR;
- if (c == '\\')
- goto again;
- }
- else
+ if (stringp || c != '-')
{
UNREAD (c);
- c = ' ';
+ return ' ';
}
- break;
+ c = READCHAR;
+ if (c == '\\')
+ c = read_escape (readcharfun, 0);
+ return c | super_modifier;
case 'C':
c = READCHAR;
error ("Invalid escape character syntax");
FALLTHROUGH;
case '^':
- modifiers |= ctrl_modifier;
c = READCHAR;
if (c == '\\')
- goto again;
- break;
+ c = read_escape (readcharfun, 0);
+ if ((c & ~CHAR_MODIFIER_MASK) == '?')
+ return 0177 | (c & CHAR_MODIFIER_MASK);
+ else if (! ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
+ return c | ctrl_modifier;
+ /* ASCII control chars are made from letters (both cases),
+ as well as the non-letters within 0100...0137. */
+ else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
+ return (c & (037 | ~0177));
+ else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
+ return (c & (037 | ~0177));
+ else
+ return c | ctrl_modifier;
case '0':
case '1':
case '5':
case '6':
case '7':
- /* 1-3 octal digits. */
+ /* An octal escape, as in ANSI C. */
{
- int i = c - '0';
- int count = 0;
+ register int i = c - '0';
+ register int count = 0;
while (++count < 3)
{
- c = READCHAR;
- if (c < '0' || c > '7')
+ if ((c = READCHAR) >= '0' && c <= '7')
+ {
+ i *= 8;
+ i += c - '0';
+ }
+ else
{
UNREAD (c);
break;
}
- i *= 8;
- i += c - '0';
}
if (i >= 0x80 && i < 0x100)
i = BYTE8_TO_CHAR (i);
- c = i;
- break;
+ return i;
}
case 'x':
- /* One or more hex digits. */
+ /* A hex escape, as in ANSI C. */
{
unsigned int i = 0;
int count = 0;
}
if (count < 3 && i >= 0x80)
- i = BYTE8_TO_CHAR (i);
- c = i;
- break;
+ return BYTE8_TO_CHAR (i);
+ return i;
}
- case 'U': /* Eight hex digits. */
+ case 'U':
+ /* Post-Unicode-2.0: Up to eight hex chars. */
unicode_hex_count = 8;
- goto unicode;
+ FALLTHROUGH;
+ case 'u':
- case 'u': /* Four hex digits. */
- unicode_hex_count = 4;
- unicode:
/* A Unicode escape. We only permit them in strings and characters,
not arbitrarily in the source code, as in some other languages. */
{
{
c = READCHAR;
if (c < 0)
- error ("Malformed Unicode escape: \\%c%x",
- unicode_hex_count == 4 ? 'u' : 'U', i);
+ {
+ if (unicode_hex_count > 4)
+ error ("Malformed Unicode escape: \\U%x", i);
+ else
+ error ("Malformed Unicode escape: \\u%x", i);
+ }
/* `isdigit' and `isalpha' may be locale-specific, which we don't
want. */
int digit = char_hexdigit (c);
}
if (i > 0x10FFFF)
error ("Non-Unicode character: 0x%x", i);
- c = i;
- break;
+ return i;
}
case 'N':
/* character_name_to_code can invoke read0, recursively.
This is why read0's buffer is not static. */
- c = character_name_to_code (name, length, readcharfun);
- break;
+ return character_name_to_code (name, length, readcharfun);
}
- }
- c |= modifiers;
- if (c & ctrl_modifier)
- {
- int b = c & ~CHAR_MODIFIER_MASK;
- /* If the base char is in the 0x3f..0x5f range or a lower case
- letter, drop the ctrl_modifier bit and generate a C0 control
- character instead. */
- if ((b >= 0x3f && b <= 0x5f) || (b >= 'a' && b <= 'z'))
- {
- c &= ~ctrl_modifier;
- if (b == '?')
- /* Special case: ^? is DEL. */
- b = 127;
- else
- /* Make a C0 control in 0..31 by clearing bits 5 and 6. */
- b &= 0x1f;
- }
- c = b | (c & CHAR_MODIFIER_MASK);
+ default:
+ return c;
}
- return c;
}
/* Return the digit that CHARACTER stands for in the given BASE.
}
if (ch == '\\')
- ch = read_escape (readcharfun);
+ ch = read_escape (readcharfun, 0);
int modifiers = ch & CHAR_MODIFIER_MASK;
ch &= ~CHAR_MODIFIER_MASK;
if (ch == '\\')
{
- ch = READCHAR;
- switch (ch)
+ ch = read_escape (readcharfun, 1);
+
+ /* CH is -1 if \ newline or \ space has just been seen. */
+ if (ch == -1)
{
- case 's':
- ch = ' ';
- break;
- case ' ':
- case '\n':
if (p == read_buffer)
cancel = true;
continue;
- default:
- UNREAD (ch);
- ch = read_escape (readcharfun);
- break;
}
int modifiers = ch & CHAR_MODIFIER_MASK;
force_multibyte = true;
else /* I.e. ASCII_CHAR_P (ch). */
{
- /* Allow `\C-SPC' and `\^SPC'. This is done here because
- the literals ?\C-SPC and ?\^SPC (rather inconsistently)
- yield (' ' | CHAR_CTL); see bug#55738. */
- if (modifiers == CHAR_CTL && ch == ' ')
+ /* Allow `\C- ' and `\C-?'. */
+ if (modifiers == CHAR_CTL)
{
- ch = 0;
- modifiers = 0;
+ if (ch == ' ')
+ {
+ ch = 0;
+ modifiers = 0;
+ }
+ else if (ch == '?')
+ {
+ ch = 127;
+ modifiers = 0;
+ }
}
if (modifiers & CHAR_SHIFT)
{