From f71cfd3084103460d7cdad96bf7f8b88e1d19596 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Wed, 1 Jun 2022 22:49:34 +0200 Subject: [PATCH] Revert "Fix reader char escape bugs (bug#55738)" This reverts commit c50718dcfa54293b695f8a3fa5cd4d77848ee084. It may have caused bootstrap problems. Sorry about that. --- src/lread.c | 201 ++++++++++++++++++++-------------------- test/src/lread-tests.el | 10 -- 2 files changed, 99 insertions(+), 112 deletions(-) diff --git a/src/lread.c b/src/lread.c index 670413efc02..a1045184d9b 100644 --- a/src/lread.c +++ b/src/lread.c @@ -2631,88 +2631,93 @@ character_name_to_code (char const *name, ptrdiff_t name_len, enum { UNICODE_CHARACTER_NAME_LENGTH_BOUND = 200 }; /* Read a \-escape sequence, assuming we already read the `\'. - When there is a difference between string and character literal \-sequences, - the latter is assumed. If the escape sequence forces unibyte, return eight-bit char. */ static int -read_escape (Lisp_Object readcharfun) +read_escape (Lisp_Object readcharfun, bool stringp) { - int modifiers = 0; - again: ; int c = READCHAR; - int unicode_hex_count; + /* \u allows up to four hex digits, \U up to eight. Default to the + behavior for \u, and change this value in the case that \U is seen. */ + int unicode_hex_count = 4; switch (c) { case -1: end_of_file_error (); - case 'a': c = '\a'; break; - case 'b': c = '\b'; break; - case 'd': c = 127; break; - case 'e': c = 27; break; - case 'f': c = '\f'; break; - case 'n': c = '\n'; break; - case 'r': c = '\r'; break; - case 't': c = '\t'; break; - case 'v': c = '\v'; break; + case 'a': + return '\007'; + case 'b': + return '\b'; + case 'd': + return 0177; + case 'e': + return 033; + case 'f': + return '\f'; + case 'n': + return '\n'; + case 'r': + return '\r'; + case 't': + return '\t'; + case 'v': + return '\v'; + case '\n': + return -1; + case ' ': + if (stringp) + return -1; + return ' '; case 'M': c = READCHAR; if (c != '-') error ("Invalid escape character syntax"); - modifiers |= meta_modifier; c = READCHAR; if (c == '\\') - goto again; - break; + c = read_escape (readcharfun, 0); + return c | meta_modifier; case 'S': c = READCHAR; if (c != '-') error ("Invalid escape character syntax"); - modifiers |= shift_modifier; c = READCHAR; if (c == '\\') - goto again; - break; + c = read_escape (readcharfun, 0); + return c | shift_modifier; case 'H': c = READCHAR; if (c != '-') error ("Invalid escape character syntax"); - modifiers |= hyper_modifier; c = READCHAR; if (c == '\\') - goto again; - break; + c = read_escape (readcharfun, 0); + return c | hyper_modifier; case 'A': c = READCHAR; if (c != '-') error ("Invalid escape character syntax"); - modifiers |= alt_modifier; c = READCHAR; if (c == '\\') - goto again; - break; + c = read_escape (readcharfun, 0); + return c | alt_modifier; case 's': c = READCHAR; - if (c == '-') - { - modifiers |= super_modifier; - c = READCHAR; - if (c == '\\') - goto again; - } - else + if (stringp || c != '-') { UNREAD (c); - c = ' '; + return ' '; } - break; + c = READCHAR; + if (c == '\\') + c = read_escape (readcharfun, 0); + return c | super_modifier; case 'C': c = READCHAR; @@ -2720,11 +2725,21 @@ read_escape (Lisp_Object readcharfun) error ("Invalid escape character syntax"); FALLTHROUGH; case '^': - modifiers |= ctrl_modifier; c = READCHAR; if (c == '\\') - goto again; - break; + c = read_escape (readcharfun, 0); + if ((c & ~CHAR_MODIFIER_MASK) == '?') + return 0177 | (c & CHAR_MODIFIER_MASK); + else if (! ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) + return c | ctrl_modifier; + /* ASCII control chars are made from letters (both cases), + as well as the non-letters within 0100...0137. */ + else if ((c & 0137) >= 0101 && (c & 0137) <= 0132) + return (c & (037 | ~0177)); + else if ((c & 0177) >= 0100 && (c & 0177) <= 0137) + return (c & (037 | ~0177)); + else + return c | ctrl_modifier; case '0': case '1': @@ -2734,30 +2749,31 @@ read_escape (Lisp_Object readcharfun) case '5': case '6': case '7': - /* 1-3 octal digits. */ + /* An octal escape, as in ANSI C. */ { - int i = c - '0'; - int count = 0; + register int i = c - '0'; + register int count = 0; while (++count < 3) { - c = READCHAR; - if (c < '0' || c > '7') + if ((c = READCHAR) >= '0' && c <= '7') + { + i *= 8; + i += c - '0'; + } + else { UNREAD (c); break; } - i *= 8; - i += c - '0'; } if (i >= 0x80 && i < 0x100) i = BYTE8_TO_CHAR (i); - c = i; - break; + return i; } case 'x': - /* One or more hex digits. */ + /* A hex escape, as in ANSI C. */ { unsigned int i = 0; int count = 0; @@ -2779,18 +2795,16 @@ read_escape (Lisp_Object readcharfun) } if (count < 3 && i >= 0x80) - i = BYTE8_TO_CHAR (i); - c = i; - break; + return BYTE8_TO_CHAR (i); + return i; } - case 'U': /* Eight hex digits. */ + case 'U': + /* Post-Unicode-2.0: Up to eight hex chars. */ unicode_hex_count = 8; - goto unicode; + FALLTHROUGH; + case 'u': - case 'u': /* Four hex digits. */ - unicode_hex_count = 4; - unicode: /* A Unicode escape. We only permit them in strings and characters, not arbitrarily in the source code, as in some other languages. */ { @@ -2801,8 +2815,12 @@ read_escape (Lisp_Object readcharfun) { c = READCHAR; if (c < 0) - error ("Malformed Unicode escape: \\%c%x", - unicode_hex_count == 4 ? 'u' : 'U', i); + { + if (unicode_hex_count > 4) + error ("Malformed Unicode escape: \\U%x", i); + else + error ("Malformed Unicode escape: \\u%x", i); + } /* `isdigit' and `isalpha' may be locale-specific, which we don't want. */ int digit = char_hexdigit (c); @@ -2813,8 +2831,7 @@ read_escape (Lisp_Object readcharfun) } if (i > 0x10FFFF) error ("Non-Unicode character: 0x%x", i); - c = i; - break; + return i; } case 'N': @@ -2863,31 +2880,12 @@ read_escape (Lisp_Object readcharfun) /* character_name_to_code can invoke read0, recursively. This is why read0's buffer is not static. */ - c = character_name_to_code (name, length, readcharfun); - break; + return character_name_to_code (name, length, readcharfun); } - } - c |= modifiers; - if (c & ctrl_modifier) - { - int b = c & ~CHAR_MODIFIER_MASK; - /* If the base char is in the 0x3f..0x5f range or a lower case - letter, drop the ctrl_modifier bit and generate a C0 control - character instead. */ - if ((b >= 0x3f && b <= 0x5f) || (b >= 'a' && b <= 'z')) - { - c &= ~ctrl_modifier; - if (b == '?') - /* Special case: ^? is DEL. */ - b = 127; - else - /* Make a C0 control in 0..31 by clearing bits 5 and 6. */ - b &= 0x1f; - } - c = b | (c & CHAR_MODIFIER_MASK); + default: + return c; } - return c; } /* Return the digit that CHARACTER stands for in the given BASE. @@ -3014,7 +3012,7 @@ read_char_literal (Lisp_Object readcharfun) } if (ch == '\\') - ch = read_escape (readcharfun); + ch = read_escape (readcharfun, 0); int modifiers = ch & CHAR_MODIFIER_MASK; ch &= ~CHAR_MODIFIER_MASK; @@ -3068,21 +3066,14 @@ read_string_literal (char stackbuf[VLA_ELEMS (stackbufsize)], if (ch == '\\') { - ch = READCHAR; - switch (ch) + ch = read_escape (readcharfun, 1); + + /* CH is -1 if \ newline or \ space has just been seen. */ + if (ch == -1) { - case 's': - ch = ' '; - break; - case ' ': - case '\n': if (p == read_buffer) cancel = true; continue; - default: - UNREAD (ch); - ch = read_escape (readcharfun); - break; } int modifiers = ch & CHAR_MODIFIER_MASK; @@ -3094,13 +3085,19 @@ read_string_literal (char stackbuf[VLA_ELEMS (stackbufsize)], force_multibyte = true; else /* I.e. ASCII_CHAR_P (ch). */ { - /* Allow `\C-SPC' and `\^SPC'. This is done here because - the literals ?\C-SPC and ?\^SPC (rather inconsistently) - yield (' ' | CHAR_CTL); see bug#55738. */ - if (modifiers == CHAR_CTL && ch == ' ') + /* Allow `\C- ' and `\C-?'. */ + if (modifiers == CHAR_CTL) { - ch = 0; - modifiers = 0; + if (ch == ' ') + { + ch = 0; + modifiers = 0; + } + else if (ch == '?') + { + ch = 127; + modifiers = 0; + } } if (modifiers & CHAR_SHIFT) { diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el index 59d5ca076f1..47351c1d116 100644 --- a/test/src/lread-tests.el +++ b/test/src/lread-tests.el @@ -317,14 +317,4 @@ literals (Bug#20852)." (should (equal (read-from-string "#_") '(## . 2)))) -(ert-deftest lread-misc-2 () - ;; ?\LF should produce LF (only inside string literals do we ignore \LF). - (should (equal (read-from-string "?\\\n") '(?\n . 3))) - (should (equal (read-from-string "\"a\\\nb\"") '("ab" . 6))) - ;; The Control modifier constructs should be idempotent. - (should (equal ?\C-\C-x ?\C-x)) - (should (equal ?\^\^x ?\C-x)) - (should (equal ?\C-\^x ?\C-x)) - (should (equal ?\^\C-x ?\C-x))) - ;;; lread-tests.el ends here -- 2.39.2