From b903507b36c438653a02d7b6291e9744d5221e28 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Tue, 24 May 2022 13:02:14 +0200 Subject: [PATCH] Nonrecursive Lisp reader (bug#55676) Restructure the reader to be nonrecursive so that it is not limited by the C stack or crashes Emacs when reading deeply nested data. This also improves performance. A few minor bugs were fixed: - (a .{NBSP}b) where {NBSP} is a non-breaking space (U+00A0) is now the dotted pair (a . b), not the 3-element list (a \. b), since U+00A0 is treated as whitespace everywhere else. - #_ with no symbol following is now equivalent to ## (empty interned symbol), not #: (empty uninterned symbol). * src/alloc.c (garbage_collect): Call mark_lread. * src/lread.c (readevalloop): Use read0 instead of read_list. (stackbufsize): Increase to 1024, now that read0 isn't recursive. (invalid_radix_integer): Buffer overflow check. (read1, read_list, read_vector): Remove. (read_char_literal, read_string_literal) (hash_table_from_plist, record_from_list, vector_from_rev_list) (bytecode_from_rev_list, char_table_from_rev_list) (sub_char_table_from_rev_list, string_props_from_rev_list) (read_bool_vector, skip_lazy_string, symbol_char_span) (skip_space_and_comments) (enum read_entry_type, struct read_stack_entry, struct read_stack) (rdstack, mark_lread, read_stack_top, read_stack_pop) (read_stack_empty_p, grow_read_stack, read_stack_push): New. (read0): Rewrite to be nonrecursive. * test/src/lread-tests.el (lread-deeply-nested, lread-misc): New tests. --- src/alloc.c | 1 + src/lisp.h | 1 + src/lread.c | 2180 +++++++++++++++++++++------------------ test/src/lread-tests.el | 36 + 4 files changed, 1229 insertions(+), 989 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index cfa51c0a8dc..02d3a3ea3a2 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -6180,6 +6180,7 @@ garbage_collect (void) mark_pinned_objects (); mark_pinned_symbols (); + mark_lread (); mark_terminals (); mark_kboards (); mark_threads (); diff --git a/src/lisp.h b/src/lisp.h index 95b33ff173e..3578ca57b46 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -4486,6 +4486,7 @@ extern void dir_warning (const char *, Lisp_Object); extern void init_obarray_once (void); extern void init_lread (void); extern void syms_of_lread (void); +extern void mark_lread (void); INLINE Lisp_Object intern (const char *str) diff --git a/src/lread.c b/src/lread.c index 5f3d83a846b..a1045184d9b 100644 --- a/src/lread.c +++ b/src/lread.c @@ -656,10 +656,6 @@ struct subst static Lisp_Object read_internal_start (Lisp_Object, Lisp_Object, Lisp_Object, bool); static Lisp_Object read0 (Lisp_Object, bool); -static Lisp_Object read1 (Lisp_Object, int *, bool, bool); - -static Lisp_Object read_list (bool, Lisp_Object, bool); -static Lisp_Object read_vector (Lisp_Object, bool, bool); static Lisp_Object substitute_object_recurse (struct subst *, Lisp_Object); static void substitute_in_interval (INTERVAL, void *); @@ -940,7 +936,7 @@ lisp_file_lexically_bound_p (Lisp_Object readcharfun) ch = READCHAR; if (ch == '\n') ch = READCHAR; /* It is OK to leave the position after a #! line, since - that is what read1 does. */ + that is what read0 does. */ } if (ch != ';') @@ -2286,6 +2282,7 @@ readevalloop (Lisp_Object readcharfun, if (c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r' || c == NO_BREAK_SPACE) goto read_next; + UNREAD (c); if (! HASH_TABLE_P (read_objects_map) || XHASH_TABLE (read_objects_map)->count) @@ -2300,12 +2297,9 @@ readevalloop (Lisp_Object readcharfun, DEFAULT_REHASH_SIZE, DEFAULT_REHASH_THRESHOLD, Qnil, false); if (!NILP (Vpurify_flag) && c == '(') - { - val = read_list (0, readcharfun, false); - } + val = read0 (readcharfun, false); else { - UNREAD (c); if (!NILP (readfun)) { val = call1 (readfun, readcharfun); @@ -2582,24 +2576,6 @@ read_internal_start (Lisp_Object stream, Lisp_Object start, Lisp_Object end, return retval; } - -/* Use this for recursive reads, in contexts where internal tokens - are not allowed. */ - -static Lisp_Object -read0 (Lisp_Object readcharfun, bool locate_syms) -{ - register Lisp_Object val; - int c; - - val = read1 (readcharfun, &c, 0, locate_syms); - if (!c) - return val; - - invalid_syntax_lisp (Fmake_string (make_fixnum (1), make_fixnum (c), Qnil), - readcharfun); -} - /* Grow a read buffer BUF that contains OFFSET useful bytes of data, by at least MAX_MULTIBYTE_LENGTH bytes. Update *BUF_ADDR and *BUF_SIZE accordingly; 0 <= OFFSET <= *BUF_SIZE. If *BUF_ADDR is @@ -2902,8 +2878,8 @@ read_escape (Lisp_Object readcharfun, bool stringp) invalid_syntax ("Empty character name", readcharfun); name[length] = '\0'; - /* character_name_to_code can invoke read1, recursively. - This is why read1's buffer is not static. */ + /* character_name_to_code can invoke read0, recursively. + This is why read0's buffer is not static. */ return character_name_to_code (name, length, readcharfun); } @@ -2932,20 +2908,17 @@ digit_to_number (int character, int base) return digit < base ? digit : -1; } -static char const invalid_radix_integer_format[] = "integer, radix %"pI"d"; - -/* Small, as read1 is recursive (Bug#31995). But big enough to hold - the invalid_radix_integer string. */ -enum { stackbufsize = max (64, - (sizeof invalid_radix_integer_format - - sizeof "%"pI"d" - + INT_STRLEN_BOUND (EMACS_INT) + 1)) }; +/* Size of the fixed-size buffer used during reading. + It should be at least big enough for `invalid_radix_integer' but + can usefully be much bigger than that. */ +enum { stackbufsize = 1024 }; static void invalid_radix_integer (EMACS_INT radix, char stackbuf[VLA_ELEMS (stackbufsize)], Lisp_Object readcharfun) { - sprintf (stackbuf, invalid_radix_integer_format, radix); + int n = snprintf (stackbuf, stackbufsize, "integer, radix %"pI"d", radix); + eassert (n < stackbufsize); invalid_syntax (stackbuf, readcharfun); } @@ -3011,780 +2984,1106 @@ read_integer (Lisp_Object readcharfun, int radix, *p = '\0'; return unbind_to (count, string_to_number (read_buffer, radix, NULL)); } + -/* If the next token is ')' or ']' or '.', we store that character - in *PCH and the return value is not interesting. Else, we store - zero in *PCH and we read and return one lisp object. - - FIRST_IN_LIST is true if this is the first element of a list. - LOCATE_SYMS true means read symbol occurrences as symbols with - position. */ - +/* Read a character literal (preceded by `?'). */ static Lisp_Object -read1 (Lisp_Object readcharfun, int *pch, bool first_in_list, bool locate_syms) +read_char_literal (Lisp_Object readcharfun) { - int c; - bool uninterned_symbol = false; - bool skip_shorthand = false; - bool multibyte; - char stackbuf[stackbufsize]; - current_thread->stack_top = stackbuf; + int ch = READCHAR; + if (ch < 0) + end_of_file_error (); - *pch = 0; + /* Accept `single space' syntax like (list ? x) where the + whitespace character is SPC or TAB. + Other literal whitespace like NL, CR, and FF are not accepted, + as there are well-established escape sequences for these. */ + if (ch == ' ' || ch == '\t') + return make_fixnum (ch); - retry: + if ( ch == '(' || ch == ')' || ch == '[' || ch == ']' + || ch == '"' || ch == ';') + { + CHECK_LIST (Vlread_unescaped_character_literals); + Lisp_Object char_obj = make_fixed_natnum (ch); + if (NILP (Fmemq (char_obj, Vlread_unescaped_character_literals))) + Vlread_unescaped_character_literals = + Fcons (char_obj, Vlread_unescaped_character_literals); + } - c = READCHAR_REPORT_MULTIBYTE (&multibyte); - if (c < 0) - end_of_file_error (); + if (ch == '\\') + ch = read_escape (readcharfun, 0); - switch (c) - { - case '(': - return read_list (0, readcharfun, locate_syms); + int modifiers = ch & CHAR_MODIFIER_MASK; + ch &= ~CHAR_MODIFIER_MASK; + if (CHAR_BYTE8_P (ch)) + ch = CHAR_TO_BYTE8 (ch); + ch |= modifiers; - case '[': - return read_vector (readcharfun, 0, locate_syms); + int nch = READCHAR; + UNREAD (nch); + if (nch <= 32 + || nch == '"' || nch == '\'' || nch == ';' || nch == '(' + || nch == ')' || nch == '[' || nch == ']' || nch == '#' + || nch == '?' || nch == '`' || nch == ',' || nch == '.') + return make_fixnum (ch); - case ')': - case ']': - { - *pch = c; - return Qnil; - } + invalid_syntax ("?", readcharfun); +} - case '#': - c = READCHAR; - if (c == 's') +/* Read a string literal (preceded by '"'). */ +static Lisp_Object +read_string_literal (char stackbuf[VLA_ELEMS (stackbufsize)], + Lisp_Object readcharfun) +{ + char *read_buffer = stackbuf; + ptrdiff_t read_buffer_size = stackbufsize; + specpdl_ref count = SPECPDL_INDEX (); + char *heapbuf = NULL; + char *p = read_buffer; + char *end = read_buffer + read_buffer_size; + /* True if we saw an escape sequence specifying + a multibyte character. */ + bool force_multibyte = false; + /* True if we saw an escape sequence specifying + a single-byte character. */ + bool force_singlebyte = false; + bool cancel = false; + ptrdiff_t nchars = 0; + + int ch; + while ((ch = READCHAR) >= 0 && ch != '\"') + { + if (end - p < MAX_MULTIBYTE_LENGTH) { - c = READCHAR; - if (c == '(') + ptrdiff_t offset = p - read_buffer; + read_buffer = grow_read_buffer (read_buffer, offset, + &heapbuf, &read_buffer_size, + count); + p = read_buffer + offset; + end = read_buffer + read_buffer_size; + } + + if (ch == '\\') + { + ch = read_escape (readcharfun, 1); + + /* CH is -1 if \ newline or \ space has just been seen. */ + if (ch == -1) + { + if (p == read_buffer) + cancel = true; + continue; + } + + int modifiers = ch & CHAR_MODIFIER_MASK; + ch &= ~CHAR_MODIFIER_MASK; + + if (CHAR_BYTE8_P (ch)) + force_singlebyte = true; + else if (! ASCII_CHAR_P (ch)) + force_multibyte = true; + else /* I.e. ASCII_CHAR_P (ch). */ { - /* Accept extended format for hash tables (extensible to - other types), e.g. - #s(hash-table size 2 test equal data (k1 v1 k2 v2)) */ - Lisp_Object tmp = read_list (0, readcharfun, false); - Lisp_Object head = CAR_SAFE (tmp); - Lisp_Object data = Qnil; - Lisp_Object val = Qnil; - /* The size is 2 * number of allowed keywords to - make-hash-table. */ - Lisp_Object params[12]; - Lisp_Object ht; - Lisp_Object key = Qnil; - int param_count = 0; - - if (!EQ (head, Qhash_table)) + /* Allow `\C- ' and `\C-?'. */ + if (modifiers == CHAR_CTL) + { + if (ch == ' ') + { + ch = 0; + modifiers = 0; + } + else if (ch == '?') + { + ch = 127; + modifiers = 0; + } + } + if (modifiers & CHAR_SHIFT) { - ptrdiff_t size = XFIXNUM (Flength (tmp)); - Lisp_Object record = Fmake_record (CAR_SAFE (tmp), - make_fixnum (size - 1), - Qnil); - for (int i = 1; i < size; i++) + /* Shift modifier is valid only with [A-Za-z]. */ + if (ch >= 'A' && ch <= 'Z') + modifiers &= ~CHAR_SHIFT; + else if (ch >= 'a' && ch <= 'z') { - tmp = Fcdr (tmp); - ASET (record, i, Fcar (tmp)); + ch -= ('a' - 'A'); + modifiers &= ~CHAR_SHIFT; } - return record; } - tmp = CDR_SAFE (tmp); + if (modifiers & CHAR_META) + { + /* Move the meta bit to the right place for a + string. */ + modifiers &= ~CHAR_META; + ch = BYTE8_TO_CHAR (ch | 0x80); + force_singlebyte = true; + } + } + + /* Any modifiers remaining are invalid. */ + if (modifiers) + invalid_syntax ("Invalid modifier in string", readcharfun); + p += CHAR_STRING (ch, (unsigned char *) p); + } + else + { + p += CHAR_STRING (ch, (unsigned char *) p); + if (CHAR_BYTE8_P (ch)) + force_singlebyte = true; + else if (! ASCII_CHAR_P (ch)) + force_multibyte = true; + } + nchars++; + } + + if (ch < 0) + end_of_file_error (); + + /* If purifying, and string starts with \ newline, + return zero instead. This is for doc strings + that we are really going to find in etc/DOC.nn.nn. */ + if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) + { + unbind_to (count, Qnil); + return make_fixnum (0); + } - /* This is repetitive but fast and simple. */ - params[param_count] = QCsize; - params[param_count + 1] = Fplist_get (tmp, Qsize); - if (!NILP (params[param_count + 1])) - param_count += 2; + if (!force_multibyte && force_singlebyte) + { + /* READ_BUFFER contains raw 8-bit bytes and no multibyte + forms. Convert it to unibyte. */ + nchars = str_as_unibyte ((unsigned char *) read_buffer, + p - read_buffer); + p = read_buffer + nchars; + } - params[param_count] = QCtest; - params[param_count + 1] = Fplist_get (tmp, Qtest); - if (!NILP (params[param_count + 1])) - param_count += 2; + Lisp_Object obj = make_specified_string (read_buffer, nchars, p - read_buffer, + (force_multibyte + || (p - read_buffer != nchars))); + return unbind_to (count, obj); +} - params[param_count] = QCweakness; - params[param_count + 1] = Fplist_get (tmp, Qweakness); - if (!NILP (params[param_count + 1])) - param_count += 2; +/* Make a hash table from the constructor plist. */ +static Lisp_Object +hash_table_from_plist (Lisp_Object plist) +{ + Lisp_Object params[12]; + Lisp_Object *par = params; + + /* This is repetitive but fast and simple. */ +#define ADDPARAM(name) \ + do { \ + Lisp_Object val = Fplist_get (plist, Q ## name); \ + if (!NILP (val)) \ + { \ + *par++ = QC ## name; \ + *par++ = val; \ + } \ + } while (0) + + ADDPARAM (size); + ADDPARAM (test); + ADDPARAM (weakness); + ADDPARAM (rehash_size); + ADDPARAM (rehash_threshold); + ADDPARAM (purecopy); + + Lisp_Object data = Fplist_get (plist, Qdata); + + /* Now use params to make a new hash table and fill it. */ + Lisp_Object ht = Fmake_hash_table (par - params, params); + + Lisp_Object last = data; + FOR_EACH_TAIL_SAFE (data) + { + Lisp_Object key = XCAR (data); + data = XCDR (data); + if (!CONSP (data)) + break; + Lisp_Object val = XCAR (data); + last = XCDR (data); + Fputhash (key, val, ht); + } + if (!NILP (last)) + error ("Hash table data is not a list of even length"); - params[param_count] = QCrehash_size; - params[param_count + 1] = Fplist_get (tmp, Qrehash_size); - if (!NILP (params[param_count + 1])) - param_count += 2; + return ht; +} - params[param_count] = QCrehash_threshold; - params[param_count + 1] = Fplist_get (tmp, Qrehash_threshold); - if (!NILP (params[param_count + 1])) - param_count += 2; +static Lisp_Object +record_from_list (Lisp_Object elems) +{ + ptrdiff_t size = list_length (elems); + Lisp_Object obj = Fmake_record (XCAR (elems), + make_fixnum (size - 1), + Qnil); + Lisp_Object tl = XCDR (elems); + for (int i = 1; i < size; i++) + { + ASET (obj, i, XCAR (tl)); + tl = XCDR (tl); + } + return obj; +} - params[param_count] = QCpurecopy; - params[param_count + 1] = Fplist_get (tmp, Qpurecopy); - if (!NILP (params[param_count + 1])) - param_count += 2; +/* Turn a reversed list into a vector. */ +static Lisp_Object +vector_from_rev_list (Lisp_Object elems) +{ + ptrdiff_t size = list_length (elems); + Lisp_Object obj = make_nil_vector (size); + Lisp_Object *vec = XVECTOR (obj)->contents; + for (ptrdiff_t i = size - 1; i >= 0; i--) + { + vec[i] = XCAR (elems); + Lisp_Object next = XCDR (elems); + free_cons (XCONS (elems)); + elems = next; + } + return obj; +} - /* This is the hash table data. */ - data = Fplist_get (tmp, Qdata); +static Lisp_Object +bytecode_from_rev_list (Lisp_Object elems, Lisp_Object readcharfun) +{ + Lisp_Object obj = vector_from_rev_list (elems); + Lisp_Object *vec = XVECTOR (obj)->contents; + ptrdiff_t size = ASIZE (obj); + + if (!(size >= COMPILED_STACK_DEPTH + 1 && size <= COMPILED_INTERACTIVE + 1 + && (FIXNUMP (vec[COMPILED_ARGLIST]) + || CONSP (vec[COMPILED_ARGLIST]) + || NILP (vec[COMPILED_ARGLIST])) + && FIXNATP (vec[COMPILED_STACK_DEPTH]))) + invalid_syntax ("Invalid byte-code object", readcharfun); + + if (load_force_doc_strings + && NILP (vec[COMPILED_CONSTANTS]) + && STRINGP (vec[COMPILED_BYTECODE])) + { + /* Lazily-loaded bytecode is represented by the constant slot being nil + and the bytecode slot a (lazily loaded) string containing the + print representation of (BYTECODE . CONSTANTS). Unpack the + pieces by coerceing the string to unibyte and reading the result. */ + Lisp_Object enc = vec[COMPILED_BYTECODE]; + Lisp_Object pair = Fread (Fcons (enc, readcharfun)); + if (!CONSP (pair)) + invalid_syntax ("Invalid byte-code object", readcharfun); - /* Now use params to make a new hash table and fill it. */ - ht = Fmake_hash_table (param_count, params); + vec[COMPILED_BYTECODE] = XCAR (pair); + vec[COMPILED_CONSTANTS] = XCDR (pair); + } - Lisp_Object last = data; - FOR_EACH_TAIL_SAFE (data) - { - key = XCAR (data); - data = XCDR (data); - if (!CONSP (data)) - break; - val = XCAR (data); - last = XCDR (data); - Fputhash (key, val, ht); - } - if (!NILP (last)) - error ("Hash table data is not a list of even length"); + if (!((STRINGP (vec[COMPILED_BYTECODE]) + && VECTORP (vec[COMPILED_CONSTANTS])) + || CONSP (vec[COMPILED_BYTECODE]))) + invalid_syntax ("Invalid byte-code object", readcharfun); - return ht; - } - UNREAD (c); - invalid_syntax ("#", readcharfun); - } - if (c == '^') - { - c = READCHAR; - if (c == '[') - { - Lisp_Object tmp; - tmp = read_vector (readcharfun, 0, false); - if (ASIZE (tmp) < CHAR_TABLE_STANDARD_SLOTS) - error ("Invalid size char-table"); - XSETPVECTYPE (XVECTOR (tmp), PVEC_CHAR_TABLE); - return tmp; - } - else if (c == '^') - { - c = READCHAR; - if (c == '[') - { - /* Sub char-table can't be read as a regular - vector because of a two C integer fields. */ - Lisp_Object tbl, tmp = read_list (1, readcharfun, false); - ptrdiff_t size = list_length (tmp); - int i, depth, min_char; - struct Lisp_Cons *cell; - - if (size == 0) - error ("Zero-sized sub char-table"); - - if (! RANGED_FIXNUMP (1, XCAR (tmp), 3)) - error ("Invalid depth in sub char-table"); - depth = XFIXNUM (XCAR (tmp)); - if (chartab_size[depth] != size - 2) - error ("Invalid size in sub char-table"); - cell = XCONS (tmp), tmp = XCDR (tmp), size--; - free_cons (cell); - - if (! RANGED_FIXNUMP (0, XCAR (tmp), MAX_CHAR)) - error ("Invalid minimum character in sub-char-table"); - min_char = XFIXNUM (XCAR (tmp)); - cell = XCONS (tmp), tmp = XCDR (tmp), size--; - free_cons (cell); - - tbl = make_uninit_sub_char_table (depth, min_char); - for (i = 0; i < size; i++) - { - XSUB_CHAR_TABLE (tbl)->contents[i] = XCAR (tmp); - cell = XCONS (tmp), tmp = XCDR (tmp); - free_cons (cell); - } - return tbl; - } - invalid_syntax ("#^^", readcharfun); - } - invalid_syntax ("#^", readcharfun); - } - if (c == '&') + if (STRINGP (vec[COMPILED_BYTECODE])) + { + if (STRING_MULTIBYTE (vec[COMPILED_BYTECODE])) { - Lisp_Object length; - length = read1 (readcharfun, pch, first_in_list, false); - c = READCHAR; - if (c == '"') - { - Lisp_Object tmp, val; - EMACS_INT size_in_chars = bool_vector_bytes (XFIXNAT (length)); - unsigned char *data; - - UNREAD (c); - tmp = read1 (readcharfun, pch, first_in_list, false); - if (STRING_MULTIBYTE (tmp) - || (size_in_chars != SCHARS (tmp) - /* We used to print 1 char too many - when the number of bits was a multiple of 8. - Accept such input in case it came from an old - version. */ - && ! (XFIXNAT (length) - == (SCHARS (tmp) - 1) * BOOL_VECTOR_BITS_PER_CHAR))) - invalid_syntax ("#&...", readcharfun); - - val = make_uninit_bool_vector (XFIXNAT (length)); - data = bool_vector_uchar_data (val); - memcpy (data, SDATA (tmp), size_in_chars); - /* Clear the extraneous bits in the last byte. */ - if (XFIXNUM (length) != size_in_chars * BOOL_VECTOR_BITS_PER_CHAR) - data[size_in_chars - 1] - &= (1 << (XFIXNUM (length) % BOOL_VECTOR_BITS_PER_CHAR)) - 1; - return val; - } - invalid_syntax ("#&...", readcharfun); + /* BYTESTR must have been produced by Emacs 20.2 or earlier + because it produced a raw 8-bit string for byte-code and + now such a byte-code string is loaded as multibyte with + raw 8-bit characters converted to multibyte form. + Convert them back to the original unibyte form. */ + vec[COMPILED_BYTECODE] = Fstring_as_unibyte (vec[COMPILED_BYTECODE]); } - if (c == '[') - { - /* Accept compiled functions at read-time so that we don't have to - build them using function calls. */ - Lisp_Object tmp; - struct Lisp_Vector *vec; - tmp = read_vector (readcharfun, 1, false); - vec = XVECTOR (tmp); - if (! (COMPILED_STACK_DEPTH < ASIZE (tmp) - && (FIXNUMP (AREF (tmp, COMPILED_ARGLIST)) - || CONSP (AREF (tmp, COMPILED_ARGLIST)) - || NILP (AREF (tmp, COMPILED_ARGLIST))) - && ((STRINGP (AREF (tmp, COMPILED_BYTECODE)) - && VECTORP (AREF (tmp, COMPILED_CONSTANTS))) - || CONSP (AREF (tmp, COMPILED_BYTECODE))) - && FIXNATP (AREF (tmp, COMPILED_STACK_DEPTH)))) - invalid_syntax ("Invalid byte-code object", readcharfun); - - if (STRINGP (AREF (tmp, COMPILED_BYTECODE))) - { - if (STRING_MULTIBYTE (AREF (tmp, COMPILED_BYTECODE))) - { - /* BYTESTR must have been produced by Emacs 20.2 or earlier - because it produced a raw 8-bit string for byte-code and - now such a byte-code string is loaded as multibyte with - raw 8-bit characters converted to multibyte form. - Convert them back to the original unibyte form. */ - ASET (tmp, COMPILED_BYTECODE, - Fstring_as_unibyte (AREF (tmp, COMPILED_BYTECODE))); - } - // Bytecode must be immovable. - pin_string (AREF (tmp, COMPILED_BYTECODE)); - } + // Bytecode must be immovable. + pin_string (vec[COMPILED_BYTECODE]); + } - XSETPVECTYPE (vec, PVEC_COMPILED); - return tmp; - } - if (c == '(') - { - Lisp_Object tmp; - int ch; - - /* Read the string itself. */ - tmp = read1 (readcharfun, &ch, 0, false); - if (ch != 0 || !STRINGP (tmp)) - invalid_syntax ("#", readcharfun); - /* Read the intervals and their properties. */ - while (1) - { - Lisp_Object beg, end, plist; + XSETPVECTYPE (XVECTOR (obj), PVEC_COMPILED); + return obj; +} - beg = read1 (readcharfun, &ch, 0, false); - end = plist = Qnil; - if (ch == ')') - break; - if (ch == 0) - end = read1 (readcharfun, &ch, 0, false); - if (ch == 0) - plist = read1 (readcharfun, &ch, 0, false); - if (ch) - invalid_syntax ("Invalid string property list", readcharfun); - Fset_text_properties (beg, end, plist, tmp); - } +static Lisp_Object +char_table_from_rev_list (Lisp_Object elems, Lisp_Object readcharfun) +{ + Lisp_Object obj = vector_from_rev_list (elems); + if (ASIZE (obj) < CHAR_TABLE_STANDARD_SLOTS) + invalid_syntax ("Invalid size char-table", readcharfun); + XSETPVECTYPE (XVECTOR (obj), PVEC_CHAR_TABLE); + return obj; - return tmp; - } +} + +static Lisp_Object +sub_char_table_from_rev_list (Lisp_Object elems, Lisp_Object readcharfun) +{ + /* A sub-char-table can't be read as a regular vector because of two + C integer fields. */ + elems = Fnreverse (elems); + ptrdiff_t size = list_length (elems); + if (size < 2) + error ("Invalid size of sub-char-table"); + + if (!RANGED_FIXNUMP (1, XCAR (elems), 3)) + error ("Invalid depth in sub-char-table"); + int depth = XFIXNUM (XCAR (elems)); + + if (chartab_size[depth] != size - 2) + error ("Invalid size in sub-char-table"); + elems = XCDR (elems); + + if (!RANGED_FIXNUMP (0, XCAR (elems), MAX_CHAR)) + error ("Invalid minimum character in sub-char-table"); + int min_char = XFIXNUM (XCAR (elems)); + elems = XCDR (elems); + + Lisp_Object tbl = make_uninit_sub_char_table (depth, min_char); + for (int i = 0; i < size - 2; i++) + { + XSUB_CHAR_TABLE (tbl)->contents[i] = XCAR (elems); + elems = XCDR (elems); + } + return tbl; +} - /* #@NUMBER is used to skip NUMBER following bytes. - That's used in .elc files to skip over doc strings - and function definitions. */ - if (c == '@') +static Lisp_Object +string_props_from_rev_list (Lisp_Object elems, Lisp_Object readcharfun) +{ + elems = Fnreverse (elems); + if (NILP (elems) || !STRINGP (XCAR (elems))) + invalid_syntax ("#", readcharfun); + Lisp_Object obj = XCAR (elems); + for (Lisp_Object tl = XCDR (elems); !NILP (tl);) + { + Lisp_Object beg = XCAR (tl); + tl = XCDR (tl); + if (NILP (tl)) + invalid_syntax ("Invalid string property list", readcharfun); + Lisp_Object end = XCAR (tl); + tl = XCDR (tl); + if (NILP (tl)) + invalid_syntax ("Invalid string property list", readcharfun); + Lisp_Object plist = XCAR (tl); + tl = XCDR (tl); + Fset_text_properties (beg, end, plist, obj); + } + return obj; +} + +/* Read a bool vector (preceded by "#&"). */ +static Lisp_Object +read_bool_vector (char stackbuf[VLA_ELEMS (stackbufsize)], + Lisp_Object readcharfun) +{ + ptrdiff_t length = 0; + for (;;) + { + int c = READCHAR; + if (c < '0' || c > '9') { - enum { extra = 100 }; - ptrdiff_t i, nskip = 0, digits = 0; + if (c != '"') + invalid_syntax ("#&", readcharfun); + break; + } + if (INT_MULTIPLY_WRAPV (length, 10, &length) + | INT_ADD_WRAPV (length, c - '0', &length)) + invalid_syntax ("#&", readcharfun); + } - /* Read a decimal integer. */ - while ((c = READCHAR) >= 0 - && c >= '0' && c <= '9') - { - if ((STRING_BYTES_BOUND - extra) / 10 <= nskip) - string_overflow (); - digits++; - nskip *= 10; - nskip += c - '0'; - if (digits == 2 && nskip == 0) - { /* We've just seen #@00, which means "skip to end". */ - skip_dyn_eof (readcharfun); - return Qnil; - } - } + ptrdiff_t size_in_chars = bool_vector_bytes (length); + Lisp_Object str = read_string_literal (stackbuf, readcharfun); + if (STRING_MULTIBYTE (str) + || !(size_in_chars == SCHARS (str) + /* We used to print 1 char too many when the number of bits + was a multiple of 8. Accept such input in case it came + from an old version. */ + || length == (SCHARS (str) - 1) * BOOL_VECTOR_BITS_PER_CHAR)) + invalid_syntax ("#&...", readcharfun); + + Lisp_Object obj = make_uninit_bool_vector (length); + unsigned char *data = bool_vector_uchar_data (obj); + memcpy (data, SDATA (str), size_in_chars); + /* Clear the extraneous bits in the last byte. */ + if (length != size_in_chars * BOOL_VECTOR_BITS_PER_CHAR) + data[size_in_chars - 1] &= (1 << (length % BOOL_VECTOR_BITS_PER_CHAR)) - 1; + return obj; +} + +/* Skip (and optionally remember) a lazily-loaded string + preceded by "#@". */ +static void +skip_lazy_string (Lisp_Object readcharfun) +{ + ptrdiff_t nskip = 0; + ptrdiff_t digits = 0; + for (;;) + { + int c = READCHAR; + if (c < '0' || c > '9') + { if (nskip > 0) /* We can't use UNREAD here, because in the code below we side-step - READCHAR. Instead, assume the first char after #@NNN occupies - a single byte, which is the case normally since it's just - a space. */ + READCHAR. Instead, assume the first char after #@NNN occupies + a single byte, which is the case normally since it's just + a space. */ nskip--; else UNREAD (c); - - if (load_force_doc_strings - && (FROM_FILE_P (readcharfun))) - { - /* If we are supposed to force doc strings into core right now, - record the last string that we skipped, - and record where in the file it comes from. */ - - /* But first exchange saved_doc_string - with prev_saved_doc_string, so we save two strings. */ - { - char *temp = saved_doc_string; - ptrdiff_t temp_size = saved_doc_string_size; - file_offset temp_pos = saved_doc_string_position; - ptrdiff_t temp_len = saved_doc_string_length; - - saved_doc_string = prev_saved_doc_string; - saved_doc_string_size = prev_saved_doc_string_size; - saved_doc_string_position = prev_saved_doc_string_position; - saved_doc_string_length = prev_saved_doc_string_length; - - prev_saved_doc_string = temp; - prev_saved_doc_string_size = temp_size; - prev_saved_doc_string_position = temp_pos; - prev_saved_doc_string_length = temp_len; - } - - if (saved_doc_string_size == 0) - { - saved_doc_string = xmalloc (nskip + extra); - saved_doc_string_size = nskip + extra; - } - if (nskip > saved_doc_string_size) - { - saved_doc_string = xrealloc (saved_doc_string, nskip + extra); - saved_doc_string_size = nskip + extra; - } - - FILE *instream = infile->stream; - saved_doc_string_position = (file_tell (instream) - - infile->lookahead); - - /* Copy that many bytes into saved_doc_string. */ - i = 0; - for (int n = min (nskip, infile->lookahead); 0 < n; n--) - saved_doc_string[i++] - = c = infile->buf[--infile->lookahead]; - block_input (); - for (; i < nskip && 0 <= c; i++) - saved_doc_string[i] = c = getc (instream); - unblock_input (); - - saved_doc_string_length = i; - } - else - /* Skip that many bytes. */ - skip_dyn_bytes (readcharfun, nskip); - - goto retry; + break; } - if (c == '!') + if (INT_MULTIPLY_WRAPV (nskip, 10, &nskip) + | INT_ADD_WRAPV (nskip, c - '0', &nskip)) + invalid_syntax ("#@", readcharfun); + digits++; + if (digits == 2 && nskip == 0) { - /* #! appears at the beginning of an executable file. - Skip the first line. */ - while (c != '\n' && c >= 0) - c = READCHAR; - goto retry; + /* #@00 means "skip to end" */ + skip_dyn_eof (readcharfun); + return; } - if (c == '$') - return Vload_file_name; - if (c == '\'') - return list2 (Qfunction, read0 (readcharfun, locate_syms)); - /* #:foo is the uninterned symbol named foo. */ - if (c == ':') + } + + if (load_force_doc_strings && FROM_FILE_P (readcharfun)) + { + /* If we are supposed to force doc strings into core right now, + record the last string that we skipped, + and record where in the file it comes from. */ + + /* But first exchange saved_doc_string + with prev_saved_doc_string, so we save two strings. */ + { + char *temp = saved_doc_string; + ptrdiff_t temp_size = saved_doc_string_size; + file_offset temp_pos = saved_doc_string_position; + ptrdiff_t temp_len = saved_doc_string_length; + + saved_doc_string = prev_saved_doc_string; + saved_doc_string_size = prev_saved_doc_string_size; + saved_doc_string_position = prev_saved_doc_string_position; + saved_doc_string_length = prev_saved_doc_string_length; + + prev_saved_doc_string = temp; + prev_saved_doc_string_size = temp_size; + prev_saved_doc_string_position = temp_pos; + prev_saved_doc_string_length = temp_len; + } + + enum { extra = 100 }; + if (saved_doc_string_size == 0) { - uninterned_symbol = true; - read_hash_prefixed_symbol: - c = READCHAR; - if (!(c > 040 - && c != NO_BREAK_SPACE - && (c >= 0200 - || strchr ("\"';()[]#`,", c) == NULL))) - { - /* No symbol character follows, this is the empty - symbol. */ - UNREAD (c); - return Fmake_symbol (empty_unibyte_string); - } - goto read_symbol; + saved_doc_string = xmalloc (nskip + extra); + saved_doc_string_size = nskip + extra; } - /* #_foo is really the symbol foo, regardless of shorthands */ - if (c == '_') + if (nskip > saved_doc_string_size) { - skip_shorthand = true; - goto read_hash_prefixed_symbol; + saved_doc_string = xrealloc (saved_doc_string, nskip + extra); + saved_doc_string_size = nskip + extra; } - /* ## is the empty symbol. */ - if (c == '#') - return Fintern (empty_unibyte_string, Qnil); - if (c >= '0' && c <= '9') - { - EMACS_INT n = c - '0'; - bool overflow = false; + FILE *instream = infile->stream; + saved_doc_string_position = (file_tell (instream) - infile->lookahead); - /* Read a non-negative integer. */ - while ('0' <= (c = READCHAR) && c <= '9') - { - overflow |= INT_MULTIPLY_WRAPV (n, 10, &n); - overflow |= INT_ADD_WRAPV (n, c - '0', &n); - } + /* Copy that many bytes into saved_doc_string. */ + ptrdiff_t i = 0; + int c; + for (int n = min (nskip, infile->lookahead); n > 0; n--) + saved_doc_string[i++] = c = infile->buf[--infile->lookahead]; + block_input (); + for (; i < nskip && c >= 0; i++) + saved_doc_string[i] = c = getc (instream); + unblock_input (); - if (!overflow) - { - if (c == 'r' || c == 'R') - { - if (! (2 <= n && n <= 36)) - invalid_radix_integer (n, stackbuf, readcharfun); - return read_integer (readcharfun, n, stackbuf); - } + saved_doc_string_length = i; + } + else + /* Skip that many bytes. */ + skip_dyn_bytes (readcharfun, nskip); +} - if (n <= MOST_POSITIVE_FIXNUM && ! NILP (Vread_circle)) - { - /* Reader forms that can reuse previously read objects. */ - /* #n=object returns object, but associates it with - n for #n#. */ - if (c == '=') - { - /* Make a placeholder for #n# to use temporarily. */ - /* Note: We used to use AUTO_CONS to allocate - placeholder, but that is a bad idea, since it - will place a stack-allocated cons cell into - the list in read_objects_map, which is a - staticpro'd global variable, and thus each of - its elements is marked during each GC. A - stack-allocated object will become garbled - when its stack slot goes out of scope, and - some other function reuses it for entirely - different purposes, which will cause crashes - in GC. */ - Lisp_Object placeholder = Fcons (Qnil, Qnil); - struct Lisp_Hash_Table *h - = XHASH_TABLE (read_objects_map); - Lisp_Object number = make_fixnum (n), hash; - - ptrdiff_t i = hash_lookup (h, number, &hash); - if (i >= 0) - /* Not normal, but input could be malformed. */ - set_hash_value_slot (h, i, placeholder); - else - hash_put (h, number, placeholder, hash); - - /* Read the object itself. */ - Lisp_Object tem = read0 (readcharfun, locate_syms); - - if (CONSP (tem)) - { - if (BASE_EQ (tem, placeholder)) - /* Catch silly games like #1=#1# */ - invalid_syntax ("nonsensical self-reference", - readcharfun); +/* Length of prefix only consisting of symbol constituent characters. */ +static ptrdiff_t +symbol_char_span (const char *s) +{ + const char *p = s; + while ( *p == '^' || *p == '*' || *p == '+' || *p == '-' || *p == '/' + || *p == '<' || *p == '=' || *p == '>' || *p == '_' || *p == '|') + p++; + return p - s; +} - /* Optimisation: since the placeholder is already - a cons, repurpose it as the actual value. - This allows us to skip the substitution below, - since the placeholder is already referenced - inside TEM at the appropriate places. */ - Fsetcar (placeholder, XCAR (tem)); - Fsetcdr (placeholder, XCDR (tem)); - - struct Lisp_Hash_Table *h2 - = XHASH_TABLE (read_objects_completed); - ptrdiff_t i = hash_lookup (h2, placeholder, &hash); - eassert (i < 0); - hash_put (h2, placeholder, Qnil, hash); - return placeholder; - } - - /* If it can be recursive, remember it for - future substitutions. */ - if (! SYMBOLP (tem) - && ! NUMBERP (tem) - && ! (STRINGP (tem) && !string_intervals (tem))) - { - struct Lisp_Hash_Table *h2 - = XHASH_TABLE (read_objects_completed); - i = hash_lookup (h2, tem, &hash); - eassert (i < 0); - hash_put (h2, tem, Qnil, hash); - } - - /* Now put it everywhere the placeholder was... */ - Flread__substitute_object_in_subtree - (tem, placeholder, read_objects_completed); - - /* ...and #n# will use the real value from now on. */ - i = hash_lookup (h, number, &hash); - eassert (i >= 0); - set_hash_value_slot (h, i, tem); - - return tem; - } +static void +skip_space_and_comments (Lisp_Object readcharfun) +{ + int c; + do + { + c = READCHAR; + if (c == ';') + do + c = READCHAR; + while (c >= 0 && c != '\n'); + if (c < 0) + end_of_file_error (); + } + while (c <= 32 || c == NO_BREAK_SPACE); + UNREAD (c); +} - /* #n# returns a previously read object. */ - if (c == '#') - { - struct Lisp_Hash_Table *h - = XHASH_TABLE (read_objects_map); - ptrdiff_t i = hash_lookup (h, make_fixnum (n), NULL); - if (i >= 0) - return HASH_VALUE (h, i); - } - } - } - /* Fall through to error message. */ - } - else if (c == 'x' || c == 'X') - return read_integer (readcharfun, 16, stackbuf); - else if (c == 'o' || c == 'O') - return read_integer (readcharfun, 8, stackbuf); - else if (c == 'b' || c == 'B') - return read_integer (readcharfun, 2, stackbuf); - - char acm_buf[15]; /* FIXME!!! 2021-11-27. */ - sprintf (acm_buf, "#%c", c); - invalid_syntax (acm_buf, readcharfun); - UNREAD (c); - invalid_syntax ("#", readcharfun); +/* When an object is read, the type of the top read stack entry indicates + the syntactic context. */ +enum read_entry_type +{ + /* preceding syntactic context */ + RE_list_start, /* "(" */ - case ';': - while ((c = READCHAR) >= 0 && c != '\n'); - goto retry; + RE_list, /* "(" (+ OBJECT) */ + RE_list_dot, /* "(" (+ OBJECT) "." */ - case '\'': - return list2 (Qquote, read0 (readcharfun, locate_syms)); + RE_vector, /* "[" (* OBJECT) */ + RE_record, /* "#s(" (* OBJECT) */ + RE_char_table, /* "#^[" (* OBJECT) */ + RE_sub_char_table, /* "#^^[" (* OBJECT) */ + RE_byte_code, /* "#[" (* OBJECT) */ + RE_string_props, /* "#(" (* OBJECT) */ - case '`': - return list2 (Qbackquote, read0 (readcharfun, locate_syms)); + RE_special, /* "'" | "#'" | "`" | "," | ",@" */ - case ',': - { - Lisp_Object comma_type = Qnil; - Lisp_Object value; - int ch = READCHAR; + RE_numbered, /* "#" (+ DIGIT) "=" */ +}; - if (ch == '@') - comma_type = Qcomma_at; - else - { - if (ch >= 0) UNREAD (ch); - comma_type = Qcomma; - } +struct read_stack_entry +{ + enum read_entry_type type; + union { + /* RE_list, RE_list_dot */ + struct { + Lisp_Object head; /* first cons of list */ + Lisp_Object tail; /* last cons of list */ + } list; + + /* RE_vector, RE_record, RE_char_table, RE_sub_char_table, + RE_byte_code, RE_string_props */ + struct { + Lisp_Object elems; /* list of elements in reverse order */ + bool old_locate_syms; /* old value of locate_syms */ + } vector; + + /* RE_special */ + struct { + Lisp_Object symbol; /* symbol from special syntax */ + } special; + + /* RE_numbered */ + struct { + Lisp_Object number; /* number as a fixnum */ + Lisp_Object placeholder; /* placeholder object */ + } numbered; + } u; +}; - value = read0 (readcharfun, locate_syms); - return list2 (comma_type, value); - } - case '?': - { - int modifiers; - int next_char; - bool ok; +struct read_stack +{ + struct read_stack_entry *stack; /* base of stack */ + ptrdiff_t size; /* allocated size in entries */ + ptrdiff_t sp; /* current number of entries */ +}; - c = READCHAR; - if (c < 0) - end_of_file_error (); - - /* Accept `single space' syntax like (list ? x) where the - whitespace character is SPC or TAB. - Other literal whitespace like NL, CR, and FF are not accepted, - as there are well-established escape sequences for these. */ - if (c == ' ' || c == '\t') - return make_fixnum (c); - - if (c == '(' || c == ')' || c == '[' || c == ']' - || c == '"' || c == ';') +static struct read_stack rdstack = {NULL, 0, 0}; + +void +mark_lread (void) +{ + /* Mark the read stack, which may contain data not otherwise traced */ + for (ptrdiff_t i = 0; i < rdstack.sp; i++) + { + struct read_stack_entry *e = &rdstack.stack[i]; + switch (e->type) + { + case RE_list_start: + break; + case RE_list: + case RE_list_dot: + mark_object (e->u.list.head); + mark_object (e->u.list.tail); + break; + case RE_vector: + case RE_record: + case RE_char_table: + case RE_sub_char_table: + case RE_byte_code: + case RE_string_props: + mark_object (e->u.vector.elems); + break; + case RE_special: + mark_object (e->u.special.symbol); + break; + case RE_numbered: + mark_object (e->u.numbered.number); + mark_object (e->u.numbered.placeholder); + break; + } + } +} + +static inline struct read_stack_entry * +read_stack_top (void) +{ + eassume (rdstack.sp > 0); + return &rdstack.stack[rdstack.sp - 1]; +} + +static inline struct read_stack_entry * +read_stack_pop (void) +{ + eassume (rdstack.sp > 0); + return &rdstack.stack[--rdstack.sp]; +} + +static inline bool +read_stack_empty_p (ptrdiff_t base_sp) +{ + return rdstack.sp <= base_sp; +} + +NO_INLINE static void +grow_read_stack (void) +{ + struct read_stack *rs = &rdstack; + eassert (rs->sp == rs->size); + rs->stack = xpalloc (rs->stack, &rs->size, 1, -1, sizeof *rs->stack); + eassert (rs->sp < rs->size); +} + +static inline void +read_stack_push (struct read_stack_entry e) +{ + if (rdstack.sp >= rdstack.size) + grow_read_stack (); + rdstack.stack[rdstack.sp++] = e; +} + + +/* Read a Lisp object. + If LOCATE_SYMS is true, symbols are read with position. */ +static Lisp_Object +read0 (Lisp_Object readcharfun, bool locate_syms) +{ + char stackbuf[stackbufsize]; + char *read_buffer = stackbuf; + ptrdiff_t read_buffer_size = sizeof stackbuf; + char *heapbuf = NULL; + specpdl_ref count = SPECPDL_INDEX (); + + ptrdiff_t base_sp = rdstack.sp; + + bool uninterned_symbol; + bool skip_shorthand; + + /* Read an object into `obj'. */ + read_obj: ; + Lisp_Object obj; + bool multibyte; + int c = READCHAR_REPORT_MULTIBYTE (&multibyte); + if (c < 0) + end_of_file_error (); + + switch (c) + { + case '(': + read_stack_push ((struct read_stack_entry) {.type = RE_list_start}); + goto read_obj; + + case ')': + if (read_stack_empty_p (base_sp)) + invalid_syntax (")", readcharfun); + switch (read_stack_top ()->type) + { + case RE_list_start: + read_stack_pop (); + obj = Qnil; + break; + case RE_list: + obj = read_stack_pop ()->u.list.head; + break; + case RE_record: { - CHECK_LIST (Vlread_unescaped_character_literals); - Lisp_Object char_obj = make_fixed_natnum (c); - if (NILP (Fmemq (char_obj, Vlread_unescaped_character_literals))) - Vlread_unescaped_character_literals = - Fcons (char_obj, Vlread_unescaped_character_literals); + locate_syms = read_stack_top ()->u.vector.old_locate_syms; + Lisp_Object elems = Fnreverse (read_stack_pop ()->u.vector.elems); + if (NILP (elems)) + invalid_syntax ("#s", readcharfun); + + if (BASE_EQ (XCAR (elems), Qhash_table)) + obj = hash_table_from_plist (XCDR (elems)); + else + obj = record_from_list (elems); + break; } + case RE_string_props: + locate_syms = read_stack_top ()->u.vector.old_locate_syms; + obj = string_props_from_rev_list (read_stack_pop () ->u.vector.elems, + readcharfun); + break; + default: + invalid_syntax (")", readcharfun); + } + break; - if (c == '\\') - c = read_escape (readcharfun, 0); - modifiers = c & CHAR_MODIFIER_MASK; - c &= ~CHAR_MODIFIER_MASK; - if (CHAR_BYTE8_P (c)) - c = CHAR_TO_BYTE8 (c); - c |= modifiers; - - next_char = READCHAR; - ok = (next_char <= 040 - || (next_char < 0200 - && strchr ("\"';()[]#?`,.", next_char) != NULL)); - UNREAD (next_char); - if (ok) - return make_fixnum (c); - - invalid_syntax ("?", readcharfun); - } + case '[': + read_stack_push ((struct read_stack_entry) { + .type = RE_vector, + .u.vector.elems = Qnil, + .u.vector.old_locate_syms = locate_syms, + }); + /* FIXME: should vectors be read with locate_syms=false? */ + goto read_obj; - case '"': + case ']': + if (read_stack_empty_p (base_sp)) + invalid_syntax ("]", readcharfun); + switch (read_stack_top ()->type) + { + case RE_vector: + locate_syms = read_stack_top ()->u.vector.old_locate_syms; + obj = vector_from_rev_list (read_stack_pop ()->u.vector.elems); + break; + case RE_byte_code: + locate_syms = read_stack_top ()->u.vector.old_locate_syms; + obj = bytecode_from_rev_list (read_stack_pop ()->u.vector.elems, + readcharfun); + break; + case RE_char_table: + locate_syms = read_stack_top ()->u.vector.old_locate_syms; + obj = char_table_from_rev_list (read_stack_pop ()->u.vector.elems, + readcharfun); + break; + case RE_sub_char_table: + locate_syms = read_stack_top ()->u.vector.old_locate_syms; + obj = sub_char_table_from_rev_list (read_stack_pop ()->u.vector.elems, + readcharfun); + break; + default: + invalid_syntax ("]", readcharfun); + break; + } + break; + + case '#': { - specpdl_ref count = SPECPDL_INDEX (); - char *read_buffer = stackbuf; - ptrdiff_t read_buffer_size = sizeof stackbuf; - char *heapbuf = NULL; - char *p = read_buffer; - char *end = read_buffer + read_buffer_size; - int ch; - /* True if we saw an escape sequence specifying - a multibyte character. */ - bool force_multibyte = false; - /* True if we saw an escape sequence specifying - a single-byte character. */ - bool force_singlebyte = false; - bool cancel = false; - ptrdiff_t nchars = 0; - - while ((ch = READCHAR) >= 0 - && ch != '\"') + int ch = READCHAR; + switch (ch) { - if (end - p < MAX_MULTIBYTE_LENGTH) + case '\'': + /* #'X -- special syntax for (function X) */ + read_stack_push ((struct read_stack_entry) { + .type = RE_special, + .u.special.symbol = Qfunction, + }); + goto read_obj; + + case '#': + /* ## -- the empty symbol */ + obj = Fintern (empty_unibyte_string, Qnil); + break; + + case 's': + /* #s(...) -- a record or hash-table */ + ch = READCHAR; + if (ch != '(') { - ptrdiff_t offset = p - read_buffer; - read_buffer = grow_read_buffer (read_buffer, offset, - &heapbuf, &read_buffer_size, - count); - p = read_buffer + offset; - end = read_buffer + read_buffer_size; + UNREAD (ch); + invalid_syntax ("#s", readcharfun); + } + read_stack_push ((struct read_stack_entry) { + .type = RE_record, + .u.vector.elems = Qnil, + .u.vector.old_locate_syms = locate_syms, + }); + locate_syms = false; + goto read_obj; + + case '^': + /* #^[...] -- char-table + #^^[...] -- sub-char-table */ + ch = READCHAR; + if (ch == '^') + { + ch = READCHAR; + if (ch == '[') + { + read_stack_push ((struct read_stack_entry) { + .type = RE_sub_char_table, + .u.vector.elems = Qnil, + .u.vector.old_locate_syms = locate_syms, + }); + locate_syms = false; + goto read_obj; + } + else + { + UNREAD (ch); + invalid_syntax ("#^^", readcharfun); + } + } + else if (ch == '[') + { + read_stack_push ((struct read_stack_entry) { + .type = RE_char_table, + .u.vector.elems = Qnil, + .u.vector.old_locate_syms = locate_syms, + }); + locate_syms = false; + goto read_obj; + } + else + { + UNREAD (ch); + invalid_syntax ("#^", readcharfun); } - if (ch == '\\') + case '(': + /* #(...) -- string with properties */ + read_stack_push ((struct read_stack_entry) { + .type = RE_string_props, + .u.vector.elems = Qnil, + .u.vector.old_locate_syms = locate_syms, + }); + locate_syms = false; + goto read_obj; + + case '[': + /* #[...] -- byte-code */ + read_stack_push ((struct read_stack_entry) { + .type = RE_byte_code, + .u.vector.elems = Qnil, + .u.vector.old_locate_syms = locate_syms, + }); + locate_syms = false; + goto read_obj; + + case '&': + /* #&N"..." -- bool-vector */ + obj = read_bool_vector (stackbuf, readcharfun); + break; + + case '!': + /* #! appears at the beginning of an executable file. + Skip the rest of the line. */ + { + int c; + do + c = READCHAR; + while (c >= 0 && c != '\n'); + goto read_obj; + } + + case 'x': + case 'X': + obj = read_integer (readcharfun, 16, stackbuf); + break; + + case 'o': + case 'O': + obj = read_integer (readcharfun, 8, stackbuf); + break; + + case 'b': + case 'B': + obj = read_integer (readcharfun, 2, stackbuf); + break; + + case '@': + /* #@NUMBER is used to skip NUMBER following bytes. + That's used in .elc files to skip over doc strings + and function definitions that can be loaded lazily. */ + skip_lazy_string (readcharfun); + goto read_obj; + + case '$': + /* #$ -- reference to lazy-loaded string */ + obj = Vload_file_name; + break; + + case ':': + /* #:X -- uninterned symbol */ + c = READCHAR; + if (c <= 32 || c == NO_BREAK_SPACE + || c == '"' || c == '\'' || c == ';' || c == '#' + || c == '(' || c == ')' || c == '[' || c == ']' + || c == '`' || c == ',') { - int modifiers; + /* No symbol character follows: this is the empty symbol. */ + UNREAD (c); + obj = Fmake_symbol (empty_unibyte_string); + break; + } + uninterned_symbol = true; + skip_shorthand = false; + goto read_symbol; - ch = read_escape (readcharfun, 1); + case '_': + /* #_X -- symbol without shorthand */ + c = READCHAR; + if (c <= 32 || c == NO_BREAK_SPACE + || c == '"' || c == '\'' || c == ';' || c == '#' + || c == '(' || c == ')' || c == '[' || c == ']' + || c == '`' || c == ',') + { + /* No symbol character follows: this is the empty symbol. */ + UNREAD (c); + obj = Fintern (empty_unibyte_string, Qnil); + break; + } + uninterned_symbol = false; + skip_shorthand = true; + goto read_symbol; - /* CH is -1 if \ newline or \ space has just been seen. */ - if (ch == -1) + default: + if (ch >= '0' && ch <= '9') + { + /* #N=OBJ or #N# -- first read the number N */ + EMACS_INT n = ch - '0'; + int c; + for (;;) { - if (p == read_buffer) - cancel = true; - continue; + c = READCHAR; + if (c < '0' || c > '9') + break; + if (INT_MULTIPLY_WRAPV (n, 10, &n) + || INT_ADD_WRAPV (n, c - '0', &n)) + invalid_syntax ("#", readcharfun); } - - modifiers = ch & CHAR_MODIFIER_MASK; - ch = ch & ~CHAR_MODIFIER_MASK; - - if (CHAR_BYTE8_P (ch)) - force_singlebyte = true; - else if (! ASCII_CHAR_P (ch)) - force_multibyte = true; - else /* I.e. ASCII_CHAR_P (ch). */ + if (c == 'r' || c == 'R') { - /* Allow `\C- ' and `\C-?'. */ - if (modifiers == CHAR_CTL) - { - if (ch == ' ') - ch = 0, modifiers = 0; - else if (ch == '?') - ch = 127, modifiers = 0; - } - if (modifiers & CHAR_SHIFT) + /* #NrDIGITS -- radix-N number */ + if (n < 0 || n > 36) + invalid_radix_integer (n, stackbuf, readcharfun); + obj = read_integer (readcharfun, n, stackbuf); + break; + } + else if (n <= MOST_POSITIVE_FIXNUM && !NILP (Vread_circle)) + { + if (c == '=') { - /* Shift modifier is valid only with [A-Za-z]. */ - if (ch >= 'A' && ch <= 'Z') - modifiers &= ~CHAR_SHIFT; - else if (ch >= 'a' && ch <= 'z') - ch -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT; + /* #N=OBJ -- assign number N to OBJ */ + Lisp_Object placeholder = Fcons (Qnil, Qnil); + + struct Lisp_Hash_Table *h + = XHASH_TABLE (read_objects_map); + Lisp_Object number = make_fixnum (n); + Lisp_Object hash; + ptrdiff_t i = hash_lookup (h, number, &hash); + if (i >= 0) + /* Not normal, but input could be malformed. */ + set_hash_value_slot (h, i, placeholder); + else + hash_put (h, number, placeholder, hash); + read_stack_push ((struct read_stack_entry) { + .type = RE_numbered, + .u.numbered.number = number, + .u.numbered.placeholder = placeholder, + }); + goto read_obj; } - - if (modifiers & CHAR_META) + else if (c == '#') { - /* Move the meta bit to the right place for a - string. */ - modifiers &= ~CHAR_META; - ch = BYTE8_TO_CHAR (ch | 0x80); - force_singlebyte = true; + /* #N# -- reference to numbered object */ + struct Lisp_Hash_Table *h + = XHASH_TABLE (read_objects_map); + ptrdiff_t i = hash_lookup (h, make_fixnum (n), NULL); + if (i < 0) + invalid_syntax ("#", readcharfun); + obj = HASH_VALUE (h, i); + break; } + else + invalid_syntax ("#", readcharfun); } - - /* Any modifiers remaining are invalid. */ - if (modifiers) - invalid_syntax ("Invalid modifier in string", readcharfun); - p += CHAR_STRING (ch, (unsigned char *) p); + else + invalid_syntax ("#", readcharfun); } else - { - p += CHAR_STRING (ch, (unsigned char *) p); - if (CHAR_BYTE8_P (ch)) - force_singlebyte = true; - else if (! ASCII_CHAR_P (ch)) - force_multibyte = true; - } - nchars++; + invalid_syntax ("#", readcharfun); } + break; + } + + case '?': + obj = read_char_literal (readcharfun); + break; - if (ch < 0) - end_of_file_error (); + case '"': + obj = read_string_literal (stackbuf, readcharfun); + break; - /* If purifying, and string starts with \ newline, - return zero instead. This is for doc strings - that we are really going to find in etc/DOC.nn.nn. */ - if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) - return unbind_to (count, make_fixnum (0)); + case '\'': + read_stack_push ((struct read_stack_entry) { + .type = RE_special, + .u.special.symbol = Qquote, + }); + goto read_obj; - if (! force_multibyte && force_singlebyte) + case '`': + read_stack_push ((struct read_stack_entry) { + .type = RE_special, + .u.special.symbol = Qbackquote, + }); + goto read_obj; + + case ',': + { + int ch = READCHAR; + Lisp_Object sym; + if (ch == '@') + sym = Qcomma_at; + else { - /* READ_BUFFER contains raw 8-bit bytes and no multibyte - forms. Convert it to unibyte. */ - nchars = str_as_unibyte ((unsigned char *) read_buffer, - p - read_buffer); - p = read_buffer + nchars; + if (ch >= 0) + UNREAD (ch); + sym = Qcomma; } + read_stack_push ((struct read_stack_entry) { + .type = RE_special, + .u.special.symbol = sym, + }); + goto read_obj; + } - Lisp_Object result - = make_specified_string (read_buffer, nchars, p - read_buffer, - (force_multibyte - || (p - read_buffer != nchars))); - return unbind_to (count, result); + case ';': + { + int c; + do + c = READCHAR; + while (c >= 0 && c != '\n'); + goto read_obj; } case '.': { - int next_char = READCHAR; - UNREAD (next_char); - - if (next_char <= 040 - || (next_char < 0200 - && strchr ("\"';([#?`,", next_char) != NULL)) + int nch = READCHAR; + UNREAD (nch); + if (nch <= 32 || nch == NO_BREAK_SPACE + || nch == '"' || nch == '\'' || nch == ';' + || nch == '(' || nch == '[' || nch == '#' + || nch == '?' || nch == '`' || nch == ',') { - *pch = c; - return Qnil; + if (!read_stack_empty_p (base_sp) + && read_stack_top ()->type == RE_list) + { + read_stack_top ()->type = RE_list_dot; + goto read_obj; + } + invalid_syntax (".", readcharfun); } } - /* The atom-reading loop below will now loop at least once, - assuring that we will not try to UNREAD two characters in a - row. */ + /* may be a number or symbol starting with a dot */ FALLTHROUGH; + default: - if (c <= 040) goto retry; - if (c == NO_BREAK_SPACE) - goto retry; + if (c <= 32 || c == NO_BREAK_SPACE) + goto read_obj; + uninterned_symbol = false; + skip_shorthand = false; + /* symbol or number */ read_symbol: { - specpdl_ref count = SPECPDL_INDEX (); - char *read_buffer = stackbuf; - ptrdiff_t read_buffer_size = sizeof stackbuf; - char *heapbuf = NULL; char *p = read_buffer; char *end = read_buffer + read_buffer_size; bool quoted = false; @@ -3805,7 +4104,7 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list, bool locate_syms) if (c == '\\') { c = READCHAR; - if (c == -1) + if (c < 0) end_of_file_error (); quoted = true; } @@ -3816,94 +4115,205 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list, bool locate_syms) *p++ = c; c = READCHAR; } - while (c > 040 + while (c > 32 && c != NO_BREAK_SPACE - && (c >= 0200 - || strchr ("\"';()[]#`,", c) == NULL)); + && (c >= 128 + || !( c == '"' || c == '\'' || c == ';' || c == '#' + || c == '(' || c == ')' || c == '[' || c == ']' + || c == '`' || c == ','))); *p = 0; ptrdiff_t nbytes = p - read_buffer; UNREAD (c); - if (!quoted && !uninterned_symbol && !skip_shorthand) + /* Only attempt to parse the token as a number if it starts as one. */ + char c0 = read_buffer[0]; + if (((c0 >= '0' && c0 <= '9') || c0 == '.' || c0 == '-' || c0 == '+') + && !quoted && !uninterned_symbol && !skip_shorthand) { ptrdiff_t len; Lisp_Object result = string_to_number (read_buffer, 10, &len); - if (! NILP (result) && len == nbytes) - return unbind_to (count, result); + if (!NILP (result) && len == nbytes) + { + obj = result; + break; + } } - { - Lisp_Object result; - ptrdiff_t nchars - = (multibyte - ? multibyte_chars_in_text ((unsigned char *) read_buffer, - nbytes) - : nbytes); - - if (uninterned_symbol) - { - Lisp_Object name - = ((! NILP (Vpurify_flag) - ? make_pure_string : make_specified_string) - (read_buffer, nchars, nbytes, multibyte)); - result = Fmake_symbol (name); - } - else - { - /* Don't create the string object for the name unless - we're going to retain it in a new symbol. - - Like intern_1 but supports multibyte names. */ - Lisp_Object obarray = check_obarray (Vobarray); - - char* longhand = NULL; - ptrdiff_t longhand_chars = 0; - ptrdiff_t longhand_bytes = 0; - - Lisp_Object tem; - if (skip_shorthand - /* The following ASCII characters are used in the - only "core" Emacs Lisp symbols that are comprised - entirely of characters that have the 'symbol - constituent' syntax. We exempt them from - transforming according to shorthands. */ - || strspn (read_buffer, "^*+-/<=>_|") >= nbytes) - tem = oblookup (obarray, read_buffer, nchars, nbytes); - else - tem = oblookup_considering_shorthand (obarray, read_buffer, + + /* symbol, possibly uninterned */ + ptrdiff_t nchars + = (multibyte + ? multibyte_chars_in_text ((unsigned char *)read_buffer, nbytes) + : nbytes); + Lisp_Object result; + if (uninterned_symbol) + { + Lisp_Object name + = (!NILP (Vpurify_flag) + ? make_pure_string (read_buffer, nchars, nbytes, multibyte) + : make_specified_string (read_buffer, nchars, nbytes, + multibyte)); + result = Fmake_symbol (name); + } + else + { + /* Don't create the string object for the name unless + we're going to retain it in a new symbol. + + Like intern_1 but supports multibyte names. */ + Lisp_Object obarray = check_obarray (Vobarray); + + char *longhand = NULL; + ptrdiff_t longhand_chars = 0; + ptrdiff_t longhand_bytes = 0; + + Lisp_Object found; + if (skip_shorthand + /* We exempt characters used in the "core" Emacs Lisp + symbols that are comprised entirely of characters + that have the 'symbol constituent' syntax from + transforming according to shorthands. */ + || symbol_char_span (read_buffer) >= nbytes) + found = oblookup (obarray, read_buffer, nchars, nbytes); + else + found = oblookup_considering_shorthand (obarray, read_buffer, nchars, nbytes, &longhand, &longhand_chars, &longhand_bytes); - if (SYMBOLP (tem)) - result = tem; - else if (longhand) - { - Lisp_Object name - = make_specified_string (longhand, longhand_chars, - longhand_bytes, multibyte); - xfree (longhand); - result = intern_driver (name, obarray, tem); - } - else - { - Lisp_Object name - = make_specified_string (read_buffer, nchars, nbytes, - multibyte); - result = intern_driver (name, obarray, tem); - } - } - if (locate_syms - && !NILP (result) - ) - result = build_symbol_with_pos (result, - make_fixnum (start_position)); + if (SYMBOLP (found)) + result = found; + else if (longhand) + { + Lisp_Object name = make_specified_string (longhand, + longhand_chars, + longhand_bytes, + multibyte); + xfree (longhand); + result = intern_driver (name, obarray, found); + } + else + { + Lisp_Object name = make_specified_string (read_buffer, nchars, + nbytes, multibyte); + result = intern_driver (name, obarray, found); + } + } + if (locate_syms && !NILP (result)) + result = build_symbol_with_pos (result, + make_fixnum (start_position)); - return unbind_to (count, result); - } + obj = result; + break; } } + + /* We have read an object in `obj'. Use the stack to decide what to + do with it. */ + while (rdstack.sp > base_sp) + { + struct read_stack_entry *e = read_stack_top (); + switch (e->type) + { + case RE_list_start: + e->type = RE_list; + e->u.list.head = e->u.list.tail = Fcons (obj, Qnil); + goto read_obj; + + case RE_list: + { + Lisp_Object tl = Fcons (obj, Qnil); + XSETCDR (e->u.list.tail, tl); + e->u.list.tail = tl; + goto read_obj; + } + + case RE_list_dot: + { + skip_space_and_comments (readcharfun); + int ch = READCHAR; + if (ch != ')') + invalid_syntax ("expected )", readcharfun); + XSETCDR (e->u.list.tail, obj); + read_stack_pop (); + obj = e->u.list.head; + break; + } + + case RE_vector: + case RE_record: + case RE_char_table: + case RE_sub_char_table: + case RE_byte_code: + case RE_string_props: + e->u.vector.elems = Fcons (obj, e->u.vector.elems); + goto read_obj; + + case RE_special: + read_stack_pop (); + obj = list2 (e->u.special.symbol, obj); + break; + + case RE_numbered: + { + read_stack_pop (); + Lisp_Object placeholder = e->u.numbered.placeholder; + if (CONSP (obj)) + { + if (BASE_EQ (obj, placeholder)) + /* Catch silly games like #1=#1# */ + invalid_syntax ("nonsensical self-reference", readcharfun); + + /* Optimisation: since the placeholder is already + a cons, repurpose it as the actual value. + This allows us to skip the substitution below, + since the placeholder is already referenced + inside OBJ at the appropriate places. */ + Fsetcar (placeholder, XCAR (obj)); + Fsetcdr (placeholder, XCDR (obj)); + + struct Lisp_Hash_Table *h2 + = XHASH_TABLE (read_objects_completed); + Lisp_Object hash; + ptrdiff_t i = hash_lookup (h2, placeholder, &hash); + eassert (i < 0); + hash_put (h2, placeholder, Qnil, hash); + obj = placeholder; + } + else + { + /* If it can be recursive, remember it for future + substitutions. */ + if (!SYMBOLP (obj) && !NUMBERP (obj) + && !(STRINGP (obj) && !string_intervals (obj))) + { + struct Lisp_Hash_Table *h2 + = XHASH_TABLE (read_objects_completed); + Lisp_Object hash; + ptrdiff_t i = hash_lookup (h2, obj, &hash); + eassert (i < 0); + hash_put (h2, obj, Qnil, hash); + } + + /* Now put it everywhere the placeholder was... */ + Flread__substitute_object_in_subtree (obj, placeholder, + read_objects_completed); + + /* ...and #n# will use the real value from now on. */ + struct Lisp_Hash_Table *h = XHASH_TABLE (read_objects_map); + Lisp_Object hash; + ptrdiff_t i = hash_lookup (h, e->u.numbered.number, &hash); + eassert (i >= 0); + set_hash_value_slot (h, i, obj); + } + break; + } + } + } + + return unbind_to (count, obj); } + DEFUN ("lread--substitute-object-in-subtree", Flread__substitute_object_in_subtree, @@ -4149,214 +4559,6 @@ string_to_number (char const *string, int base, ptrdiff_t *plen) return result; } - -static Lisp_Object -read_vector (Lisp_Object readcharfun, bool bytecodeflag, bool locate_syms) -{ - Lisp_Object tem = read_list (1, readcharfun, locate_syms); - ptrdiff_t size = list_length (tem); - Lisp_Object vector = make_nil_vector (size); - - /* Avoid accessing past the end of a vector if the vector is too - small to be valid for bytecode. */ - bytecodeflag &= COMPILED_STACK_DEPTH < size; - - Lisp_Object *ptr = XVECTOR (vector)->contents; - for (ptrdiff_t i = 0; i < size; i++) - { - Lisp_Object item = Fcar (tem); - /* If `load-force-doc-strings' is t when reading a lazily-loaded - bytecode object, the docstring containing the bytecode and - constants values must be treated as unibyte and passed to - Fread, to get the actual bytecode string and constants vector. */ - if (bytecodeflag && load_force_doc_strings) - { - if (i == COMPILED_BYTECODE) - { - if (!STRINGP (item)) - error ("Invalid byte code"); - - /* Delay handling the bytecode slot until we know whether - it is lazily-loaded (we can tell by whether the - constants slot is nil). */ - ASET (vector, COMPILED_CONSTANTS, item); - item = Qnil; - } - else if (i == COMPILED_CONSTANTS) - { - Lisp_Object bytestr = ptr[COMPILED_CONSTANTS]; - - if (NILP (item)) - { - /* Coerce string to unibyte (like string-as-unibyte, - but without generating extra garbage and - guaranteeing no change in the contents). */ - STRING_SET_CHARS (bytestr, SBYTES (bytestr)); - STRING_SET_UNIBYTE (bytestr); - - item = Fread (Fcons (bytestr, readcharfun)); - if (!CONSP (item)) - error ("Invalid byte code"); - - struct Lisp_Cons *otem = XCONS (item); - bytestr = XCAR (item); - item = XCDR (item); - free_cons (otem); - } - - /* Now handle the bytecode slot. */ - ASET (vector, COMPILED_BYTECODE, bytestr); - } - else if (i == COMPILED_DOC_STRING - && STRINGP (item) - && ! STRING_MULTIBYTE (item)) - { - if (EQ (readcharfun, Qget_emacs_mule_file_char)) - item = Fdecode_coding_string (item, Qemacs_mule, Qnil, Qnil); - else - item = Fstring_as_multibyte (item); - } - } - ASET (vector, i, item); - struct Lisp_Cons *otem = XCONS (tem); - tem = Fcdr (tem); - free_cons (otem); - } - return vector; -} - -/* FLAG means check for ']' to terminate rather than ')' and '.'. - LOCATE_SYMS true means read symbol occurrencess as symbols with - position. */ - -static Lisp_Object -read_list (bool flag, Lisp_Object readcharfun, bool locate_syms) -{ - Lisp_Object val, tail; - Lisp_Object elt, tem; - /* 0 is the normal case. - 1 means this list is a doc reference; replace it with the number 0. - 2 means this list is a doc reference; replace it with the doc string. */ - int doc_reference = 0; - - /* Initialize this to 1 if we are reading a list. */ - bool first_in_list = flag <= 0; - - val = Qnil; - tail = Qnil; - - while (1) - { - int ch; - elt = read1 (readcharfun, &ch, first_in_list, locate_syms); - - first_in_list = 0; - - /* While building, if the list starts with #$, treat it specially. */ - if (EQ (elt, Vload_file_name) - && ! NILP (elt)) - { - if (!NILP (Vpurify_flag)) - doc_reference = 0; - else if (load_force_doc_strings) - doc_reference = 2; - } - if (ch) - { - if (flag > 0) - { - if (ch == ']') - return val; - invalid_syntax (") or . in a vector", readcharfun); - } - if (ch == ')') - return val; - if (ch == '.') - { - if (!NILP (tail)) - XSETCDR (tail, read0 (readcharfun, locate_syms)); - else - val = read0 (readcharfun, locate_syms); - read1 (readcharfun, &ch, 0, locate_syms); - - if (ch == ')') - { - if (doc_reference == 2 && FIXNUMP (XCDR (val))) - { - char *saved = NULL; - file_offset saved_position; - /* Get a doc string from the file we are loading. - If it's in saved_doc_string, get it from there. - - Here, we don't know if the string is a - bytecode string or a doc string. As a - bytecode string must be unibyte, we always - return a unibyte string. If it is actually a - doc string, caller must make it - multibyte. */ - - /* Position is negative for user variables. */ - EMACS_INT pos = eabs (XFIXNUM (XCDR (val))); - if (pos >= saved_doc_string_position - && pos < (saved_doc_string_position - + saved_doc_string_length)) - { - saved = saved_doc_string; - saved_position = saved_doc_string_position; - } - /* Look in prev_saved_doc_string the same way. */ - else if (pos >= prev_saved_doc_string_position - && pos < (prev_saved_doc_string_position - + prev_saved_doc_string_length)) - { - saved = prev_saved_doc_string; - saved_position = prev_saved_doc_string_position; - } - if (saved) - { - ptrdiff_t start = pos - saved_position; - ptrdiff_t from, to; - - /* Process quoting with ^A, - and find the end of the string, - which is marked with ^_ (037). */ - for (from = start, to = start; - saved[from] != 037;) - { - int c = saved[from++]; - if (c == 1) - { - c = saved[from++]; - saved[to++] = (c == 1 ? c - : c == '0' ? 0 - : c == '_' ? 037 - : c); - } - else - saved[to++] = c; - } - - return make_unibyte_string (saved + start, - to - start); - } - else - return get_doc_string (val, 1, 0); - } - - return val; - } - invalid_syntax (". in wrong context", readcharfun); - } - invalid_syntax ("] in a list", readcharfun); - } - tem = list1 (elt); - if (!NILP (tail)) - XSETCDR (tail, tem); - else - val = tem; - tail = tem; - } -} static Lisp_Object initial_obarray; diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el index 9ec54c719c8..47351c1d116 100644 --- a/test/src/lread-tests.el +++ b/test/src/lread-tests.el @@ -281,4 +281,40 @@ literals (Bug#20852)." (should (equal (lread-test-read-and-print str) str)))) (should-error (read-from-string "#1=#1#") :type 'invalid-read-syntax)) +(ert-deftest lread-deeply-nested () + ;; Check that we can read a deeply nested data structure correctly. + (let ((levels 10000) + (prefix nil) + (suffix nil)) + (dotimes (_ levels) + (push "([#s(r " prefix) + (push ")])" suffix)) + (let ((str (concat (apply #'concat prefix) + "a" + (apply #'concat suffix)))) + (let* ((read-circle t) + (result (read-from-string str))) + (should (equal (cdr result) (length str))) + ;; Check the result. (We can't build a reference value and compare + ;; using `equal' because that function is currently depth-limited.) + (named-let check ((x (car result)) (level 0)) + (if (equal level levels) + (should (equal x 'a)) + (should (and (consp x) (null (cdr x)))) + (let ((x2 (car x))) + (should (and (vectorp x2) (equal (length x2) 1))) + (let ((x3 (aref x2 0))) + (should (and (recordp x3) (equal (length x3) 2) + (equal (aref x3 0) 'r))) + (check (aref x3 1) (1+ level)))))))))) + +(ert-deftest lread-misc () + ;; Regression tests for issues found and fixed in bug#55676: + ;; Non-breaking space after a dot makes it a dot token. + (should (equal (read-from-string "(a .\u00A0b)") + '((a . b) . 7))) + ;; #_ without symbol following is the interned empty symbol. + (should (equal (read-from-string "#_") + '(## . 2)))) + ;;; lread-tests.el ends here -- 2.39.2