From 67267bbb8ddd5bffaa6eca1f29f72efa9f3c3230 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Sat, 11 May 2024 13:14:13 +0200 Subject: [PATCH] Refactor buffering code in the Lisp reader Abstract the buffering in the reader with a struct and plain functions instead of rather unhygienic preprocessor macros. * src/lread.c (READ_AND_BUFFER, INVALID_SYNTAX_WITH_BUFFER): Removed, replaced with... (readbuf_t, readbuf_grow, add_char_to_buffer, read_and_buffer) (invalid_syntax_with_buffer): ...these new functions and struct. (read0): Use new code instead of old preprocessor macros. (cherry picked from commit 32a0367554334c14fc1a9242fa6bfc45abe3d032) --- src/lread.c | 148 +++++++++++++++++++++++++++------------------------- 1 file changed, 77 insertions(+), 71 deletions(-) diff --git a/src/lread.c b/src/lread.c index 07cbb58c4ec..e714684f4c8 100644 --- a/src/lread.c +++ b/src/lread.c @@ -3952,28 +3952,53 @@ read_stack_reset (intmax_t sp) rdstack.sp = sp; } -#define READ_AND_BUFFER(c) \ - c = READCHAR; \ - if (c < 0) \ - INVALID_SYNTAX_WITH_BUFFER (); \ - if (multibyte) \ - p += CHAR_STRING (c, (unsigned char *) p); \ - else \ - *p++ = c; \ - if (end - p < MAX_MULTIBYTE_LENGTH + 1) \ - { \ - offset = p - read_buffer; \ - read_buffer = grow_read_buffer (read_buffer, offset, \ - &heapbuf, &read_buffer_size, count); \ - p = read_buffer + offset; \ - end = read_buffer + read_buffer_size; \ - } +typedef struct { + char *start; /* start of buffer, dynamic if equal to heapbuf */ + char *end; /* just past end of buffer */ + char *cur; /* where to put next char read */ + char *heapbuf; /* start of heap allocation if any, or NULL */ + specpdl_ref count; /* specpdl at start */ +} readbuf_t; + +static NO_INLINE void +readbuf_grow (readbuf_t *rb) +{ + ptrdiff_t offset = rb->cur - rb->start; + ptrdiff_t size = rb->end - rb->start; + rb->start = grow_read_buffer (rb->start, offset, &rb->heapbuf, &size, + rb->count); + rb->cur = rb->start + offset; + rb->end = rb->start + size; +} -#define INVALID_SYNTAX_WITH_BUFFER() \ - { \ - *p = 0; \ - invalid_syntax (read_buffer, readcharfun); \ - } +static inline void +add_char_to_buffer (readbuf_t *rb, int c, bool multibyte) +{ + if (multibyte) + rb->cur += CHAR_STRING (c, (unsigned char *) rb->cur); + else + *rb->cur++ = c; + if (rb->end - rb->cur < MAX_MULTIBYTE_LENGTH + 1) + readbuf_grow (rb); +} + +static AVOID +invalid_syntax_with_buffer (readbuf_t *rb, Lisp_Object readcharfun) +{ + *rb->cur = '\0'; + invalid_syntax (rb->start, readcharfun); +} + +static inline int +read_and_buffer (readbuf_t *rb, Lisp_Object readcharfun) +{ + bool multibyte; + int c = READCHAR_REPORT_MULTIBYTE (&multibyte); + if (c < 0) + invalid_syntax_with_buffer (rb, readcharfun); + add_char_to_buffer (rb, c, multibyte); + return c; +} /* Read a Lisp object. If LOCATE_SYMS is true, symbols are read with position. */ @@ -3981,16 +4006,15 @@ static Lisp_Object read0 (Lisp_Object readcharfun, bool locate_syms) { char stackbuf[64]; - char *read_buffer = stackbuf; - ptrdiff_t read_buffer_size = sizeof stackbuf; - ptrdiff_t offset; - char *heapbuf = NULL; specpdl_ref base_pdl = SPECPDL_INDEX (); ptrdiff_t base_sp = rdstack.sp; record_unwind_protect_intmax (read_stack_reset, base_sp); - specpdl_ref count = SPECPDL_INDEX (); + readbuf_t rb = { .start = stackbuf, + .end = stackbuf + sizeof stackbuf, + .heapbuf = NULL, + .count = SPECPDL_INDEX () }; bool uninterned_symbol; bool skip_shorthand; @@ -4085,13 +4109,9 @@ read0 (Lisp_Object readcharfun, bool locate_syms) case '#': { - char *p = read_buffer; - char *end = read_buffer + read_buffer_size; - - *p++ = '#'; - int ch; - READ_AND_BUFFER (ch); - + rb.cur = rb.start; + *rb.cur++ = '#'; + int ch = read_and_buffer (&rb, readcharfun); switch (ch) { case '\'': @@ -4109,11 +4129,11 @@ read0 (Lisp_Object readcharfun, bool locate_syms) case 's': /* #s(...) -- a record or hash-table */ - READ_AND_BUFFER (ch); + ch = read_and_buffer (&rb, readcharfun); if (ch != '(') { UNREAD (ch); - INVALID_SYNTAX_WITH_BUFFER (); + invalid_syntax_with_buffer (&rb, readcharfun); } read_stack_push ((struct read_stack_entry) { .type = RE_record, @@ -4126,10 +4146,10 @@ read0 (Lisp_Object readcharfun, bool locate_syms) case '^': /* #^[...] -- char-table #^^[...] -- sub-char-table */ - READ_AND_BUFFER (ch); + ch = read_and_buffer (&rb, readcharfun); if (ch == '^') { - ch = READCHAR; + ch = read_and_buffer (&rb, readcharfun); if (ch == '[') { read_stack_push ((struct read_stack_entry) { @@ -4143,7 +4163,7 @@ read0 (Lisp_Object readcharfun, bool locate_syms) else { UNREAD (ch); - INVALID_SYNTAX_WITH_BUFFER (); + invalid_syntax_with_buffer (&rb, readcharfun); } } else if (ch == '[') @@ -4159,7 +4179,7 @@ read0 (Lisp_Object readcharfun, bool locate_syms) else { UNREAD (ch); - INVALID_SYNTAX_WITH_BUFFER (); + invalid_syntax_with_buffer (&rb, readcharfun); } case '(': @@ -4269,12 +4289,12 @@ read0 (Lisp_Object readcharfun, bool locate_syms) int c; for (;;) { - READ_AND_BUFFER (c); + c = read_and_buffer (&rb, readcharfun); if (c < '0' || c > '9') break; if (ckd_mul (&n, n, 10) || ckd_add (&n, n, c - '0')) - INVALID_SYNTAX_WITH_BUFFER (); + invalid_syntax_with_buffer (&rb, readcharfun); } if (c == 'r' || c == 'R') { @@ -4315,18 +4335,18 @@ read0 (Lisp_Object readcharfun, bool locate_syms) = XHASH_TABLE (read_objects_map); ptrdiff_t i = hash_find (h, make_fixnum (n)); if (i < 0) - INVALID_SYNTAX_WITH_BUFFER (); + invalid_syntax_with_buffer (&rb, readcharfun); obj = HASH_VALUE (h, i); break; } else - INVALID_SYNTAX_WITH_BUFFER (); + invalid_syntax_with_buffer (&rb, readcharfun); } else - INVALID_SYNTAX_WITH_BUFFER (); + invalid_syntax_with_buffer (&rb, readcharfun); } else - INVALID_SYNTAX_WITH_BUFFER (); + invalid_syntax_with_buffer (&rb, readcharfun); } break; } @@ -4411,23 +4431,12 @@ read0 (Lisp_Object readcharfun, bool locate_syms) /* symbol or number */ read_symbol: { - char *p = read_buffer; - char *end = read_buffer + read_buffer_size; + rb.cur = rb.start; bool quoted = false; EMACS_INT start_position = readchar_offset - 1; do { - if (end - p < MAX_MULTIBYTE_LENGTH + 1) - { - ptrdiff_t offset = p - read_buffer; - read_buffer = grow_read_buffer (read_buffer, offset, - &heapbuf, &read_buffer_size, - count); - p = read_buffer + offset; - end = read_buffer + read_buffer_size; - } - if (c == '\\') { c = READCHAR; @@ -4436,10 +4445,7 @@ read0 (Lisp_Object readcharfun, bool locate_syms) quoted = true; } - if (multibyte) - p += CHAR_STRING (c, (unsigned char *) p); - else - *p++ = c; + add_char_to_buffer (&rb, c, multibyte); c = READCHAR; } while (c > 32 @@ -4449,17 +4455,17 @@ read0 (Lisp_Object readcharfun, bool locate_syms) || c == '(' || c == ')' || c == '[' || c == ']' || c == '`' || c == ','))); - *p = 0; - ptrdiff_t nbytes = p - read_buffer; + *rb.cur = '\0'; + ptrdiff_t nbytes = rb.cur - rb.start; UNREAD (c); /* Only attempt to parse the token as a number if it starts as one. */ - char c0 = read_buffer[0]; + char c0 = rb.start[0]; if (((c0 >= '0' && c0 <= '9') || c0 == '.' || c0 == '-' || c0 == '+') && !quoted && !uninterned_symbol && !skip_shorthand) { ptrdiff_t len; - Lisp_Object result = string_to_number (read_buffer, 10, &len); + Lisp_Object result = string_to_number (rb.start, 10, &len); if (!NILP (result) && len == nbytes) { obj = result; @@ -4470,13 +4476,13 @@ read0 (Lisp_Object readcharfun, bool locate_syms) /* symbol, possibly uninterned */ ptrdiff_t nchars = (multibyte - ? multibyte_chars_in_text ((unsigned char *)read_buffer, nbytes) + ? multibyte_chars_in_text ((unsigned char *)rb.start, nbytes) : nbytes); Lisp_Object result; if (uninterned_symbol) { Lisp_Object name - = make_specified_string (read_buffer, nchars, nbytes, multibyte); + = make_specified_string (rb.start, nchars, nbytes, multibyte); result = Fmake_symbol (name); } else @@ -4497,10 +4503,10 @@ read0 (Lisp_Object readcharfun, bool locate_syms) symbols that are comprised entirely of characters that have the 'symbol constituent' syntax from transforming according to shorthands. */ - || symbol_char_span (read_buffer) >= nbytes) - found = oblookup (obarray, read_buffer, nchars, nbytes); + || symbol_char_span (rb.start) >= nbytes) + found = oblookup (obarray, rb.start, nchars, nbytes); else - found = oblookup_considering_shorthand (obarray, read_buffer, + found = oblookup_considering_shorthand (obarray, rb.start, nchars, nbytes, &longhand, &longhand_chars, &longhand_bytes); @@ -4518,7 +4524,7 @@ read0 (Lisp_Object readcharfun, bool locate_syms) } else { - Lisp_Object name = make_specified_string (read_buffer, nchars, + Lisp_Object name = make_specified_string (rb.start, nchars, nbytes, multibyte); result = intern_driver (name, obarray, found); } -- 2.39.5