]> git.eshelyaron.com Git - emacs.git/commitdiff
Refactor buffering code in the Lisp reader
authorMattias EngdegÄrd <mattiase@acm.org>
Sat, 11 May 2024 11:14:13 +0000 (13:14 +0200)
committerEshel Yaron <me@eshelyaron.com>
Thu, 24 Jul 2025 07:49:07 +0000 (09:49 +0200)
Abstract the buffering in the reader with a struct and plain functions
instead of rather unhygienic preprocessor macros.

* src/lread.c (READ_AND_BUFFER, INVALID_SYNTAX_WITH_BUFFER):
Removed, replaced with...
(readbuf_t, readbuf_grow, add_char_to_buffer, read_and_buffer)
(invalid_syntax_with_buffer): ...these new functions and struct.
(read0): Use new code instead of old preprocessor macros.

(cherry picked from commit 32a0367554334c14fc1a9242fa6bfc45abe3d032)

src/lread.c

index 07cbb58c4ec2259b1ee6b0412a07e27429813256..e714684f4c858e2c8b4ed5af598fa3f4160bd888 100644 (file)
@@ -3952,28 +3952,53 @@ read_stack_reset (intmax_t sp)
   rdstack.sp = sp;
 }
 
-#define READ_AND_BUFFER(c)                     \
-  c = READCHAR;                                        \
-  if (c < 0)                                   \
-    INVALID_SYNTAX_WITH_BUFFER ();             \
-  if (multibyte)                               \
-    p += CHAR_STRING (c, (unsigned char *) p); \
-  else                                         \
-    *p++ = c;                                  \
-  if (end - p < MAX_MULTIBYTE_LENGTH + 1)      \
-    {                                          \
-       offset = p - read_buffer;               \
-       read_buffer = grow_read_buffer (read_buffer, offset, \
-                                      &heapbuf, &read_buffer_size, count); \
-       p = read_buffer + offset;                                       \
-       end = read_buffer + read_buffer_size;                           \
-    }
+typedef struct {
+  char *start;         /* start of buffer, dynamic if equal to heapbuf */
+  char *end;           /* just past end of buffer */
+  char *cur;           /* where to put next char read */
+  char *heapbuf;       /* start of heap allocation if any, or NULL */
+  specpdl_ref count;   /* specpdl at start */
+} readbuf_t;
+
+static NO_INLINE void
+readbuf_grow (readbuf_t *rb)
+{
+  ptrdiff_t offset = rb->cur - rb->start;
+  ptrdiff_t size = rb->end - rb->start;
+  rb->start = grow_read_buffer (rb->start, offset, &rb->heapbuf, &size,
+                               rb->count);
+  rb->cur = rb->start + offset;
+  rb->end = rb->start + size;
+}
 
-#define INVALID_SYNTAX_WITH_BUFFER()           \
-  {                                            \
-    *p = 0;                                    \
-    invalid_syntax (read_buffer, readcharfun); \
-  }
+static inline void
+add_char_to_buffer (readbuf_t *rb, int c, bool multibyte)
+{
+  if (multibyte)
+    rb->cur += CHAR_STRING (c, (unsigned char *) rb->cur);
+  else
+    *rb->cur++ = c;
+  if (rb->end - rb->cur < MAX_MULTIBYTE_LENGTH + 1)
+    readbuf_grow (rb);
+}
+
+static AVOID
+invalid_syntax_with_buffer (readbuf_t *rb, Lisp_Object readcharfun)
+{
+  *rb->cur = '\0';
+  invalid_syntax (rb->start, readcharfun);
+}
+
+static inline int
+read_and_buffer (readbuf_t *rb, Lisp_Object readcharfun)
+{
+  bool multibyte;
+  int c = READCHAR_REPORT_MULTIBYTE (&multibyte);
+  if (c < 0)
+    invalid_syntax_with_buffer (rb, readcharfun);
+  add_char_to_buffer (rb, c, multibyte);
+  return c;
+}
 
 /* Read a Lisp object.
    If LOCATE_SYMS is true, symbols are read with position.  */
@@ -3981,16 +4006,15 @@ static Lisp_Object
 read0 (Lisp_Object readcharfun, bool locate_syms)
 {
   char stackbuf[64];
-  char *read_buffer = stackbuf;
-  ptrdiff_t read_buffer_size = sizeof stackbuf;
-  ptrdiff_t offset;
-  char *heapbuf = NULL;
 
   specpdl_ref base_pdl = SPECPDL_INDEX ();
   ptrdiff_t base_sp = rdstack.sp;
   record_unwind_protect_intmax (read_stack_reset, base_sp);
 
-  specpdl_ref count = SPECPDL_INDEX ();
+  readbuf_t rb = { .start = stackbuf,
+                  .end = stackbuf + sizeof stackbuf,
+                  .heapbuf = NULL,
+                  .count = SPECPDL_INDEX () };
 
   bool uninterned_symbol;
   bool skip_shorthand;
@@ -4085,13 +4109,9 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
 
     case '#':
       {
-       char *p = read_buffer;
-       char *end = read_buffer + read_buffer_size;
-
-       *p++ = '#';
-       int ch;
-       READ_AND_BUFFER (ch);
-
+       rb.cur = rb.start;
+       *rb.cur++ = '#';
+       int ch = read_and_buffer (&rb, readcharfun);
        switch (ch)
          {
          case '\'':
@@ -4109,11 +4129,11 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
 
          case 's':
            /* #s(...) -- a record or hash-table */
-           READ_AND_BUFFER (ch);
+           ch = read_and_buffer (&rb, readcharfun);
            if (ch != '(')
              {
                UNREAD (ch);
-               INVALID_SYNTAX_WITH_BUFFER ();
+               invalid_syntax_with_buffer (&rb, readcharfun);
              }
            read_stack_push ((struct read_stack_entry) {
                .type = RE_record,
@@ -4126,10 +4146,10 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
          case '^':
            /* #^[...]  -- char-table
               #^^[...] -- sub-char-table */
-           READ_AND_BUFFER (ch);
+           ch = read_and_buffer (&rb, readcharfun);
            if (ch == '^')
              {
-               ch = READCHAR;
+               ch = read_and_buffer (&rb, readcharfun);
                if (ch == '[')
                  {
                    read_stack_push ((struct read_stack_entry) {
@@ -4143,7 +4163,7 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
                else
                  {
                    UNREAD (ch);
-                   INVALID_SYNTAX_WITH_BUFFER ();
+                   invalid_syntax_with_buffer (&rb, readcharfun);
                  }
              }
            else if (ch == '[')
@@ -4159,7 +4179,7 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
            else
              {
                UNREAD (ch);
-               INVALID_SYNTAX_WITH_BUFFER ();
+               invalid_syntax_with_buffer (&rb, readcharfun);
              }
 
          case '(':
@@ -4269,12 +4289,12 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
                int c;
                for (;;)
                  {
-                   READ_AND_BUFFER (c);
+                   c = read_and_buffer (&rb, readcharfun);
                    if (c < '0' || c > '9')
                      break;
                    if (ckd_mul (&n, n, 10)
                        || ckd_add (&n, n, c - '0'))
-                     INVALID_SYNTAX_WITH_BUFFER ();
+                     invalid_syntax_with_buffer (&rb, readcharfun);
                  }
                if (c == 'r' || c == 'R')
                  {
@@ -4315,18 +4335,18 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
                          = XHASH_TABLE (read_objects_map);
                        ptrdiff_t i = hash_find (h, make_fixnum (n));
                        if (i < 0)
-                         INVALID_SYNTAX_WITH_BUFFER ();
+                         invalid_syntax_with_buffer (&rb, readcharfun);
                        obj = HASH_VALUE (h, i);
                        break;
                      }
                    else
-                     INVALID_SYNTAX_WITH_BUFFER ();
+                     invalid_syntax_with_buffer (&rb, readcharfun);
                  }
                else
-                 INVALID_SYNTAX_WITH_BUFFER ();
+                 invalid_syntax_with_buffer (&rb, readcharfun);
              }
            else
-             INVALID_SYNTAX_WITH_BUFFER ();
+             invalid_syntax_with_buffer (&rb, readcharfun);
          }
        break;
       }
@@ -4411,23 +4431,12 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
       /* symbol or number */
     read_symbol:
       {
-       char *p = read_buffer;
-       char *end = read_buffer + read_buffer_size;
+       rb.cur = rb.start;
        bool quoted = false;
        EMACS_INT start_position = readchar_offset - 1;
 
        do
          {
-           if (end - p < MAX_MULTIBYTE_LENGTH + 1)
-             {
-               ptrdiff_t offset = p - read_buffer;
-               read_buffer = grow_read_buffer (read_buffer, offset,
-                                               &heapbuf, &read_buffer_size,
-                                               count);
-               p = read_buffer + offset;
-               end = read_buffer + read_buffer_size;
-             }
-
            if (c == '\\')
              {
                c = READCHAR;
@@ -4436,10 +4445,7 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
                quoted = true;
              }
 
-           if (multibyte)
-             p += CHAR_STRING (c, (unsigned char *) p);
-           else
-             *p++ = c;
+           add_char_to_buffer (&rb, c, multibyte);
            c = READCHAR;
          }
        while (c > 32
@@ -4449,17 +4455,17 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
                        || c == '(' || c == ')'  || c == '[' || c == ']'
                        || c == '`' || c == ',')));
 
-       *p = 0;
-       ptrdiff_t nbytes = p - read_buffer;
+       *rb.cur = '\0';
+       ptrdiff_t nbytes = rb.cur - rb.start;
        UNREAD (c);
 
        /* Only attempt to parse the token as a number if it starts as one.  */
-       char c0 = read_buffer[0];
+       char c0 = rb.start[0];
        if (((c0 >= '0' && c0 <= '9') || c0 == '.' || c0 == '-' || c0 == '+')
            && !quoted && !uninterned_symbol && !skip_shorthand)
          {
            ptrdiff_t len;
-           Lisp_Object result = string_to_number (read_buffer, 10, &len);
+           Lisp_Object result = string_to_number (rb.start, 10, &len);
            if (!NILP (result) && len == nbytes)
              {
                obj = result;
@@ -4470,13 +4476,13 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
        /* symbol, possibly uninterned */
        ptrdiff_t nchars
          = (multibyte
-            ? multibyte_chars_in_text ((unsigned char *)read_buffer, nbytes)
+            ? multibyte_chars_in_text ((unsigned char *)rb.start, nbytes)
             : nbytes);
        Lisp_Object result;
        if (uninterned_symbol)
          {
            Lisp_Object name
-             = make_specified_string (read_buffer, nchars, nbytes, multibyte);
+             = make_specified_string (rb.start, nchars, nbytes, multibyte);
            result = Fmake_symbol (name);
          }
        else
@@ -4497,10 +4503,10 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
                   symbols that are comprised entirely of characters
                   that have the 'symbol constituent' syntax from
                   transforming according to shorthands.  */
-               || symbol_char_span (read_buffer) >= nbytes)
-             found = oblookup (obarray, read_buffer, nchars, nbytes);
+               || symbol_char_span (rb.start) >= nbytes)
+             found = oblookup (obarray, rb.start, nchars, nbytes);
            else
-             found = oblookup_considering_shorthand (obarray, read_buffer,
+             found = oblookup_considering_shorthand (obarray, rb.start,
                                                      nchars, nbytes, &longhand,
                                                      &longhand_chars,
                                                      &longhand_bytes);
@@ -4518,7 +4524,7 @@ read0 (Lisp_Object readcharfun, bool locate_syms)
              }
            else
              {
-               Lisp_Object name = make_specified_string (read_buffer, nchars,
+               Lisp_Object name = make_specified_string (rb.start, nchars,
                                                          nbytes, multibyte);
                result = intern_driver (name, obarray, found);
              }