From: Mattias Engdegård <mattiase@acm.org>
Date: Sun, 26 May 2024 09:13:50 +0000 (+0200)
Subject: Clean up legacy bytecode loading
X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=bf003feccae9f07faef33ccb0a457e1d1a7cf51a;p=emacs.git

Clean up legacy bytecode loading

Complete some unfinished work from the elimination of lazy bytecode
loading in 9bcc9690a8: remove code which became unreachable by that
reform.  This simplifies some of the lower-level parts of the reader
which helps performance somewhat.

* src/doc.c (read_bytecode_pointer, read_bytecode_char)
(read_doc_string): Remove.
(get_doc_string): Remove last argument, all callers adapted.  Simplify.
* src/lread.c (readbyte_for_lambda, readbyte_from_string)
(readbyte_for_lambda, readbyte_from_string): Remove.
(readchar, unreadchar): Remove unused code paths for Qlambda and cons
arguments.
(bytecode_from_rev_list): Remove last use of a quirky code path,
decoding force-loaded lazy bytecode explicitly.

(cherry picked from commit 236034a24d0e93e1796224e2ff4a14819fcfd559)
---

diff --git a/src/doc.c b/src/doc.c
index 36633a920c6..f516db3bbcc 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -74,24 +74,8 @@ along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
 static char *get_doc_string_buffer;
 static ptrdiff_t get_doc_string_buffer_size;
 
-static unsigned char *read_bytecode_pointer;
-
 static char const sibling_etc[] = "../etc/";
 
-/* `readchar' in lread.c calls back here to fetch the next byte.
-   If UNREADFLAG is 1, we unread a byte.  */
-
-int
-read_bytecode_char (bool unreadflag)
-{
-  if (unreadflag)
-    {
-      read_bytecode_pointer--;
-      return 0;
-    }
-  return *read_bytecode_pointer++;
-}
-
 #ifdef USE_ANDROID_ASSETS
 
 /* Like `close_file_unwind'.  However, PTR is a pointer to an Android
@@ -120,15 +104,10 @@ close_file_unwind_android_fd (void *ptr)
    (e.g. because the file has been modified and the location is stale),
    return nil.
 
-   If UNIBYTE, always make a unibyte string.
-
-   If DEFINITION, assume this is for reading
-   a dynamic function definition; convert the bytestring
-   and the constants vector with appropriate byte handling,
-   and return a cons cell.  */
+   If UNIBYTE, always make a unibyte string.  */
 
 Lisp_Object
-get_doc_string (Lisp_Object filepos, bool unibyte, bool definition)
+get_doc_string (Lisp_Object filepos, bool unibyte)
 {
   char *from, *to, *name, *p, *p1;
   Lisp_Object file, pos;
@@ -312,14 +291,6 @@ Invalid data in documentation file -- %c followed by code %03o",
 	*to++ = *from++;
     }
 
-  /* If DEFINITION, read from this buffer
-     the same way we would read bytes from a file.  */
-  if (definition)
-    {
-      read_bytecode_pointer = (unsigned char *) get_doc_string_buffer + offset;
-      return Fread (Qlambda);
-    }
-
   if (unibyte)
     return make_unibyte_string (get_doc_string_buffer + offset,
 				to - (get_doc_string_buffer + offset));
@@ -336,16 +307,6 @@ Invalid data in documentation file -- %c followed by code %03o",
     }
 }
 
-/* Get a string from position FILEPOS and pass it through the Lisp reader.
-   We use this for fetching the bytecode string and constants vector
-   of a compiled function from the .elc file.  */
-
-Lisp_Object
-read_doc_string (Lisp_Object filepos)
-{
-  return get_doc_string (filepos, 0, 1);
-}
-
 static bool
 reread_doc_file (Lisp_Object file)
 {
@@ -406,7 +367,7 @@ string is passed through `substitute-command-keys'.  */)
   if (FIXNUMP (doc) || CONSP (doc))
     {
       Lisp_Object tem;
-      tem = get_doc_string (doc, 0, 0);
+      tem = get_doc_string (doc, 0);
       if (NILP (tem) && try_reload)
 	{
 	  /* The file is newer, we need to reset the pointers.  */
@@ -481,7 +442,7 @@ aren't strings.  */)
   if (FIXNUMP (tem) || (CONSP (tem) && FIXNUMP (XCDR (tem))))
     {
       Lisp_Object doc = tem;
-      tem = get_doc_string (tem, 0, 0);
+      tem = get_doc_string (tem, 0);
       if (NILP (tem) && try_reload)
 	{
 	  /* The file is newer, we need to reset the pointers.  */
diff --git a/src/lisp.h b/src/lisp.h
index 534a36499f1..f8d59b1e9fd 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -5273,10 +5273,8 @@ extern void set_initial_environment (void);
 extern void syms_of_callproc (void);
 
 /* Defined in doc.c.  */
-extern Lisp_Object read_doc_string (Lisp_Object);
-extern Lisp_Object get_doc_string (Lisp_Object, bool, bool);
+extern Lisp_Object get_doc_string (Lisp_Object, bool);
 extern void syms_of_doc (void);
-extern int read_bytecode_char (bool);
 
 /* Defined in bytecode.c.  */
 extern void syms_of_bytecode (void);
diff --git a/src/lread.c b/src/lread.c
index c92b2ede932..233f08b0727 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -256,15 +256,13 @@ static Lisp_Object oblookup_considering_shorthand (Lisp_Object, const char *,
 						   ptrdiff_t *);
 
 
-/* Functions that read one byte from the current source READCHARFUN
+/* Function that reads one byte from the current source READCHARFUN
    or unreads one byte.  If the integer argument C is -1, it returns
    one read byte, or -1 when there's no more byte in the source.  If C
    is 0 or positive, it unreads C, and the return value is not
    interesting.  */
 
-static int readbyte_for_lambda (int, Lisp_Object);
 static int readbyte_from_file (int, Lisp_Object);
-static int readbyte_from_string (int, Lisp_Object);
 
 /* Handle unreading and rereading of characters.
    Write READCHAR to read a character,
@@ -278,8 +276,8 @@ static int readbyte_from_string (int, Lisp_Object);
 /* Same as READCHAR but set *MULTIBYTE to the multibyteness of the source.  */
 #define READCHAR_REPORT_MULTIBYTE(multibyte) readchar (readcharfun, multibyte)
 
-/* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char,
-   Qlambda, or a cons, we use this to keep an unread character because
+/* When READCHARFUN is Qget_file_char or Qget_emacs_mule_file_char,
+   we use this to keep an unread character because
    a file stream can't handle multibyte-char unreading.  The value -1
    means that there's no unread character.  */
 static int unread_char = -1;
@@ -365,12 +363,6 @@ readchar (Lisp_Object readcharfun, bool *multibyte)
       return c;
     }
 
-  if (EQ (readcharfun, Qlambda))
-    {
-      readbyte = readbyte_for_lambda;
-      goto read_multibyte;
-    }
-
   if (EQ (readcharfun, Qget_file_char))
     {
       eassert (infile);
@@ -400,20 +392,6 @@ readchar (Lisp_Object readcharfun, bool *multibyte)
       return c;
     }
 
-  if (CONSP (readcharfun) && STRINGP (XCAR (readcharfun)))
-    {
-      /* This is the case that read_vector is reading from a unibyte
-	 string that contains a byte sequence previously skipped
-	 because of #@NUMBER.  The car part of readcharfun is that
-	 string, and the cdr part is a value of readcharfun given to
-	 read_vector.  */
-      readbyte = readbyte_from_string;
-      eassert (infile);
-      if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char))
-	emacs_mule_encoding = 1;
-      goto read_multibyte;
-    }
-
   if (EQ (readcharfun, Qget_emacs_mule_file_char))
     {
       readbyte = readbyte_from_file;
@@ -545,14 +523,6 @@ unreadchar (Lisp_Object readcharfun, int c)
       read_from_string_index_byte
 	= string_char_to_byte (readcharfun, read_from_string_index);
     }
-  else if (CONSP (readcharfun) && STRINGP (XCAR (readcharfun)))
-    {
-      unread_char = c;
-    }
-  else if (EQ (readcharfun, Qlambda))
-    {
-      unread_char = c;
-    }
   else if (FROM_FILE_P (readcharfun))
     {
       unread_char = c;
@@ -561,13 +531,6 @@ unreadchar (Lisp_Object readcharfun, int c)
     call1 (readcharfun, make_fixnum (c));
 }
 
-static int
-readbyte_for_lambda (int c, Lisp_Object readcharfun)
-{
-  return read_bytecode_char (c >= 0);
-}
-
-
 static int
 readbyte_from_stdio (void)
 {
@@ -638,26 +601,6 @@ readbyte_from_file (int c, Lisp_Object readcharfun)
   return readbyte_from_stdio ();
 }
 
-static int
-readbyte_from_string (int c, Lisp_Object readcharfun)
-{
-  Lisp_Object string = XCAR (readcharfun);
-
-  if (c >= 0)
-    {
-      read_from_string_index--;
-      read_from_string_index_byte
-	= string_char_to_byte (string, read_from_string_index);
-    }
-
-  return (read_from_string_index < read_from_string_limit
-	  ? fetch_string_char_advance (string,
-				       &read_from_string_index,
-				       &read_from_string_index_byte)
-	  : -1);
-}
-
-
 /* Signal Qinvalid_read_syntax error.
    S is error string of length N (if > 0)  */
 
@@ -3517,12 +3460,18 @@ bytecode_from_rev_list (Lisp_Object elems, Lisp_Object readcharfun)
 
       /* Lazily-loaded bytecode is represented by the constant slot being nil
          and the bytecode slot a (lazily loaded) string containing the
-         print representation of (BYTECODE . CONSTANTS).  Unpack the
-         pieces by coerceing the string to unibyte and reading the result.  */
+         print representation of (BYTECODE . CONSTANTS).  */
       if (NILP (vec[CLOSURE_CONSTANTS]) && STRINGP (vec[CLOSURE_CODE]))
         {
           Lisp_Object enc = vec[CLOSURE_CODE];
-          Lisp_Object pair = Fread (Fcons (enc, readcharfun));
+	  eassert (!STRING_MULTIBYTE (enc));
+	  /* The string (always unibyte) must be decoded to be parsed.  */
+	  enc = Fdecode_coding_string (enc,
+				       EQ (readcharfun,
+					   Qget_emacs_mule_file_char)
+				       ? Qemacs_mule : Qutf_8_emacs,
+				       Qt, Qnil);
+	  Lisp_Object pair = Fread (enc);
           if (!CONSP (pair))
 	    invalid_syntax ("Invalid byte-code object", readcharfun);
 
@@ -3772,7 +3721,7 @@ get_lazy_string (Lisp_Object val)
 	 && !(pos >= ss->position && pos < ss->position + ss->length))
     ss++;
   if (ss >= ssend)
-    return get_doc_string (val, 1, 0);
+    return get_doc_string (val, 1);
 
   ptrdiff_t start = pos - ss->position;
   char *str = ss->string;