From f0b0105d913a94c66f230874c9269b19dbbc83bd Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue, 19 May 2020 23:22:40 -0700
Subject: [PATCH] Hoist some byte-code checking out of eval
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

Check Lisp_Compiled objects better as theyâre created,
so that the byte-code interpreter neednât do the checks
each time it executes them.  This improved performance
of âmake compile-alwaysâ by 1.5% on my platform.  Also,
improve the quality of the (still-incomplete) checks, as
this is more practical now that theyâre done less often.
* src/alloc.c (make_byte_code): Remove.  All uses removed.
(Fmake_byte_code): Put a better (though still incomplete)
check here instead.  Simplify by using Fvector instead
of make_uninit_vector followed by memcpy, and by using
XSETPVECTYPE instead of make_byte_code followed by XSETCOMPILED.
* src/bytecode.c (Fbyte_code): Do sanity check and conditional
translation to unibyte here instead of each time the function is
executed.
(exec_byte_code): Omit no-longer-necessary sanity and
unibyte checking.  Use SCHARS instead of SBYTES where
either will do, as SCHARS is faster.
* src/eval.c (fetch_and_exec_byte_code): New function.
(funcall_lambda): Use it.
(funcall_lambda, lambda_arity, Ffetch_bytecode):
Omit no-longer-necessary sanity checks.
(Ffetch_bytecode): Add sanity check if actually fetching.
* src/lisp.h (XSETCOMPILED): Remove.  All uses removed.
* src/lread.c (read1): Check byte-code objects more thoroughly,
albeit still incompletely, and do translation to unibyte here
instead of each time the function is executed.
(read1): Use XSETPVECYPE instead of make_byte_code.
(read_vector): Omit no-longer-necessary sanity check.
---
 src/alloc.c    | 33 ++++++++++-----------------------
 src/bytecode.c | 28 +++++++++++++++-------------
 src/eval.c     | 48 +++++++++++++++++++-----------------------------
 src/lisp.h     |  2 --
 src/lread.c    | 26 +++++++++++++++++++++-----
 5 files changed, 65 insertions(+), 72 deletions(-)

diff --git a/src/alloc.c b/src/alloc.c
index ebc55857ea0..b7ebaa63a5b 100644
--- a/src/alloc.c
+++ b/src/alloc.c
@@ -3421,23 +3421,6 @@ usage: (vector &rest OBJECTS)  */)
   return val;
 }
 
-void
-make_byte_code (struct Lisp_Vector *v)
-{
-  /* Don't allow the global zero_vector to become a byte code object.  */
-  eassert (0 < v->header.size);
-
-  if (v->header.size > 1 && STRINGP (v->contents[1])
-      && STRING_MULTIBYTE (v->contents[1]))
-    /* BYTECODE-STRING must have been produced by Emacs 20.2 or the
-       earlier because they produced a raw 8-bit string for byte-code
-       and now such a byte-code string is loaded as multibyte while
-       raw 8-bit characters converted to multibyte form.  Thus, now we
-       must convert them back to the original unibyte form.  */
-    v->contents[1] = Fstring_as_unibyte (v->contents[1]);
-  XSETPVECTYPE (v, PVEC_COMPILED);
-}
-
 DEFUN ("make-byte-code", Fmake_byte_code, Smake_byte_code, 4, MANY, 0,
        doc: /* Create a byte-code object with specified arguments as elements.
 The arguments should be the ARGLIST, bytecode-string BYTE-CODE, constant
@@ -3456,8 +3439,14 @@ stack before executing the byte-code.
 usage: (make-byte-code ARGLIST BYTE-CODE CONSTANTS DEPTH &optional DOCSTRING INTERACTIVE-SPEC &rest ELEMENTS)  */)
   (ptrdiff_t nargs, Lisp_Object *args)
 {
-  Lisp_Object val = make_uninit_vector (nargs);
-  struct Lisp_Vector *p = XVECTOR (val);
+  if (! ((FIXNUMP (args[COMPILED_ARGLIST])
+	  || CONSP (args[COMPILED_ARGLIST])
+	  || NILP (args[COMPILED_ARGLIST]))
+	 && STRINGP (args[COMPILED_BYTECODE])
+	 && !STRING_MULTIBYTE (args[COMPILED_BYTECODE])
+	 && VECTORP (args[COMPILED_CONSTANTS])
+	 && FIXNATP (args[COMPILED_STACK_DEPTH])))
+    error ("Invalid byte-code object");
 
   /* We used to purecopy everything here, if purify-flag was set.  This worked
      OK for Emacs-23, but with Emacs-24's lexical binding code, it can be
@@ -3466,10 +3455,8 @@ usage: (make-byte-code ARGLIST BYTE-CODE CONSTANTS DEPTH &optional DOCSTRING INT
      copied into pure space, including its free variables, which is sometimes
      just wasteful and other times plainly wrong (e.g. those free vars may want
      to be setcar'd).  */
-
-  memcpy (p->contents, args, nargs * sizeof *args);
-  make_byte_code (p);
-  XSETCOMPILED (val, p);
+  Lisp_Object val = Fvector (nargs, args);
+  XSETPVECTYPE (XVECTOR (val), PVEC_COMPILED);
   return val;
 }
 
diff --git a/src/bytecode.c b/src/bytecode.c
index 3c90544f3f2..5ac30aa1010 100644
--- a/src/bytecode.c
+++ b/src/bytecode.c
@@ -319,6 +319,19 @@ the third, MAXDEPTH, the maximum stack depth used in this function.
 If the third argument is incorrect, Emacs may crash.  */)
   (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth)
 {
+  if (! (STRINGP (bytestr) && VECTORP (vector) && FIXNATP (maxdepth)))
+    error ("Invalid byte-code");
+
+  if (STRING_MULTIBYTE (bytestr))
+    {
+      /* BYTESTR must have been produced by Emacs 20.2 or earlier
+	 because it produced a raw 8-bit string for byte-code and now
+	 such a byte-code string is loaded as multibyte with raw 8-bit
+	 characters converted to multibyte form.  Convert them back to
+	 the original unibyte form.  */
+      bytestr = Fstring_as_unibyte (bytestr);
+    }
+
   return exec_byte_code (bytestr, vector, maxdepth, Qnil, 0, NULL);
 }
 
@@ -344,21 +357,10 @@ exec_byte_code (Lisp_Object bytestr, Lisp_Object vector, Lisp_Object maxdepth,
   int volatile this_op = 0;
 #endif
 
-  CHECK_STRING (bytestr);
-  CHECK_VECTOR (vector);
-  CHECK_FIXNAT (maxdepth);
+  eassert (!STRING_MULTIBYTE (bytestr));
 
   ptrdiff_t const_length = ASIZE (vector);
-
-  if (STRING_MULTIBYTE (bytestr))
-    /* BYTESTR must have been produced by Emacs 20.2 or the earlier
-       because they produced a raw 8-bit string for byte-code and now
-       such a byte-code string is loaded as multibyte while raw 8-bit
-       characters converted to multibyte form.  Thus, now we must
-       convert them back to the originally intended unibyte form.  */
-    bytestr = Fstring_as_unibyte (bytestr);
-
-  ptrdiff_t bytestr_length = SBYTES (bytestr);
+  ptrdiff_t bytestr_length = SCHARS (bytestr);
   Lisp_Object *vectorp = XVECTOR (vector)->contents;
 
   unsigned char quitcounter = 1;
diff --git a/src/eval.c b/src/eval.c
index 014905ce6df..be2af2d041b 100644
--- a/src/eval.c
+++ b/src/eval.c
@@ -2904,6 +2904,21 @@ funcall_subr (struct Lisp_Subr *subr, ptrdiff_t numargs, Lisp_Object *args)
     }
 }
 
+/* Call the compiled Lisp function FUN.  If we have not yet read FUN's
+   bytecode string and constants vector, fetch them from the file first.  */
+
+static Lisp_Object
+fetch_and_exec_byte_code (Lisp_Object fun, Lisp_Object syms_left,
+			  ptrdiff_t nargs, Lisp_Object *args)
+{
+  if (CONSP (AREF (fun, COMPILED_BYTECODE)))
+    Ffetch_bytecode (fun);
+  return exec_byte_code (AREF (fun, COMPILED_BYTECODE),
+			 AREF (fun, COMPILED_CONSTANTS),
+			 AREF (fun, COMPILED_STACK_DEPTH),
+			 syms_left, nargs, args);
+}
+
 static Lisp_Object
 apply_lambda (Lisp_Object fun, Lisp_Object args, ptrdiff_t count)
 {
@@ -2968,9 +2983,6 @@ funcall_lambda (Lisp_Object fun, ptrdiff_t nargs,
     }
   else if (COMPILEDP (fun))
     {
-      ptrdiff_t size = PVSIZE (fun);
-      if (size <= COMPILED_STACK_DEPTH)
-	xsignal1 (Qinvalid_function, fun);
       syms_left = AREF (fun, COMPILED_ARGLIST);
       if (FIXNUMP (syms_left))
 	/* A byte-code object with an integer args template means we
@@ -2982,15 +2994,7 @@ funcall_lambda (Lisp_Object fun, ptrdiff_t nargs,
 	   argument-binding code below instead (as do all interpreted
 	   functions, even lexically bound ones).  */
 	{
-	  /* If we have not actually read the bytecode string
-	     and constants vector yet, fetch them from the file.  */
-	  if (CONSP (AREF (fun, COMPILED_BYTECODE)))
-	    Ffetch_bytecode (fun);
-	  return exec_byte_code (AREF (fun, COMPILED_BYTECODE),
-				 AREF (fun, COMPILED_CONSTANTS),
-				 AREF (fun, COMPILED_STACK_DEPTH),
-				 syms_left,
-				 nargs, arg_vector);
+	  return fetch_and_exec_byte_code (fun, syms_left, nargs, arg_vector);
 	}
       lexenv = Qnil;
     }
@@ -3059,16 +3063,7 @@ funcall_lambda (Lisp_Object fun, ptrdiff_t nargs,
   if (CONSP (fun))
     val = Fprogn (XCDR (XCDR (fun)));
   else
-    {
-      /* If we have not actually read the bytecode string
-	 and constants vector yet, fetch them from the file.  */
-      if (CONSP (AREF (fun, COMPILED_BYTECODE)))
-	Ffetch_bytecode (fun);
-      val = exec_byte_code (AREF (fun, COMPILED_BYTECODE),
-			    AREF (fun, COMPILED_CONSTANTS),
-			    AREF (fun, COMPILED_STACK_DEPTH),
-			    Qnil, 0, 0);
-    }
+    val = fetch_and_exec_byte_code (fun, Qnil, 0, NULL);
 
   return unbind_to (count, val);
 }
@@ -3153,9 +3148,6 @@ lambda_arity (Lisp_Object fun)
     }
   else if (COMPILEDP (fun))
     {
-      ptrdiff_t size = PVSIZE (fun);
-      if (size <= COMPILED_STACK_DEPTH)
-	xsignal1 (Qinvalid_function, fun);
       syms_left = AREF (fun, COMPILED_ARGLIST);
       if (FIXNUMP (syms_left))
         return get_byte_code_arity (syms_left);
@@ -3198,13 +3190,11 @@ DEFUN ("fetch-bytecode", Ffetch_bytecode, Sfetch_bytecode,
 
   if (COMPILEDP (object))
     {
-      ptrdiff_t size = PVSIZE (object);
-      if (size <= COMPILED_STACK_DEPTH)
-	xsignal1 (Qinvalid_function, object);
       if (CONSP (AREF (object, COMPILED_BYTECODE)))
 	{
 	  tem = read_doc_string (AREF (object, COMPILED_BYTECODE));
-	  if (!CONSP (tem))
+	  if (! (CONSP (tem) && STRINGP (XCAR (tem))
+		 && VECTORP (XCDR (tem))))
 	    {
 	      tem = AREF (object, COMPILED_BYTECODE);
 	      if (CONSP (tem) && STRINGP (XCAR (tem)))
diff --git a/src/lisp.h b/src/lisp.h
index ad7d67ae695..85bdc172b20 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -1341,7 +1341,6 @@ dead_object (void)
 #define XSETWINDOW(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_WINDOW))
 #define XSETTERMINAL(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_TERMINAL))
 #define XSETSUBR(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_SUBR))
-#define XSETCOMPILED(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_COMPILED))
 #define XSETBUFFER(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_BUFFER))
 #define XSETCHAR_TABLE(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_CHAR_TABLE))
 #define XSETBOOL_VECTOR(a, b) (XSETPSEUDOVECTOR (a, b, PVEC_BOOL_VECTOR))
@@ -3934,7 +3933,6 @@ build_string (const char *str)
 
 extern Lisp_Object pure_cons (Lisp_Object, Lisp_Object);
 extern Lisp_Object make_vector (ptrdiff_t, Lisp_Object);
-extern void make_byte_code (struct Lisp_Vector *);
 extern struct Lisp_Vector *allocate_vector (ptrdiff_t);
 extern struct Lisp_Vector *allocate_nil_vector (ptrdiff_t);
 
diff --git a/src/lread.c b/src/lread.c
index 59bf529f45c..53b4e1be2df 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -2966,8 +2966,26 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
 	  struct Lisp_Vector *vec;
 	  tmp = read_vector (readcharfun, 1);
 	  vec = XVECTOR (tmp);
-	  if (vec->header.size == 0)
-	    invalid_syntax ("Empty byte-code object");
+	  if (! (COMPILED_STACK_DEPTH < vec->header.size
+		 && (FIXNUMP (vec->contents[COMPILED_ARGLIST])
+		     || CONSP (vec->contents[COMPILED_ARGLIST])
+		     || NILP (vec->contents[COMPILED_ARGLIST]))
+		 && ((STRINGP (vec->contents[COMPILED_BYTECODE])
+		      && VECTORP (vec->contents[COMPILED_CONSTANTS]))
+		     || CONSP (vec->contents[COMPILED_BYTECODE]))
+		 && FIXNATP (vec->contents[COMPILED_STACK_DEPTH])))
+	    invalid_syntax ("Invalid byte-code object");
+
+	  if (STRING_MULTIBYTE (AREF (tmp, COMPILED_BYTECODE)))
+	    {
+	      /* BYTESTR must have been produced by Emacs 20.2 or earlier
+		 because it produced a raw 8-bit string for byte-code and
+		 now such a byte-code string is loaded as multibyte with
+		 raw 8-bit characters converted to multibyte form.
+		 Convert them back to the original unibyte form.  */
+	      ASET (tmp, COMPILED_BYTECODE,
+		    Fstring_as_unibyte (AREF (tmp, COMPILED_BYTECODE)));
+	    }
 
 	  if (COMPILED_DOC_STRING < vec->header.size
 	      && EQ (AREF (tmp, COMPILED_DOC_STRING), make_fixnum (0)))
@@ -2986,7 +3004,7 @@ read1 (Lisp_Object readcharfun, int *pch, bool first_in_list)
 	      ASET (tmp, COMPILED_DOC_STRING, make_ufixnum (hash));
 	    }
 
-	  make_byte_code (vec);
+	  XSETPVECTYPE (vec, PVEC_COMPILED);
 	  return tmp;
 	}
       if (c == '(')
@@ -3824,8 +3842,6 @@ read_vector (Lisp_Object readcharfun, bool bytecodeflag)
 {
   Lisp_Object tem = read_list (1, readcharfun);
   ptrdiff_t size = list_length (tem);
-  if (bytecodeflag && size <= COMPILED_STACK_DEPTH)
-    error ("Invalid byte code");
   Lisp_Object vector = make_nil_vector (size);
 
   Lisp_Object *ptr = XVECTOR (vector)->contents;
-- 
2.39.5