From: Paul Eggert Date: Sat, 20 Jul 2024 07:17:14 +0000 (-0700) Subject: Avoid accessing uninitialized bool_vector words X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=63d4de4d8452ac6642e8d29ed2d12ca0ecd866f8;p=emacs.git Avoid accessing uninitialized bool_vector words Although loading uninitialized works from memory and then ignoring the result works fine on conventional architectures, it technically has undefined behavior in C, so redo bool_vector allocation so that the code never does that. This can improve performance when allocating large vectors of nil, since calloc can clear the memory lazily. * src/alloc.c (make_clear_bool_vector): New function, a generalization of make_uninit_bool_vector. (make_uninit_bool_vector): Use it. (Fmake_bool_vector): If !INIT, rely on make_clear_bool_vector. * src/alloc.c (Fbool_vector): * src/fns.c (Freverse): Don’t access uninitialized bool_vector words. (cherry picked from commit 79b9f05d3a44b25db9fd3eff9cc002324cfa790c) --- diff --git a/src/alloc.c b/src/alloc.c index 52f8a65d59d..41679b52707 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -453,6 +453,7 @@ no_sanitize_memcpy (void *dest, void const *src, size_t size) #endif /* MAX_SAVE_STACK > 0 */ +static struct Lisp_Vector *allocate_clear_vector (ptrdiff_t, bool); static void unchain_finalizer (struct Lisp_Finalizer *); static void mark_terminals (void); static void gc_sweep (void); @@ -2406,10 +2407,11 @@ bool_vector_fill (Lisp_Object a, Lisp_Object init) return a; } -/* Return a newly allocated, uninitialized bool vector of size NBITS. */ +/* Return a newly allocated, bool vector of size NBITS. If CLEARIT, + clear its slots; otherwise the vector's slots are uninitialized. */ Lisp_Object -make_uninit_bool_vector (EMACS_INT nbits) +make_clear_bool_vector (EMACS_INT nbits, bool clearit) { Lisp_Object val; EMACS_INT words = bool_vector_words (nbits); @@ -2420,16 +2422,25 @@ make_uninit_bool_vector (EMACS_INT nbits) if (PTRDIFF_MAX < needed_elements) memory_full (SIZE_MAX); struct Lisp_Bool_Vector *p - = (struct Lisp_Bool_Vector *) allocate_vector (needed_elements); + = (struct Lisp_Bool_Vector *) allocate_clear_vector (needed_elements, + clearit); + /* Clear padding at end; but only if necessary, to avoid polluting the + data cache. */ + if (!clearit && nbits % BITS_PER_BITS_WORD != 0) + p->data[words - 1] = 0; + XSETVECTOR (val, p); XSETPVECTYPESIZE (XVECTOR (val), PVEC_BOOL_VECTOR, 0, 0); p->size = nbits; + return val; +} - /* Clear padding at the end. */ - if (words) - p->data[words - 1] = 0; +/* Return a newly allocated, uninitialized bool vector of size NBITS. */ - return val; +Lisp_Object +make_uninit_bool_vector (EMACS_INT nbits) +{ + return make_clear_bool_vector (nbits, false); } DEFUN ("make-bool-vector", Fmake_bool_vector, Smake_bool_vector, 2, 2, 0, @@ -2437,11 +2448,9 @@ DEFUN ("make-bool-vector", Fmake_bool_vector, Smake_bool_vector, 2, 2, 0, LENGTH must be a number. INIT matters only in whether it is t or nil. */) (Lisp_Object length, Lisp_Object init) { - Lisp_Object val; - CHECK_FIXNAT (length); - val = make_uninit_bool_vector (XFIXNAT (length)); - return bool_vector_fill (val, init); + Lisp_Object val = make_clear_bool_vector (XFIXNAT (length), NILP (init)); + return NILP (init) ? val : bool_vector_fill (val, init); } DEFUN ("bool-vector", Fbool_vector, Sbool_vector, 0, MANY, 0, @@ -2450,13 +2459,10 @@ Allows any number of arguments, including zero. usage: (bool-vector &rest OBJECTS) */) (ptrdiff_t nargs, Lisp_Object *args) { - ptrdiff_t i; - Lisp_Object vector; - - vector = make_uninit_bool_vector (nargs); - for (i = 0; i < nargs; i++) - bool_vector_set (vector, i, !NILP (args[i])); - + Lisp_Object vector = make_clear_bool_vector (nargs, true); + for (ptrdiff_t i = 0; i < nargs; i++) + if (!NILP (args[i])) + bool_vector_set (vector, i, true); return vector; } diff --git a/src/fns.c b/src/fns.c index a6addccb56e..7c14d904a1a 100644 --- a/src/fns.c +++ b/src/fns.c @@ -2309,12 +2309,12 @@ See also the function `nreverse', which is used more often. */) } else if (BOOL_VECTOR_P (seq)) { - ptrdiff_t i; EMACS_INT nbits = bool_vector_size (seq); - new = make_uninit_bool_vector (nbits); - for (i = 0; i < nbits; i++) - bool_vector_set (new, i, bool_vector_bitref (seq, nbits - i - 1)); + new = make_clear_bool_vector (nbits, true); + for (ptrdiff_t i = 0; i < nbits; i++) + if (bool_vector_bitref (seq, nbits - i - 1)) + bool_vector_set (new, i, true); } else if (STRINGP (seq)) { diff --git a/src/lisp.h b/src/lisp.h index 68b77a88aa7..79eade2f5ae 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -4578,6 +4578,7 @@ list4i (intmax_t a, intmax_t b, intmax_t c, intmax_t d) return list4 (make_int (a), make_int (b), make_int (c), make_int (d)); } +extern Lisp_Object make_clear_bool_vector (EMACS_INT, bool); extern Lisp_Object make_uninit_bool_vector (EMACS_INT); extern Lisp_Object bool_vector_fill (Lisp_Object, Lisp_Object); extern AVOID string_overflow (void);