From: Paul Eggert Date: Sat, 20 Jul 2024 22:52:05 +0000 (-0700) Subject: Fix bool vector length overflow X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=e0f50bd4c392746fc261b89523baf1e442cbff3a;p=emacs.git Fix bool vector length overflow * src/alloc.c (make_clear_bool_vector): It’s now the caller’s responsibility to make sure the bool vector length is in range. Add an eassert to double-check this. This lets some locals be ptrdiff_t not EMACS_INT. (Fmake_bool_vector, Fbool_vector): Check that bool vector lengths are in range. * src/lisp.h (BOOL_VECTOR_LENGTH_MAX): New macro. (bool_vector_words, bool_vector_bytes): Avoid undefined behavior if size == EMACS_INT_MAX - (BITS_PER_BITS_WORD - 1). This is mostly theoretical but it’s easy to do it right. * src/lread.c (read_bool_vector): Use EMACS_INT, not just ptrdiff_t. Check that length doesn’t exceed BOOL_VECTOR_LENGTH_MAX. This fixes an unlikely integer overflow where the calculated size went negative. (cherry picked from commit 515e5ad0de133f0a3d501bd6290ccc51d8462955) --- diff --git a/src/alloc.c b/src/alloc.c index 41679b52707..48b170b866f 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -2413,14 +2413,13 @@ bool_vector_fill (Lisp_Object a, Lisp_Object init) Lisp_Object make_clear_bool_vector (EMACS_INT nbits, bool clearit) { + eassert (0 <= nbits && nbits <= BOOL_VECTOR_LENGTH_MAX); Lisp_Object val; - EMACS_INT words = bool_vector_words (nbits); - EMACS_INT word_bytes = words * sizeof (bits_word); - EMACS_INT needed_elements = ((bool_header_size - header_size + word_bytes + ptrdiff_t words = bool_vector_words (nbits); + ptrdiff_t word_bytes = words * sizeof (bits_word); + ptrdiff_t needed_elements = ((bool_header_size - header_size + word_bytes + word_size - 1) / word_size); - if (PTRDIFF_MAX < needed_elements) - memory_full (SIZE_MAX); struct Lisp_Bool_Vector *p = (struct Lisp_Bool_Vector *) allocate_clear_vector (needed_elements, clearit); @@ -2449,7 +2448,10 @@ LENGTH must be a number. INIT matters only in whether it is t or nil. */) (Lisp_Object length, Lisp_Object init) { CHECK_FIXNAT (length); - Lisp_Object val = make_clear_bool_vector (XFIXNAT (length), NILP (init)); + EMACS_INT len = XFIXNAT (length); + if (BOOL_VECTOR_LENGTH_MAX < len) + memory_full (SIZE_MAX); + Lisp_Object val = make_clear_bool_vector (len, NILP (init)); return NILP (init) ? val : bool_vector_fill (val, init); } @@ -2459,6 +2461,8 @@ Allows any number of arguments, including zero. usage: (bool-vector &rest OBJECTS) */) (ptrdiff_t nargs, Lisp_Object *args) { + if (BOOL_VECTOR_LENGTH_MAX < nargs) + memory_full (SIZE_MAX); Lisp_Object vector = make_clear_bool_vector (nargs, true); for (ptrdiff_t i = 0; i < nargs; i++) if (!NILP (args[i])) diff --git a/src/lisp.h b/src/lisp.h index 79eade2f5ae..976b7a15251 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -1840,7 +1840,7 @@ struct Lisp_Bool_Vector /* HEADER.SIZE is the vector's size field. It doesn't have the real size, just the subtype information. */ union vectorlike_header header; - /* This is the size in bits. */ + /* The size in bits; at most BOOL_VECTOR_LENGTH_MAX. */ EMACS_INT size; /* The actual bits, packed into bytes. Zeros fill out the last word if needed. @@ -1868,20 +1868,32 @@ enum word_size = sizeof (Lisp_Object) }; +/* A bool vector's length must be a fixnum for XFIXNUM (Flength (...)). + Also, it is limited object size, which must fit in both ptrdiff_t and + size_t including header overhead and trailing alignment. */ +#define BOOL_VECTOR_LENGTH_MAX \ + min (MOST_POSITIVE_FIXNUM, \ + ((INT_MULTIPLY_OVERFLOW (min (PTRDIFF_MAX, SIZE_MAX) - bool_header_size,\ + (EMACS_INT) BOOL_VECTOR_BITS_PER_CHAR) \ + ? EMACS_INT_MAX \ + : ((min (PTRDIFF_MAX, SIZE_MAX) - bool_header_size) \ + * (EMACS_INT) BOOL_VECTOR_BITS_PER_CHAR)) \ + - (BITS_PER_BITS_WORD - 1))) + /* The number of data words and bytes in a bool vector with SIZE bits. */ INLINE EMACS_INT bool_vector_words (EMACS_INT size) { eassume (0 <= size && size <= EMACS_INT_MAX - (BITS_PER_BITS_WORD - 1)); - return (size + BITS_PER_BITS_WORD - 1) / BITS_PER_BITS_WORD; + return (size + (BITS_PER_BITS_WORD - 1)) / BITS_PER_BITS_WORD; } INLINE EMACS_INT bool_vector_bytes (EMACS_INT size) { eassume (0 <= size && size <= EMACS_INT_MAX - (BITS_PER_BITS_WORD - 1)); - return (size + BOOL_VECTOR_BITS_PER_CHAR - 1) / BOOL_VECTOR_BITS_PER_CHAR; + return (size + (BOOL_VECTOR_BITS_PER_CHAR - 1)) / BOOL_VECTOR_BITS_PER_CHAR; } INLINE bits_word diff --git a/src/lread.c b/src/lread.c index c1f309866c8..ace7abd80c8 100644 --- a/src/lread.c +++ b/src/lread.c @@ -3568,7 +3568,7 @@ string_props_from_rev_list (Lisp_Object elems, Lisp_Object readcharfun) static Lisp_Object read_bool_vector (Lisp_Object readcharfun) { - ptrdiff_t length = 0; + EMACS_INT length = 0; for (;;) { int c = READCHAR; @@ -3582,6 +3582,8 @@ read_bool_vector (Lisp_Object readcharfun) || ckd_add (&length, length, c - '0')) invalid_syntax ("#&", readcharfun); } + if (BOOL_VECTOR_LENGTH_MAX < length) + invalid_syntax ("#&", readcharfun); ptrdiff_t size_in_chars = bool_vector_bytes (length); Lisp_Object str = read_string_literal (readcharfun);