From 38d558f2f8c37ba7ff6e0ebd4e3d5fd48ed49775 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Mon, 15 Jan 2024 09:25:02 +0100 Subject: [PATCH] Change hash range reduction from remainder to multiplication This makes both lookups and rehashing cheaper. The index vector size is now always a power of 2. The first table size is reduced to 6 (from 8), because index vectors would become excessively big otherwise. * src/lisp.h (struct Lisp_Hash_Table): Replace index_size with index_bits. All references adapted. (hash_table_index_size): New accessor; use it where applicable. * src/fns.c (hash_index_size): Replace with... (compute_hash_index_bits): ...this new function, returning the log2 of the index size. All callers adapted. (hash_index_index): Knuth multiplicative hashing instead of remainder. (maybe_resize_hash_table): Reduce first table size from 8 to 6. (cherry picked from commit e66870400d45e3d08265df9f6acd4631a5712139) --- src/alloc.c | 7 +++-- src/fns.c | 78 +++++++++++++++++++++++++-------------------------- src/lisp.h | 13 +++++++-- src/pdumper.c | 2 +- 4 files changed, 54 insertions(+), 46 deletions(-) diff --git a/src/alloc.c b/src/alloc.c index 15bb65cf74f..6abe9e28650 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -3443,7 +3443,7 @@ cleanup_vector (struct Lisp_Vector *vector) struct Lisp_Hash_Table *h = PSEUDOVEC_STRUCT (vector, Lisp_Hash_Table); if (h->table_size > 0) { - eassert (h->index_size > 1); + eassert (h->index_bits > 0); xfree (h->index); xfree (h->key_and_value); xfree (h->next); @@ -3451,7 +3451,7 @@ cleanup_vector (struct Lisp_Vector *vector) ptrdiff_t bytes = (h->table_size * (2 * sizeof *h->key_and_value + sizeof *h->hash + sizeof *h->next) - + h->index_size * sizeof *h->index); + + hash_table_index_size (h) * sizeof *h->index); hash_table_allocated_bytes -= bytes; } } @@ -5959,7 +5959,8 @@ purecopy_hash_table (struct Lisp_Hash_Table *table) for (ptrdiff_t i = 0; i < nvalues; i++) pure->key_and_value[i] = purecopy (table->key_and_value[i]); - ptrdiff_t index_bytes = table->index_size * sizeof *table->index; + ptrdiff_t index_bytes = hash_table_index_size (table) + * sizeof *table->index; pure->index = pure_alloc (index_bytes, -(int)sizeof *table->index); memcpy (pure->index, table->index, index_bytes); } diff --git a/src/fns.c b/src/fns.c index 08908d481a3..7de2616b359 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4291,7 +4291,7 @@ set_hash_hash_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, hash_hash_t val) static void set_hash_index_slot (struct Lisp_Hash_Table *h, ptrdiff_t idx, ptrdiff_t val) { - eassert (idx >= 0 && idx < h->index_size); + eassert (idx >= 0 && idx < hash_table_index_size (h)); h->index[idx] = val; } @@ -4392,7 +4392,7 @@ HASH_NEXT (struct Lisp_Hash_Table *h, ptrdiff_t idx) static ptrdiff_t HASH_INDEX (struct Lisp_Hash_Table *h, ptrdiff_t idx) { - eassert (idx >= 0 && idx < h->index_size); + eassert (idx >= 0 && idx < hash_table_index_size (h)); return h->index[idx]; } @@ -4527,26 +4527,19 @@ allocate_hash_table (void) return ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_Hash_Table, PVEC_HASH_TABLE); } -/* Compute the size of the index from the table capacity. */ -static ptrdiff_t -hash_index_size (ptrdiff_t size) -{ - /* An upper bound on the size of a hash table index. It must fit in - ptrdiff_t and be a valid Emacs fixnum. */ - ptrdiff_t upper_bound = min (MOST_POSITIVE_FIXNUM, - min (TYPE_MAXIMUM (hash_idx_t), - PTRDIFF_MAX / sizeof (ptrdiff_t))); - /* Single-element index vectors are used iff size=0. */ - eassert (size > 0); - ptrdiff_t lower_bound = 2; - ptrdiff_t index_size = size + max (size >> 2, 1); /* 1.25x larger */ - if (index_size < upper_bound) - index_size = (index_size < lower_bound - ? lower_bound - : next_almost_prime (index_size)); - if (index_size > upper_bound) +/* Compute the size of the index (as log2) from the table capacity. */ +static int +compute_hash_index_bits (hash_idx_t size) +{ + /* An upper bound on the size of a hash table index index. */ + hash_idx_t upper_bound = min (MOST_POSITIVE_FIXNUM, + min (TYPE_MAXIMUM (hash_idx_t), + PTRDIFF_MAX / sizeof (hash_idx_t))); + /* Use next higher power of 2. This works even for size=0. */ + int bits = elogb (size) + 1; + if (bits >= TYPE_WIDTH (uintmax_t) || ((uintmax_t)1 << bits) > upper_bound) error ("Hash table too large"); - return index_size; + return bits; } /* Constant hash index vector used when the table size is zero. @@ -4587,7 +4580,7 @@ make_hash_table (const struct hash_table_test *test, EMACS_INT size, h->key_and_value = NULL; h->hash = NULL; h->next = NULL; - h->index_size = 1; + h->index_bits = 0; h->index = (hash_idx_t *)empty_hash_index_vector; h->next_free = -1; } @@ -4605,8 +4598,9 @@ make_hash_table (const struct hash_table_test *test, EMACS_INT size, h->next[i] = i + 1; h->next[size - 1] = -1; - int index_size = hash_index_size (size); - h->index_size = index_size; + int index_bits = compute_hash_index_bits (size); + h->index_bits = index_bits; + ptrdiff_t index_size = hash_table_index_size (h); h->index = hash_table_alloc_bytes (index_size * sizeof *h->index); for (ptrdiff_t i = 0; i < index_size; i++) h->index[i] = -1; @@ -4654,7 +4648,7 @@ copy_hash_table (struct Lisp_Hash_Table *h1) h2->next = hash_table_alloc_bytes (next_bytes); memcpy (h2->next, h1->next, next_bytes); - ptrdiff_t index_bytes = h1->index_size * sizeof *h1->index; + ptrdiff_t index_bytes = hash_table_index_size (h1) * sizeof *h1->index; h2->index = hash_table_alloc_bytes (index_bytes); memcpy (h2->index, h1->index, index_bytes); } @@ -4668,8 +4662,11 @@ copy_hash_table (struct Lisp_Hash_Table *h1) static inline ptrdiff_t hash_index_index (struct Lisp_Hash_Table *h, hash_hash_t hash) { - eassert (h->index_size > 0); - return hash % h->index_size; + /* Knuth multiplicative hashing, tailored for 32-bit indices + (avoiding a 64-bit multiply). */ + uint32_t alpha = 2654435769; /* 2**32/phi */ + /* Note the cast to uint64_t, to make it work for index_bits=0. */ + return (uint64_t)((uint32_t)hash * alpha) >> (32 - h->index_bits); } /* Resize hash table H if it's too full. If H cannot be resized @@ -4681,7 +4678,7 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) if (h->next_free < 0) { ptrdiff_t old_size = HASH_TABLE_SIZE (h); - ptrdiff_t min_size = 8; + ptrdiff_t min_size = 6; ptrdiff_t base_size = min (max (old_size, min_size), PTRDIFF_MAX / 2); /* Grow aggressively at small sizes, then just double. */ ptrdiff_t new_size = @@ -4706,13 +4703,14 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) hash_hash_t *hash = hash_table_alloc_bytes (new_size * sizeof *hash); memcpy (hash, h->hash, old_size * sizeof *hash); - ptrdiff_t old_index_size = h->index_size; - ptrdiff_t index_size = hash_index_size (new_size); + ptrdiff_t old_index_size = hash_table_index_size (h); + ptrdiff_t index_bits = compute_hash_index_bits (new_size); + ptrdiff_t index_size = (ptrdiff_t)1 << index_bits; hash_idx_t *index = hash_table_alloc_bytes (index_size * sizeof *index); for (ptrdiff_t i = 0; i < index_size; i++) index[i] = -1; - h->index_size = index_size; + h->index_bits = index_bits; h->table_size = new_size; h->next_free = old_size; @@ -4778,18 +4776,19 @@ hash_table_thaw (Lisp_Object hash_table) h->key_and_value = NULL; h->hash = NULL; h->next = NULL; - h->index_size = 1; + h->index_bits = 0; h->index = (hash_idx_t *)empty_hash_index_vector; } else { - ptrdiff_t index_size = hash_index_size (size); - h->index_size = index_size; + ptrdiff_t index_bits = compute_hash_index_bits (size); + h->index_bits = index_bits; h->hash = hash_table_alloc_bytes (size * sizeof *h->hash); h->next = hash_table_alloc_bytes (size * sizeof *h->next); + ptrdiff_t index_size = hash_table_index_size (h); h->index = hash_table_alloc_bytes (index_size * sizeof *h->index); for (ptrdiff_t i = 0; i < index_size; i++) h->index[i] = -1; @@ -4937,7 +4936,8 @@ hash_clear (struct Lisp_Hash_Table *h) set_hash_value_slot (h, i, Qnil); } - for (ptrdiff_t i = 0; i < h->index_size; i++) + ptrdiff_t index_size = hash_table_index_size (h); + for (ptrdiff_t i = 0; i < index_size; i++) h->index[i] = -1; h->next_free = 0; @@ -4976,7 +4976,7 @@ keep_entry_p (hash_table_weakness_t weakness, bool sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p) { - ptrdiff_t n = h->index_size; + ptrdiff_t n = hash_table_index_size (h); bool marked = false; for (ptrdiff_t bucket = 0; bucket < n; ++bucket) @@ -5701,7 +5701,7 @@ DEFUN ("internal--hash-table-histogram", struct Lisp_Hash_Table *h = check_hash_table (hash_table); ptrdiff_t size = HASH_TABLE_SIZE (h); ptrdiff_t *freq = xzalloc (size * sizeof *freq); - ptrdiff_t index_size = h->index_size; + ptrdiff_t index_size = hash_table_index_size (h); for (ptrdiff_t i = 0; i < index_size; i++) { ptrdiff_t n = 0; @@ -5729,7 +5729,7 @@ Internal use only. */) { struct Lisp_Hash_Table *h = check_hash_table (hash_table); Lisp_Object ret = Qnil; - ptrdiff_t index_size = h->index_size; + ptrdiff_t index_size = hash_table_index_size (h); for (ptrdiff_t i = 0; i < index_size; i++) { Lisp_Object bucket = Qnil; @@ -5750,7 +5750,7 @@ DEFUN ("internal--hash-table-index-size", (Lisp_Object hash_table) { struct Lisp_Hash_Table *h = check_hash_table (hash_table); - return make_int (h->index_size); + return make_int (hash_table_index_size (h)); } diff --git a/src/lisp.h b/src/lisp.h index e6fd8cacb1b..d6bbf15d83b 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2475,14 +2475,14 @@ struct Lisp_Hash_Table The table is physically split into three vectors (hash, next, key_and_value) which may or may not be beneficial. */ - hash_idx_t index_size; /* Size of the index vector. */ + int index_bits; /* log2 (size of the index vector). */ hash_idx_t table_size; /* Size of the next and hash vectors. */ /* Bucket vector. An entry of -1 indicates no item is present, and a nonnegative entry is the index of the first item in a collision chain. - This vector is index_size entries long. - If index_size is 1 (and table_size is 0), then this is the + This vector is 2**index_bits entries long. + If index_bits is 0 (and table_size is 0), then this is the constant read-only vector {-1}, shared between all instances. Otherwise it is heap-allocated. */ hash_idx_t *index; @@ -2597,6 +2597,13 @@ HASH_TABLE_SIZE (const struct Lisp_Hash_Table *h) return h->table_size; } +/* Size of the index vector in hash table H. */ +INLINE ptrdiff_t +hash_table_index_size (const struct Lisp_Hash_Table *h) +{ + return (ptrdiff_t)1 << h->index_bits; +} + /* Hash value for KEY in hash table H. */ INLINE hash_hash_t hash_from_key (struct Lisp_Hash_Table *h, Lisp_Object key) diff --git a/src/pdumper.c b/src/pdumper.c index ee554cda55a..b8006b035ea 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -2688,7 +2688,7 @@ hash_table_freeze (struct Lisp_Hash_Table *h) h->hash = NULL; h->index = NULL; h->table_size = 0; - h->index_size = 0; + h->index_bits = 0; h->frozen_test = hash_table_std_test (h->test); h->test = NULL; } -- 2.39.5