From ed06de52a53135ee42e528496fdddbf3d74b0479 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Sat, 4 Nov 2023 18:21:06 +0100 Subject: [PATCH] Faster hash table growth, starting at zero size The algorithms no longer use the rehash_threshold and rehash_size float constants, but vary depending on size. In particular, the table now grows faster, especially from smaller sizes. The default size is now 0, starting empty, which effectively postpones allocation until the first insertion (unless make-hash-table was called with a positive :size); this is a clear gain as long as the table remains empty. The first inserted item will use an initial size of 8 because most tables are small. * src/fns.c (std_rehash_size, std_rehash_threshold): Remove. (hash_index_size): Integer-only computation. (maybe_resize_hash_table): Grow more aggressively. (Fhash_table_rehash_size, Fhash_table_rehash_threshold): Use the constants directly. * src/lisp.h (DEFAULT_HASH_SIZE): New value. --- src/fns.c | 60 ++++++++++++++++++++++-------------------------------- src/lisp.h | 2 +- 2 files changed, 25 insertions(+), 37 deletions(-) diff --git a/src/fns.c b/src/fns.c index 3e650b13c1f..4a38126d9dc 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4508,31 +4508,18 @@ allocate_hash_table (void) return ALLOCATE_PLAIN_PSEUDOVECTOR (struct Lisp_Hash_Table, PVEC_HASH_TABLE); } -/* An upper bound on the size of a hash table index. It must fit in - ptrdiff_t and be a valid Emacs fixnum. This is an upper bound on - VECTOR_ELTS_MAX (see alloc.c) and gets as close as we can without - violating modularity. */ -#define INDEX_SIZE_BOUND \ - ((ptrdiff_t) min (MOST_POSITIVE_FIXNUM, \ - ((min (PTRDIFF_MAX, SIZE_MAX) \ - - header_size - GCALIGNMENT) \ - / word_size))) - -/* Default factor by which to increase the size of a hash table. */ -static const double std_rehash_size = 1.5; - -/* Resize hash table when number of entries / table size is >= this - ratio. */ -static const double std_rehash_threshold = 0.8125; - +/* Compute the size of the index from the table capacity. */ static ptrdiff_t hash_index_size (ptrdiff_t size) { - double index_float = size * (1.0 / std_rehash_threshold); - ptrdiff_t index_size = (index_float < INDEX_SIZE_BOUND + 1 - ? next_almost_prime (index_float) - : INDEX_SIZE_BOUND + 1); - if (INDEX_SIZE_BOUND < index_size) + /* An upper bound on the size of a hash table index. It must fit in + ptrdiff_t and be a valid Emacs fixnum. */ + ptrdiff_t upper_bound = min (MOST_POSITIVE_FIXNUM, + PTRDIFF_MAX / sizeof (ptrdiff_t)); + ptrdiff_t index_size = size + (size >> 2); /* 1.25x larger */ + if (index_size < upper_bound) + index_size = next_almost_prime (index_size); + if (index_size > upper_bound) error ("Hash table too large"); return index_size; } @@ -4671,16 +4658,12 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) if (h->next_free < 0) { ptrdiff_t old_size = HASH_TABLE_SIZE (h); - /* FIXME: better growth management, ditch std_rehash_size */ - EMACS_INT new_size = old_size * std_rehash_size; - if (new_size < EMACS_INT_MAX) - new_size = max (new_size, 32); /* avoid slow initial growth */ - else - new_size = EMACS_INT_MAX; - if (PTRDIFF_MAX < new_size) - new_size = PTRDIFF_MAX; - if (new_size <= old_size) - new_size = old_size + 1; + ptrdiff_t base_size = min (max (old_size, 8), PTRDIFF_MAX / 2); + /* Grow aggressively at small sizes, then just double. */ + ptrdiff_t new_size = + old_size == 0 + ? 8 + : (base_size <= 64 ? base_size * 4 : base_size * 2); /* Allocate all the new vectors before updating *H, to avoid problems if memory is exhausted. */ @@ -4738,7 +4721,7 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) #ifdef ENABLE_CHECKING if (HASH_TABLE_P (Vpurify_flag) && XHASH_TABLE (Vpurify_flag) == h) - message ("Growing hash table to: %"pI"d", new_size); + message ("Growing hash table to: %"pD"d", new_size); #endif } } @@ -5403,7 +5386,8 @@ keys. Default is `eql'. Predefined are the tests `eq', `eql', and `define-hash-table-test'. :size SIZE -- A hint as to how many elements will be put in the table. -Default is 65. +The table will always grow as needed; this argument may help performance +slightly if the size is known in advance but is never required. :weakness WEAK -- WEAK must be one of nil, t, `key', `value', `key-or-value', or `key-and-value'. If WEAK is not nil, the table @@ -5516,7 +5500,9 @@ DEFUN ("hash-table-rehash-size", Fhash_table_rehash_size, (Lisp_Object table) { CHECK_HASH_TABLE (table); - return make_float (std_rehash_size); + /* Nominal factor by which to increase the size of a hash table. + No longer used; this is for compatibility. */ + return make_float (1.5); } @@ -5526,7 +5512,9 @@ DEFUN ("hash-table-rehash-threshold", Fhash_table_rehash_threshold, (Lisp_Object table) { CHECK_HASH_TABLE (table); - return make_float (std_rehash_threshold); + /* Nominal threshold for when to resize a hash table. + No longer used; this is for compatibility. */ + return make_float (0.8125); } diff --git a/src/lisp.h b/src/lisp.h index 658bcd8b780..5b70e96d6a1 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2591,7 +2591,7 @@ void hash_table_thaw (Lisp_Object hash_table); /* Default size for hash tables if not specified. */ -enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 65 }; +enum DEFAULT_HASH_SIZE { DEFAULT_HASH_SIZE = 0 }; /* Combine two integers X and Y for hashing. The result might exceed INTMASK. */ -- 2.39.2