From d3cefd3e98354929d96c9396e5920e8a123784dc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Sat, 4 Nov 2023 16:34:09 +0100 Subject: [PATCH] Leaner hash table dumping and thawing Only dump the actual data, and the test encoded as an enum. This simplifies dumping, makes dump files smaller and saves space at run time. * src/lisp.h (hash_table_std_test_t): New enum. (struct Lisp_Hash_Table): Add frozen_test member, consuming no extra space. * src/fns.c (hashfn_user_defined): Now static. (hash_table_test_from_std): New. (hash_table_rehash): Rename to... (hash_table_thaw): ...this and rewrite. * src/pdumper.c (hash_table_contents): Only include actual data, not unused space. (hash_table_std_test): New. (hash_table_freeze): Set frozen_test from test. (dump_hash_table): Dump frozen_test, not the whole test struct. Don't bother other dumping fields that can be derived. --- src/fns.c | 53 +++++++++++++++++++++++++++---------------- src/lisp.h | 12 ++++++++-- src/pdumper.c | 63 ++++++++++++++++++++++----------------------------- 3 files changed, 70 insertions(+), 58 deletions(-) diff --git a/src/fns.c b/src/fns.c index efec74d4959..74fdf29417e 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4474,7 +4474,7 @@ hashfn_eql (Lisp_Object key, struct Lisp_Hash_Table *h) /* Given H, return a hash code for KEY which uses a user-defined function to compare keys. */ -Lisp_Object +static Lisp_Object hashfn_user_defined (Lisp_Object key, struct Lisp_Hash_Table *h) { Lisp_Object args[] = { h->test.user_hash_function, key }; @@ -4638,11 +4638,10 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) if (h->next_free < 0) { ptrdiff_t old_size = HASH_TABLE_SIZE (h); - EMACS_INT new_size; - - double float_new_size = old_size * std_rehash_size; - if (float_new_size < EMACS_INT_MAX) - new_size = float_new_size; + /* FIXME: better growth management, ditch std_rehash_size */ + EMACS_INT new_size = old_size * std_rehash_size; + if (new_size < EMACS_INT_MAX) + new_size = max (new_size, 32); /* avoid slow initial growth */ else new_size = EMACS_INT_MAX; if (PTRDIFF_MAX < new_size) @@ -4691,20 +4690,39 @@ maybe_resize_hash_table (struct Lisp_Hash_Table *h) } } -/* Recompute the hashes (and hence also the "next" pointers). - Normally there's never a need to recompute hashes. - This is done only on first access to a hash-table loaded from - the "pdump", because the objects' addresses may have changed, thus - affecting their hashes. */ +static const struct hash_table_test * +hash_table_test_from_std (hash_table_std_test_t test) +{ + switch (test) + { + case Test_eq: return &hashtest_eq; + case Test_eql: return &hashtest_eql; + case Test_equal: return &hashtest_equal; + } + emacs_abort(); +} + +/* Rebuild a hash table from its frozen (dumped) form. */ void -hash_table_rehash (Lisp_Object hash) +hash_table_thaw (Lisp_Object hash_table) { - struct Lisp_Hash_Table *h = XHASH_TABLE (hash); - ptrdiff_t i, count = h->count; + struct Lisp_Hash_Table *h = XHASH_TABLE (hash_table); + + /* Freezing discarded most non-essential information; recompute it. + The allocation is minimal with no room for growth. */ + h->test = *hash_table_test_from_std (h->frozen_test); + ptrdiff_t size = ASIZE (h->key_and_value) / 2; + h->count = size; + ptrdiff_t index_size = hash_index_size (size); + h->next_free = -1; + + h->hash = make_nil_vector (size); + h->next = make_vector (size, make_fixnum (-1)); + h->index = make_vector (index_size, make_fixnum (-1)); /* Recompute the actual hash codes for each entry in the table. Order is still invalid. */ - for (i = 0; i < count; i++) + for (ptrdiff_t i = 0; i < size; i++) { Lisp_Object key = HASH_KEY (h, i); Lisp_Object hash_code = hash_from_key (h, key); @@ -4712,12 +4730,7 @@ hash_table_rehash (Lisp_Object hash) set_hash_hash_slot (h, i, hash_code); set_hash_next_slot (h, i, HASH_INDEX (h, start_of_bucket)); set_hash_index_slot (h, start_of_bucket, i); - eassert (HASH_NEXT (h, i) != i); /* Stop loops. */ } - - ptrdiff_t size = ASIZE (h->next); - for (; i + 1 < size; i++) - set_hash_next_slot (h, i, i + 1); } /* Lookup KEY in hash table H. If HASH is non-null, return in *HASH diff --git a/src/lisp.h b/src/lisp.h index 48e1f943ed8..d9b828b0328 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2385,6 +2385,12 @@ INLINE int struct Lisp_Hash_Table; +typedef enum { + Test_eql, + Test_eq, + Test_equal, +} hash_table_std_test_t; + struct hash_table_test { /* Function used to compare keys; always a bare symbol. */ @@ -2473,6 +2479,9 @@ struct Lisp_Hash_Table /* Weakness of the table. */ hash_table_weakness_t weakness : 8; + /* Hash table test (only used when frozen in dump) */ + hash_table_std_test_t frozen_test : 8; + /* True if the table can be purecopied. The table cannot be changed afterwards. */ bool purecopy; @@ -2563,7 +2572,7 @@ hash_from_key (struct Lisp_Hash_Table *h, Lisp_Object key) return h->test.hashfn (key, h); } -void hash_table_rehash (Lisp_Object); +void hash_table_thaw (Lisp_Object hash_table); /* Default size for hash tables if not specified. */ @@ -4038,7 +4047,6 @@ extern void hexbuf_digest (char *, void const *, int); extern char *extract_data_from_object (Lisp_Object, ptrdiff_t *, ptrdiff_t *); EMACS_UINT hash_string (char const *, ptrdiff_t); EMACS_UINT sxhash (Lisp_Object); -Lisp_Object hashfn_user_defined (Lisp_Object, struct Lisp_Hash_Table *); Lisp_Object make_hash_table (struct hash_table_test, EMACS_INT, hash_table_weakness_t, bool); Lisp_Object hash_table_weakness_symbol (hash_table_weakness_t weak); diff --git a/src/pdumper.c b/src/pdumper.c index 8072148c542..e4349f0cb17 100644 --- a/src/pdumper.c +++ b/src/pdumper.c @@ -2646,34 +2646,26 @@ dump_vectorlike_generic (struct dump_context *ctx, return offset; } -/* Return a vector of KEY, VALUE pairs in the given hash table H. The - first H->count pairs are valid, and the rest are unbound. */ +/* Return a vector of KEY, VALUE pairs in the given hash table H. + No room for growth is included. */ static Lisp_Object hash_table_contents (struct Lisp_Hash_Table *h) { - if (h->test.hashfn == hashfn_user_defined) - error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */ - - ptrdiff_t size = HASH_TABLE_SIZE (h); + ptrdiff_t old_size = HASH_TABLE_SIZE (h); + ptrdiff_t size = h->count; Lisp_Object key_and_value = make_uninit_vector (2 * size); ptrdiff_t n = 0; /* Make sure key_and_value ends up in the same order; charset.c relies on it by expecting hash table indices to stay constant across the dump. */ - for (ptrdiff_t i = 0; i < size; i++) + for (ptrdiff_t i = 0; i < old_size; i++) if (!NILP (HASH_HASH (h, i))) { ASET (key_and_value, n++, HASH_KEY (h, i)); ASET (key_and_value, n++, HASH_VALUE (h, i)); } - while (n < 2 * size) - { - ASET (key_and_value, n++, Qunbound); - ASET (key_and_value, n++, Qnil); - } - return key_and_value; } @@ -2686,25 +2678,32 @@ dump_hash_table_list (struct dump_context *ctx) return 0; } -static void -hash_table_freeze (struct Lisp_Hash_Table *h) +static hash_table_std_test_t +hash_table_std_test (const struct hash_table_test *t) { - ptrdiff_t npairs = ASIZE (h->key_and_value) / 2; - h->key_and_value = hash_table_contents (h); - h->next = h->hash = make_fixnum (npairs); - h->index = make_fixnum (ASIZE (h->index)); - h->next_free = (npairs == h->count ? -1 : h->count); + if (BASE_EQ (t->name, Qeq)) + return Test_eq; + if (BASE_EQ (t->name, Qeql)) + return Test_eql; + if (BASE_EQ (t->name, Qequal)) + return Test_equal; + error ("cannot dump hash tables with user-defined tests"); /* Bug#36769 */ } +/* Compact contents and discard inessential information from a hash table, + preparing it for dumping. + See `hash_table_thaw' for the code that restores the object to a usable + state. */ static void -hash_table_thaw (Lisp_Object hash) +hash_table_freeze (struct Lisp_Hash_Table *h) { - struct Lisp_Hash_Table *h = XHASH_TABLE (hash); - h->hash = make_nil_vector (XFIXNUM (h->hash)); - h->next = Fmake_vector (h->next, make_fixnum (-1)); - h->index = Fmake_vector (h->index, make_fixnum (-1)); - - hash_table_rehash (hash); + h->key_and_value = hash_table_contents (h); + eassert (ASIZE (h->key_and_value) == h->count * 2); + h->next = Qnil; + h->hash = Qnil; + h->index = Qnil; + h->count = 0; + h->frozen_test = hash_table_std_test (&h->test); } static dump_off @@ -2724,19 +2723,11 @@ dump_hash_table (struct dump_context *ctx, Lisp_Object object) dump_pseudovector_lisp_fields (ctx, &out->header, &hash->header); /* TODO: dump the hash bucket vectors synchronously here to keep them as close to the hash table as possible. */ - DUMP_FIELD_COPY (out, hash, count); - DUMP_FIELD_COPY (out, hash, next_free); DUMP_FIELD_COPY (out, hash, weakness); DUMP_FIELD_COPY (out, hash, purecopy); DUMP_FIELD_COPY (out, hash, mutable); + DUMP_FIELD_COPY (out, hash, frozen_test); dump_field_lv (ctx, out, hash, &hash->key_and_value, WEIGHT_STRONG); - dump_field_lv (ctx, out, hash, &hash->test.name, WEIGHT_STRONG); - dump_field_lv (ctx, out, hash, &hash->test.user_hash_function, - WEIGHT_STRONG); - dump_field_lv (ctx, out, hash, &hash->test.user_cmp_function, - WEIGHT_STRONG); - dump_field_emacs_ptr (ctx, out, hash, &hash->test.cmpfn); - dump_field_emacs_ptr (ctx, out, hash, &hash->test.hashfn); eassert (hash->next_weak == NULL); return finish_dump_pvec (ctx, &out->header); } -- 2.39.2