From 1998039f7a8f2ecc884a6fed85c0cc1ce06f83e2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Wed, 22 Nov 2023 14:54:34 +0100 Subject: [PATCH] Change hash_hash_t to uint32_t This saves a lot of memory and is quite sufficient. Hash functions are adapted to produce a hash_hash_t eventually, which eliminates some useless and information-destroying intermediate hash reduction steps. We still use EMACS_UINT for most of the actual hashing steps before producing the final value; this may be slightly wasteful on 32-bit platforms with 64-bit EMACS_UINT. * src/lisp.h (hash_hash_t): Change to uint32_t. * src/fns.c (reduce_emacs_uint_to_hash_hash): New. (hashfn_eq, hashfn_equal, hashfn_user_defined): Reduce return values to hash_hash_t. (sxhash_string): Remove. Caller changed to hash_string. (sxhash_float, sxhash_list, sxhash_vector, sxhash_bool_vector) (sxhash_bignum): Remove wasteful calls to SXHASH_REDUCE. (hash_hash_to_fixnum): New. (Fsxhash_eq, Fsxhash_eql, Fsxhash_equal) (Fsxhash_equal_including_properties): Convert return values to fixnum. --- src/fns.c | 77 +++++++++++++++++++++++++++++------------------------- src/lisp.h | 2 +- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/src/fns.c b/src/fns.c index ed7b7bb2024..3765fc74967 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4452,6 +4452,16 @@ cmpfn_user_defined (Lisp_Object key1, Lisp_Object key2, return hash_table_user_defined_call (ARRAYELTS (args), args, h); } +/* Reduce an EMACS_UINT hash value to hash_hash_t. */ +static inline hash_hash_t +reduce_emacs_uint_to_hash_hash (EMACS_UINT x) +{ + verify (sizeof x <= 2 * sizeof (hash_hash_t)); + return (sizeof x == sizeof (hash_hash_t) + ? x + : x ^ (x >> (8 * (sizeof x - sizeof (hash_hash_t))))); +} + /* Ignore H and return a hash code for KEY which uses 'eq' to compare keys. */ static hash_hash_t @@ -4459,21 +4469,18 @@ hashfn_eq (Lisp_Object key, struct Lisp_Hash_Table *h) { if (symbols_with_pos_enabled && SYMBOL_WITH_POS_P (key)) key = SYMBOL_WITH_POS_SYM (key); - return XHASH (key) ^ XTYPE (key); + return reduce_emacs_uint_to_hash_hash (XHASH (key) ^ XTYPE (key)); } -/* Ignore H and return a hash code for KEY which uses 'equal' to compare keys. - The hash code is at most INTMASK. */ - +/* Ignore H and return a hash code for KEY which uses 'equal' to + compare keys. */ static hash_hash_t hashfn_equal (Lisp_Object key, struct Lisp_Hash_Table *h) { - return sxhash (key); + return reduce_emacs_uint_to_hash_hash (sxhash (key)); } -/* Ignore H and return a hash code for KEY which uses 'eql' to compare keys. - The hash code is at most INTMASK. */ - +/* Ignore H and return a hash code for KEY which uses 'eql' to compare keys. */ static hash_hash_t hashfn_eql (Lisp_Object key, struct Lisp_Hash_Table *h) { @@ -4489,7 +4496,8 @@ hashfn_user_defined (Lisp_Object key, struct Lisp_Hash_Table *h) { Lisp_Object args[] = { h->test->user_hash_function, key }; Lisp_Object hash = hash_table_user_defined_call (ARRAYELTS (args), args, h); - return FIXNUMP (hash) ? XUFIXNUM(hash) : sxhash (hash); + return reduce_emacs_uint_to_hash_hash (FIXNUMP (hash) + ? XUFIXNUM(hash) : sxhash (hash)); } struct hash_table_test const @@ -5061,16 +5069,6 @@ hash_string (char const *ptr, ptrdiff_t len) return hash; } -/* Return a hash for string PTR which has length LEN. The hash - code returned is at most INTMASK. */ - -static EMACS_UINT -sxhash_string (char const *ptr, ptrdiff_t len) -{ - EMACS_UINT hash = hash_string (ptr, len); - return SXHASH_REDUCE (hash); -} - /* Return a hash for the floating point value VAL. */ static EMACS_UINT @@ -5080,7 +5078,7 @@ sxhash_float (double val) union double_and_words u = { .val = val }; for (int i = 0; i < WORDS_PER_DOUBLE; i++) hash = sxhash_combine (hash, u.word[i]); - return SXHASH_REDUCE (hash); + return hash; } /* Return a hash for list LIST. DEPTH is the current depth in the @@ -5107,7 +5105,7 @@ sxhash_list (Lisp_Object list, int depth) hash = sxhash_combine (hash, hash2); } - return SXHASH_REDUCE (hash); + return hash; } @@ -5127,7 +5125,7 @@ sxhash_vector (Lisp_Object vec, int depth) hash = sxhash_combine (hash, hash2); } - return SXHASH_REDUCE (hash); + return hash; } /* Return a hash for bool-vector VECTOR. */ @@ -5143,7 +5141,7 @@ sxhash_bool_vector (Lisp_Object vec) for (i = 0; i < n; ++i) hash = sxhash_combine (hash, bool_vector_data (vec)[i]); - return SXHASH_REDUCE (hash); + return hash; } /* Return a hash for a bignum. */ @@ -5158,19 +5156,18 @@ sxhash_bignum (Lisp_Object bignum) for (i = 0; i < nlimbs; ++i) hash = sxhash_combine (hash, mpz_getlimbn (*n, i)); - return SXHASH_REDUCE (hash); + return hash; } - -/* Return a hash code for OBJ. DEPTH is the current depth in the Lisp - structure. Value is an unsigned integer clipped to INTMASK. */ - EMACS_UINT sxhash (Lisp_Object obj) { return sxhash_obj (obj, 0); } +/* Return a hash code for OBJ. DEPTH is the current depth in the Lisp + structure. */ + static EMACS_UINT sxhash_obj (Lisp_Object obj, int depth) { @@ -5186,7 +5183,7 @@ sxhash_obj (Lisp_Object obj, int depth) return XHASH (obj); case Lisp_String: - return sxhash_string (SSDATA (obj), SBYTES (obj)); + return hash_string (SSDATA (obj), SBYTES (obj)); case Lisp_Vectorlike: { @@ -5213,7 +5210,7 @@ sxhash_obj (Lisp_Object obj, int depth) = XMARKER (obj)->buffer ? XMARKER (obj)->bytepos : 0; EMACS_UINT hash = sxhash_combine ((intptr_t) XMARKER (obj)->buffer, bytepos); - return SXHASH_REDUCE (hash); + return hash; } else if (pvec_type == PVEC_BOOL_VECTOR) return sxhash_bool_vector (obj); @@ -5222,7 +5219,7 @@ sxhash_obj (Lisp_Object obj, int depth) EMACS_UINT hash = OVERLAY_START (obj); hash = sxhash_combine (hash, OVERLAY_END (obj)); hash = sxhash_combine (hash, sxhash_obj (XOVERLAY (obj)->plist, depth)); - return SXHASH_REDUCE (hash); + return hash; } else if (symbols_with_pos_enabled && pvec_type == PVEC_SYMBOL_WITH_POS) return sxhash_obj (XSYMBOL_WITH_POS (obj)->sym, depth + 1); @@ -5258,6 +5255,15 @@ collect_interval (INTERVAL interval, Lisp_Object collector) Lisp Interface ***********************************************************************/ +/* Reduce X to a Lisp fixnum. */ +static inline Lisp_Object +hash_hash_to_fixnum (hash_hash_t x) +{ + return make_ufixnum (FIXNUM_BITS < 8 * sizeof x + ? (x ^ x >> (8 * sizeof x - FIXNUM_BITS)) & INTMASK + : x); +} + DEFUN ("sxhash-eq", Fsxhash_eq, Ssxhash_eq, 1, 1, 0, doc: /* Return an integer hash code for OBJ suitable for `eq'. If (eq A B), then (= (sxhash-eq A) (sxhash-eq B)). @@ -5265,7 +5271,7 @@ If (eq A B), then (= (sxhash-eq A) (sxhash-eq B)). Hash codes are not guaranteed to be preserved across Emacs sessions. */) (Lisp_Object obj) { - return make_ufixnum (hashfn_eq (obj, NULL)); + return hash_hash_to_fixnum (hashfn_eq (obj, NULL)); } DEFUN ("sxhash-eql", Fsxhash_eql, Ssxhash_eql, 1, 1, 0, @@ -5276,7 +5282,7 @@ isn't necessarily true. Hash codes are not guaranteed to be preserved across Emacs sessions. */) (Lisp_Object obj) { - return make_ufixnum (hashfn_eql (obj, NULL)); + return hash_hash_to_fixnum (hashfn_eql (obj, NULL)); } DEFUN ("sxhash-equal", Fsxhash_equal, Ssxhash_equal, 1, 1, 0, @@ -5287,7 +5293,7 @@ opposite isn't necessarily true. Hash codes are not guaranteed to be preserved across Emacs sessions. */) (Lisp_Object obj) { - return make_ufixnum (hashfn_equal (obj, NULL)); + return hash_hash_to_fixnum (hashfn_equal (obj, NULL)); } DEFUN ("sxhash-equal-including-properties", Fsxhash_equal_including_properties, @@ -5302,6 +5308,7 @@ Hash codes are not guaranteed to be preserved across Emacs sessions. */) { if (STRINGP (obj)) { + /* FIXME: This is very wasteful. We needn't cons at all. */ Lisp_Object collector = Fcons (Qnil, Qnil); traverse_intervals (string_intervals (obj), 0, collect_interval, collector); @@ -5311,7 +5318,7 @@ Hash codes are not guaranteed to be preserved across Emacs sessions. */) sxhash (CDR (collector))))); } - return make_ufixnum (hashfn_equal (obj, NULL)); + return hash_hash_to_fixnum (hashfn_equal (obj, NULL)); } diff --git a/src/lisp.h b/src/lisp.h index 0701028c14c..64492361e64 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -2387,7 +2387,7 @@ struct Lisp_Hash_Table; /* The type of a hash value stored in the table. It's unsigned and a subtype of EMACS_UINT. */ -typedef EMACS_UINT hash_hash_t; +typedef uint32_t hash_hash_t; typedef enum { Test_eql, -- 2.39.2