From: Stefan Monnier <monnier@iro.umontreal.ca>
Date: Thu, 28 Jan 2021 17:27:09 +0000 (-0500)
Subject: * src/fns.c (hash_string): Fix bug#46111
X-Git-Tag: emacs-28.0.90~4069
X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=592a230832257e915aa3ed798a1f0210df639031;p=emacs.git

* src/fns.c (hash_string): Fix bug#46111

Use `memcpy` instead of an unaligned memory access.  On x86 at least,
GCC turns this `memcpy` into a single `mov`, so it's about as fast.
---

diff --git a/src/fns.c b/src/fns.c
index 7ab2e8f1a03..bd4afa0c4e9 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -4599,33 +4599,29 @@ sweep_weak_table (struct Lisp_Hash_Table *h, bool remove_entries_p)
 EMACS_UINT
 hash_string (char const *ptr, ptrdiff_t len)
 {
-  EMACS_UINT const *p   = (EMACS_UINT const *) ptr;
-  EMACS_UINT const *end = (EMACS_UINT const *) (ptr + len);
+  char const *p   = ptr;
+  char const *end = ptr + len;
   EMACS_UINT hash = len;
   /* At most 8 steps.  We could reuse SXHASH_MAX_LEN, of course,
    * but dividing by 8 is cheaper.  */
-  ptrdiff_t step = 1 + ((end - p) >> 3);
+  ptrdiff_t step = sizeof hash + ((end - p) >> 3);
 
-  /* Beware: `end` might be unaligned, so `p < end` is not always the same
-   * as `p <= end - 1`.  */
-  while (p <= end - 1)
+  while (p + sizeof hash <= end)
     {
-      EMACS_UINT c = *p;
+      EMACS_UINT c;
+      /* We presume that the compiler will replace this `memcpy` with
+         a single load/move instruction when applicable.  */
+      memcpy (&c, p, sizeof hash);
       p += step;
       hash = sxhash_combine (hash, c);
     }
-  if (p < end)
-    { /* A few last bytes remain (smaller than an EMACS_UINT).  */
-      /* FIXME: We could do this without a loop, but it'd require
-         endian-dependent code :-(  */
-      char const *p1 = (char const *)p;
-      char const *end1 = (char const *)end;
-      do
-        {
-          unsigned char c = *p1++;
-          hash = sxhash_combine (hash, c);
-        }
-      while (p1 < end1);
+  /* A few last bytes may remain (smaller than an EMACS_UINT).  */
+  /* FIXME: We could do this without a loop, but it'd require
+     endian-dependent code :-(  */
+  while (p < end)
+    {
+      unsigned char c = *p++;
+      hash = sxhash_combine (hash, c);
     }
 
   return hash;