From: Lars Magne Ingebrigtsen Date: Mon, 28 Mar 2016 17:07:39 +0000 (+0200) Subject: Add a new function `buffer-hash' X-Git-Tag: emacs-26.0.90~2280 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=15357f6d1f90b03719f650823ac6531a305a9818;p=emacs.git Add a new function `buffer-hash' * doc/lispref/text.texi (Checksum/Hash): Document `buffer-hash'. * src/fns.c (Fbuffer_hash): New function. (make_digest_string): Refactored out into its own function. (secure_hash): Use it. * test/src/fns-tests.el (fns-tests-hash-buffer): New tests. --- diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi index 4c3a1a01e7d..5e473166d1d 100644 --- a/doc/lispref/text.texi +++ b/doc/lispref/text.texi @@ -4468,6 +4468,20 @@ using the specified or chosen coding system. However, if coding instead. @end defun +@defun buffer-hash &optional buffer-or-name +Return a hash of @var{buffer-or-name}. If @code{nil}, this defaults +to the current buffer. As opposed to @code{secure-hash}, this +function computes the hash based on the internal representation of the +buffer, disregarding any coding systems. It's therefore only useful +when comparing two buffers running in the same Emacs, and is not +guaranteed to return the same hash between different Emacs versions. +It should be somewhat more efficient on larger buffers than +@code{secure-hash} is, and should not allocate more memory. +@c Note that we do not document what hashing function we're using, or +@c even whether it's a cryptographic hash, since that may change +@c according to what we find useful. +@end defun + @node Parsing HTML/XML @section Parsing HTML and XML @cindex parsing html diff --git a/etc/NEWS b/etc/NEWS index ce21532b68d..0a363711626 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -53,6 +53,10 @@ has been added. They are: 'file-attribute-type', 'file-attribute-modes', 'file-attribute-inode-number', and 'file-attribute-device-number' ++++ +** The new function `buffer-hash' has been added, and can be used to +compute a fash, non-consing hash of the contents of a buffer. + --- ** The locale language name 'ca' is now mapped to the language environment 'Catalan', which has been added. diff --git a/src/fns.c b/src/fns.c index 0e3fc2765b4..9513387f93e 100644 --- a/src/fns.c +++ b/src/fns.c @@ -4737,6 +4737,22 @@ returns nil, then (funcall TEST x1 x2) also returns nil. */) #include "sha256.h" #include "sha512.h" +Lisp_Object +make_digest_string (Lisp_Object digest, int digest_size) +{ + unsigned char *p = SDATA (digest); + int i; + + for (i = digest_size - 1; i >= 0; i--) + { + static char const hexdigit[16] = "0123456789abcdef"; + int p_i = p[i]; + p[2 * i] = hexdigit[p_i >> 4]; + p[2 * i + 1] = hexdigit[p_i & 0xf]; + } + return digest; +} + /* ALGORITHM is a symbol: md5, sha1, sha224 and so on. */ static Lisp_Object @@ -4936,17 +4952,7 @@ secure_hash (Lisp_Object algorithm, Lisp_Object object, Lisp_Object start, SSDATA (digest)); if (NILP (binary)) - { - unsigned char *p = SDATA (digest); - for (i = digest_size - 1; i >= 0; i--) - { - static char const hexdigit[16] = "0123456789abcdef"; - int p_i = p[i]; - p[2 * i] = hexdigit[p_i >> 4]; - p[2 * i + 1] = hexdigit[p_i & 0xf]; - } - return digest; - } + return make_digest_string (digest, digest_size); else return make_unibyte_string (SSDATA (digest), digest_size); } @@ -4997,6 +5003,45 @@ If BINARY is non-nil, returns a string in binary form. */) { return secure_hash (algorithm, object, start, end, Qnil, Qnil, binary); } + +DEFUN ("buffer-hash", Fbuffer_hash, Sbuffer_hash, 0, 1, 0, + doc: /* Return a hash of the contents of BUFFER-OR-NAME. +This hash is performed on the raw internal format of the buffer, +disregarding any coding systems. +If nil, use the current buffer." */ ) + (Lisp_Object buffer_or_name) +{ + Lisp_Object buffer; + struct buffer *b; + struct sha1_ctx ctx; + Lisp_Object digest = make_uninit_string (SHA1_DIGEST_SIZE * 2); + + if (NILP (buffer_or_name)) + buffer = Fcurrent_buffer (); + else + buffer = Fget_buffer (buffer_or_name); + if (NILP (buffer)) + nsberror (buffer_or_name); + + b = XBUFFER (buffer); + sha1_init_ctx (&ctx); + + /* Process the first part of the buffer. */ + sha1_process_bytes (BUF_BEG_ADDR (b), + BUF_GPT_BYTE (b) - BUF_BEG_BYTE (b), + &ctx); + + /* If the gap is before the end of the buffer, process the last half + of the buffer. */ + if (BUF_GPT_BYTE (b) < BUF_Z_BYTE (b)) + sha1_process_bytes (BUF_GAP_END_ADDR (b), + BUF_Z_ADDR (b) - BUF_GAP_END_ADDR (b), + &ctx); + + sha1_finish_ctx (&ctx, SSDATA (digest)); + return make_digest_string (digest, SHA1_DIGEST_SIZE); +} + void syms_of_fns (void) @@ -5156,6 +5201,7 @@ this variable. */); defsubr (&Sbase64_decode_string); defsubr (&Smd5); defsubr (&Ssecure_hash); + defsubr (&Sbuffer_hash); defsubr (&Slocale_info); hashtest_eq.name = Qeq; diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 688ff1f6bd9..848589692ea 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -219,3 +219,19 @@ (should (equal (func-arity (eval (lambda (x &optional y)) nil)) '(1 . 2))) (should (equal (func-arity (eval (lambda (x &optional y)) t)) '(1 . 2))) (should (equal (func-arity 'let) '(1 . unevalled)))) + +(ert-deftest fns-tests-hash-buffer () + (should (equal (sha1 "foo") "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33")) + (should (equal (with-temp-buffer + (insert "foo") + (buffer-hash)) + (sha1 "foo"))) + ;; This tests whether the presence of a gap in the middle of the + ;; buffer is handled correctly. + (should (equal (with-temp-buffer + (insert "foo") + (goto-char 2) + (insert " ") + (backward-delete-char 1) + (buffer-hash)) + (sha1 "foo"))))