From 5f5d664c734414597c1c7d9981b1ceb9ff69c5b1 Mon Sep 17 00:00:00 2001 From: Andrea Corallo Date: Sat, 22 Aug 2020 11:11:21 +0200 Subject: [PATCH] Rework eln hash filename strategy Generate eln filename hashing also the source file content in the form: /absolute/path/filename.el + content -> eln-cache/filename-path_hash-content_hash.eln * src/lread.c (maybe_swap_for_eln): Always call Fcomp_el_to_eln_filename on an existing source file. * src/comp.c (md5.h, sysstdio.h, zlib.h): New include. (comp_hash_string): Use md5 instead of sha512. (MD5_BLOCKSIZE): New macro. (accumulate_and_process_md5, final_process_md5, md5_gz_stream) (comp_hash_source_file): New functions. (Fcomp_el_to_eln_filename): Rework for hasing using also source file content. * src/lread.c (maybe_swap_for_eln): Rename el_name -> src_name as this can be also a have .el.gz extention. --- configure.ac | 9 ++- lib/Makefile.in | 6 ++ src/comp.c | 161 +++++++++++++++++++++++++++++++++++++++++++----- src/lread.c | 13 +++- 4 files changed, 167 insertions(+), 22 deletions(-) diff --git a/configure.ac b/configure.ac index 0582b2f61c5..cdc18eab19e 100644 --- a/configure.ac +++ b/configure.ac @@ -3787,6 +3787,12 @@ Here instructions on how to compile and install libgccjit from source: HAVE_NATIVE_COMP=no LIBGCCJIT_LIB= if test "${with_nativecomp}" != "no"; then + if test "${HAVE_PDUMPER}" = no; then + AC_MSG_ERROR(['--with-nativecomp' requires '--with-dumping=pdumper']) + fi + if test "${HAVE_ZLIB}" = no; then + AC_MSG_ERROR(['--with-nativecomp' requires zlib]) + fi emacs_save_LIBS=$LIBS LIBS="-lgccjit" AC_RUN_IFELSE([libgccjit_smoke_test], [], [libgccjit_broken], @@ -3800,9 +3806,6 @@ if test "${with_nativecomp}" != "no"; then NEED_DYNLIB=yes AC_DEFINE(HAVE_NATIVE_COMP, 1, [Define to 1 if you have the libgccjit library (-lgccjit).]) fi -if test "${HAVE_NATIVE_COMP}" = yes && test "${HAVE_PDUMPER}" = no; then - AC_MSG_ERROR(['--with-nativecomp' requires '--with-dumping=pdumper']) -fi AC_DEFINE_UNQUOTED(NATIVE_ELISP_SUFFIX, ".eln", [System extension for native compiled elisp]) AC_SUBST(HAVE_NATIVE_COMP) diff --git a/lib/Makefile.in b/lib/Makefile.in index 06d8e56421b..8d97d3bcfbb 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -50,12 +50,18 @@ am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) am__v_at_0 = @ am__v_at_1 = +HAVE_NATIVE_COMP = @HAVE_NATIVE_COMP@ + ALL_CFLAGS= \ $(C_SWITCH_SYSTEM) $(C_SWITCH_MACHINE) $(DEPFLAGS) \ $(GNULIB_WARN_CFLAGS) $(WERROR_CFLAGS) $(PROFILING_CFLAGS) $(CFLAGS) \ -I. -I../src -I$(srcdir) -I$(srcdir)/../src \ $(if $(patsubst e-%,,$(notdir $<)),,-Demacs) +ifeq ($(HAVE_NATIVE_COMP),yes) +ALL_CFLAGS += -DGL_COMPILE_CRYPTO_STREAM +endif + SYSTEM_TYPE = @SYSTEM_TYPE@ ifeq ($(SYSTEM_TYPE),windows-nt) include $(srcdir)/../nt/gnulib-cfg.mk diff --git a/src/comp.c b/src/comp.c index ff73245b8de..5f1257f6be1 100644 --- a/src/comp.c +++ b/src/comp.c @@ -36,7 +36,9 @@ along with GNU Emacs. If not, see . */ #include "dynlib.h" #include "buffer.h" #include "blockinput.h" -#include "sha512.h" +#include "md5.h" +#include "sysstdio.h" +#include "zlib.h" /********************************/ @@ -394,8 +396,6 @@ load_gccjit_if_necessary (bool mandatory) } -#define ELN_FILENAME_HASH_LEN 64 - /* C symbols emitted for the load relocation mechanism. */ #define CURRENT_THREAD_RELOC_SYM "current_thread_reloc" #define PURE_RELOC_SYM "pure_reloc" @@ -640,9 +640,123 @@ format_string (const char *format, ...) static Lisp_Object comp_hash_string (Lisp_Object string) { - Lisp_Object digest = make_uninit_string (SHA512_DIGEST_SIZE * 2); - sha512_buffer (SSDATA (string), SCHARS (string), SSDATA (digest)); - hexbuf_digest (SSDATA (digest), SDATA (digest), SHA512_DIGEST_SIZE); + Lisp_Object digest = make_uninit_string (MD5_DIGEST_SIZE * 2); + md5_buffer (SSDATA (string), SCHARS (string), SSDATA (digest)); + hexbuf_digest (SSDATA (digest), SDATA (digest), MD5_DIGEST_SIZE); + + return digest; +} + +#define MD5_BLOCKSIZE 32768 /* From md5.c */ + +static char acc_buff[2 * MD5_BLOCKSIZE]; +static size_t acc_size; + +static void +accumulate_and_process_md5 (void *data, size_t len, struct md5_ctx *ctxt) +{ + eassert (len <= MD5_BLOCKSIZE); + /* We may optimize this saving some of these memcpy/move using + directly the outer buffers but so far I'll not bother. */ + memcpy (acc_buff + acc_size, data, len); + acc_size += len; + if (acc_size >= MD5_BLOCKSIZE) + { + acc_size -= MD5_BLOCKSIZE; + md5_process_block (acc_buff, MD5_BLOCKSIZE, ctxt); + memmove (acc_buff, acc_buff + MD5_BLOCKSIZE, acc_size); + } +} + +static void +final_process_md5 (struct md5_ctx *ctxt) +{ + if (acc_size) + { + md5_process_bytes (acc_buff, acc_size, ctxt); + acc_size = 0; + } +} + +static int +md5_gz_stream (FILE *source, void *resblock) +{ + z_stream stream; + unsigned char in[MD5_BLOCKSIZE]; + unsigned char out[MD5_BLOCKSIZE]; + + eassert (!acc_size); + + struct md5_ctx ctx; + md5_init_ctx (&ctx); + + /* allocate inflate state */ + stream.zalloc = Z_NULL; + stream.zfree = Z_NULL; + stream.opaque = Z_NULL; + stream.avail_in = 0; + stream.next_in = Z_NULL; + int res = inflateInit2 (&stream, MAX_WBITS + 32); + if (res != Z_OK) + return -1; + + do { + stream.avail_in = fread (in, 1, MD5_BLOCKSIZE, source); + if (ferror (source)) { + inflateEnd (&stream); + return -1; + } + if (stream.avail_in == 0) + break; + stream.next_in = in; + + do { + stream.avail_out = MD5_BLOCKSIZE; + stream.next_out = out; + res = inflate (&stream, Z_NO_FLUSH); + + if (res != Z_OK && res != Z_STREAM_END) + return -1; + + accumulate_and_process_md5 (out, MD5_BLOCKSIZE - stream.avail_out, &ctx); + } while (!stream.avail_out); + + } while (res != Z_STREAM_END); + + final_process_md5 (&ctx); + inflateEnd (&stream); + + if (res != Z_STREAM_END) + return -1; + + md5_finish_ctx (&ctx, resblock); + + return 0; +} +#undef MD5_BLOCKSIZE + +static Lisp_Object +comp_hash_source_file (Lisp_Object filename) +{ + /* Can't use Finsert_file_contents + Fbuffer_hash as this is called + by Fcomp_el_to_eln_filename too early during bootstrap. */ + bool is_gz = suffix_p (filename, ".gz"); + FILE *f = emacs_fopen (SSDATA (filename), is_gz ? "rb" : "r"); + + if (!f) + report_file_error ("Opening source file", filename); + + Lisp_Object digest = make_uninit_string (MD5_DIGEST_SIZE * 2); + + int res = is_gz + ? md5_gz_stream (f, SSDATA (digest)) + : md5_stream (f, SSDATA (digest)); + fclose (f); + + if (res) + xsignal2 (Qfile_notify_error, build_string ("hashing failed"), filename); + + hexbuf_digest (SSDATA (digest), SSDATA (digest), MD5_DIGEST_SIZE); return digest; } @@ -3872,21 +3986,36 @@ If BASE-DIR is nil use the first entry in `comp-eln-load-path'. */) { CHECK_STRING (filename); + filename = Fexpand_file_name (filename, Qnil); + + if (NILP (Ffile_exists_p (filename))) + xsignal1 (Qfile_missing, filename); + + Lisp_Object content_hash = comp_hash_source_file (filename); + if (suffix_p (filename, ".gz")) filename = Fsubstring (filename, Qnil, make_fixnum (-3)); - filename = Fexpand_file_name (filename, Qnil); /* We create eln filenames with an hash in order to look-up these starting from the source filename, IOW have a relation - /absolute/path/filename.el -> eln-cache/filename-hash.eln. + + /absolute/path/filename.el + content -> + eln-cache/filename-path_hash-content_hash.eln. + + 'dlopen' can return the same handle if two shared with the same + filename are loaded in two different times (even if the first was + deleted!). To prevent this scenario the source file content is + included in the hashing algorithm. + + As at any point in time no more then one file can exist with the + same filename, should be possibile to clean up all + filename-path_hash-* except the most recent one (or the new one + being recompiled). As installing .eln files compiled during the build changes their absolute path we need an hashing mechanism that is not sensitive to that. For this we replace if match PATH_DUMPLOADSEARCH or - PATH_LOADSEARCH with '//' before generating the hash. - - Another approach would be to hash using the source file content - but this may have a measurable performance impact. */ + PATH_LOADSEARCH with '//' before generating the hash. */ if (NILP (loadsearch_re_list)) { @@ -3909,12 +4038,12 @@ If BASE-DIR is nil use the first entry in `comp-eln-load-path'. */) break; } } - - Lisp_Object hash = Fsubstring (comp_hash_string (filename), Qnil, - make_fixnum (ELN_FILENAME_HASH_LEN)); + Lisp_Object separator = build_string ("-"); + Lisp_Object path_hash = comp_hash_string (filename); filename = concat2 (Ffile_name_nondirectory (Fsubstring (filename, Qnil, make_fixnum (-3))), - build_string ("-")); + separator); + Lisp_Object hash = concat3 (path_hash, separator, content_hash); filename = concat3 (filename, hash, build_string (NATIVE_ELISP_SUFFIX)); if (NILP (base_dir)) base_dir = XCAR (Vcomp_eln_load_path); diff --git a/src/lread.c b/src/lread.c index 521da4e1d81..3d0de495605 100644 --- a/src/lread.c +++ b/src/lread.c @@ -1623,10 +1623,17 @@ maybe_swap_for_eln (Lisp_Object *filename, int *fd, struct timespec mtime) Lisp_Object eln_path_tail = Vcomp_eln_load_path; FOR_EACH_TAIL_SAFE (eln_path_tail) { - Lisp_Object el_name = + Lisp_Object src_name = Fsubstring (*filename, Qnil, make_fixnum (-1)); + if (NILP (Ffile_exists_p (src_name))) + { + src_name = concat2 (src_name, build_string (".gz")); + if (NILP (Ffile_exists_p (src_name))) + /* Can't find the corresponding source file. */ + return; + } Lisp_Object eln_name = - Fcomp_el_to_eln_filename (el_name, XCAR (eln_path_tail)); + Fcomp_el_to_eln_filename (src_name, XCAR (eln_path_tail)); int eln_fd = emacs_open (SSDATA (ENCODE_FILE (eln_name)), O_RDONLY, 0); if (eln_fd > 0) @@ -1643,7 +1650,7 @@ maybe_swap_for_eln (Lisp_Object *filename, int *fd, struct timespec mtime) *fd = eln_fd; /* Store the eln -> el relation. */ Fputhash (Ffile_name_nondirectory (eln_name), - el_name, Vcomp_eln_to_el_h); + src_name, Vcomp_eln_to_el_h); return; } else -- 2.39.5