* admin/merge-gnulib (GNULIB_MODULES): Add memmem-simple.
(AVOIDED_MODULES): Add memchr.
* configure.ac (HAVE_PDUMPER): AC_SUBST it, too, for use in makefiles.
* lib/Makefile.in (libgnu_a_OBJECTS): Add fingerprint.o.
* lib/fingerprint.c: New file.
* lib/memmem.c, lib/str-two-way.h, m4/memmem.m4: New files,
copied from Gnulib.
* lib/fingerprint.h: Rename from src/fingerprint.h.
* lib-src/make-fingerprint.c: Include limits.h, sys/stat.h,
fingerprint.h, intprops.h, min-max.h.
(SSIZE_MAX): New macro, if not already defined.
(main): Without -r, Replace the fingerprint in the input file
instead of generating a fingerprint.c.
* lib/Makefile.in (libgnu_a_OBJECTS): Add fingerprint.o.
* lib/gnulib.mk.in, m4/gnulib-comp.m4: Regenerate.
* src/Makefile.in (HAVE_PDUMPER, MAKE_PDUMPER_FINGERPRINT):
New macros.
(temacs$(EXEEXT)): Use them to replace the fingerprint instead
of precalculating it.
(mostlyclean, ctagsfiles1): Do not worry about fingerprint.c.
filemode filevercmp flexmember fpieee fstatat fsusage fsync
getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog
ieee754-h ignore-value intprops largefile lstat
- manywarnings memrchr minmax mkostemp mktime nstrftime
+ manywarnings memmem-simple memrchr minmax mkostemp mktime nstrftime
pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat regex
sig2str socklen stat-time std-gnu11 stdalign stddef stdio
stpcpy strtoimax symlink sys_stat sys_time
AVOIDED_MODULES='
btowc close dup fchdir fstat langinfo lock
- malloc-posix mbrtowc mbsinit mkdir msvc-inval msvc-nothrow nl_langinfo
+ malloc-posix mbrtowc mbsinit memchr mkdir msvc-inval msvc-nothrow nl_langinfo
openat-die opendir raise
save-cwd select setenv sigprocmask stat stdarg stdbool
threadlib tzset unsetenv utime utime-h
fi
if test "$with_pdumper" = "yes"; then
- AC_DEFINE(HAVE_PDUMPER, 1, [Define to build with portable dumper support])
+ AC_DEFINE([HAVE_PDUMPER], 1, [Define to build with portable dumper support])
+ HAVE_PDUMPER=yes
+else
+ HAVE_PDUMPER=no
fi
+AC_SUBST([HAVE_PDUMPER])
DUMPING=$with_dumping
AC_SUBST(DUMPING)
#include <config.h>
+#include <limits.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/stat.h>
+
#include <sysstdio.h>
-#include <sha256.h>
+
+#include <fingerprint.h>
#include <getopt.h>
+#include <intprops.h>
+#include <min-max.h>
+#include <sha256.h>
+
+#ifndef SSIZE_MAX
+# define SSIZE_MAX TYPE_MAXIMUM (ssize_t)
+#endif
#ifdef WINDOWSNT
/* Defined to be sys_fopen in ms-w32.h, but only #ifdef emacs, so this
raw = true;
break;
case 'h':
- printf ("make-fingerprint [-r] FILES...: compute a hash\n");
- return 0;
+ printf ("make-fingerprint [-r] FILE: replace or compute a hash\n");
+ return EXIT_SUCCESS;
default:
- return 1;
+ return EXIT_FAILURE;
}
}
struct sha256_ctx ctx;
sha256_init_ctx (&ctx);
- for (int i = optind; i < argc; ++i)
+ if (argc - optind != 1)
{
- FILE *f = fopen (argv[i], "r" FOPEN_BINARY);
- if (!f)
- {
- fprintf (stderr, "%s: Error: could not open %s\n",
- argv[0], argv[i]);
- return 1;
- }
+ fprintf (stderr, "%s: missing or extra file operand\n", argv[0]);
+ return EXIT_FAILURE;
+ }
- char buf[128*1024];
- do
- {
- size_t chunksz = fread (buf, 1, sizeof (buf), f);
- if (ferror (f))
- {
- fprintf (stderr, "%s: Error: could not read %s\n",
- argv[0], argv[i]);
- return 1;
- }
- sha256_process_bytes (buf, chunksz, &ctx);
- } while (!feof (f));
- fclose (f);
+ FILE *f = fopen (argv[1], raw ? "r" FOPEN_BINARY : "r+" FOPEN_BINARY);
+ struct stat st;
+ if (!f || fstat (fileno (f), &st) != 0)
+ {
+ perror (argv[1]);
+ return EXIT_FAILURE;
}
+ if (!S_ISREG (st.st_mode))
+ {
+ fprintf (stderr, "%s: Error: %s is not a regular file\n",
+ argv[0], argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ ptrdiff_t maxlen = min (min (TYPE_MAXIMUM (off_t), PTRDIFF_MAX),
+ min (SIZE_MAX, SSIZE_MAX));
+ if (maxlen <= st.st_size)
+ {
+ fprintf (stderr, "%s: %s: file too big\n", argv[0], argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ char *buf = malloc (st.st_size + 1);
+ if (!buf)
+ {
+ perror ("malloc");
+ return EXIT_FAILURE;
+ }
+
+ size_t chunksz = fread (buf, 1, st.st_size + 1, f);
+ if (ferror (f) || chunksz != st.st_size)
+ {
+ fprintf (stderr, "%s: Error: could not read %s\n",
+ argv[0], argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ sha256_process_bytes (buf, chunksz, &ctx);
+
unsigned char digest[32];
sha256_finish_ctx (&ctx, digest);
}
else
{
- puts ("#include \"fingerprint.h\"\n"
- "unsigned char const fingerprint[] =\n"
- "{");
- for (int i = 0; i < 32; ++i)
- printf ("\t0x%02X,\n", digest[i]);
- puts ("};");
+ char *finger = memmem (buf, chunksz, fingerprint, sizeof fingerprint);
+ if (!finger)
+ {
+ fprintf (stderr, "%s: %s: missing fingerprint\n", argv[0], argv[1]);
+ return EXIT_FAILURE;
+ }
+ else if (memmem (finger + 1, buf + chunksz - (finger + 1),
+ fingerprint, sizeof fingerprint))
+ {
+ fprintf (stderr, "%s: %s: two occurrences of fingerprint\n",
+ argv[0], argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (fseeko (f, finger - buf, SEEK_SET) != 0)
+ {
+ perror (argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (fwrite (digest, 1, sizeof digest, f) != sizeof digest)
+ {
+ perror (argv[1]);
+ return EXIT_FAILURE;
+ }
+ }
+
+ if (fclose (f) != 0)
+ {
+ perror (argv[1]);
+ return EXIT_FAILURE;
}
return EXIT_SUCCESS;
# and building it would just waste time.
not_emacs_OBJECTS = regex.o
-libgnu_a_OBJECTS = $(gl_LIBOBJS) \
+libgnu_a_OBJECTS = fingerprint.o $(gl_LIBOBJS) \
$(patsubst %.c,%.o,$(filter %.c,$(libgnu_a_SOURCES)))
for_emacs_OBJECTS = $(filter-out $(not_emacs_OBJECTS),$(libgnu_a_OBJECTS))
libegnu_a_OBJECTS = $(patsubst %.o,e-%.o,$(for_emacs_OBJECTS))
--- /dev/null
+/* Placeholder fingerprint for Emacs
+
+Copyright 2019 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "fingerprint.h"
+
+/* This random fingerprint was generated by the shell command:
+
+ shuf -i 0-255 -n 32 -r | awk '{printf " 0x%.02X,\n", $0}'
+
+ In the final Emacs executable, this random fingerprint is replaced
+ by a fingerprint of the temporary Emacs executable that was built
+ along the way. */
+
+unsigned char const fingerprint[] =
+ {
+ 0xDE,
+ 0x86,
+ 0xBB,
+ 0x99,
+ 0xFF,
+ 0xF5,
+ 0x46,
+ 0x9A,
+ 0x9E,
+ 0x3F,
+ 0x9F,
+ 0x5D,
+ 0x9A,
+ 0xDF,
+ 0xF0,
+ 0x91,
+ 0xBD,
+ 0xCD,
+ 0xC1,
+ 0xE8,
+ 0x0C,
+ 0x16,
+ 0x1E,
+ 0xAF,
+ 0xB8,
+ 0x6C,
+ 0xE2,
+ 0x2B,
+ 0xB1,
+ 0x24,
+ 0xCE,
+ 0xB0,
+ };
--- /dev/null
+/* Header file for the Emacs build fingerprint.
+
+Copyright (C) 2016, 2018-2019 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef EMACS_FINGERPRINT_H
+#define EMACS_FINGERPRINT_H
+
+/* We generate fingerprint.c and fingerprint.o from all the sources in
+ Emacs. This way, we have a unique value that we can use to pair
+ data files (like a portable dump image) with a specific build of
+ Emacs. */
+extern unsigned char const fingerprint[32];
+
+#endif
# --avoid=malloc-posix \
# --avoid=mbrtowc \
# --avoid=mbsinit \
+# --avoid=memchr \
# --avoid=mkdir \
# --avoid=msvc-inval \
# --avoid=msvc-nothrow \
# largefile \
# lstat \
# manywarnings \
+# memmem-simple \
# memrchr \
# minmax \
# mkostemp \
gl_GNULIB_ENABLED_2049e887c7e5308faad27b3f894bb8c9 = @gl_GNULIB_ENABLED_2049e887c7e5308faad27b3f894bb8c9@
gl_GNULIB_ENABLED_21ee726a3540c09237a8e70c0baf7467 = @gl_GNULIB_ENABLED_21ee726a3540c09237a8e70c0baf7467@
gl_GNULIB_ENABLED_260941c0e5dc67ec9e87d1fb321c300b = @gl_GNULIB_ENABLED_260941c0e5dc67ec9e87d1fb321c300b@
-gl_GNULIB_ENABLED_37f71b604aa9c54446783d80f42fe547 = @gl_GNULIB_ENABLED_37f71b604aa9c54446783d80f42fe547@
gl_GNULIB_ENABLED_5264294aa0a5557541b53c8c741f7f31 = @gl_GNULIB_ENABLED_5264294aa0a5557541b53c8c741f7f31@
gl_GNULIB_ENABLED_6099e9737f757db36c47fa9d9f02e88c = @gl_GNULIB_ENABLED_6099e9737f757db36c47fa9d9f02e88c@
gl_GNULIB_ENABLED_682e609604ccaac6be382e4ee3a4eaec = @gl_GNULIB_ENABLED_682e609604ccaac6be382e4ee3a4eaec@
endif
## end gnulib module lstat
+## begin gnulib module memmem-simple
+ifeq (,$(OMIT_GNULIB_MODULE_memmem-simple))
+
+
+EXTRA_DIST += memmem.c str-two-way.h
+
+EXTRA_libgnu_a_SOURCES += memmem.c
+
+endif
+## end gnulib module memmem-simple
+
## begin gnulib module memrchr
ifeq (,$(OMIT_GNULIB_MODULE_memrchr))
--- /dev/null
+/* Copyright (C) 1991-1994, 1996-1998, 2000, 2004, 2007-2019 Free Software
+ Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, see <https://www.gnu.org/licenses/>. */
+
+/* This particular implementation was written by Eric Blake, 2008. */
+
+#ifndef _LIBC
+# include <config.h>
+#endif
+
+/* Specification of memmem. */
+#include <string.h>
+
+#define RETURN_TYPE void *
+#define AVAILABLE(h, h_l, j, n_l) ((j) <= (h_l) - (n_l))
+#include "str-two-way.h"
+
+/* Return the first occurrence of NEEDLE in HAYSTACK. Return HAYSTACK
+ if NEEDLE_LEN is 0, otherwise NULL if NEEDLE is not found in
+ HAYSTACK. */
+void *
+memmem (const void *haystack_start, size_t haystack_len,
+ const void *needle_start, size_t needle_len)
+{
+ /* Abstract memory is considered to be an array of 'unsigned char' values,
+ not an array of 'char' values. See ISO C 99 section 6.2.6.1. */
+ const unsigned char *haystack = (const unsigned char *) haystack_start;
+ const unsigned char *needle = (const unsigned char *) needle_start;
+
+ if (needle_len == 0)
+ /* The first occurrence of the empty string is deemed to occur at
+ the beginning of the string. */
+ return (void *) haystack;
+
+ /* Sanity check, otherwise the loop might search through the whole
+ memory. */
+ if (__builtin_expect (haystack_len < needle_len, 0))
+ return NULL;
+
+ /* Use optimizations in memchr when possible, to reduce the search
+ size of haystack using a linear algorithm with a smaller
+ coefficient. However, avoid memchr for long needles, since we
+ can often achieve sublinear performance. */
+ if (needle_len < LONG_NEEDLE_THRESHOLD)
+ {
+ haystack = memchr (haystack, *needle, haystack_len);
+ if (!haystack || __builtin_expect (needle_len == 1, 0))
+ return (void *) haystack;
+ haystack_len -= haystack - (const unsigned char *) haystack_start;
+ if (haystack_len < needle_len)
+ return NULL;
+ return two_way_short_needle (haystack, haystack_len, needle, needle_len);
+ }
+ else
+ return two_way_long_needle (haystack, haystack_len, needle, needle_len);
+}
+
+#undef LONG_NEEDLE_THRESHOLD
--- /dev/null
+/* Byte-wise substring search, using the Two-Way algorithm.
+ Copyright (C) 2008-2019 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Eric Blake <ebb9@byu.net>, 2008.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, see <https://www.gnu.org/licenses/>. */
+
+/* Before including this file, you need to include <config.h> and
+ <string.h>, and define:
+ RESULT_TYPE A macro that expands to the return type.
+ AVAILABLE(h, h_l, j, n_l)
+ A macro that returns nonzero if there are
+ at least N_L bytes left starting at H[J].
+ H is 'unsigned char *', H_L, J, and N_L
+ are 'size_t'; H_L is an lvalue. For
+ NUL-terminated searches, H_L can be
+ modified each iteration to avoid having
+ to compute the end of H up front.
+
+ For case-insensitivity, you may optionally define:
+ CMP_FUNC(p1, p2, l) A macro that returns 0 iff the first L
+ characters of P1 and P2 are equal.
+ CANON_ELEMENT(c) A macro that canonicalizes an element right after
+ it has been fetched from one of the two strings.
+ The argument is an 'unsigned char'; the result
+ must be an 'unsigned char' as well.
+
+ This file undefines the macros documented above, and defines
+ LONG_NEEDLE_THRESHOLD.
+*/
+
+#include <limits.h>
+#include <stdint.h>
+
+/* We use the Two-Way string matching algorithm (also known as
+ Chrochemore-Perrin), which guarantees linear complexity with
+ constant space. Additionally, for long needles, we also use a bad
+ character shift table similar to the Boyer-Moore algorithm to
+ achieve improved (potentially sub-linear) performance.
+
+ See http://www-igm.univ-mlv.fr/~lecroq/string/node26.html#SECTION00260,
+ https://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm,
+ https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.34.6641&rep=rep1&type=pdf
+*/
+
+/* Point at which computing a bad-byte shift table is likely to be
+ worthwhile. Small needles should not compute a table, since it
+ adds (1 << CHAR_BIT) + NEEDLE_LEN computations of preparation for a
+ speedup no greater than a factor of NEEDLE_LEN. The larger the
+ needle, the better the potential performance gain. On the other
+ hand, on non-POSIX systems with CHAR_BIT larger than eight, the
+ memory required for the table is prohibitive. */
+#if CHAR_BIT < 10
+# define LONG_NEEDLE_THRESHOLD 32U
+#else
+# define LONG_NEEDLE_THRESHOLD SIZE_MAX
+#endif
+
+#ifndef MAX
+# define MAX(a, b) ((a < b) ? (b) : (a))
+#endif
+
+#ifndef CANON_ELEMENT
+# define CANON_ELEMENT(c) c
+#endif
+#ifndef CMP_FUNC
+# define CMP_FUNC memcmp
+#endif
+
+/* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
+ Return the index of the first byte in the right half, and set
+ *PERIOD to the global period of the right half.
+
+ The global period of a string is the smallest index (possibly its
+ length) at which all remaining bytes in the string are repetitions
+ of the prefix (the last repetition may be a subset of the prefix).
+
+ When NEEDLE is factored into two halves, a local period is the
+ length of the smallest word that shares a suffix with the left half
+ and shares a prefix with the right half. All factorizations of a
+ non-empty NEEDLE have a local period of at least 1 and no greater
+ than NEEDLE_LEN.
+
+ A critical factorization has the property that the local period
+ equals the global period. All strings have at least one critical
+ factorization with the left half smaller than the global period.
+ And while some strings have more than one critical factorization,
+ it is provable that with an ordered alphabet, at least one of the
+ critical factorizations corresponds to a maximal suffix.
+
+ Given an ordered alphabet, a critical factorization can be computed
+ in linear time, with 2 * NEEDLE_LEN comparisons, by computing the
+ shorter of two ordered maximal suffixes. The ordered maximal
+ suffixes are determined by lexicographic comparison while tracking
+ periodicity. */
+static size_t
+critical_factorization (const unsigned char *needle, size_t needle_len,
+ size_t *period)
+{
+ /* Index of last byte of left half, or SIZE_MAX. */
+ size_t max_suffix, max_suffix_rev;
+ size_t j; /* Index into NEEDLE for current candidate suffix. */
+ size_t k; /* Offset into current period. */
+ size_t p; /* Intermediate period. */
+ unsigned char a, b; /* Current comparison bytes. */
+
+ /* Special case NEEDLE_LEN of 1 or 2 (all callers already filtered
+ out 0-length needles. */
+ if (needle_len < 3)
+ {
+ *period = 1;
+ return needle_len - 1;
+ }
+
+ /* Invariants:
+ 0 <= j < NEEDLE_LEN - 1
+ -1 <= max_suffix{,_rev} < j (treating SIZE_MAX as if it were signed)
+ min(max_suffix, max_suffix_rev) < global period of NEEDLE
+ 1 <= p <= global period of NEEDLE
+ p == global period of the substring NEEDLE[max_suffix{,_rev}+1...j]
+ 1 <= k <= p
+ */
+
+ /* Perform lexicographic search. */
+ max_suffix = SIZE_MAX;
+ j = 0;
+ k = p = 1;
+ while (j + k < needle_len)
+ {
+ a = CANON_ELEMENT (needle[j + k]);
+ b = CANON_ELEMENT (needle[max_suffix + k]);
+ if (a < b)
+ {
+ /* Suffix is smaller, period is entire prefix so far. */
+ j += k;
+ k = 1;
+ p = j - max_suffix;
+ }
+ else if (a == b)
+ {
+ /* Advance through repetition of the current period. */
+ if (k != p)
+ ++k;
+ else
+ {
+ j += p;
+ k = 1;
+ }
+ }
+ else /* b < a */
+ {
+ /* Suffix is larger, start over from current location. */
+ max_suffix = j++;
+ k = p = 1;
+ }
+ }
+ *period = p;
+
+ /* Perform reverse lexicographic search. */
+ max_suffix_rev = SIZE_MAX;
+ j = 0;
+ k = p = 1;
+ while (j + k < needle_len)
+ {
+ a = CANON_ELEMENT (needle[j + k]);
+ b = CANON_ELEMENT (needle[max_suffix_rev + k]);
+ if (b < a)
+ {
+ /* Suffix is smaller, period is entire prefix so far. */
+ j += k;
+ k = 1;
+ p = j - max_suffix_rev;
+ }
+ else if (a == b)
+ {
+ /* Advance through repetition of the current period. */
+ if (k != p)
+ ++k;
+ else
+ {
+ j += p;
+ k = 1;
+ }
+ }
+ else /* a < b */
+ {
+ /* Suffix is larger, start over from current location. */
+ max_suffix_rev = j++;
+ k = p = 1;
+ }
+ }
+
+ /* Choose the shorter suffix. Return the index of the first byte of
+ the right half, rather than the last byte of the left half.
+
+ For some examples, 'banana' has two critical factorizations, both
+ exposed by the two lexicographic extreme suffixes of 'anana' and
+ 'nana', where both suffixes have a period of 2. On the other
+ hand, with 'aab' and 'bba', both strings have a single critical
+ factorization of the last byte, with the suffix having a period
+ of 1. While the maximal lexicographic suffix of 'aab' is 'b',
+ the maximal lexicographic suffix of 'bba' is 'ba', which is not a
+ critical factorization. Conversely, the maximal reverse
+ lexicographic suffix of 'a' works for 'bba', but not 'ab' for
+ 'aab'. The shorter suffix of the two will always be a critical
+ factorization. */
+ if (max_suffix_rev + 1 < max_suffix + 1)
+ return max_suffix + 1;
+ *period = p;
+ return max_suffix_rev + 1;
+}
+
+/* Return the first location of non-empty NEEDLE within HAYSTACK, or
+ NULL. HAYSTACK_LEN is the minimum known length of HAYSTACK. This
+ method is optimized for NEEDLE_LEN < LONG_NEEDLE_THRESHOLD.
+ Performance is guaranteed to be linear, with an initialization cost
+ of 2 * NEEDLE_LEN comparisons.
+
+ If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at
+ most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching.
+ If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
+ HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching. */
+static RETURN_TYPE
+two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
+ const unsigned char *needle, size_t needle_len)
+{
+ size_t i; /* Index into current byte of NEEDLE. */
+ size_t j; /* Index into current window of HAYSTACK. */
+ size_t period; /* The period of the right half of needle. */
+ size_t suffix; /* The index of the right half of needle. */
+
+ /* Factor the needle into two halves, such that the left half is
+ smaller than the global period, and the right half is
+ periodic (with a period as large as NEEDLE_LEN - suffix). */
+ suffix = critical_factorization (needle, needle_len, &period);
+
+ /* Perform the search. Each iteration compares the right half
+ first. */
+ if (CMP_FUNC (needle, needle + period, suffix) == 0)
+ {
+ /* Entire needle is periodic; a mismatch in the left half can
+ only advance by the period, so use memory to avoid rescanning
+ known occurrences of the period in the right half. */
+ size_t memory = 0;
+ j = 0;
+ while (AVAILABLE (haystack, haystack_len, j, needle_len))
+ {
+ /* Scan for matches in right half. */
+ i = MAX (suffix, memory);
+ while (i < needle_len && (CANON_ELEMENT (needle[i])
+ == CANON_ELEMENT (haystack[i + j])))
+ ++i;
+ if (needle_len <= i)
+ {
+ /* Scan for matches in left half. */
+ i = suffix - 1;
+ while (memory < i + 1 && (CANON_ELEMENT (needle[i])
+ == CANON_ELEMENT (haystack[i + j])))
+ --i;
+ if (i + 1 < memory + 1)
+ return (RETURN_TYPE) (haystack + j);
+ /* No match, so remember how many repetitions of period
+ on the right half were scanned. */
+ j += period;
+ memory = needle_len - period;
+ }
+ else
+ {
+ j += i - suffix + 1;
+ memory = 0;
+ }
+ }
+ }
+ else
+ {
+ /* The two halves of needle are distinct; no extra memory is
+ required, and any mismatch results in a maximal shift. */
+ period = MAX (suffix, needle_len - suffix) + 1;
+ j = 0;
+ while (AVAILABLE (haystack, haystack_len, j, needle_len))
+ {
+ /* Scan for matches in right half. */
+ i = suffix;
+ while (i < needle_len && (CANON_ELEMENT (needle[i])
+ == CANON_ELEMENT (haystack[i + j])))
+ ++i;
+ if (needle_len <= i)
+ {
+ /* Scan for matches in left half. */
+ i = suffix - 1;
+ while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
+ == CANON_ELEMENT (haystack[i + j])))
+ --i;
+ if (i == SIZE_MAX)
+ return (RETURN_TYPE) (haystack + j);
+ j += period;
+ }
+ else
+ j += i - suffix + 1;
+ }
+ }
+ return NULL;
+}
+
+/* Return the first location of non-empty NEEDLE within HAYSTACK, or
+ NULL. HAYSTACK_LEN is the minimum known length of HAYSTACK. This
+ method is optimized for LONG_NEEDLE_THRESHOLD <= NEEDLE_LEN.
+ Performance is guaranteed to be linear, with an initialization cost
+ of 3 * NEEDLE_LEN + (1 << CHAR_BIT) operations.
+
+ If AVAILABLE does not modify HAYSTACK_LEN (as in memmem), then at
+ most 2 * HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching,
+ and sublinear performance O(HAYSTACK_LEN / NEEDLE_LEN) is possible.
+ If AVAILABLE modifies HAYSTACK_LEN (as in strstr), then at most 3 *
+ HAYSTACK_LEN - NEEDLE_LEN comparisons occur in searching, and
+ sublinear performance is not possible. */
+static RETURN_TYPE
+two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
+ const unsigned char *needle, size_t needle_len)
+{
+ size_t i; /* Index into current byte of NEEDLE. */
+ size_t j; /* Index into current window of HAYSTACK. */
+ size_t period; /* The period of the right half of needle. */
+ size_t suffix; /* The index of the right half of needle. */
+ size_t shift_table[1U << CHAR_BIT]; /* See below. */
+
+ /* Factor the needle into two halves, such that the left half is
+ smaller than the global period, and the right half is
+ periodic (with a period as large as NEEDLE_LEN - suffix). */
+ suffix = critical_factorization (needle, needle_len, &period);
+
+ /* Populate shift_table. For each possible byte value c,
+ shift_table[c] is the distance from the last occurrence of c to
+ the end of NEEDLE, or NEEDLE_LEN if c is absent from the NEEDLE.
+ shift_table[NEEDLE[NEEDLE_LEN - 1]] contains the only 0. */
+ for (i = 0; i < 1U << CHAR_BIT; i++)
+ shift_table[i] = needle_len;
+ for (i = 0; i < needle_len; i++)
+ shift_table[CANON_ELEMENT (needle[i])] = needle_len - i - 1;
+
+ /* Perform the search. Each iteration compares the right half
+ first. */
+ if (CMP_FUNC (needle, needle + period, suffix) == 0)
+ {
+ /* Entire needle is periodic; a mismatch in the left half can
+ only advance by the period, so use memory to avoid rescanning
+ known occurrences of the period in the right half. */
+ size_t memory = 0;
+ size_t shift;
+ j = 0;
+ while (AVAILABLE (haystack, haystack_len, j, needle_len))
+ {
+ /* Check the last byte first; if it does not match, then
+ shift to the next possible match location. */
+ shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
+ if (0 < shift)
+ {
+ if (memory && shift < period)
+ {
+ /* Since needle is periodic, but the last period has
+ a byte out of place, there can be no match until
+ after the mismatch. */
+ shift = needle_len - period;
+ }
+ memory = 0;
+ j += shift;
+ continue;
+ }
+ /* Scan for matches in right half. The last byte has
+ already been matched, by virtue of the shift table. */
+ i = MAX (suffix, memory);
+ while (i < needle_len - 1 && (CANON_ELEMENT (needle[i])
+ == CANON_ELEMENT (haystack[i + j])))
+ ++i;
+ if (needle_len - 1 <= i)
+ {
+ /* Scan for matches in left half. */
+ i = suffix - 1;
+ while (memory < i + 1 && (CANON_ELEMENT (needle[i])
+ == CANON_ELEMENT (haystack[i + j])))
+ --i;
+ if (i + 1 < memory + 1)
+ return (RETURN_TYPE) (haystack + j);
+ /* No match, so remember how many repetitions of period
+ on the right half were scanned. */
+ j += period;
+ memory = needle_len - period;
+ }
+ else
+ {
+ j += i - suffix + 1;
+ memory = 0;
+ }
+ }
+ }
+ else
+ {
+ /* The two halves of needle are distinct; no extra memory is
+ required, and any mismatch results in a maximal shift. */
+ size_t shift;
+ period = MAX (suffix, needle_len - suffix) + 1;
+ j = 0;
+ while (AVAILABLE (haystack, haystack_len, j, needle_len))
+ {
+ /* Check the last byte first; if it does not match, then
+ shift to the next possible match location. */
+ shift = shift_table[CANON_ELEMENT (haystack[j + needle_len - 1])];
+ if (0 < shift)
+ {
+ j += shift;
+ continue;
+ }
+ /* Scan for matches in right half. The last byte has
+ already been matched, by virtue of the shift table. */
+ i = suffix;
+ while (i < needle_len - 1 && (CANON_ELEMENT (needle[i])
+ == CANON_ELEMENT (haystack[i + j])))
+ ++i;
+ if (needle_len - 1 <= i)
+ {
+ /* Scan for matches in left half. */
+ i = suffix - 1;
+ while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
+ == CANON_ELEMENT (haystack[i + j])))
+ --i;
+ if (i == SIZE_MAX)
+ return (RETURN_TYPE) (haystack + j);
+ j += period;
+ }
+ else
+ j += i - suffix + 1;
+ }
+ }
+ return NULL;
+}
+
+#undef AVAILABLE
+#undef CANON_ELEMENT
+#undef CMP_FUNC
+#undef MAX
+#undef RETURN_TYPE
# Code from module localtime-buffer:
# Code from module lstat:
# Code from module manywarnings:
+ # Code from module memmem-simple:
# Code from module memrchr:
# Code from module minmax:
# Code from module mkostemp:
gl_source_base='lib'
gl_FUNC_ACL
gl_FUNC_ALLOCA
+ gl___BUILTIN_EXPECT
gl_BYTESWAP
AC_CHECK_FUNCS_ONCE([readlinkat])
gl_CLOCK_TIME
gl_PREREQ_LSTAT
fi
gl_SYS_STAT_MODULE_INDICATOR([lstat])
+ gl_FUNC_MEMMEM_SIMPLE
+ if test $HAVE_MEMMEM = 0 || test $REPLACE_MEMMEM = 1; then
+ AC_LIBOBJ([memmem])
+ fi
+ gl_STRING_MODULE_INDICATOR([memmem])
gl_FUNC_MEMRCHR
if test $ac_cv_func_memrchr = no; then
AC_LIBOBJ([memrchr])
gl_UTIMENS
AC_C_VARARRAYS
gl_gnulib_enabled_260941c0e5dc67ec9e87d1fb321c300b=false
- gl_gnulib_enabled_37f71b604aa9c54446783d80f42fe547=false
gl_gnulib_enabled_cloexec=false
gl_gnulib_enabled_dirfd=false
gl_gnulib_enabled_euidaccess=false
func_gl_gnulib_m4code_open
fi
}
- func_gl_gnulib_m4code_37f71b604aa9c54446783d80f42fe547 ()
- {
- if ! $gl_gnulib_enabled_37f71b604aa9c54446783d80f42fe547; then
- gl___BUILTIN_EXPECT
- gl_gnulib_enabled_37f71b604aa9c54446783d80f42fe547=true
- fi
- }
func_gl_gnulib_m4code_cloexec ()
{
if ! $gl_gnulib_enabled_cloexec; then
if test $HAVE_READLINKAT = 0; then
func_gl_gnulib_m4code_03e0aaad4cb89ca757653bd367a6ccb7
fi
- if test $ac_use_included_regex = yes; then
- func_gl_gnulib_m4code_37f71b604aa9c54446783d80f42fe547
- fi
if test $ac_use_included_regex = yes; then
func_gl_gnulib_m4code_21ee726a3540c09237a8e70c0baf7467
fi
fi
m4_pattern_allow([^gl_GNULIB_ENABLED_])
AM_CONDITIONAL([gl_GNULIB_ENABLED_260941c0e5dc67ec9e87d1fb321c300b], [$gl_gnulib_enabled_260941c0e5dc67ec9e87d1fb321c300b])
- AM_CONDITIONAL([gl_GNULIB_ENABLED_37f71b604aa9c54446783d80f42fe547], [$gl_gnulib_enabled_37f71b604aa9c54446783d80f42fe547])
AM_CONDITIONAL([gl_GNULIB_ENABLED_cloexec], [$gl_gnulib_enabled_cloexec])
AM_CONDITIONAL([gl_GNULIB_ENABLED_dirfd], [$gl_gnulib_enabled_dirfd])
AM_CONDITIONAL([gl_GNULIB_ENABLED_euidaccess], [$gl_gnulib_enabled_euidaccess])
lib/lstat.c
lib/md5.c
lib/md5.h
+ lib/memmem.c
lib/memrchr.c
lib/minmax.h
lib/mkostemp.c
lib/stdio.in.h
lib/stdlib.in.h
lib/stpcpy.c
+ lib/str-two-way.h
lib/strftime.h
lib/string.in.h
lib/strtoimax.c
m4/manywarnings.m4
m4/mbstate_t.m4
m4/md5.m4
+ m4/memmem.m4
m4/memrchr.m4
m4/minmax.m4
m4/mkostemp.m4
--- /dev/null
+# memmem.m4 serial 25
+dnl Copyright (C) 2002-2004, 2007-2019 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+dnl Check that memmem is present and functional.
+AC_DEFUN([gl_FUNC_MEMMEM_SIMPLE],
+[
+ dnl Persuade glibc <string.h> to declare memmem().
+ AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS])
+
+ AC_REQUIRE([gl_HEADER_STRING_H_DEFAULTS])
+ AC_CHECK_FUNCS([memmem])
+ if test $ac_cv_func_memmem = yes; then
+ HAVE_MEMMEM=1
+ else
+ HAVE_MEMMEM=0
+ fi
+ AC_CHECK_DECLS_ONCE([memmem])
+ if test $ac_cv_have_decl_memmem = no; then
+ HAVE_DECL_MEMMEM=0
+ else
+ dnl Detect https://sourceware.org/bugzilla/show_bug.cgi?id=12092.
+ dnl Also check that we handle empty needles correctly.
+ AC_CACHE_CHECK([whether memmem works],
+ [gl_cv_func_memmem_works_always],
+ [AC_RUN_IFELSE([AC_LANG_PROGRAM([[
+#include <string.h> /* for memmem */
+#define P "_EF_BF_BD"
+#define HAYSTACK "F_BD_CE_BD" P P P P "_C3_88_20" P P P "_C3_A7_20" P
+#define NEEDLE P P P P P
+]], [[
+ int result = 0;
+ if (memmem (HAYSTACK, strlen (HAYSTACK), NEEDLE, strlen (NEEDLE)))
+ result |= 1;
+ /* Check for empty needle behavior. */
+ {
+ const char *haystack = "AAA";
+ if (memmem (haystack, 3, NULL, 0) != haystack)
+ result |= 2;
+ }
+ return result;
+ ]])],
+ [gl_cv_func_memmem_works_always=yes],
+ [gl_cv_func_memmem_works_always=no],
+ [dnl glibc 2.9..2.12 and cygwin 1.7.7 have issue #12092 above.
+ dnl Also empty needles work on glibc >= 2.1 and cygwin >= 1.7.0.
+ dnl uClibc is not affected, since it uses different source code.
+ dnl Assume that it works on all other platforms (even if not linear).
+ AC_EGREP_CPP([Lucky user],
+ [
+#ifdef __GNU_LIBRARY__
+ #include <features.h>
+ #if ((__GLIBC__ == 2 && ((__GLIBC_MINOR > 0 && __GLIBC_MINOR__ < 9) \
+ || __GLIBC_MINOR__ > 12)) \
+ || (__GLIBC__ > 2)) \
+ || defined __UCLIBC__
+ Lucky user
+ #endif
+#elif defined __CYGWIN__
+ #include <cygwin/version.h>
+ #if CYGWIN_VERSION_DLL_COMBINED > CYGWIN_VERSION_DLL_MAKE_COMBINED (1007, 7)
+ Lucky user
+ #endif
+#else
+ Lucky user
+#endif
+ ],
+ [gl_cv_func_memmem_works_always="guessing yes"],
+ [gl_cv_func_memmem_works_always="guessing no"])
+ ])
+ ])
+ case "$gl_cv_func_memmem_works_always" in
+ *yes) ;;
+ *)
+ REPLACE_MEMMEM=1
+ ;;
+ esac
+ fi
+ gl_PREREQ_MEMMEM
+]) # gl_FUNC_MEMMEM_SIMPLE
+
+dnl Additionally, check that memmem has linear performance characteristics
+AC_DEFUN([gl_FUNC_MEMMEM],
+[
+ AC_REQUIRE([gl_FUNC_MEMMEM_SIMPLE])
+ if test $HAVE_DECL_MEMMEM = 1 && test $REPLACE_MEMMEM = 0; then
+ AC_CACHE_CHECK([whether memmem works in linear time],
+ [gl_cv_func_memmem_works_fast],
+ [AC_RUN_IFELSE([AC_LANG_PROGRAM([[
+#include <signal.h> /* for signal */
+#include <string.h> /* for memmem */
+#include <stdlib.h> /* for malloc */
+#include <unistd.h> /* for alarm */
+static void quit (int sig) { _exit (sig + 128); }
+]], [[
+ int result = 0;
+ size_t m = 1000000;
+ char *haystack = (char *) malloc (2 * m + 1);
+ char *needle = (char *) malloc (m + 1);
+ /* Failure to compile this test due to missing alarm is okay,
+ since all such platforms (mingw) also lack memmem. */
+ signal (SIGALRM, quit);
+ alarm (5);
+ /* Check for quadratic performance. */
+ if (haystack && needle)
+ {
+ memset (haystack, 'A', 2 * m);
+ haystack[2 * m] = 'B';
+ memset (needle, 'A', m);
+ needle[m] = 'B';
+ if (!memmem (haystack, 2 * m + 1, needle, m + 1))
+ result |= 1;
+ }
+ /* Free allocated memory, in case some sanitizer is watching. */
+ free (haystack);
+ free (needle);
+ return result;
+ ]])],
+ [gl_cv_func_memmem_works_fast=yes], [gl_cv_func_memmem_works_fast=no],
+ [dnl Only glibc >= 2.9 and cygwin > 1.7.0 are known to have a
+ dnl memmem that works in linear time.
+ AC_EGREP_CPP([Lucky user],
+ [
+#include <features.h>
+#ifdef __GNU_LIBRARY__
+ #if ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 9) || (__GLIBC__ > 2)) \
+ && !defined __UCLIBC__
+ Lucky user
+ #endif
+#endif
+#ifdef __CYGWIN__
+ #include <cygwin/version.h>
+ #if CYGWIN_VERSION_DLL_COMBINED > CYGWIN_VERSION_DLL_MAKE_COMBINED (1007, 0)
+ Lucky user
+ #endif
+#endif
+ ],
+ [gl_cv_func_memmem_works_fast="guessing yes"],
+ [gl_cv_func_memmem_works_fast="guessing no"])
+ ])
+ ])
+ case "$gl_cv_func_memmem_works_fast" in
+ *yes) ;;
+ *)
+ REPLACE_MEMMEM=1
+ ;;
+ esac
+ fi
+]) # gl_FUNC_MEMMEM
+
+# Prerequisites of lib/memmem.c.
+AC_DEFUN([gl_PREREQ_MEMMEM], [:])
DUMPING=@DUMPING@
CHECK_STRUCTS = @CHECK_STRUCTS@
+HAVE_PDUMPER = @HAVE_PDUMPER@
# 'make' verbosity.
AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
$(LIBEGNU_ARCHIVE): $(config_h)
$(MAKE) -C $(dir $@) all
-FINGERPRINTED = $(LIBXMENU) $(ALLOBJS) $(LIBEGNU_ARCHIVE) $(EMACSRES)
-fingerprint.c: $(FINGERPRINTED) $(libsrc)/make-fingerprint$(EXEEXT)
- $(AM_V_GEN)$(libsrc)/make-fingerprint$(EXEEXT) $(FINGERPRINTED) >$@.tmp
- $(AM_V_at)mv $@.tmp $@
+ifeq ($(HAVE_PDUMPER),yes)
+ MAKE_PDUMPER_FINGERPRINT = $(libsrc)/make-fingerprint$(EXEEXT)
+else
+ MAKE_PDUMPER_FINGERPRINT =
+endif
## We have to create $(etc) here because init_cmdargs tests its
## existence when setting Vinstallation_directory (FIXME?).
## This goes on to affect various things, and the emacs binary fails
## to start if Vinstallation_directory has the wrong value.
-temacs$(EXEEXT): fingerprint.o $(charsets) $(charscript)
- $(AM_V_CCLD)$(CC) -o $@ $(ALL_CFLAGS) $(TEMACS_LDFLAGS) $(LDFLAGS) \
- $(ALLOBJS) fingerprint.o \
- $(LIBEGNU_ARCHIVE) $(W32_RES_LINK) $(LIBES)
+temacs$(EXEEXT): $(LIBXMENU) $(ALLOBJS) $(LIBEGNU_ARCHIVE) $(EMACSRES) \
+ $(charsets) $(charscript) $(MAKE_PDUMPER_FINGERPRINT)
+ $(AM_V_CCLD)$(CC) -o $@.tmp \
+ $(ALL_CFLAGS) $(TEMACS_LDFLAGS) $(LDFLAGS) \
+ $(ALLOBJS) $(LIBEGNU_ARCHIVE) $(W32_RES_LINK) $(LIBES)
+ifeq ($(HAVE_PDUMPER),yes)
+ $(AM_V_at)$(MAKE_PDUMPER_FINGERPRINT) $@.tmp
+endif
+ $(AM_V_at)mv $@.tmp $@
$(MKDIR_P) $(etc)
ifeq ($(DUMPING),unexec)
ifneq ($(PAXCTL_notdumped),)
mostlyclean:
rm -f temacs$(EXEEXT) core ./*.core \#* ./*.o
- rm -f dmpstruct.h fingerprint.c
+ rm -f dmpstruct.h
rm -f emacs.pdmp
rm -f ../etc/DOC
rm -f bootstrap-emacs$(EXEEXT) $(bootstrap_pdmp)
${ETAGS}: FORCE
$(MAKE) -C $(dir $@) $(notdir $@)
-# Remove macuvs.h and fingerprint.c since they'd cause `src/emacs`
+# Remove macuvs.h since it'd cause `src/emacs`
# to be built before we can get TAGS.
-ctagsfiles1 = $(filter-out ${srcdir}/macuvs.h ${srcdir}/fingerprint.c, \
- $(wildcard ${srcdir}/*.[hc]))
+ctagsfiles1 = $(filter-out ${srcdir}/macuvs.h, $(wildcard ${srcdir}/*.[hc]))
ctagsfiles2 = $(wildcard ${srcdir}/*.m)
## In out-of-tree builds, TAGS are generated in the build dir, like
+++ /dev/null
-/* Header file for the Emacs build fingerprint.
-
-Copyright (C) 2016, 2018-2019 Free Software Foundation, Inc.
-
-This file is part of GNU Emacs.
-
-GNU Emacs is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or (at
-your option) any later version.
-
-GNU Emacs is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
-
-#ifndef EMACS_FINGERPRINT_H
-#define EMACS_FINGERPRINT_H
-
-/* We generate fingerprint.c and fingerprint.o from all the sources in
- Emacs. This way, we have a unique value that we can use to pair
- data files (like a portable dump image) with a specific build of
- Emacs. */
-extern unsigned char const fingerprint[32];
-
-#endif