From d682f0daa3c0bfdd5ee8ce0e9226353d505e85a9 Mon Sep 17 00:00:00 2001 From: Philipp Stephani Date: Sun, 7 May 2017 21:01:53 +0200 Subject: [PATCH] Add command to replace buffer contents Add a new command 'replace-buffer-contents' that uses the Myers diff algorithm to non-destructively replace the accessible portion of the current buffer. The Myers algorithm is implemented in Gnulib. * src/editfns.c (Freplace_buffer_contents): New command. (set_bit, bit_is_set, buffer_chars_equal): New helper functions. (syms_of_editfns): Define new command. * test/src/editfns-tests.el (replace-buffer-contents-1) (replace-buffer-contents-2): New unit tests. * src/buffer.h (BUF_FETCH_CHAR_AS_MULTIBYTE): New helper macro. * admin/merge-gnulib (GNULIB_MODULES): Add diffseq.h and minmax.h. --- admin/merge-gnulib | 4 +- etc/NEWS | 5 + lib/diffseq.h | 525 ++++++++++++++++++++++++++++++++++++++ lib/gnulib.mk.in | 18 +- lib/minmax.h | 60 +++++ m4/gnulib-comp.m4 | 6 + m4/minmax.m4 | 44 ++++ src/buffer.h | 9 + src/editfns.c | 201 +++++++++++++++ test/src/editfns-tests.el | 31 +++ 10 files changed, 900 insertions(+), 3 deletions(-) create mode 100644 lib/diffseq.h create mode 100644 lib/minmax.h create mode 100644 m4/minmax.m4 diff --git a/admin/merge-gnulib b/admin/merge-gnulib index e5fb0f59fb3..d4bbf17cb3d 100755 --- a/admin/merge-gnulib +++ b/admin/merge-gnulib @@ -30,12 +30,12 @@ GNULIB_MODULES=' careadlinkat close-stream count-leading-zeros count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 - dtoastr dtotimespec dup2 environ execinfo faccessat + diffseq dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode filevercmp flexmember fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat - manywarnings memrchr mkostemp mktime + manywarnings memrchr minmax mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax symlink sys_stat diff --git a/etc/NEWS b/etc/NEWS index 2fb8daab101..ab600eb2786 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -462,6 +462,11 @@ Negative prefix arg flips the direction of selection. Also, defun are selected unless they are separated from the defun by a blank line. +** New command 'replace-buffer-contents'. This command replaces the +contents of theaccessible portion of the current buffer with the +contents of the accessible portion of a different buffer while keeping +point, mark, markers, and text properties as intact as possible. + * Changes in Specialized Modes and Packages in Emacs 26.1 diff --git a/lib/diffseq.h b/lib/diffseq.h new file mode 100644 index 00000000000..d7a374357c7 --- /dev/null +++ b/lib/diffseq.h @@ -0,0 +1,525 @@ +/* Analyze differences between two vectors. + + Copyright (C) 1988-1989, 1992-1995, 2001-2004, 2006-2017 Free Software + Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + +/* The basic idea is to consider two vectors as similar if, when + transforming the first vector into the second vector through a + sequence of edits (inserts and deletes of one element each), + this sequence is short - or equivalently, if the ordered list + of elements that are untouched by these edits is long. For a + good introduction to the subject, read about the "Levenshtein + distance" in Wikipedia. + + The basic algorithm is described in: + "An O(ND) Difference Algorithm and its Variations", Eugene W. Myers, + Algorithmica Vol. 1, 1986, pp. 251-266, + . + See especially section 4.2, which describes the variation used below. + + The basic algorithm was independently discovered as described in: + "Algorithms for Approximate String Matching", Esko Ukkonen, + Information and Control Vol. 64, 1985, pp. 100-118, + . + + Unless the 'find_minimal' flag is set, this code uses the TOO_EXPENSIVE + heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N) + at the price of producing suboptimal output for large inputs with + many differences. */ + +/* Before including this file, you need to define: + ELEMENT The element type of the vectors being compared. + EQUAL A two-argument macro that tests two elements for + equality. + OFFSET A signed integer type sufficient to hold the + difference between two indices. Usually + something like ptrdiff_t. + EXTRA_CONTEXT_FIELDS Declarations of fields for 'struct context'. + NOTE_DELETE(ctxt, xoff) Record the removal of the object xvec[xoff]. + NOTE_INSERT(ctxt, yoff) Record the insertion of the object yvec[yoff]. + EARLY_ABORT(ctxt) (Optional) A boolean expression that triggers an + early abort of the computation. + USE_HEURISTIC (Optional) Define if you want to support the + heuristic for large vectors. + It is also possible to use this file with abstract arrays. In this case, + xvec and yvec are not represented in memory. They only exist conceptually. + In this case, the list of defines above is amended as follows: + ELEMENT Undefined. + EQUAL Undefined. + XVECREF_YVECREF_EQUAL(ctxt, xoff, yoff) + A three-argument macro: References xvec[xoff] and + yvec[yoff] and tests these elements for equality. + Before including this file, you also need to include: + #include + #include + #include "minmax.h" + */ + +/* Maximum value of type OFFSET. */ +#define OFFSET_MAX \ + ((((OFFSET)1 << (sizeof (OFFSET) * CHAR_BIT - 2)) - 1) * 2 + 1) + +/* Default to no early abort. */ +#ifndef EARLY_ABORT +# define EARLY_ABORT(ctxt) false +#endif + +/* Use this to suppress gcc's "...may be used before initialized" warnings. + Beware: The Code argument must not contain commas. */ +#ifndef IF_LINT +# if defined GCC_LINT || defined lint +# define IF_LINT(Code) Code +# else +# define IF_LINT(Code) /* empty */ +# endif +#endif + +/* As above, but when Code must contain one comma. */ +#ifndef IF_LINT2 +# if defined GCC_LINT || defined lint +# define IF_LINT2(Code1, Code2) Code1, Code2 +# else +# define IF_LINT2(Code1, Code2) /* empty */ +# endif +#endif + +/* + * Context of comparison operation. + */ +struct context +{ + #ifdef ELEMENT + /* Vectors being compared. */ + ELEMENT const *xvec; + ELEMENT const *yvec; + #endif + + /* Extra fields. */ + EXTRA_CONTEXT_FIELDS + + /* Vector, indexed by diagonal, containing 1 + the X coordinate of the point + furthest along the given diagonal in the forward search of the edit + matrix. */ + OFFSET *fdiag; + + /* Vector, indexed by diagonal, containing the X coordinate of the point + furthest along the given diagonal in the backward search of the edit + matrix. */ + OFFSET *bdiag; + + #ifdef USE_HEURISTIC + /* This corresponds to the diff --speed-large-files flag. With this + heuristic, for vectors with a constant small density of changes, + the algorithm is linear in the vector size. */ + bool heuristic; + #endif + + /* Edit scripts longer than this are too expensive to compute. */ + OFFSET too_expensive; + + /* Snakes bigger than this are considered "big". */ + #define SNAKE_LIMIT 20 +}; + +struct partition +{ + /* Midpoints of this partition. */ + OFFSET xmid; + OFFSET ymid; + + /* True if low half will be analyzed minimally. */ + bool lo_minimal; + + /* Likewise for high half. */ + bool hi_minimal; +}; + + +/* Find the midpoint of the shortest edit script for a specified portion + of the two vectors. + + Scan from the beginnings of the vectors, and simultaneously from the ends, + doing a breadth-first search through the space of edit-sequence. + When the two searches meet, we have found the midpoint of the shortest + edit sequence. + + If FIND_MINIMAL is true, find the minimal edit script regardless of + expense. Otherwise, if the search is too expensive, use heuristics to + stop the search and report a suboptimal answer. + + Set PART->(xmid,ymid) to the midpoint (XMID,YMID). The diagonal number + XMID - YMID equals the number of inserted elements minus the number + of deleted elements (counting only elements before the midpoint). + + Set PART->lo_minimal to true iff the minimal edit script for the + left half of the partition is known; similarly for PART->hi_minimal. + + This function assumes that the first elements of the specified portions + of the two vectors do not match, and likewise that the last elements do not + match. The caller must trim matching elements from the beginning and end + of the portions it is going to specify. + + If we return the "wrong" partitions, the worst this can do is cause + suboptimal diff output. It cannot cause incorrect diff output. */ + +static void +diag (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, bool find_minimal, + struct partition *part, struct context *ctxt) +{ + OFFSET *const fd = ctxt->fdiag; /* Give the compiler a chance. */ + OFFSET *const bd = ctxt->bdiag; /* Additional help for the compiler. */ +#ifdef ELEMENT + ELEMENT const *const xv = ctxt->xvec; /* Still more help for the compiler. */ + ELEMENT const *const yv = ctxt->yvec; /* And more and more . . . */ + #define XREF_YREF_EQUAL(x,y) EQUAL (xv[x], yv[y]) +#else + #define XREF_YREF_EQUAL(x,y) XVECREF_YVECREF_EQUAL (ctxt, x, y) +#endif + const OFFSET dmin = xoff - ylim; /* Minimum valid diagonal. */ + const OFFSET dmax = xlim - yoff; /* Maximum valid diagonal. */ + const OFFSET fmid = xoff - yoff; /* Center diagonal of top-down search. */ + const OFFSET bmid = xlim - ylim; /* Center diagonal of bottom-up search. */ + OFFSET fmin = fmid; + OFFSET fmax = fmid; /* Limits of top-down search. */ + OFFSET bmin = bmid; + OFFSET bmax = bmid; /* Limits of bottom-up search. */ + OFFSET c; /* Cost. */ + bool odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd + diagonal with respect to the northwest. */ + + fd[fmid] = xoff; + bd[bmid] = xlim; + + for (c = 1;; ++c) + { + OFFSET d; /* Active diagonal. */ + bool big_snake = false; + + /* Extend the top-down search by an edit step in each diagonal. */ + if (fmin > dmin) + fd[--fmin - 1] = -1; + else + ++fmin; + if (fmax < dmax) + fd[++fmax + 1] = -1; + else + --fmax; + for (d = fmax; d >= fmin; d -= 2) + { + OFFSET x; + OFFSET y; + OFFSET tlo = fd[d - 1]; + OFFSET thi = fd[d + 1]; + OFFSET x0 = tlo < thi ? thi : tlo + 1; + + for (x = x0, y = x0 - d; + x < xlim && y < ylim && XREF_YREF_EQUAL (x, y); + x++, y++) + continue; + if (x - x0 > SNAKE_LIMIT) + big_snake = true; + fd[d] = x; + if (odd && bmin <= d && d <= bmax && bd[d] <= x) + { + part->xmid = x; + part->ymid = y; + part->lo_minimal = part->hi_minimal = true; + return; + } + } + + /* Similarly extend the bottom-up search. */ + if (bmin > dmin) + bd[--bmin - 1] = OFFSET_MAX; + else + ++bmin; + if (bmax < dmax) + bd[++bmax + 1] = OFFSET_MAX; + else + --bmax; + for (d = bmax; d >= bmin; d -= 2) + { + OFFSET x; + OFFSET y; + OFFSET tlo = bd[d - 1]; + OFFSET thi = bd[d + 1]; + OFFSET x0 = tlo < thi ? tlo : thi - 1; + + for (x = x0, y = x0 - d; + xoff < x && yoff < y && XREF_YREF_EQUAL (x - 1, y - 1); + x--, y--) + continue; + if (x0 - x > SNAKE_LIMIT) + big_snake = true; + bd[d] = x; + if (!odd && fmin <= d && d <= fmax && x <= fd[d]) + { + part->xmid = x; + part->ymid = y; + part->lo_minimal = part->hi_minimal = true; + return; + } + } + + if (find_minimal) + continue; + +#ifdef USE_HEURISTIC + /* Heuristic: check occasionally for a diagonal that has made lots + of progress compared with the edit distance. If we have any + such, find the one that has made the most progress and return it + as if it had succeeded. + + With this heuristic, for vectors with a constant small density + of changes, the algorithm is linear in the vector size. */ + + if (200 < c && big_snake && ctxt->heuristic) + { + { + OFFSET best = 0; + + for (d = fmax; d >= fmin; d -= 2) + { + OFFSET dd = d - fmid; + OFFSET x = fd[d]; + OFFSET y = x - d; + OFFSET v = (x - xoff) * 2 - dd; + + if (v > 12 * (c + (dd < 0 ? -dd : dd))) + { + if (v > best + && xoff + SNAKE_LIMIT <= x && x < xlim + && yoff + SNAKE_LIMIT <= y && y < ylim) + { + /* We have a good enough best diagonal; now insist + that it end with a significant snake. */ + int k; + + for (k = 1; XREF_YREF_EQUAL (x - k, y - k); k++) + if (k == SNAKE_LIMIT) + { + best = v; + part->xmid = x; + part->ymid = y; + break; + } + } + } + } + if (best > 0) + { + part->lo_minimal = true; + part->hi_minimal = false; + return; + } + } + + { + OFFSET best = 0; + + for (d = bmax; d >= bmin; d -= 2) + { + OFFSET dd = d - bmid; + OFFSET x = bd[d]; + OFFSET y = x - d; + OFFSET v = (xlim - x) * 2 + dd; + + if (v > 12 * (c + (dd < 0 ? -dd : dd))) + { + if (v > best + && xoff < x && x <= xlim - SNAKE_LIMIT + && yoff < y && y <= ylim - SNAKE_LIMIT) + { + /* We have a good enough best diagonal; now insist + that it end with a significant snake. */ + int k; + + for (k = 0; XREF_YREF_EQUAL (x + k, y + k); k++) + if (k == SNAKE_LIMIT - 1) + { + best = v; + part->xmid = x; + part->ymid = y; + break; + } + } + } + } + if (best > 0) + { + part->lo_minimal = false; + part->hi_minimal = true; + return; + } + } + } +#endif /* USE_HEURISTIC */ + + /* Heuristic: if we've gone well beyond the call of duty, give up + and report halfway between our best results so far. */ + if (c >= ctxt->too_expensive) + { + OFFSET fxybest; + OFFSET fxbest IF_LINT (= 0); + OFFSET bxybest; + OFFSET bxbest IF_LINT (= 0); + + /* Find forward diagonal that maximizes X + Y. */ + fxybest = -1; + for (d = fmax; d >= fmin; d -= 2) + { + OFFSET x = MIN (fd[d], xlim); + OFFSET y = x - d; + if (ylim < y) + { + x = ylim + d; + y = ylim; + } + if (fxybest < x + y) + { + fxybest = x + y; + fxbest = x; + } + } + + /* Find backward diagonal that minimizes X + Y. */ + bxybest = OFFSET_MAX; + for (d = bmax; d >= bmin; d -= 2) + { + OFFSET x = MAX (xoff, bd[d]); + OFFSET y = x - d; + if (y < yoff) + { + x = yoff + d; + y = yoff; + } + if (x + y < bxybest) + { + bxybest = x + y; + bxbest = x; + } + } + + /* Use the better of the two diagonals. */ + if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff)) + { + part->xmid = fxbest; + part->ymid = fxybest - fxbest; + part->lo_minimal = true; + part->hi_minimal = false; + } + else + { + part->xmid = bxbest; + part->ymid = bxybest - bxbest; + part->lo_minimal = false; + part->hi_minimal = true; + } + return; + } + } + #undef XREF_YREF_EQUAL +} + + +/* Compare in detail contiguous subsequences of the two vectors + which are known, as a whole, to match each other. + + The subsequence of vector 0 is [XOFF, XLIM) and likewise for vector 1. + + Note that XLIM, YLIM are exclusive bounds. All indices into the vectors + are origin-0. + + If FIND_MINIMAL, find a minimal difference no matter how + expensive it is. + + The results are recorded by invoking NOTE_DELETE and NOTE_INSERT. + + Return false if terminated normally, or true if terminated through early + abort. */ + +static bool +compareseq (OFFSET xoff, OFFSET xlim, OFFSET yoff, OFFSET ylim, + bool find_minimal, struct context *ctxt) +{ +#ifdef ELEMENT + ELEMENT const *xv = ctxt->xvec; /* Help the compiler. */ + ELEMENT const *yv = ctxt->yvec; + #define XREF_YREF_EQUAL(x,y) EQUAL (xv[x], yv[y]) +#else + #define XREF_YREF_EQUAL(x,y) XVECREF_YVECREF_EQUAL (ctxt, x, y) +#endif + + /* Slide down the bottom initial diagonal. */ + while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xoff, yoff)) + { + xoff++; + yoff++; + } + + /* Slide up the top initial diagonal. */ + while (xoff < xlim && yoff < ylim && XREF_YREF_EQUAL (xlim - 1, ylim - 1)) + { + xlim--; + ylim--; + } + + /* Handle simple cases. */ + if (xoff == xlim) + while (yoff < ylim) + { + NOTE_INSERT (ctxt, yoff); + if (EARLY_ABORT (ctxt)) + return true; + yoff++; + } + else if (yoff == ylim) + while (xoff < xlim) + { + NOTE_DELETE (ctxt, xoff); + if (EARLY_ABORT (ctxt)) + return true; + xoff++; + } + else + { + struct partition part IF_LINT2 (= { .xmid = 0, .ymid = 0 }); + + /* Find a point of correspondence in the middle of the vectors. */ + diag (xoff, xlim, yoff, ylim, find_minimal, &part, ctxt); + + /* Use the partitions to split this problem into subproblems. */ + if (compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal, ctxt)) + return true; + if (compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal, ctxt)) + return true; + } + + return false; + #undef XREF_YREF_EQUAL +} + +#undef ELEMENT +#undef EQUAL +#undef OFFSET +#undef EXTRA_CONTEXT_FIELDS +#undef NOTE_DELETE +#undef NOTE_INSERT +#undef EARLY_ABORT +#undef USE_HEURISTIC +#undef XVECREF_YVECREF_EQUAL +#undef OFFSET_MAX diff --git a/lib/gnulib.mk.in b/lib/gnulib.mk.in index 73d304307d4..509089e6391 100644 --- a/lib/gnulib.mk.in +++ b/lib/gnulib.mk.in @@ -21,7 +21,7 @@ # the same distribution terms as the rest of that program. # # Generated by gnulib-tool. -# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stat --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=tzset --avoid=unsetenv --avoid=utime --avoid=utime-h --gnu-make --makefile-name=gnulib.mk.in --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-leading-zeros count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode filevercmp flexmember fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub update-copyright utimens vla warnings +# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stat --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=tzset --avoid=unsetenv --avoid=utime --avoid=utime-h --gnu-make --makefile-name=gnulib.mk.in --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-leading-zeros count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 diffseq dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode filevercmp flexmember fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr minmax mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub update-copyright utimens vla warnings MOSTLYCLEANFILES += core *.stackdump @@ -1167,6 +1167,14 @@ EXTRA_DIST += gl_openssl.h sha512.h endif ## end gnulib module crypto/sha512 +## begin gnulib module diffseq +ifeq (,$(OMIT_GNULIB_MODULE_diffseq)) + +libgnu_a_SOURCES += diffseq.h + +endif +## end gnulib module diffseq + ## begin gnulib module dirent ifeq (,$(OMIT_GNULIB_MODULE_dirent)) @@ -1747,6 +1755,14 @@ EXTRA_libgnu_a_SOURCES += memrchr.c endif ## end gnulib module memrchr +## begin gnulib module minmax +ifeq (,$(OMIT_GNULIB_MODULE_minmax)) + +libgnu_a_SOURCES += minmax.h + +endif +## end gnulib module minmax + ## begin gnulib module mkostemp ifeq (,$(OMIT_GNULIB_MODULE_mkostemp)) diff --git a/lib/minmax.h b/lib/minmax.h new file mode 100644 index 00000000000..6b602a94fdb --- /dev/null +++ b/lib/minmax.h @@ -0,0 +1,60 @@ +/* MIN, MAX macros. + Copyright (C) 1995, 1998, 2001, 2003, 2005, 2009-2017 Free Software + Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . */ + +#ifndef _MINMAX_H +#define _MINMAX_H + +/* Note: MIN, MAX are also defined in on some systems + (glibc, IRIX, HP-UX, OSF/1). Therefore you might get warnings about + MIN, MAX macro redefinitions on some systems; the workaround is to + #include this file as the last one among the #include list. */ + +/* Before we define the following symbols we get the file + since otherwise we get redefinitions on some systems if is + included after this file. Likewise for . + If more than one of these system headers define MIN and MAX, pick just + one of the headers (because the definitions most likely are the same). */ +#if HAVE_MINMAX_IN_LIMITS_H +# include +#elif HAVE_MINMAX_IN_SYS_PARAM_H +# include +#endif + +/* Note: MIN and MAX should be used with two arguments of the + same type. They might not return the minimum and maximum of their two + arguments, if the arguments have different types or have unusual + floating-point values. For example, on a typical host with 32-bit 'int', + 64-bit 'long long', and 64-bit IEEE 754 'double' types: + + MAX (-1, 2147483648) returns 4294967295. + MAX (9007199254740992.0, 9007199254740993) returns 9007199254740992.0. + MAX (NaN, 0.0) returns 0.0. + MAX (+0.0, -0.0) returns -0.0. + + and in each case the answer is in some sense bogus. */ + +/* MAX(a,b) returns the maximum of A and B. */ +#ifndef MAX +# define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif + +/* MIN(a,b) returns the minimum of A and B. */ +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + +#endif /* _MINMAX_H */ diff --git a/m4/gnulib-comp.m4 b/m4/gnulib-comp.m4 index 8f53a990e34..1ac58e871cc 100644 --- a/m4/gnulib-comp.m4 +++ b/m4/gnulib-comp.m4 @@ -61,6 +61,7 @@ AC_DEFUN([gl_EARLY], # Code from module crypto/sha1: # Code from module crypto/sha256: # Code from module crypto/sha512: + # Code from module diffseq: # Code from module dirent: # Code from module dirfd: # Code from module dosname: @@ -105,6 +106,7 @@ AC_DEFUN([gl_EARLY], # Code from module lstat: # Code from module manywarnings: # Code from module memrchr: + # Code from module minmax: # Code from module mkostemp: # Code from module mktime: # Code from module mktime-internal: @@ -289,6 +291,7 @@ AC_DEFUN([gl_INIT], gl_PREREQ_MEMRCHR fi gl_STRING_MODULE_INDICATOR([memrchr]) + gl_MINMAX gl_FUNC_MKOSTEMP if test $HAVE_MKOSTEMP = 0; then AC_LIBOBJ([mkostemp]) @@ -821,6 +824,7 @@ AC_DEFUN([gl_FILE_LIST], [ lib/count-one-bits.h lib/count-trailing-zeros.c lib/count-trailing-zeros.h + lib/diffseq.h lib/dirent.in.h lib/dirfd.c lib/dosname.h @@ -875,6 +879,7 @@ AC_DEFUN([gl_FILE_LIST], [ lib/md5.c lib/md5.h lib/memrchr.c + lib/minmax.h lib/mkostemp.c lib/mktime-internal.h lib/mktime.c @@ -991,6 +996,7 @@ AC_DEFUN([gl_FILE_LIST], [ m4/manywarnings.m4 m4/md5.m4 m4/memrchr.m4 + m4/minmax.m4 m4/mkostemp.m4 m4/mktime.m4 m4/multiarch.m4 diff --git a/m4/minmax.m4 b/m4/minmax.m4 new file mode 100644 index 00000000000..6845fce89c4 --- /dev/null +++ b/m4/minmax.m4 @@ -0,0 +1,44 @@ +# minmax.m4 serial 4 +dnl Copyright (C) 2005, 2009-2017 Free Software Foundation, Inc. +dnl This file is free software; the Free Software Foundation +dnl gives unlimited permission to copy and/or distribute it, +dnl with or without modifications, as long as this notice is preserved. + +AC_PREREQ([2.53]) + +AC_DEFUN([gl_MINMAX], +[ + AC_REQUIRE([gl_PREREQ_MINMAX]) +]) + +# Prerequisites of lib/minmax.h. +AC_DEFUN([gl_PREREQ_MINMAX], +[ + gl_MINMAX_IN_HEADER([limits.h]) + gl_MINMAX_IN_HEADER([sys/param.h]) +]) + +dnl gl_MINMAX_IN_HEADER(HEADER) +dnl The parameter has to be a literal header name; it cannot be macro, +dnl nor a shell variable. (Because autoheader collects only AC_DEFINE +dnl invocations with a literal macro name.) +AC_DEFUN([gl_MINMAX_IN_HEADER], +[ + m4_pushdef([header], AS_TR_SH([$1])) + m4_pushdef([HEADER], AS_TR_CPP([$1])) + AC_CACHE_CHECK([whether <$1> defines MIN and MAX], + [gl_cv_minmax_in_]header, + [AC_COMPILE_IFELSE( + [AC_LANG_PROGRAM( + [[#include <$1> + int x = MIN (42, 17);]], + [[]])], + [gl_cv_minmax_in_]header[=yes], + [gl_cv_minmax_in_]header[=no])]) + if test $gl_cv_minmax_in_[]header = yes; then + AC_DEFINE([HAVE_MINMAX_IN_]HEADER, 1, + [Define to 1 if <$1> defines the MIN and MAX macros.]) + fi + m4_popdef([HEADER]) + m4_popdef([header]) +]) diff --git a/src/buffer.h b/src/buffer.h index a2bdc4e7294..be270fe4823 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -412,6 +412,15 @@ extern void enlarge_buffer_text (struct buffer *, ptrdiff_t); ? BUF_FETCH_MULTIBYTE_CHAR ((buf), (pos)) \ : BUF_FETCH_BYTE ((buf), (pos))) +/* Return character at byte position POS in buffer BUF. If BUF is + unibyte and the character is not ASCII, make the returning + character multibyte. */ + +#define BUF_FETCH_CHAR_AS_MULTIBYTE(buf, pos) \ + (! NILP (BVAR ((buf), enable_multibyte_characters)) \ + ? BUF_FETCH_MULTIBYTE_CHAR ((buf), (pos)) \ + : UNIBYTE_TO_CHAR (BUF_FETCH_BYTE ((buf), (pos)))) + /* Return the byte at byte position N in buffer BUF. */ #define BUF_FETCH_BYTE(buf, n) \ diff --git a/src/editfns.c b/src/editfns.c index 43b17f9f116..76b4aaf81bc 100644 --- a/src/editfns.c +++ b/src/editfns.c @@ -3105,6 +3105,206 @@ determines whether case is significant or ignored. */) /* Same length too => they are equal. */ return make_number (0); } + + +/* Set up necessary definitions for diffseq.h; see comments in + diffseq.h for explanation. */ + +#undef ELEMENT +#undef EQUAL + +#define XVECREF_YVECREF_EQUAL(ctx, xoff, yoff) \ + buffer_chars_equal ((ctx), (xoff), (yoff)) + +#define OFFSET ptrdiff_t + +#define EXTRA_CONTEXT_FIELDS \ + /* Buffers to compare. */ \ + struct buffer *buffer_a; \ + struct buffer *buffer_b; \ + /* Bit vectors recording for each character whether it was deleted + or inserted. */ \ + unsigned char *deletions; \ + unsigned char *insertions; + +#define NOTE_DELETE(ctx, xoff) set_bit ((ctx)->deletions, (xoff)) +#define NOTE_INSERT(ctx, yoff) set_bit ((ctx)->insertions, (yoff)) + +struct context; +static void set_bit (unsigned char *, OFFSET); +static bool bit_is_set (const unsigned char *, OFFSET); +static bool buffer_chars_equal (struct context *, OFFSET, OFFSET); + +#include "minmax.h" +#include "diffseq.h" + +DEFUN ("replace-buffer-contents", Freplace_buffer_contents, + Sreplace_buffer_contents, 1, 1, "bSource buffer: ", + doc: /* Replace accessible portion of the current buffer with accessible portion of SOURCE. +As far as possible the replacement is non-destructive, i.e. existing +buffer contents, markers, properties, and overlays in the current +buffer stay intact. */) + (Lisp_Object source) +{ + struct buffer *a = current_buffer; + Lisp_Object source_buffer = Fget_buffer (source); + if (NILP (source_buffer)) + nsberror (source); + struct buffer *b = XBUFFER (source_buffer); + if (! BUFFER_LIVE_P (b)) + error ("Selecting deleted buffer"); + if (a == b) + error ("Cannot replace a buffer with itself"); + + ptrdiff_t min_a = BEGV; + ptrdiff_t min_b = BUF_BEGV (b); + ptrdiff_t size_a = ZV - min_a; + ptrdiff_t size_b = BUF_ZV (b) - min_b; + eassume (size_a >= 0); + eassume (size_b >= 0); + bool a_empty = size_a == 0; + bool b_empty = size_b == 0; + + /* Handle trivial cases where at least one accessible portion is + empty. */ + + if (a_empty && b_empty) + return Qnil; + + if (a_empty) + return Finsert_buffer_substring (source, Qnil, Qnil); + + if (b_empty) + { + del_range_both (BEGV, BEGV_BYTE, ZV, ZV_BYTE, true); + return Qnil; + } + + /* FIXME: It is not documented how to initialize the contents of the + context structure. This code cargo-cults from the existing + caller in src/analyze.c of GNU Diffutils, which appears to + work. */ + + ptrdiff_t diags = size_a + size_b + 3; + ptrdiff_t *buffer; + USE_SAFE_ALLOCA; + SAFE_NALLOCA (buffer, 2, diags); + /* Micro-optimization: Casting to size_t generates much better + code. */ + ptrdiff_t del_bytes = (size_t) size_a / CHAR_BIT + 1; + ptrdiff_t ins_bytes = (size_t) size_b / CHAR_BIT + 1; + struct context ctx = { + .buffer_a = a, + .buffer_b = b, + .deletions = SAFE_ALLOCA (del_bytes), + .insertions = SAFE_ALLOCA (ins_bytes), + .fdiag = buffer + size_b + 1, + .bdiag = buffer + diags + size_b + 1, + /* FIXME: Find a good number for .too_expensive. */ + .too_expensive = 1000000, + }; + memclear (ctx.deletions, del_bytes); + memclear (ctx.insertions, ins_bytes); + /* compareseq requires indices to be zero-based. We add BEGV back + later. */ + bool early_abort = compareseq (0, size_a, 0, size_b, false, &ctx); + /* Since we didn’t define EARLY_ABORT, we should never abort + early. */ + eassert (! early_abort); + SAFE_FREE (); + + Fundo_boundary (); + ptrdiff_t count = SPECPDL_INDEX (); + record_unwind_protect (save_excursion_restore, save_excursion_save ()); + + SET_PT_BOTH (BEGV, BEGV_BYTE); + ptrdiff_t i = size_a; + ptrdiff_t j = size_b; + /* Walk backwards through the lists of changes. This was also + cargo-culted from src/analyze.c in GNU Diffutils. Because we + walk backwards, we don’t have to keep the positions in sync. */ + while (i >= 0 || j >= 0) + { + /* Check whether there is a change (insertion or deletion) + before the current position. */ + if ((i > 0 && bit_is_set (ctx.deletions, i - 1)) || + (j > 0 && bit_is_set (ctx.insertions, j - 1))) + { + ptrdiff_t end_a = min_a + i; + ptrdiff_t end_b = min_b + j; + /* Find the beginning of the current change run. */ + while (i > 0 && bit_is_set (ctx.deletions, i - 1)) + --i; + while (j > 0 && bit_is_set (ctx.insertions, j - 1)) + --j; + ptrdiff_t beg_a = min_a + i; + ptrdiff_t beg_b = min_b + j; + eassert (beg_a >= BEGV); + eassert (beg_b >= BUF_BEGV (b)); + eassert (beg_a <= end_a); + eassert (beg_b <= end_b); + eassert (end_a <= ZV); + eassert (end_b <= BUF_ZV (b)); + eassert (beg_a < end_a || beg_b < end_b); + if (beg_a < end_a) + del_range (beg_a, end_a); + if (beg_b < end_b) + { + SET_PT (beg_a); + Finsert_buffer_substring (source, make_natnum (beg_b), + make_natnum (end_b)); + } + } + --i; + --j; + } + + return unbind_to (count, Qnil); +} + +static void +set_bit (unsigned char *a, ptrdiff_t i) +{ + eassert (i >= 0); + /* Micro-optimization: Casting to size_t generates much better + code. */ + size_t j = i; + a[j / CHAR_BIT] |= (1 << (j % CHAR_BIT)); +} + +static bool +bit_is_set (const unsigned char *a, ptrdiff_t i) +{ + eassert (i >= 0); + /* Micro-optimization: Casting to size_t generates much better + code. */ + size_t j = i; + return a[j / CHAR_BIT] & (1 << (j % CHAR_BIT)); +} + +/* Return true if the characters at position POS_A of buffer + CTX->buffer_a and at position POS_B of buffer CTX->buffer_b are + equal. POS_A and POS_B are zero-based. Text properties are + ignored. */ + +static bool +buffer_chars_equal (struct context *ctx, + ptrdiff_t pos_a, ptrdiff_t pos_b) +{ + eassert (pos_a >= 0); + pos_a += BUF_BEGV (ctx->buffer_a); + eassert (pos_a >= BUF_BEGV (ctx->buffer_a)); + eassert (pos_a < BUF_ZV (ctx->buffer_a)); + + eassert (pos_b >= 0); + pos_b += BUF_BEGV (ctx->buffer_b); + eassert (pos_b >= BUF_BEGV (ctx->buffer_b)); + eassert (pos_b < BUF_ZV (ctx->buffer_b)); + + return BUF_FETCH_CHAR_AS_MULTIBYTE (ctx->buffer_a, pos_a) + == BUF_FETCH_CHAR_AS_MULTIBYTE (ctx->buffer_b, pos_b); +} + static void subst_char_in_region_unwind (Lisp_Object arg) @@ -5315,6 +5515,7 @@ functions if all the text being accessed has this property. */); defsubr (&Sinsert_buffer_substring); defsubr (&Scompare_buffer_substrings); + defsubr (&Sreplace_buffer_contents); defsubr (&Ssubst_char_in_region); defsubr (&Stranslate_region_internal); defsubr (&Sdelete_region); diff --git a/test/src/editfns-tests.el b/test/src/editfns-tests.el index 3073e371933..a3ea8ab60b5 100644 --- a/test/src/editfns-tests.el +++ b/test/src/editfns-tests.el @@ -208,4 +208,35 @@ '(error "Invalid format operation %$"))) (should (equal (format "%1$c %1$s" ?±) "± 177"))) +(ert-deftest replace-buffer-contents-1 () + (with-temp-buffer + (insert #("source" 2 4 (prop 7))) + (let ((source (current-buffer))) + (with-temp-buffer + (insert "before dest after") + (let ((marker (set-marker (make-marker) 14))) + (save-restriction + (narrow-to-region 8 12) + (replace-buffer-contents source)) + (should (equal (marker-buffer marker) (current-buffer))) + (should (equal (marker-position marker) 16))) + (should (equal-including-properties + (buffer-string) + #("before source after" 9 11 (prop 7)))) + (should (equal (point) 9)))) + (should (equal-including-properties + (buffer-string) + #("source" 2 4 (prop 7)))))) + +(ert-deftest replace-buffer-contents-2 () + (with-temp-buffer + (insert "foo bar baz qux") + (let ((source (current-buffer))) + (with-temp-buffer + (insert "foo BAR baz qux") + (replace-buffer-contents source) + (should (equal-including-properties + (buffer-string) + "foo bar baz qux")))))) + ;;; editfns-tests.el ends here -- 2.39.2