From: Tassilo Horn Date: Sat, 23 Feb 2019 20:18:36 +0000 (+0100) Subject: Improve replace-buffer-contents/replace-region-contents X-Git-Tag: emacs-27.0.90~3554 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=e96923c188a2a38d09917c5b7f606187a1413a96;p=emacs.git Improve replace-buffer-contents/replace-region-contents * src/editfns.c (Freplace_buffer_contents): Add two optional arguments for mitigating performance issues. * lisp/emacs-lisp/subr-x.el (replace-region-contents): Move from subr.el. Add the same two arguments as for replace-buffer-contents. * lisp/json.el (json-pretty-print-max-secs): New variable holding the default MAX-SECS value json-pretty-print passes to replace-buffer-contents. (json-pretty-print): Use it. * doc/lispref/text.texi (Replacing): Add documentation for replace-buffer-contents two new optional arguments. Document replace-region-contents. --- diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi index 6dfd211d1a0..88843c3764f 100644 --- a/doc/lispref/text.texi +++ b/doc/lispref/text.texi @@ -4436,20 +4436,57 @@ all markers unrelocated. You can use the following function to replace the text of one buffer with the text of another buffer: -@deffn Command replace-buffer-contents source +@deffn Command replace-buffer-contents source &optional max-secs max-costs This function replaces the accessible portion of the current buffer with the accessible portion of the buffer @var{source}. @var{source} may either be a buffer object or the name of a buffer. When @code{replace-buffer-contents} succeeds, the text of the accessible portion of the current buffer will be equal to the text of the -accessible portion of the @var{source} buffer. This function attempts -to keep point, markers, text properties, and overlays in the current -buffer intact. One potential case where this behavior is useful is -external code formatting programs: they typically write the -reformatted text into a temporary buffer or file, and using -@code{delete-region} and @code{insert-buffer-substring} would destroy -these properties. However, the latter combination is typically -faster. @xref{Deletion}, and @ref{Insertion}. +accessible portion of the @var{source} buffer. + +This function attempts to keep point, markers, text properties, and +overlays in the current buffer intact. One potential case where this +behavior is useful is external code formatting programs: they +typically write the reformatted text into a temporary buffer or file, +and using @code{delete-region} and @code{insert-buffer-substring} +would destroy these properties. However, the latter combination is +typically faster (@xref{Deletion}, and @ref{Insertion}). + +For its working, @code{replace-buffer-contents} needs to compare the +contents of the original buffer with that of @code{source} which is a +costly operation if the buffers are huge and there is a high number of +differences between them. In order to keep +@code{replace-buffer-contents}'s runtime in bounds, it has two +optional arguments. + +@code{max-secs} defines a hard boundary in terms of seconds. If given +and exceeded, it will fall back to @code{delete-region} and +@code{insert-buffer-substring}. + +@code{max-costs} defines the quality of the difference computation. +If the actual costs exceed this limit, heuristics are used to provide +a faster but suboptimal solution. The default value is 1000000. + +@code{replace-buffer-contents} returns t if a non-destructive +replacement could be performed. Otherwise, i.e., if MAX-SECS was +exceeded, it returns nil. +@end deffn + +@defun Command replace-region-contents beg end replace-fn &optional max-secs max-costs +This function replaces the region between @code{beg} and @code{end} +using the given @code{replace-fn}. The function @code{replace-fn} is +run in the current buffer narrowed to the specified region and it +should return either a string or a buffer replacing the region. + +The replacement is performed using @code{replace-buffer-contents} +which also describes the @code{max-secs} and @code{max-costs} +arguments and the return value. + +Note: If the replacement is a string, it will be placed in a temporary +buffer so that @code{replace-buffer-contents} can operate on it. +Therefore, if you already have the replacement in a buffer, it makes +no sense to convert it to a string using @code{buffer-substring} or +similar. @end deffn @node Decompression diff --git a/etc/NEWS b/etc/NEWS index 3c5fb24b0e4..67e376d9b38 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -335,6 +335,16 @@ the node "(emacs) Directory Variables" of the user manual. 'make-network-process' now uses the correct loopback address when asked to use :host 'local and :family 'ipv6. ++++ +** The new function `replace-region-contents' replaces the current +region using a given replacement-function in a non-destructive manner +(in terms of `replace-buffer-contents'). + ++++ +** The command `replace-buffer-contents' now has two optional +arguments mitigating performance issues when operating on huge +buffers. + * Changes in Specialized Modes and Packages in Emacs 27.1 diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el index 7d9f0bba4c7..b9ffe6a6fc6 100644 --- a/lisp/emacs-lisp/subr-x.el +++ b/lisp/emacs-lisp/subr-x.el @@ -250,6 +250,35 @@ TRIM-LEFT and TRIM-RIGHT default to \"[ \\t\\n\\r]+\"." (substring string 0 (- (length string) (length suffix))) string)) +(defun replace-region-contents (beg end replace-fn + &optional max-secs max-costs) + "Replace the region between BEG and END using REPLACE-FN. +REPLACE-FN runs on the current buffer narrowed to the region. It +should return either a string or a buffer replacing the region. + +The replacement is performed using `replace-buffer-contents' +which also describes the MAX-SECS and MAX-COSTS arguments and the +return value. + +Note: If the replacement is a string, it'll be placed in a +temporary buffer so that `replace-buffer-contents' can operate on +it. Therefore, if you already have the replacement in a buffer, +it makes no sense to convert it to a string using +`buffer-substring' or similar." + (save-excursion + (save-restriction + (narrow-to-region beg end) + (goto-char (point-min)) + (let ((repl (funcall replace-fn))) + (if (bufferp repl) + (replace-buffer-contents repl max-secs max-costs) + (let ((source-buffer (current-buffer))) + (with-temp-buffer + (insert repl) + (let ((tmp-buffer (current-buffer))) + (set-buffer source-buffer) + (replace-buffer-contents tmp-buffer max-secs max-costs))))))))) + (provide 'subr-x) ;;; subr-x.el ends here diff --git a/lisp/json.el b/lisp/json.el index 19b8f09dcda..44b3c33df7c 100644 --- a/lisp/json.el +++ b/lisp/json.el @@ -49,10 +49,13 @@ ;; 2008-02-21 - Installed in GNU Emacs. ;; 2011-10-17 - Patch `json-alist-p' and `json-plist-p' to avoid recursion -tzz ;; 2012-10-25 - Added pretty-printed reformatting -Ryan Crum (ryan@ryancrum.org) +;; 2019-02-02 - Pretty-printing now uses replace-region-contents and support for +;; minimization -tsdh ;;; Code: (require 'map) +(require 'subr-x) ;; Parameters @@ -738,6 +741,12 @@ With prefix argument MINIMIZE, minimize it instead." (interactive "P") (json-pretty-print (point-min) (point-max) minimize)) +(defvar json-pretty-print-max-secs 2.0 + "Maximum time for `json-pretty-print's comparison. +The function `json-pretty-print' uses `replace-region-contents' +(which see) passing the value of this variable as argument +MAX-SECS.") + (defun json-pretty-print (begin end &optional minimize) "Pretty-print selected region. With prefix argument MINIMIZE, minimize it instead." @@ -749,7 +758,11 @@ With prefix argument MINIMIZE, minimize it instead." (json-object-type 'alist)) (replace-region-contents begin end - (lambda () (json-encode (json-read)))))) + (lambda () (json-encode (json-read))) + json-pretty-print-max-secs + ;; FIXME: What's a good value here? Can we use something better, + ;; e.g., by deriving a value from the size of the region? + 64))) (defun json-pretty-print-buffer-ordered (&optional minimize) "Pretty-print current buffer with object keys ordered. diff --git a/lisp/subr.el b/lisp/subr.el index 69ae804e200..5c8b84b8e9c 100644 --- a/lisp/subr.el +++ b/lisp/subr.el @@ -5476,30 +5476,4 @@ returned list are in the same order as in TREE. ;; for discoverability: (defalias 'flatten-list 'flatten-tree) -(defun replace-region-contents (beg end replace-fn) - "Replace the region between BEG and END using REPLACE-FN. -REPLACE-FN runs on the current buffer narrowed to the region. It -should return either a string or a buffer replacing the region. - -The replacement is performed using `replace-buffer-contents'. - -Note: If the replacement is a string, it'll be placed in a -temporary buffer so that `replace-buffer-contents' can operate on -it. Therefore, if you already have the replacement in a buffer, -it makes no sense to convert it to a string using -`buffer-substring' or similar." - (save-excursion - (save-restriction - (narrow-to-region beg end) - (goto-char (point-min)) - (let ((repl (funcall replace-fn))) - (if (bufferp repl) - (replace-buffer-contents repl) - (let ((source-buffer (current-buffer))) - (with-temp-buffer - (insert repl) - (let ((tmp-buffer (current-buffer))) - (set-buffer source-buffer) - (replace-buffer-contents tmp-buffer))))))))) - ;;; subr.el ends here diff --git a/src/editfns.c b/src/editfns.c index 7a600bacf18..8f21f8a677e 100644 --- a/src/editfns.c +++ b/src/editfns.c @@ -20,6 +20,7 @@ along with GNU Emacs. If not, see . */ #include #include +#include #include #ifdef HAVE_PWD_H @@ -1912,10 +1913,6 @@ determines whether case is significant or ignored. */) #undef EQUAL #define USE_HEURISTIC -#ifdef USE_HEURISTIC -#define DIFFSEQ_HEURISTIC -#endif - /* Counter used to rarely_quit in replace-buffer-contents. */ static unsigned short rbc_quitcounter; @@ -1937,30 +1934,54 @@ static unsigned short rbc_quitcounter; /* Bit vectors recording for each character whether it was deleted or inserted. */ \ unsigned char *deletions; \ - unsigned char *insertions; + unsigned char *insertions; \ + struct timeval start; \ + double max_secs; \ + unsigned int early_abort_tests; #define NOTE_DELETE(ctx, xoff) set_bit ((ctx)->deletions, (xoff)) #define NOTE_INSERT(ctx, yoff) set_bit ((ctx)->insertions, (yoff)) +#define EARLY_ABORT(ctx) compareseq_early_abort (ctx) struct context; static void set_bit (unsigned char *, OFFSET); static bool bit_is_set (const unsigned char *, OFFSET); static bool buffer_chars_equal (struct context *, OFFSET, OFFSET); +static bool compareseq_early_abort (struct context *); #include "minmax.h" #include "diffseq.h" DEFUN ("replace-buffer-contents", Freplace_buffer_contents, - Sreplace_buffer_contents, 1, 1, "bSource buffer: ", + Sreplace_buffer_contents, 1, 3, "bSource buffer: ", doc: /* Replace accessible portion of current buffer with that of SOURCE. SOURCE can be a buffer or a string that names a buffer. Interactively, prompt for SOURCE. + As far as possible the replacement is non-destructive, i.e. existing buffer contents, markers, properties, and overlays in the current buffer stay intact. -Warning: this function can be slow if there's a large number of small -differences between the two buffers. */) - (Lisp_Object source) + +Because this function can be very slow if there is a large number of +differences between the two buffers, there are two optional arguments +mitigating this issue. + +The MAX-SECS argument, if given, defines a hard limit on the time used +for comparing the buffers. If it takes longer than MAX-SECS, the +function falls back to a plain `delete-region' and +`insert-buffer-substring'. (Note that the checks are not performed +too evenly over time, so in some cases it may run a bit longer than +allowed). + +The optional argument MAX-COSTS defines the quality of the difference +computation. If the actual costs exceed this limit, heuristics are +used to provide a faster but suboptimal solution. The default value +is 1000000. + +This function returns t if a non-destructive replacement could be +performed. Otherwise, i.e., if MAX-SECS was exceeded, it returns +nil. */) + (Lisp_Object source, Lisp_Object max_secs, Lisp_Object max_costs) { struct buffer *a = current_buffer; Lisp_Object source_buffer = Fget_buffer (source); @@ -1985,15 +2006,18 @@ differences between the two buffers. */) empty. */ if (a_empty && b_empty) - return Qnil; + return Qt; if (a_empty) - return Finsert_buffer_substring (source, Qnil, Qnil); + { + Finsert_buffer_substring (source, Qnil, Qnil); + return Qt; + } if (b_empty) { del_range_both (BEGV, BEGV_BYTE, ZV, ZV_BYTE, true); - return Qnil; + return Qt; } ptrdiff_t count = SPECPDL_INDEX (); @@ -2007,6 +2031,12 @@ differences between the two buffers. */) ptrdiff_t *buffer; USE_SAFE_ALLOCA; SAFE_NALLOCA (buffer, 2, diags); + + if (NILP (max_costs)) + XSETFASTINT (max_costs, 1000000); + else + CHECK_FIXNUM (max_costs); + /* Micro-optimization: Casting to size_t generates much better code. */ ptrdiff_t del_bytes = (size_t) size_a / CHAR_BIT + 1; @@ -2022,20 +2052,26 @@ differences between the two buffers. */) .insertions = SAFE_ALLOCA (ins_bytes), .fdiag = buffer + size_b + 1, .bdiag = buffer + diags + size_b + 1, -#ifdef DIFFSEQ_HEURISTIC .heuristic = true, -#endif - /* FIXME: Find a good number for .too_expensive. */ - .too_expensive = 64, + .too_expensive = XFIXNUM (max_costs), + .max_secs = FLOATP (max_secs) ? XFLOAT_DATA (max_secs) : -1.0, + .early_abort_tests = 0 }; memclear (ctx.deletions, del_bytes); memclear (ctx.insertions, ins_bytes); + + gettimeofday (&ctx.start, NULL); /* compareseq requires indices to be zero-based. We add BEGV back later. */ bool early_abort = compareseq (0, size_a, 0, size_b, false, &ctx); - /* Since we didn’t define EARLY_ABORT, we should never abort - early. */ - eassert (! early_abort); + + if (early_abort) + { + del_range (min_a, ZV); + Finsert_buffer_substring (source, Qnil,Qnil); + SAFE_FREE_UNBIND_TO (count, Qnil); + return Qnil; + } rbc_quitcounter = 0; @@ -2097,6 +2133,7 @@ differences between the two buffers. */) --i; --j; } + SAFE_FREE_UNBIND_TO (count, Qnil); rbc_quitcounter = 0; @@ -2106,7 +2143,7 @@ differences between the two buffers. */) update_compositions (BEGV, ZV, CHECK_INSIDE); } - return Qnil; + return Qt; } static void @@ -2173,6 +2210,18 @@ buffer_chars_equal (struct context *ctx, == BUF_FETCH_MULTIBYTE_CHAR (ctx->buffer_b, bpos_b); } +static bool +compareseq_early_abort (struct context *ctx) +{ + if (ctx->max_secs < 0.0) + return false; + + struct timeval now, diff; + gettimeofday (&now, NULL); + timersub (&now, &ctx->start, &diff); + return diff.tv_sec + diff.tv_usec / 1000000.0 > ctx->max_secs; +} + static void subst_char_in_region_unwind (Lisp_Object arg) @@ -4441,6 +4490,12 @@ it to be non-nil. */); binary_as_unsigned = true; #endif + DEFVAR_LISP ("replace-buffer-contents-max-secs", + Vreplace_buffer_contents_max_secs, + doc: /* If differencing the two buffers takes longer than this, +`replace-buffer-contents' falls back to a plain delete and insert. */); + Vreplace_buffer_contents_max_secs = Qnil; + defsubr (&Spropertize); defsubr (&Schar_equal); defsubr (&Sgoto_char);