From eec71ebdb50c3110bb747db57c7d7f04b6d14ad1 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Fri, 29 Jun 2018 16:55:20 +0300 Subject: [PATCH] Speed up replace-buffer-contents * src/editfns.c (EXTRA_CONTEXT_FIELDS): Add a_unibyte and b_unibyte members. (rbc_quitcounter): New static variable. (Freplace_buffer_contents): Initialize a_unibyte, b_unibyte, and rbc_quitcounter. Inhibit modification hooks if they were not already inhibited. Use rarely_quit to allow user to quit, to avoid calling maybe_quit too frequently (which hurts performance). Remove redundant assertions (which hurt performance too much). Call signal_after_change and update_compositions after all the changes are done. (buffer_chars_equal): Remove redundant assertions (which hurt performance). Avoid using BUF_FETCH_CHAR_AS_MULTIBYTE, which hurts performance by referencing Lisp symbols; instead, use lower-level macros with explicit tests to select which macro to use. (Bug#31888) --- src/editfns.c | 83 +++++++++++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 25 deletions(-) diff --git a/src/editfns.c b/src/editfns.c index 4fba68692bf..4d3c838d2ff 100644 --- a/src/editfns.c +++ b/src/editfns.c @@ -3115,6 +3115,9 @@ determines whether case is significant or ignored. */) #undef ELEMENT #undef EQUAL +/* Counter used to rarely_quit in replace-buffer-contents. */ +static unsigned short rbc_quitcounter; + #define XVECREF_YVECREF_EQUAL(ctx, xoff, yoff) \ buffer_chars_equal ((ctx), (xoff), (yoff)) @@ -3124,6 +3127,9 @@ determines whether case is significant or ignored. */) /* Buffers to compare. */ \ struct buffer *buffer_a; \ struct buffer *buffer_b; \ + /* Whether each buffer is unibyte/plain-ASCII or not. */ \ + bool a_unibyte; \ + bool b_unibyte; \ /* Bit vectors recording for each character whether it was deleted or inserted. */ \ unsigned char *deletions; \ @@ -3202,6 +3208,8 @@ differences between the two buffers. */) struct context ctx = { .buffer_a = a, .buffer_b = b, + .a_unibyte = BUF_ZV (a) == BUF_ZV_BYTE (a), + .b_unibyte = BUF_ZV (b) == BUF_ZV_BYTE (b), .deletions = SAFE_ALLOCA (del_bytes), .insertions = SAFE_ALLOCA (ins_bytes), .fdiag = buffer + size_b + 1, @@ -3218,10 +3226,25 @@ differences between the two buffers. */) early. */ eassert (! early_abort); + rbc_quitcounter = 0; + Fundo_boundary (); + bool modification_hooks_inhibited = false; ptrdiff_t count = SPECPDL_INDEX (); record_unwind_protect (save_excursion_restore, save_excursion_save ()); + /* We are going to make a lot of small modifications, and having the + modification hooks called for each of them will slow us down. + Instead, we announce a single modification for the entire + modified region. But don't do that if the caller inhibited + modification hooks, because then they don't want that. */ + if (!inhibit_modification_hooks) + { + prepare_to_modify_buffer (BEGV, ZV, NULL); + specbind (Qinhibit_modification_hooks, Qt); + modification_hooks_inhibited = true; + } + ptrdiff_t i = size_a; ptrdiff_t j = size_b; /* Walk backwards through the lists of changes. This was also @@ -3230,15 +3253,13 @@ differences between the two buffers. */) while (i >= 0 || j >= 0) { /* Allow the user to quit if this gets too slow. */ - maybe_quit (); + rarely_quit (++rbc_quitcounter); /* Check whether there is a change (insertion or deletion) before the current position. */ if ((i > 0 && bit_is_set (ctx.deletions, i - 1)) || (j > 0 && bit_is_set (ctx.insertions, j - 1))) { - maybe_quit (); - ptrdiff_t end_a = min_a + i; ptrdiff_t end_b = min_b + j; /* Find the beginning of the current change run. */ @@ -3246,14 +3267,13 @@ differences between the two buffers. */) --i; while (j > 0 && bit_is_set (ctx.insertions, j - 1)) --j; + + rarely_quit (rbc_quitcounter++); + ptrdiff_t beg_a = min_a + i; ptrdiff_t beg_b = min_b + j; - eassert (beg_a >= BEGV); - eassert (beg_b >= BUF_BEGV (b)); eassert (beg_a <= end_a); eassert (beg_b <= end_b); - eassert (end_a <= ZV); - eassert (end_b <= BUF_ZV (b)); eassert (beg_a < end_a || beg_b < end_b); if (beg_a < end_a) del_range (beg_a, end_a); @@ -3269,6 +3289,13 @@ differences between the two buffers. */) } unbind_to (count, Qnil); SAFE_FREE (); + rbc_quitcounter = 0; + + if (modification_hooks_inhibited) + { + signal_after_change (BEGV, size_a, ZV - BEGV); + update_compositions (BEGV, ZV, CHECK_BORDER); + } return Qnil; } @@ -3296,39 +3323,45 @@ bit_is_set (const unsigned char *a, ptrdiff_t i) /* Return true if the characters at position POS_A of buffer CTX->buffer_a and at position POS_B of buffer CTX->buffer_b are equal. POS_A and POS_B are zero-based. Text properties are - ignored. */ + ignored. + + Implementation note: this function is called inside the inner-most + loops of compareseq, so it absolutely must be optimized for speed, + every last bit of it. E.g., each additional use of BEGV or such + likes will slow down replace-buffer-contents by dozens of percents, + because builtin_lisp_symbol will be called one more time in the + innermost loop. */ static bool buffer_chars_equal (struct context *ctx, ptrdiff_t pos_a, ptrdiff_t pos_b) { - eassert (pos_a >= 0); pos_a += BUF_BEGV (ctx->buffer_a); - eassert (pos_a >= BUF_BEGV (ctx->buffer_a)); - eassert (pos_a < BUF_ZV (ctx->buffer_a)); - - eassert (pos_b >= 0); pos_b += BUF_BEGV (ctx->buffer_b); - eassert (pos_b >= BUF_BEGV (ctx->buffer_b)); - eassert (pos_b < BUF_ZV (ctx->buffer_b)); - - bool a_unibyte = BUF_ZV (ctx->buffer_a) == BUF_ZV_BYTE (ctx->buffer_a); - bool b_unibyte = BUF_ZV (ctx->buffer_b) == BUF_ZV_BYTE (ctx->buffer_b); /* Allow the user to escape out of a slow compareseq call. */ - maybe_quit (); + rarely_quit (++rbc_quitcounter); ptrdiff_t bpos_a = - a_unibyte ? pos_a : buf_charpos_to_bytepos (ctx->buffer_a, pos_a); + ctx->a_unibyte ? pos_a : buf_charpos_to_bytepos (ctx->buffer_a, pos_a); ptrdiff_t bpos_b = - b_unibyte ? pos_b : buf_charpos_to_bytepos (ctx->buffer_b, pos_b); + ctx->b_unibyte ? pos_b : buf_charpos_to_bytepos (ctx->buffer_b, pos_b); - if (a_unibyte && b_unibyte) + /* We make the below a series of specific test to avoid using + BUF_FETCH_CHAR_AS_MULTIBYTE, which references Lisp symbols, and + is therefore significantly slower (see the note in the commentary + to this function). */ + if (ctx->a_unibyte && ctx->b_unibyte) return BUF_FETCH_BYTE (ctx->buffer_a, bpos_a) == BUF_FETCH_BYTE (ctx->buffer_b, bpos_b); - - return BUF_FETCH_CHAR_AS_MULTIBYTE (ctx->buffer_a, bpos_a) - == BUF_FETCH_CHAR_AS_MULTIBYTE (ctx->buffer_b, bpos_b); + if (ctx->a_unibyte && !ctx->b_unibyte) + return UNIBYTE_TO_CHAR (BUF_FETCH_BYTE (ctx->buffer_a, bpos_a)) + == BUF_FETCH_MULTIBYTE_CHAR (ctx->buffer_b, bpos_b); + if (!ctx->a_unibyte && ctx->b_unibyte) + return BUF_FETCH_MULTIBYTE_CHAR (ctx->buffer_a, bpos_a) + == UNIBYTE_TO_CHAR (BUF_FETCH_BYTE (ctx->buffer_b, bpos_b)); + return BUF_FETCH_MULTIBYTE_CHAR (ctx->buffer_a, bpos_a) + == BUF_FETCH_MULTIBYTE_CHAR (ctx->buffer_b, bpos_b); } -- 2.39.2