From: Stefan Monnier Date: Mon, 26 Mar 2018 13:01:30 +0000 (-0400) Subject: * src/marker.c: Try and speed up byte<->char conversion with many markers. X-Git-Tag: emacs-27.0.90~5393^2~1 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=b300052fb4ef;p=emacs.git * src/marker.c: Try and speed up byte<->char conversion with many markers. When considering markers (to find a starting point for the conversion), typically one of the two bounds is nearby (coming from cached_(byte|char)pos) but the other is far (point-min or point-max), so change the exit condition so we stop as soon as *one* of the bounds is near. (BYTECHAR_DISTANCE_INITIAL, BYTECHAR_DISTANCE_INCREMENT): New constants. (buf_charpos_to_bytepos, buf_bytepos_to_charpos): Use them to try and reduce the number of markers we consider. --- diff --git a/src/marker.c b/src/marker.c index 7773c4fce0f..9933d957a57 100644 --- a/src/marker.c +++ b/src/marker.c @@ -90,7 +90,7 @@ clear_charpos_cache (struct buffer *b) #define CONSIDER(CHARPOS, BYTEPOS) \ { \ ptrdiff_t this_charpos = (CHARPOS); \ - bool changed = 0; \ + bool changed = false; \ \ if (this_charpos == charpos) \ { \ @@ -105,14 +105,14 @@ clear_charpos_cache (struct buffer *b) { \ best_above = this_charpos; \ best_above_byte = (BYTEPOS); \ - changed = 1; \ + changed = true; \ } \ } \ else if (this_charpos > best_below) \ { \ best_below = this_charpos; \ best_below_byte = (BYTEPOS); \ - changed = 1; \ + changed = true; \ } \ \ if (changed) \ @@ -133,6 +133,28 @@ CHECK_MARKER (Lisp_Object x) CHECK_TYPE (MARKERP (x), Qmarkerp, x); } +/* When converting bytes from/to chars, we look through the list of + markers to try and find a good starting point (since markers keep + track of both bytepos and charpos at the same time). + But if there are many markers, it can take too much time to find a "good" + marker from which to start. Worse yet: if it takes a long time and we end + up finding a nearby markers, we won't add a new marker to cache this + result, so next time around we'll have to go through this same long list + to (re)find this best marker. So the further down the list of + markers we go, the less demanding we are w.r.t what is a good marker. + + The previous code used INITIAL=50 and INCREMENT=0 and this lead to + really poor performance when there are many markers. + I haven't tried to tweak INITIAL, but experiments on my trusty Thinkpad + T61 using various artificial test cases seem to suggest that INCREMENT=50 + might be "the best compromise": it significantly improved the + worst case and it was rarely slower and never by much. + + The asymptotic behavior is still poor, tho, so in largish buffers with many + overlays (e.g. 300KB and 30K overlays), it can still be a bottlneck. */ +#define BYTECHAR_DISTANCE_INITIAL 50 +#define BYTECHAR_DISTANCE_INCREMENT 50 + /* Return the byte position corresponding to CHARPOS in B. */ ptrdiff_t @@ -141,6 +163,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos) struct Lisp_Marker *tail; ptrdiff_t best_above, best_above_byte; ptrdiff_t best_below, best_below_byte; + ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL; eassert (BUF_BEG (b) <= charpos && charpos <= BUF_Z (b)); @@ -180,8 +203,11 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos) /* If we are down to a range of 50 chars, don't bother checking any other markers; scan the intervening chars directly now. */ - if (best_above - best_below < 50) + if (best_above - charpos < distance + || charpos - best_below < distance) break; + else + distance += BYTECHAR_DISTANCE_INCREMENT; } /* We get here if we did not exactly hit one of the known places. @@ -248,7 +274,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos) #define CONSIDER(BYTEPOS, CHARPOS) \ { \ ptrdiff_t this_bytepos = (BYTEPOS); \ - int changed = 0; \ + int changed = false; \ \ if (this_bytepos == bytepos) \ { \ @@ -263,14 +289,14 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos) { \ best_above = (CHARPOS); \ best_above_byte = this_bytepos; \ - changed = 1; \ + changed = true; \ } \ } \ else if (this_bytepos > best_below_byte) \ { \ best_below = (CHARPOS); \ best_below_byte = this_bytepos; \ - changed = 1; \ + changed = true; \ } \ \ if (changed) \ @@ -293,6 +319,7 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos) struct Lisp_Marker *tail; ptrdiff_t best_above, best_above_byte; ptrdiff_t best_below, best_below_byte; + ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL; eassert (BUF_BEG_BYTE (b) <= bytepos && bytepos <= BUF_Z_BYTE (b)); @@ -323,8 +350,11 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos) /* If we are down to a range of 50 chars, don't bother checking any other markers; scan the intervening chars directly now. */ - if (best_above - best_below < 50) + if (best_above - bytepos < distance + || bytepos - best_below < distance) break; + else + distance += BYTECHAR_DISTANCE_INCREMENT; } /* We get here if we did not exactly hit one of the known places. @@ -744,8 +774,8 @@ count_markers (struct buffer *buf) ptrdiff_t verify_bytepos (ptrdiff_t charpos) { - ptrdiff_t below = 1; - ptrdiff_t below_byte = 1; + ptrdiff_t below = BEG; + ptrdiff_t below_byte = BYTE_BEG; while (below != charpos) {