* src/marker.c: Try and speed up byte<->char conversion with many markers.

author Stefan Monnier <monnier@iro.umontreal.ca>

Mon, 26 Mar 2018 13:01:30 +0000 (09:01 -0400)

committer Stefan Monnier <monnier@iro.umontreal.ca>

Mon, 26 Mar 2018 13:01:30 +0000 (09:01 -0400)
author Stefan Monnier <monnier@iro.umontreal.ca>
Mon, 26 Mar 2018 13:01:30 +0000 (09:01 -0400)
committer Stefan Monnier <monnier@iro.umontreal.ca>
Mon, 26 Mar 2018 13:01:30 +0000 (09:01 -0400)
diff --git a/src/marker.c b/src/marker.c

index 7773c4fce0ff0ac6639a1fe9715d765eb62742c0..9933d957a57083d6b5701f0af9f81feef02adde4 100644 (file)
--- a/src/marker.c
+++ b/src/marker.c
@@ -90,7 +90,7 @@ clear_charpos_cache (struct buffer *b)
  #define CONSIDER(CHARPOS, BYTEPOS)                                     \
  {                                                                      \
    ptrdiff_t this_charpos = (CHARPOS);                                  \
-  bool changed = 0;                                                    \
+  bool changed = false;                                                        \
                                                                         \
    if (this_charpos == charpos)                                         \
      {                                                                  \
@@ -105,14 +105,14 @@ clear_charpos_cache (struct buffer *b)
         {                                                               \
           best_above = this_charpos;                                    \
           best_above_byte = (BYTEPOS);                                  \
-         changed = 1;                                                  \
+         changed = true;                                               \
         }                                                               \
      }                                                                  \
    else if (this_charpos > best_below)                                  \
      {                                                                  \
        best_below = this_charpos;                                       \
        best_below_byte = (BYTEPOS);                                     \
-      changed = 1;                                                     \
+      changed = true;                                                  \
      }                                                                  \
                                                                         \
    if (changed)                                                         \
@@ -133,6 +133,28 @@ CHECK_MARKER (Lisp_Object x)
    CHECK_TYPE (MARKERP (x), Qmarkerp, x);
  }
  
+/* When converting bytes from/to chars, we look through the list of
+   markers to try and find a good starting point (since markers keep
+   track of both bytepos and charpos at the same time).
+   But if there are many markers, it can take too much time to find a "good"
+   marker from which to start.  Worse yet: if it takes a long time and we end
+   up finding a nearby markers, we won't add a new marker to cache this
+   result, so next time around we'll have to go through this same long list
+   to (re)find this best marker.  So the further down the list of
+   markers we go, the less demanding we are w.r.t what is a good marker.
+
+   The previous code used INITIAL=50 and INCREMENT=0 and this lead to
+   really poor performance when there are many markers.
+   I haven't tried to tweak INITIAL, but experiments on my trusty Thinkpad
+   T61 using various artificial test cases seem to suggest that INCREMENT=50
+   might be "the best compromise": it significantly improved the
+   worst case and it was rarely slower and never by much.
+
+   The asymptotic behavior is still poor, tho, so in largish buffers with many
+   overlays (e.g. 300KB and 30K overlays), it can still be a bottlneck.  */
+#define BYTECHAR_DISTANCE_INITIAL 50
+#define BYTECHAR_DISTANCE_INCREMENT 50
+
  /* Return the byte position corresponding to CHARPOS in B.  */
  
  ptrdiff_t
@@ -141,6 +163,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
    struct Lisp_Marker *tail;
    ptrdiff_t best_above, best_above_byte;
    ptrdiff_t best_below, best_below_byte;
+  ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
  
    eassert (BUF_BEG (b) <= charpos && charpos <= BUF_Z (b));
  
@@ -180,8 +203,11 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
        /* If we are down to a range of 50 chars,
          don't bother checking any other markers;
          scan the intervening chars directly now.  */
-      if (best_above - best_below < 50)
+      if (best_above - charpos < distance
+          || charpos - best_below < distance)
         break;
+      else
+        distance += BYTECHAR_DISTANCE_INCREMENT;
      }
  
    /* We get here if we did not exactly hit one of the known places.
@@ -248,7 +274,7 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
  #define CONSIDER(BYTEPOS, CHARPOS)                                     \
  {                                                                      \
    ptrdiff_t this_bytepos = (BYTEPOS);                                  \
-  int changed = 0;                                                     \
+  int changed = false;                                                 \
                                                                         \
    if (this_bytepos == bytepos)                                         \
      {                                                                  \
@@ -263,14 +289,14 @@ buf_charpos_to_bytepos (struct buffer *b, ptrdiff_t charpos)
         {                                                               \
           best_above = (CHARPOS);                                       \
           best_above_byte = this_bytepos;                               \
-         changed = 1;                                                  \
+         changed = true;                                               \
         }                                                               \
      }                                                                  \
    else if (this_bytepos > best_below_byte)                             \
      {                                                                  \
        best_below = (CHARPOS);                                          \
        best_below_byte = this_bytepos;                                  \
-      changed = 1;                                                     \
+      changed = true;                                                  \
      }                                                                  \
                                                                         \
    if (changed)                                                         \
@@ -293,6 +319,7 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
    struct Lisp_Marker *tail;
    ptrdiff_t best_above, best_above_byte;
    ptrdiff_t best_below, best_below_byte;
+  ptrdiff_t distance = BYTECHAR_DISTANCE_INITIAL;
  
    eassert (BUF_BEG_BYTE (b) <= bytepos && bytepos <= BUF_Z_BYTE (b));
  
@@ -323,8 +350,11 @@ buf_bytepos_to_charpos (struct buffer *b, ptrdiff_t bytepos)
        /* If we are down to a range of 50 chars,
          don't bother checking any other markers;
          scan the intervening chars directly now.  */
-      if (best_above - best_below < 50)
+      if (best_above - bytepos < distance
+          || bytepos - best_below < distance)
         break;
+      else
+        distance += BYTECHAR_DISTANCE_INCREMENT;
      }
  
    /* We get here if we did not exactly hit one of the known places.
@@ -744,8 +774,8 @@ count_markers (struct buffer *buf)
  ptrdiff_t
  verify_bytepos (ptrdiff_t charpos)
  {
-  ptrdiff_t below = 1;
-  ptrdiff_t below_byte = 1;
+  ptrdiff_t below = BEG;
+  ptrdiff_t below_byte = BYTE_BEG;
  
    while (below != charpos)
      {
author	Stefan Monnier <monnier@iro.umontreal.ca>
	Mon, 26 Mar 2018 13:01:30 +0000 (09:01 -0400)
committer	Stefan Monnier <monnier@iro.umontreal.ca>
	Mon, 26 Mar 2018 13:01:30 +0000 (09:01 -0400)