#include "puresize.h"
#include "gnutls.h"
-static void sort_vector_copy (Lisp_Object pred, ptrdiff_t len,
- Lisp_Object src[restrict VLA_ELEMS (len)],
- Lisp_Object dest[restrict VLA_ELEMS (len)]);
enum equal_kind { EQUAL_NO_QUIT, EQUAL_PLAIN, EQUAL_INCLUDING_PROPERTIES };
static bool internal_equal (Lisp_Object, Lisp_Object,
enum equal_kind, int, Lisp_Object);
return new;
}
-/* Sort LIST using PREDICATE, preserving original order of elements
- considered as equal. */
+
+/* Stably sort LIST ordered by PREDICATE using the TIMSORT
+ algorithm. This converts the list to a vector, sorts the vector,
+ and returns the result converted back to a list. The input list is
+ destructively reused to hold the sorted result. */
static Lisp_Object
sort_list (Lisp_Object list, Lisp_Object predicate)
ptrdiff_t length = list_length (list);
if (length < 2)
return list;
-
- Lisp_Object tem = Fnthcdr (make_fixnum (length / 2 - 1), list);
- Lisp_Object back = Fcdr (tem);
- Fsetcdr (tem, Qnil);
-
- return merge (Fsort (list, predicate), Fsort (back, predicate), predicate);
-}
-
-/* Using PRED to compare, return whether A and B are in order.
- Compare stably when A appeared before B in the input. */
-static bool
-inorder (Lisp_Object pred, Lisp_Object a, Lisp_Object b)
-{
- return NILP (call2 (pred, b, a));
-}
-
-/* Using PRED to compare, merge from ALEN-length A and BLEN-length B
- into DEST. Argument arrays must be nonempty and must not overlap,
- except that B might be the last part of DEST. */
-static void
-merge_vectors (Lisp_Object pred,
- ptrdiff_t alen, Lisp_Object const a[restrict VLA_ELEMS (alen)],
- ptrdiff_t blen, Lisp_Object const b[VLA_ELEMS (blen)],
- Lisp_Object dest[VLA_ELEMS (alen + blen)])
-{
- eassume (0 < alen && 0 < blen);
- Lisp_Object const *alim = a + alen;
- Lisp_Object const *blim = b + blen;
-
- while (true)
+ else
{
- if (inorder (pred, a[0], b[0]))
+ Lisp_Object *result;
+ USE_SAFE_ALLOCA;
+ SAFE_ALLOCA_LISP (result, length);
+ Lisp_Object tail = list;
+ for (ptrdiff_t i = 0; i < length; i++)
{
- *dest++ = *a++;
- if (a == alim)
- {
- if (dest != b)
- memcpy (dest, b, (blim - b) * sizeof *dest);
- return;
- }
+ result[i] = Fcar (tail);
+ tail = XCDR (tail);
}
- else
+ tim_sort (predicate, result, length);
+
+ ptrdiff_t i = 0;
+ tail = list;
+ while (CONSP (tail))
{
- *dest++ = *b++;
- if (b == blim)
- {
- memcpy (dest, a, (alim - a) * sizeof *dest);
- return;
- }
+ XSETCAR (tail, result[i]);
+ tail = XCDR (tail);
+ i++;
}
+ SAFE_FREE ();
+ return list;
}
}
-/* Using PRED to compare, sort LEN-length VEC in place, using TMP for
- temporary storage. LEN must be at least 2. */
-static void
-sort_vector_inplace (Lisp_Object pred, ptrdiff_t len,
- Lisp_Object vec[restrict VLA_ELEMS (len)],
- Lisp_Object tmp[restrict VLA_ELEMS (len >> 1)])
-{
- eassume (2 <= len);
- ptrdiff_t halflen = len >> 1;
- sort_vector_copy (pred, halflen, vec, tmp);
- if (1 < len - halflen)
- sort_vector_inplace (pred, len - halflen, vec + halflen, vec);
- merge_vectors (pred, halflen, tmp, len - halflen, vec + halflen, vec);
-}
-
-/* Using PRED to compare, sort from LEN-length SRC into DST.
- Len must be positive. */
-static void
-sort_vector_copy (Lisp_Object pred, ptrdiff_t len,
- Lisp_Object src[restrict VLA_ELEMS (len)],
- Lisp_Object dest[restrict VLA_ELEMS (len)])
-{
- eassume (0 < len);
- ptrdiff_t halflen = len >> 1;
- if (halflen < 1)
- dest[0] = src[0];
- else
- {
- if (1 < halflen)
- sort_vector_inplace (pred, halflen, src, dest);
- if (1 < len - halflen)
- sort_vector_inplace (pred, len - halflen, src + halflen, dest);
- merge_vectors (pred, halflen, src, len - halflen, src + halflen, dest);
- }
-}
-
-/* Sort VECTOR in place using PREDICATE, preserving original order of
- elements considered as equal. */
+/* Stably sort VECTOR ordered by PREDICATE using the TIMSORT
+ algorithm. */
static void
sort_vector (Lisp_Object vector, Lisp_Object predicate)
{
- ptrdiff_t len = ASIZE (vector);
- if (len < 2)
+ ptrdiff_t length = ASIZE (vector);
+ if (length < 2)
return;
- ptrdiff_t halflen = len >> 1;
- Lisp_Object *tmp;
- USE_SAFE_ALLOCA;
- SAFE_ALLOCA_LISP (tmp, halflen);
- for (ptrdiff_t i = 0; i < halflen; i++)
- tmp[i] = make_fixnum (0);
- sort_vector_inplace (predicate, len, XVECTOR (vector)->contents, tmp);
- SAFE_FREE ();
+
+ tim_sort (predicate, XVECTOR (vector)->contents, length);
}
DEFUN ("sort", Fsort, Ssort, 2, 2, 0,
}
Lisp_Object tem;
- if (inorder (pred, Fcar (l1), Fcar (l2)))
+ if (!NILP (call2 (pred, Fcar (l1), Fcar (l2))))
{
tem = l1;
l1 = Fcdr (l1);
--- /dev/null
+/* Timsort for sequences.
+
+Copyright (C) 2022 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or (at
+your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
+
+/* This is a version of the cpython code implementing the TIMSORT
+ sorting algorithm described in
+ https://github.com/python/cpython/blob/main/Objects/listsort.txt.
+ This algorithm identifies and pushes naturally ordered sublists of
+ the original list, or "runs", onto a stack, and merges them
+ periodically according to a merge strategy called "powersort".
+ State is maintained during the sort in a merge_state structure,
+ which is passed around as an argument to all the subroutines. A
+ "stretch" structure includes a pointer to the run BASE of length
+ LEN along with its POWER (a computed integer used by the powersort
+ merge strategy that depends on this run and the succeeding run.) */
+
+
+#include <config.h>
+#include "lisp.h"
+
+
+/* MAX_MERGE_PENDING is the maximum number of entries in merge_state's
+ pending-stretch stack. For a list with n elements, this needs at most
+ floor(log2(n)) + 1 entries even if we didn't force runs to a
+ minimal length. So the number of bits in a ptrdiff_t is plenty large
+ enough for all cases. */
+
+#define MAX_MERGE_PENDING (sizeof (ptrdiff_t) * 8)
+
+/* Once we get into galloping mode, we stay there as long as both runs
+ win at least GALLOP_WIN_MIN consecutive times. */
+
+#define GALLOP_WIN_MIN 7
+
+/* A small temp array of size MERGESTATE_TEMP_SIZE is used to avoid
+ malloc when merging small lists. */
+
+#define MERGESTATE_TEMP_SIZE 256
+
+struct stretch
+{
+ Lisp_Object *base;
+ ptrdiff_t len;
+ int power;
+};
+
+struct reloc
+{
+ Lisp_Object **src;
+ Lisp_Object **dst;
+ ptrdiff_t *size;
+ int order; /* -1 while in merge_lo; +1 while in merg_hi; 0 otherwise. */
+};
+
+
+typedef struct
+{
+ Lisp_Object *listbase;
+ ptrdiff_t listlen;
+
+ /* PENDING is a stack of N pending stretches yet to be merged.
+ Stretch #i starts at address base[i] and extends for len[i]
+ elements. */
+
+ int n;
+ struct stretch pending[MAX_MERGE_PENDING];
+
+ /* The variable MIN_GALLOP, initialized to GALLOP_WIN_MIN, controls
+ when we get *into* galloping mode. merge_lo and merge_hi tend to
+ nudge it higher for random data, and lower for highly structured
+ data. */
+
+ ptrdiff_t min_gallop;
+
+ /* 'A' is temporary storage, able to hold ALLOCED elements, to help
+ with merges. 'A' initially points to TEMPARRAY, and subsequently
+ to newly allocated memory if needed. */
+
+ Lisp_Object *a;
+ ptrdiff_t alloced;
+ specpdl_ref count;
+ Lisp_Object temparray[MERGESTATE_TEMP_SIZE];
+
+ /* If an exception is thrown while merging we might have to relocate
+ some list elements from temporary storage back into the list.
+ RELOC keeps track of the information needed to do this. */
+
+ struct reloc reloc;
+
+ /* PREDICATE is the lisp comparison predicate for the sort. */
+
+ Lisp_Object predicate;
+} merge_state;
+
+
+/* Return true iff (PREDICATE A B) is non-nil. */
+
+static inline bool
+inorder (const Lisp_Object predicate, const Lisp_Object a, const Lisp_Object b)
+{
+ return !NILP (call2 (predicate, a, b));
+}
+
+
+/* Sort the list starting at LO and ending at HI using a stable binary
+ insertion sort algorithm. On entry the sublist [LO, START) (with
+ START between LO and HIGH) is known to be sorted (pass START == LO
+ if you are unsure). Even in case of error, the output will be some
+ permutation of the input (nothing is lost or duplicated). */
+
+static void
+binarysort (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi,
+ Lisp_Object *start)
+{
+ Lisp_Object pred = ms->predicate;
+
+ eassume (lo <= start && start <= hi);
+ if (lo == start)
+ ++start;
+ for (; start < hi; ++start)
+ {
+ Lisp_Object *l = lo;
+ Lisp_Object *r = start;
+ Lisp_Object pivot = *r;
+
+ eassume (l < r);
+ do {
+ Lisp_Object *p = l + ((r - l) >> 1);
+ if (inorder (pred, pivot, *p))
+ r = p;
+ else
+ l = p + 1;
+ } while (l < r);
+ eassume (l == r);
+ for (Lisp_Object *p = start; p > l; --p)
+ p[0] = p[-1];
+ *l = pivot;
+ }
+}
+
+
+/* Find and return the length of the "run" (the longest
+ non-decreasing sequence or the longest strictly decreasing
+ sequence, with the Boolean *DESCENDING set to 0 in the former
+ case, or to 1 in the latter) beginning at LO, in the slice [LO,
+ HI) with LO < HI. The strictness of the definition of
+ "descending" ensures there are no equal elements to get out of
+ order so the caller can safely reverse a descending sequence
+ without violating stability. */
+
+static ptrdiff_t
+count_run (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi,
+ bool *descending)
+{
+ Lisp_Object pred = ms->predicate;
+
+ eassume (lo < hi);
+ *descending = 0;
+ ++lo;
+ ptrdiff_t n = 1;
+ if (lo == hi)
+ return n;
+
+ n = 2;
+ if (inorder (pred, lo[0], lo[-1]))
+ {
+ *descending = 1;
+ for (lo = lo + 1; lo < hi; ++lo, ++n)
+ {
+ if (!inorder (pred, lo[0], lo[-1]))
+ break;
+ }
+ }
+ else
+ {
+ for (lo = lo + 1; lo < hi; ++lo, ++n)
+ {
+ if (inorder (pred, lo[0], lo[-1]))
+ break;
+ }
+ }
+
+ return n;
+}
+
+
+/* Locate and return the proper insertion position of KEY in a sorted
+ vector: if the vector contains an element equal to KEY, return the
+ position immediately to the left of the leftmost equal element.
+ [GALLOP_RIGHT does the same except it returns the position to the
+ right of the rightmost equal element (if any).]
+
+ 'A' is a sorted vector of N elements. N must be > 0.
+
+ Elements preceding HINT, a non-negative index less than N, are
+ skipped. The closer HINT is to the final result, the faster this
+ runs.
+
+ The return value is the int k in [0, N] such that
+
+ A[k-1] < KEY <= a[k]
+
+ pretending that *(A-1) precedes all values and *(A+N) succeeds all
+ values. In other words, the first k elements of A should precede
+ KEY, and the last N-k should follow KEY. */
+
+static ptrdiff_t
+gallop_left (merge_state *ms, const Lisp_Object key, Lisp_Object *a,
+ const ptrdiff_t n, const ptrdiff_t hint)
+{
+ Lisp_Object pred = ms->predicate;
+
+ eassume (a && n > 0 && hint >= 0 && hint < n);
+
+ a += hint;
+ ptrdiff_t lastofs = 0;
+ ptrdiff_t ofs = 1;
+ if (inorder (pred, *a, key))
+ {
+ /* When a[hint] < key, gallop right until
+ a[hint + lastofs] < key <= a[hint + ofs]. */
+ const ptrdiff_t maxofs = n - hint; /* This is one after the end of a. */
+ while (ofs < maxofs)
+ {
+ if (inorder (pred, a[ofs], key))
+ {
+ lastofs = ofs;
+ eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
+ ofs = (ofs << 1) + 1;
+ }
+ else
+ break; /* Here key <= a[hint+ofs]. */
+ }
+ if (ofs > maxofs)
+ ofs = maxofs;
+ /* Translate back to offsets relative to &a[0]. */
+ lastofs += hint;
+ ofs += hint;
+ }
+ else
+ {
+ /* When key <= a[hint], gallop left, until
+ a[hint - ofs] < key <= a[hint - lastofs]. */
+ const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */
+ while (ofs < maxofs)
+ {
+ if (inorder (pred, a[-ofs], key))
+ break;
+ /* Here key <= a[hint - ofs]. */
+ lastofs = ofs;
+ eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
+ ofs = (ofs << 1) + 1;
+ }
+ if (ofs > maxofs)
+ ofs = maxofs;
+ /* Translate back to use positive offsets relative to &a[0]. */
+ ptrdiff_t k = lastofs;
+ lastofs = hint - ofs;
+ ofs = hint - k;
+ }
+ a -= hint;
+
+ eassume (-1 <= lastofs && lastofs < ofs && ofs <= n);
+ /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the
+ right of lastofs but no farther right than ofs. Do a binary
+ search, with invariant a[lastofs-1] < key <= a[ofs]. */
+ ++lastofs;
+ while (lastofs < ofs)
+ {
+ ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1);
+
+ if (inorder (pred, a[m], key))
+ lastofs = m + 1; /* Here a[m] < key. */
+ else
+ ofs = m; /* Here key <= a[m]. */
+ }
+ eassume (lastofs == ofs); /* Then a[ofs-1] < key <= a[ofs]. */
+ return ofs;
+}
+
+
+/* Locate and return the proper position of KEY in a sorted vector
+ exactly like GALLOP_LEFT, except that if KEY already exists in
+ A[0:N] find the position immediately to the right of the rightmost
+ equal value.
+
+ The return value is the int k in [0, N] such that
+
+ A[k-1] <= KEY < A[k]. */
+
+static ptrdiff_t
+gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a,
+ const ptrdiff_t n, const ptrdiff_t hint)
+{
+ Lisp_Object pred = ms->predicate;
+
+ eassume (a && n > 0 && hint >= 0 && hint < n);
+
+ a += hint;
+ ptrdiff_t lastofs = 0;
+ ptrdiff_t ofs = 1;
+ if (inorder (pred, key, *a))
+ {
+ /* When key < a[hint], gallop left until
+ a[hint - ofs] <= key < a[hint - lastofs]. */
+ const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */
+ while (ofs < maxofs)
+ {
+ if (inorder (pred, key, a[-ofs]))
+ {
+ lastofs = ofs;
+ eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
+ ofs = (ofs << 1) + 1;
+ }
+ else /* Here a[hint - ofs] <= key. */
+ break;
+ }
+ if (ofs > maxofs)
+ ofs = maxofs;
+ /* Translate back to use positive offsets relative to &a[0]. */
+ ptrdiff_t k = lastofs;
+ lastofs = hint - ofs;
+ ofs = hint - k;
+ }
+ else
+ {
+ /* When a[hint] <= key, gallop right, until
+ a[hint + lastofs] <= key < a[hint + ofs]. */
+ const ptrdiff_t maxofs = n - hint; /* Here &a[n-1] is highest. */
+ while (ofs < maxofs)
+ {
+ if (inorder (pred, key, a[ofs]))
+ break;
+ /* Here a[hint + ofs] <= key. */
+ lastofs = ofs;
+ eassume (ofs <= (PTRDIFF_MAX - 1) / 2);
+ ofs = (ofs << 1) + 1;
+ }
+ if (ofs > maxofs)
+ ofs = maxofs;
+ /* Translate back to use offsets relative to &a[0]. */
+ lastofs += hint;
+ ofs += hint;
+ }
+ a -= hint;
+
+ eassume (-1 <= lastofs && lastofs < ofs && ofs <= n);
+ /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the
+ right of lastofs but no farther right than ofs. Do a binary
+ search, with invariant a[lastofs-1] <= key < a[ofs]. */
+ ++lastofs;
+ while (lastofs < ofs)
+ {
+ ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1);
+
+ if (inorder (pred, key, a[m]))
+ ofs = m; /* Here key < a[m]. */
+ else
+ lastofs = m + 1; /* Here a[m] <= key. */
+ }
+ eassume (lastofs == ofs); /* Now a[ofs-1] <= key < a[ofs]. */
+ return ofs;
+}
+
+
+static void
+merge_init (merge_state *ms, const ptrdiff_t list_size, Lisp_Object *lo,
+ const Lisp_Object predicate)
+{
+ eassume (ms != NULL);
+
+ ms->a = ms->temparray;
+ ms->alloced = MERGESTATE_TEMP_SIZE;
+
+ ms->n = 0;
+ ms->min_gallop = GALLOP_WIN_MIN;
+ ms->listlen = list_size;
+ ms->listbase = lo;
+ ms->predicate = predicate;
+ ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+}
+
+
+/* The dynamically allocated memory may hold lisp objects during
+ merging. MERGE_MARKMEM marks them so they aren't reaped during
+ GC. */
+
+static void
+merge_markmem (void *arg)
+{
+ merge_state *ms = arg;
+ eassume (ms != NULL);
+
+ if (ms->reloc.size != NULL && *ms->reloc.size > 0)
+ {
+ eassume (ms->reloc.src != NULL);
+ mark_objects (*ms->reloc.src, *ms->reloc.size);
+ }
+}
+
+
+/* Free all temp storage. If an exception occurs while merging,
+ relocate any lisp elements in temp storage back to the original
+ array. */
+
+static void
+cleanup_mem (void *arg)
+{
+ merge_state *ms = arg;
+ eassume (ms != NULL);
+
+ /* If we have an exception while merging, some of the list elements
+ might only live in temp storage; we copy everything remaining in
+ the temp storage back into the original list. This ensures that
+ the original list has all of the original elements, although
+ their order is unpredictable. */
+
+ if (ms->reloc.order != 0 && *ms->reloc.size > 0)
+ {
+ eassume (*ms->reloc.src != NULL && *ms->reloc.dst != NULL);
+ ptrdiff_t n = *ms->reloc.size;
+ ptrdiff_t shift = ms->reloc.order == -1 ? 0 : n - 1;
+ memcpy (*ms->reloc.dst - shift, *ms->reloc.src, n * word_size);
+ }
+
+ /* Free any remaining temp storage. */
+ xfree (ms->a);
+}
+
+
+/* Allocate enough temp memory for NEED array slots. Any previously
+ allocated memory is first freed, and a cleanup routine is
+ registered to free memory at the very end of the sort, or on
+ exception. */
+
+static void
+merge_getmem (merge_state *ms, const ptrdiff_t need)
+{
+ eassume (ms != NULL);
+
+ if (ms->a == ms->temparray)
+ {
+ /* We only get here if alloc is needed and this is the first
+ time, so we set up the unwind protection. */
+ specpdl_ref count = SPECPDL_INDEX ();
+ record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem);
+ ms->count = count;
+ }
+ else
+ {
+ /* We have previously alloced storage. Since we don't care
+ what's in the block we don't use realloc which would waste
+ cycles copying the old data. We just free and alloc
+ again. */
+ xfree (ms->a);
+ }
+ ms->a = xmalloc (need * word_size);
+ ms->alloced = need;
+}
+
+
+static inline void
+needmem (merge_state *ms, ptrdiff_t na)
+{
+ if (na > ms->alloced)
+ merge_getmem (ms, na);
+}
+
+
+/* Stably merge (in-place) the NA elements starting at SSA with the NB
+ elements starting at SSB = SSA + NA. NA and NB must be positive.
+ Require that SSA[NA-1] belongs at the end of the merge, and NA <=
+ NB. */
+
+static void
+merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb,
+ ptrdiff_t nb)
+{
+ Lisp_Object pred = ms->predicate;
+
+ eassume (ms && ssa && ssb && na > 0 && nb > 0);
+ eassume (ssa + na == ssb);
+ needmem (ms, na);
+ memcpy (ms->a, ssa, na * word_size);
+ Lisp_Object *dest = ssa;
+ ssa = ms->a;
+
+ ms->reloc = (struct reloc){&ssa, &dest, &na, -1};
+
+ *dest++ = *ssb++;
+ --nb;
+ if (nb == 0)
+ goto Succeed;
+ if (na == 1)
+ goto CopyB;
+
+ ptrdiff_t min_gallop = ms->min_gallop;
+ for (;;)
+ {
+ ptrdiff_t acount = 0; /* The # of consecutive times A won. */
+
+ ptrdiff_t bcount = 0; /* The # of consecutive times B won. */
+
+ for (;;)
+ {
+ eassume (na > 1 && nb > 0);
+ if (inorder (pred, *ssb, *ssa))
+ {
+ *dest++ = *ssb++ ;
+ ++bcount;
+ acount = 0;
+ --nb;
+ if (nb == 0)
+ goto Succeed;
+ if (bcount >= min_gallop)
+ break;
+ }
+ else
+ {
+ *dest++ = *ssa++;
+ ++acount;
+ bcount = 0;
+ --na;
+ if (na == 1)
+ goto CopyB;
+ if (acount >= min_gallop)
+ break;
+ }
+ }
+
+ /* One run is winning so consistently that galloping may be a
+ huge speedup. We try that, and continue galloping until (if
+ ever) neither run appears to be winning consistently
+ anymore. */
+ ++min_gallop;
+ do {
+ eassume (na > 1 && nb > 0);
+ min_gallop -= min_gallop > 1;
+ ms->min_gallop = min_gallop;
+ ptrdiff_t k = gallop_right (ms, ssb[0], ssa, na, 0);
+ acount = k;
+ if (k)
+ {
+ memcpy (dest, ssa, k * word_size);
+ dest += k;
+ ssa += k;
+ na -= k;
+ if (na == 1)
+ goto CopyB;
+ /* While na==0 is impossible for a consistent comparison
+ function, we shouldn't assume that it is. */
+ if (na == 0)
+ goto Succeed;
+ }
+ *dest++ = *ssb++ ;
+ --nb;
+ if (nb == 0)
+ goto Succeed;
+
+ k = gallop_left (ms, ssa[0], ssb, nb, 0);
+ bcount = k;
+ if (k)
+ {
+ memmove (dest, ssb, k * word_size);
+ dest += k;
+ ssb += k;
+ nb -= k;
+ if (nb == 0)
+ goto Succeed;
+ }
+ *dest++ = *ssa++;
+ --na;
+ if (na == 1)
+ goto CopyB;
+ } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN);
+ ++min_gallop; /* Apply a penalty for leaving galloping mode. */
+ ms->min_gallop = min_gallop;
+ }
+ Succeed:
+ ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+
+ if (na)
+ memcpy (dest, ssa, na * word_size);
+ return;
+ CopyB:
+ eassume (na == 1 && nb > 0);
+ ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+
+ /* The last element of ssa belongs at the end of the merge. */
+ memmove (dest, ssb, nb * word_size);
+ dest[nb] = ssa[0];
+}
+
+
+/* Stably merge (in-place) the NA elements starting at SSA with the NB
+ elements starting at SSB = SSA + NA. NA and NB must be positive.
+ Require that SSA[NA-1] belongs at the end of the merge, and NA >=
+ NB. */
+
+static void
+merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na,
+ Lisp_Object *ssb, ptrdiff_t nb)
+{
+ Lisp_Object pred = ms->predicate;
+
+ eassume (ms && ssa && ssb && na > 0 && nb > 0);
+ eassume (ssa + na == ssb);
+ needmem (ms, nb);
+ Lisp_Object *dest = ssb;
+ dest += nb - 1;
+ memcpy(ms->a, ssb, nb * word_size);
+ Lisp_Object *basea = ssa;
+ Lisp_Object *baseb = ms->a;
+ ssb = ms->a + nb - 1;
+ ssa += na - 1;
+
+ ms->reloc = (struct reloc){&baseb, &dest, &nb, 1};
+
+ *dest-- = *ssa--;
+ --na;
+ if (na == 0)
+ goto Succeed;
+ if (nb == 1)
+ goto CopyA;
+
+ ptrdiff_t min_gallop = ms->min_gallop;
+ for (;;) {
+ ptrdiff_t acount = 0; /* The # of consecutive times A won. */
+ ptrdiff_t bcount = 0; /* The # of consecutive times B won. */
+
+ for (;;) {
+ eassume (na > 0 && nb > 1);
+ if (inorder (pred, *ssb, *ssa))
+ {
+ *dest-- = *ssa--;
+ ++acount;
+ bcount = 0;
+ --na;
+ if (na == 0)
+ goto Succeed;
+ if (acount >= min_gallop)
+ break;
+ }
+ else
+ {
+ *dest-- = *ssb--;
+ ++bcount;
+ acount = 0;
+ --nb;
+ if (nb == 1)
+ goto CopyA;
+ if (bcount >= min_gallop)
+ break;
+ }
+ }
+
+ /* One run is winning so consistently that galloping may be a huge
+ speedup. Try that, and continue galloping until (if ever)
+ neither run appears to be winning consistently anymore. */
+ ++min_gallop;
+ do {
+ eassume (na > 0 && nb > 1);
+ min_gallop -= min_gallop > 1;
+ ms->min_gallop = min_gallop;
+ ptrdiff_t k = gallop_right (ms, ssb[0], basea, na, na - 1);
+ k = na - k;
+ acount = k;
+ if (k)
+ {
+ dest += -k;
+ ssa += -k;
+ memmove(dest + 1, ssa + 1, k * word_size);
+ na -= k;
+ if (na == 0)
+ goto Succeed;
+ }
+ *dest-- = *ssb--;
+ --nb;
+ if (nb == 1)
+ goto CopyA;
+
+ k = gallop_left (ms, ssa[0], baseb, nb, nb - 1);
+ k = nb - k;
+ bcount = k;
+ if (k)
+ {
+ dest += -k;
+ ssb += -k;
+ memcpy(dest + 1, ssb + 1, k * word_size);
+ nb -= k;
+ if (nb == 1)
+ goto CopyA;
+ /* While nb==0 is impossible for a consistent comparison
+ function we shouldn't assume that it is. */
+ if (nb == 0)
+ goto Succeed;
+ }
+ *dest-- = *ssa--;
+ --na;
+ if (na == 0)
+ goto Succeed;
+ } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN);
+ ++min_gallop; /* Apply a penalty for leaving galloping mode. */
+ ms->min_gallop = min_gallop;
+ }
+ Succeed:
+ ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+ if (nb)
+ memcpy (dest - nb + 1, baseb, nb * word_size);
+ return;
+ CopyA:
+ eassume (nb == 1 && na > 0);
+ ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+ /* The first element of ssb belongs at the front of the merge. */
+ memmove (dest + 1 - na, ssa + 1 - na, na * word_size);
+ dest += -na;
+ ssa += -na;
+ dest[0] = ssb[0];
+}
+
+
+/* Merge the two runs at stack indices I and I+1. */
+
+static void
+merge_at (merge_state *ms, const ptrdiff_t i)
+{
+ eassume (ms != NULL);
+ eassume (ms->n >= 2);
+ eassume (i >= 0);
+ eassume (i == ms->n - 2 || i == ms->n - 3);
+
+ Lisp_Object *ssa = ms->pending[i].base;
+ ptrdiff_t na = ms->pending[i].len;
+ Lisp_Object *ssb = ms->pending[i + 1].base;
+ ptrdiff_t nb = ms->pending[i + 1].len;
+ eassume (na > 0 && nb > 0);
+ eassume (ssa + na == ssb);
+
+ /* Record the length of the combined runs. The current run i+1 goes
+ away after the merge. If i is the 3rd-last run now, slide the
+ last run (which isn't involved in this merge) over to i+1. */
+ ms->pending[i].len = na + nb;
+ if (i == ms->n - 3)
+ ms->pending[i + 1] = ms->pending[i + 2];
+ --ms->n;
+
+ /* Where does b start in a? Elements in a before that can be
+ ignored (they are already in place). */
+ ptrdiff_t k = gallop_right (ms, *ssb, ssa, na, 0);
+ eassume (k >= 0);
+ ssa += k;
+ na -= k;
+ if (na == 0)
+ return;
+
+ /* Where does a end in b? Elements in b after that can be ignored
+ (they are already in place). */
+ nb = gallop_left (ms, ssa[na - 1], ssb, nb, nb - 1);
+ if (nb == 0)
+ return;
+ eassume (nb > 0);
+ /* Merge what remains of the runs using a temp array with size
+ min(na, nb) elements. */
+ if (na <= nb)
+ merge_lo (ms, ssa, na, ssb, nb);
+ else
+ merge_hi (ms, ssa, na, ssb, nb);
+}
+
+
+/* Compute the "power" of the first of two adjacent runs begining at
+ index S1, with the first having length N1 and the second (starting
+ at index S1+N1) having length N2. The run has total length N. */
+
+static int
+powerloop (const ptrdiff_t s1, const ptrdiff_t n1, const ptrdiff_t n2,
+ const ptrdiff_t n)
+{
+ eassume (s1 >= 0);
+ eassume (n1 > 0 && n2 > 0);
+ eassume (s1 + n1 + n2 <= n);
+ /* The midpoints a and b are
+ a = s1 + n1/2
+ b = s1 + n1 + n2/2 = a + (n1 + n2)/2
+
+ These may not be integers because of the "/2", so we work with
+ 2*a and 2*b instead. It makes no difference to the outcome,
+ since the bits in the expansion of (2*i)/n are merely shifted one
+ position from those of i/n. */
+ ptrdiff_t a = 2 * s1 + n1;
+ ptrdiff_t b = a + n1 + n2;
+ int result = 0;
+ /* Emulate a/n and b/n one bit a time, until their bits differ. */
+ for (;;)
+ {
+ ++result;
+ if (a >= n)
+ { /* Both quotient bits are now 1. */
+ eassume (b >= a);
+ a -= n;
+ b -= n;
+ }
+ else if (b >= n)
+ { /* a/n bit is 0 and b/n bit is 1. */
+ break;
+ } /* Otherwise both quotient bits are 0. */
+ eassume (a < b && b < n);
+ a <<= 1;
+ b <<= 1;
+ }
+ return result;
+}
+
+
+/* Update the state upon identifying a run of length N2. If there's
+ already a stretch on the stack, apply the "powersort" merge
+ strategy: compute the topmost stretch's "power" (depth in a
+ conceptual binary merge tree) and merge adjacent runs on the stack
+ with greater power. */
+
+static void
+found_new_run (merge_state *ms, const ptrdiff_t n2)
+{
+ eassume (ms != NULL);
+ if (ms->n)
+ {
+ eassume (ms->n > 0);
+ struct stretch *p = ms->pending;
+ ptrdiff_t s1 = p[ms->n - 1].base - ms->listbase;
+ ptrdiff_t n1 = p[ms->n - 1].len;
+ int power = powerloop (s1, n1, n2, ms->listlen);
+ while (ms->n > 1 && p[ms->n - 2].power > power)
+ {
+ merge_at (ms, ms->n - 2);
+ }
+ eassume (ms->n < 2 || p[ms->n - 2].power < power);
+ p[ms->n - 1].power = power;
+ }
+}
+
+
+/* Unconditionally merge all stretches on the stack until only one
+ remains. */
+
+static void
+merge_force_collapse (merge_state *ms)
+{
+ struct stretch *p = ms->pending;
+
+ eassume (ms != NULL);
+ while (ms->n > 1)
+ {
+ ptrdiff_t n = ms->n - 2;
+ if (n > 0 && p[n - 1].len < p[n + 1].len)
+ --n;
+ merge_at (ms, n);
+ }
+}
+
+
+/* Compute a good value for the minimum run length; natural runs
+ shorter than this are boosted artificially via binary insertion.
+
+ If N < 64, return N (it's too small to bother with fancy stuff).
+ Otherwise if N is an exact power of 2, return 32. Finally, return
+ an int k, 32 <= k <= 64, such that N/k is close to, but strictly
+ less than, an exact power of 2. */
+
+static ptrdiff_t
+merge_compute_minrun (ptrdiff_t n)
+{
+ ptrdiff_t r = 0; /* r will become 1 if any non-zero bits are
+ shifted off. */
+
+ eassume (n >= 0);
+ while (n >= 64)
+ {
+ r |= n & 1;
+ n >>= 1;
+ }
+ return n + r;
+}
+
+
+static void
+reverse_vector (Lisp_Object *s, const ptrdiff_t n)
+{
+ for (ptrdiff_t i = 0; i < n >> 1; i++)
+ {
+ Lisp_Object tem = s[i];
+ s[i] = s[n - i - 1];
+ s[n - i - 1] = tem;
+ }
+}
+
+/* Sort the array SEQ with LENGTH elements in the order determined by
+ PREDICATE. */
+
+void
+tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length)
+{
+ if (SYMBOLP (predicate))
+ {
+ /* Attempt to resolve the function as far as possible ahead of time,
+ to avoid having to do it for each call. */
+ Lisp_Object fun = XSYMBOL (predicate)->u.s.function;
+ if (SYMBOLP (fun))
+ /* Function was an alias; use slow-path resolution. */
+ fun = indirect_function (fun);
+ /* Don't resolve to an autoload spec; that would be very slow. */
+ if (!NILP (fun) && !(CONSP (fun) && EQ (XCAR (fun), Qautoload)))
+ predicate = fun;
+ }
+
+ merge_state ms;
+ Lisp_Object *lo = seq;
+
+ merge_init (&ms, length, lo, predicate);
+
+ /* March over the array once, left to right, finding natural runs,
+ and extending short natural runs to minrun elements. */
+ const ptrdiff_t minrun = merge_compute_minrun (length);
+ ptrdiff_t nremaining = length;
+ do {
+ bool descending;
+
+ /* Identify the next run. */
+ ptrdiff_t n = count_run (&ms, lo, lo + nremaining, &descending);
+ if (descending)
+ reverse_vector (lo, n);
+ /* If the run is short, extend it to min(minrun, nremaining). */
+ if (n < minrun)
+ {
+ const ptrdiff_t force = nremaining <= minrun ?
+ nremaining : minrun;
+ binarysort (&ms, lo, lo + force, lo + n);
+ n = force;
+ }
+ eassume (ms.n == 0 || ms.pending[ms.n - 1].base +
+ ms.pending[ms.n - 1].len == lo);
+ found_new_run (&ms, n);
+ /* Push the new run on to the stack. */
+ eassume (ms.n < MAX_MERGE_PENDING);
+ ms.pending[ms.n].base = lo;
+ ms.pending[ms.n].len = n;
+ ++ms.n;
+ /* Advance to find the next run. */
+ lo += n;
+ nremaining -= n;
+ } while (nremaining);
+
+ merge_force_collapse (&ms);
+ eassume (ms.n == 1);
+ eassume (ms.pending[0].len == length);
+ lo = ms.pending[0].base;
+
+ if (ms.a != ms.temparray)
+ unbind_to (ms.count, Qnil);
+}