From e0470bcec76b94f93aed796abdcab14e2086fffc Mon Sep 17 00:00:00 2001 From: Andrew G Cohen Date: Thu, 10 Mar 2022 09:30:00 +0800 Subject: [PATCH] Replace list and vector sorting with TIMSORT algorithm * src/Makefile.in (base_obj): Add sort.o. * src/deps.mk (fns.o): Add sort.c. * src/lisp.h: Add prototypes for inorder, tim_sort. * src/sort.c: New file providing tim_sort. * src/fns.c: Remove prototypes for removed routines. (merge_vectors, sort_vector_inplace, sort_vector_copy): Remove. (sort_list, sort_vector): Use tim_sort. --- src/Makefile.in | 2 +- src/deps.mk | 2 +- src/fns.c | 122 ++---- src/lisp.h | 4 + src/sort.c | 961 ++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 993 insertions(+), 98 deletions(-) create mode 100644 src/sort.c diff --git a/src/Makefile.in b/src/Makefile.in index 3353fb16d79..e0f18dc352e 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -427,7 +427,7 @@ base_obj = dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \ minibuf.o fileio.o dired.o \ cmds.o casetab.o casefiddle.o indent.o search.o regex-emacs.o undo.o \ alloc.o pdumper.o data.o doc.o editfns.o callint.o \ - eval.o floatfns.o fns.o font.o print.o lread.o $(MODULES_OBJ) \ + eval.o floatfns.o fns.o sort.o font.o print.o lread.o $(MODULES_OBJ) \ syntax.o $(UNEXEC_OBJ) bytecode.o comp.o $(DYNLIB_OBJ) \ process.o gnutls.o callproc.o \ region-cache.o sound.o timefns.o atimer.o \ diff --git a/src/deps.mk b/src/deps.mk index deffab93eca..39edd5c1dd3 100644 --- a/src/deps.mk +++ b/src/deps.mk @@ -279,7 +279,7 @@ eval.o: eval.c commands.h keyboard.h blockinput.h atimer.h systime.h frame.h \ dispextern.h lisp.h globals.h $(config_h) coding.h composite.h xterm.h \ msdos.h floatfns.o: floatfns.c syssignal.h lisp.h globals.h $(config_h) -fns.o: fns.c commands.h lisp.h $(config_h) frame.h buffer.h character.h \ +fns.o: fns.c sort.c commands.h lisp.h $(config_h) frame.h buffer.h character.h \ keyboard.h keymap.h window.h $(INTERVALS_H) coding.h ../lib/md5.h \ ../lib/sha1.h ../lib/sha256.h ../lib/sha512.h blockinput.h atimer.h \ systime.h xterm.h ../lib/unistd.h globals.h diff --git a/src/fns.c b/src/fns.c index 06a64563806..2e8454532cb 100644 --- a/src/fns.c +++ b/src/fns.c @@ -39,9 +39,6 @@ along with GNU Emacs. If not, see . */ #include "puresize.h" #include "gnutls.h" -static void sort_vector_copy (Lisp_Object pred, ptrdiff_t len, - Lisp_Object src[restrict VLA_ELEMS (len)], - Lisp_Object dest[restrict VLA_ELEMS (len)]); enum equal_kind { EQUAL_NO_QUIT, EQUAL_PLAIN, EQUAL_INCLUDING_PROPERTIES }; static bool internal_equal (Lisp_Object, Lisp_Object, enum equal_kind, int, Lisp_Object); @@ -2166,8 +2163,10 @@ See also the function `nreverse', which is used more often. */) return new; } -/* Sort LIST using PREDICATE, preserving original order of elements - considered as equal. */ + +/* Stably sort LIST using PREDICATE. This converts the list to a + vector, sorts the vector using the TIMSORT algorithm, and converts + back to a list. */ static Lisp_Object sort_list (Lisp_Object list, Lisp_Object predicate) @@ -2175,97 +2174,34 @@ sort_list (Lisp_Object list, Lisp_Object predicate) ptrdiff_t length = list_length (list); if (length < 2) return list; - - Lisp_Object tem = Fnthcdr (make_fixnum (length / 2 - 1), list); - Lisp_Object back = Fcdr (tem); - Fsetcdr (tem, Qnil); - - return merge (Fsort (list, predicate), Fsort (back, predicate), predicate); -} - -/* Using PRED to compare, return whether A and B are in order. - Compare stably when A appeared before B in the input. */ -static bool -inorder (Lisp_Object pred, Lisp_Object a, Lisp_Object b) -{ - return NILP (call2 (pred, b, a)); -} - -/* Using PRED to compare, merge from ALEN-length A and BLEN-length B - into DEST. Argument arrays must be nonempty and must not overlap, - except that B might be the last part of DEST. */ -static void -merge_vectors (Lisp_Object pred, - ptrdiff_t alen, Lisp_Object const a[restrict VLA_ELEMS (alen)], - ptrdiff_t blen, Lisp_Object const b[VLA_ELEMS (blen)], - Lisp_Object dest[VLA_ELEMS (alen + blen)]) -{ - eassume (0 < alen && 0 < blen); - Lisp_Object const *alim = a + alen; - Lisp_Object const *blim = b + blen; - - while (true) + else { - if (inorder (pred, a[0], b[0])) + Lisp_Object *result; + USE_SAFE_ALLOCA; + SAFE_ALLOCA_LISP (result, length); + Lisp_Object tail = list; + for (ptrdiff_t i = 0; i < length; i++) { - *dest++ = *a++; - if (a == alim) - { - if (dest != b) - memcpy (dest, b, (blim - b) * sizeof *dest); - return; - } + result[i] = Fcar (tail); + tail = XCDR (tail); } - else + tim_sort (predicate, result, length); + + ptrdiff_t i = 0; + tail = list; + while (CONSP (tail)) { - *dest++ = *b++; - if (b == blim) - { - memcpy (dest, a, (alim - a) * sizeof *dest); - return; - } + XSETCAR (tail, result[i]); + tail = XCDR (tail); + i++; } + SAFE_FREE (); + return list; } } -/* Using PRED to compare, sort LEN-length VEC in place, using TMP for - temporary storage. LEN must be at least 2. */ -static void -sort_vector_inplace (Lisp_Object pred, ptrdiff_t len, - Lisp_Object vec[restrict VLA_ELEMS (len)], - Lisp_Object tmp[restrict VLA_ELEMS (len >> 1)]) -{ - eassume (2 <= len); - ptrdiff_t halflen = len >> 1; - sort_vector_copy (pred, halflen, vec, tmp); - if (1 < len - halflen) - sort_vector_inplace (pred, len - halflen, vec + halflen, vec); - merge_vectors (pred, halflen, tmp, len - halflen, vec + halflen, vec); -} - -/* Using PRED to compare, sort from LEN-length SRC into DST. - Len must be positive. */ -static void -sort_vector_copy (Lisp_Object pred, ptrdiff_t len, - Lisp_Object src[restrict VLA_ELEMS (len)], - Lisp_Object dest[restrict VLA_ELEMS (len)]) -{ - eassume (0 < len); - ptrdiff_t halflen = len >> 1; - if (halflen < 1) - dest[0] = src[0]; - else - { - if (1 < halflen) - sort_vector_inplace (pred, halflen, src, dest); - if (1 < len - halflen) - sort_vector_inplace (pred, len - halflen, src + halflen, dest); - merge_vectors (pred, halflen, src, len - halflen, src + halflen, dest); - } -} - -/* Sort VECTOR in place using PREDICATE, preserving original order of - elements considered as equal. */ +/* Stably sort VECTOR ordered by PREDICATE using the TIMSORT + algorithm. */ static void sort_vector (Lisp_Object vector, Lisp_Object predicate) @@ -2273,14 +2209,8 @@ sort_vector (Lisp_Object vector, Lisp_Object predicate) ptrdiff_t len = ASIZE (vector); if (len < 2) return; - ptrdiff_t halflen = len >> 1; - Lisp_Object *tmp; - USE_SAFE_ALLOCA; - SAFE_ALLOCA_LISP (tmp, halflen); - for (ptrdiff_t i = 0; i < halflen; i++) - tmp[i] = make_fixnum (0); - sort_vector_inplace (predicate, len, XVECTOR (vector)->contents, tmp); - SAFE_FREE (); + + tim_sort (predicate, XVECTOR (vector)->contents, len); } DEFUN ("sort", Fsort, Ssort, 2, 2, 0, diff --git a/src/lisp.h b/src/lisp.h index 315fb03fe63..43a4589efff 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -3903,6 +3903,10 @@ extern Lisp_Object string_to_multibyte (Lisp_Object); extern Lisp_Object string_make_unibyte (Lisp_Object); extern void syms_of_fns (void); +/* Defined in sort.c */ +extern void tim_sort (Lisp_Object, Lisp_Object *, const ptrdiff_t); +extern bool inorder (Lisp_Object, Lisp_Object, Lisp_Object); + /* Defined in floatfns.c. */ verify (FLT_RADIX == 2 || FLT_RADIX == 16); enum { LOG2_FLT_RADIX = FLT_RADIX == 2 ? 1 : 4 }; diff --git a/src/sort.c b/src/sort.c new file mode 100644 index 00000000000..33f5d033190 --- /dev/null +++ b/src/sort.c @@ -0,0 +1,961 @@ +/* Timsort for sequences. + +Copyright (C) 2022 Free Software Foundation, Inc. + +This file is part of GNU Emacs. + +GNU Emacs is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. + +GNU Emacs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Emacs. If not, see . */ + +/* This is a version of the cpython code implementing the TIMSORT + sorting algorithm described in + https://github.com/python/cpython/blob/main/Objects/listsort.txt. + This algorithm identifies and pushes naturally ordered sublists of + the original list, or "runs", onto a stack, and merges them + periodically according to a merge strategy called "powersort". + State is maintained during the sort in a merge_state structure, + which is passed around as an argument to all the subroutines. A + "stretch" structure includes a pointer to the run BASE of length + LEN along with its POWER (a computed integer used by the powersort + merge strategy that depends on this run and the succeeding run. */ + + +#include +#include "lisp.h" + + +/* MAX_MERGE_PENDING is the maximum number of entries in merge_state's + pending-stretch stack. For a list with n elements, this needs at most + floor(log2(n)) + 1 entries even if we didn't force runs to a + minimal length. So the number of bits in a ptrdiff_t is plenty large + enough for all cases. */ + +#define MAX_MERGE_PENDING (sizeof (ptrdiff_t) * 8) + +/* Once we get into galloping mode, we stay there as long as both runs + win at least GALLOP_WIN_MIN consecutive times. */ + +#define GALLOP_WIN_MIN 7 + +/* A small temp array of size MERGESTATE_TEMP_SIZE is used to avoid + malloc when merging small lists. */ + +#define MERGESTATE_TEMP_SIZE 256 + +struct stretch +{ + Lisp_Object *base; + ptrdiff_t len; + int power; +}; + +struct reloc +{ + Lisp_Object **src; + Lisp_Object **dst; + ptrdiff_t *size; + int order; +}; + + +typedef struct +{ + Lisp_Object *listbase; + ptrdiff_t listlen; + + /* PENDING is a stack of N pending stretches yet to be merged. + Stretch #i starts at address base[i] and extends for len[i] + elements. */ + + int n; + struct stretch pending[MAX_MERGE_PENDING]; + + /* The variable MIN_GALLOP, initialized to GALLOP_WIN_MIN, controls + when we get *into* galloping mode. merge_lo and merge_hi tend to + nudge it higher for random data, and lower for highly structured + data. */ + + ptrdiff_t min_gallop; + + /* 'A' is temporary storage, able to hold ALLOCED elements, to help + with merges. If temporary storage is passed to the sorting entry + function, 'A' will point to it. Otherwise 'A' initially points + to TEMPARRAY, and subsequently to newly allocated memory if + needed. */ + + Lisp_Object *a; + ptrdiff_t alloced; + specpdl_ref count; + Lisp_Object temparray[MERGESTATE_TEMP_SIZE]; + + /* If an exception is thrown while merging we might have to relocate + some list elements from temporary storage back into the list. + RELOC keeps track of the information needed to do this. */ + + struct reloc reloc; + + /* PREDICATE is the lisp comparison predicate for the sort. */ + + Lisp_Object predicate; +} merge_state; + + +/* INORDER returns true iff (PREDICATE A B) is non-nil. */ + +inline bool +inorder (const Lisp_Object predicate, const Lisp_Object a, const Lisp_Object b) +{ + return !NILP (call2 (predicate, a, b)); +} + + +/* BINARYSORT() is a stable binary insertion sort used for sorting the + list starting at LO and ending at HI. On entry, LO <= START <= HI, + and [LO, START) is already sorted (pass START == LO if you don't + know!). Even in case of error, the output slice will be some + permutation of the input (nothing is lost or duplicated). */ + +static void +binarysort (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, + Lisp_Object *start) +{ + Lisp_Object pred = ms->predicate; + + eassume (lo <= start && start <= hi); + if (lo == start) + ++start; + for (; start < hi; ++start) + { + Lisp_Object *l = lo; + Lisp_Object *r = start; + Lisp_Object pivot = *r; + Lisp_Object *p; + + eassume (l < r); + do { + p = l + ((r - l) >> 1); + if (inorder (pred, pivot, *p)) + r = p; + else + l = p + 1; + } while (l < r); + eassume (l == r); + for (p = start; p > l; --p) + p[0] = p[-1]; + *l = pivot; + } +} + + +/* COUNT_RUN() returns the length of the run beginning at LO, in the + slice [LO, HI) with LO < HI. A "run" is the longest + non-decreasing sequence or the longest strictly decreasing + sequence, with the Boolean *DESCENDING set to 0 in the former + case, or to 1 in the latter. The strictness of the definition of + "descending" is needed so that the caller can safely reverse a + descending sequence without violating stability (strict > ensures + there are no equal elements to get out of order). */ + +static ptrdiff_t +count_run (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, bool *descending) +{ + Lisp_Object pred = ms->predicate; + + eassume (lo < hi); + *descending = 0; + ++lo; + ptrdiff_t n = 1; + if (lo == hi) + return n; + + n = 2; + if (inorder (pred, lo[0], lo[-1])) + { + *descending = 1; + for (lo = lo + 1; lo < hi; ++lo, ++n) + { + if (!inorder (pred, lo[0], lo[-1])) + break; + } + } + else + { + for (lo = lo + 1; lo < hi; ++lo, ++n) + { + if (inorder (pred, lo[0], lo[-1])) + break; + } + } + + return n; +} + + +/* GALLOP_LEFT() locates the proper position of KEY in a sorted + vector: if the vector contains an element equal to KEY, return the + position immediately to the left of the leftmost equal element. + [GALLOP_RIGHT() does the same except returns the position to the + right of the rightmost equal element (if any).] + + 'A' is a sorted vector with N elements, starting at A[0]. N must be > 0. + + HINT is an index at which to begin the search, 0 <= HINT < N. The closer + HINT is to the final result, the faster this runs. + + The return value is the int k in [0, N] such that + + A[k-1] < KEY <= a[k] + + pretending that *(A-1) is minus infinity and A[N] is plus infinity. IOW, + KEY belongs at index k; or, IOW, the first k elements of A should precede + KEY, and the last N-k should follow KEY. */ + +static ptrdiff_t +gallop_left (merge_state *ms, const Lisp_Object key, Lisp_Object *a, + const ptrdiff_t n, const ptrdiff_t hint) +{ + Lisp_Object pred = ms->predicate; + + eassume (key && a && n > 0 && hint >= 0 && hint < n); + + a += hint; + ptrdiff_t lastofs = 0; + ptrdiff_t ofs = 1; + if (inorder (pred, *a, key)) + { + /* When a[hint] < key, gallop right until + a[hint + lastofs] < key <= a[hint + ofs]. */ + const ptrdiff_t maxofs = n - hint; /* This is one after the end of a. */ + while (ofs < maxofs) + { + if (inorder (pred, a[ofs], key)) + { + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + else + break; /* Here key <= a[hint+ofs]. */ + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to offsets relative to &a[0]. */ + lastofs += hint; + ofs += hint; + } + else + { + /* When key <= a[hint], gallop left, until + a[hint - ofs] < key <= a[hint - lastofs]. */ + const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */ + while (ofs < maxofs) + { + if (inorder (pred, a[-ofs], key)) + break; + /* Here key <= a[hint - ofs]. */ + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to use positive offsets relative to &a[0]. */ + ptrdiff_t k = lastofs; + lastofs = hint - ofs; + ofs = hint - k; + } + a -= hint; + + eassume (-1 <= lastofs && lastofs < ofs && ofs <= n); + /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the + right of lastofs but no farther right than ofs. Do a binary + search, with invariant a[lastofs-1] < key <= a[ofs]. */ + ++lastofs; + while (lastofs < ofs) + { + ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1); + + if (inorder (pred, a[m], key)) + lastofs = m + 1; /* Here a[m] < key. */ + else + ofs = m; /* Here key <= a[m]. */ + } + eassume (lastofs == ofs); /* Then a[ofs-1] < key <= a[ofs]. */ + return ofs; +} + + +/* GALLOP_RIGHT() is exactly like GALLOP_LEFT(), except that if KEY + already exists in A[0:N], it finds the position immediately to the + right of the rightmost equal value. + + The return value is the int k in [0, N] such that + + A[k-1] <= KEY < A[k]. */ + +static ptrdiff_t +gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a, + const ptrdiff_t n, const ptrdiff_t hint) +{ + Lisp_Object pred = ms->predicate; + + eassume (key && a && n > 0 && hint >= 0 && hint < n); + + a += hint; + ptrdiff_t lastofs = 0; + ptrdiff_t ofs = 1; + if (inorder (pred, key, *a)) + { + /* When key < a[hint], gallop left until + a[hint - ofs] <= key < a[hint - lastofs]. */ + const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */ + while (ofs < maxofs) + { + if (inorder (pred, key, a[-ofs])) + { + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + else /* Here a[hint - ofs] <= key. */ + break; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to use positive offsets relative to &a[0]. */ + ptrdiff_t k = lastofs; + lastofs = hint - ofs; + ofs = hint - k; + } + else + { + /* When a[hint] <= key, gallop right, until + a[hint + lastofs] <= key < a[hint + ofs]. */ + const ptrdiff_t maxofs = n - hint; /* Here &a[n-1] is highest. */ + while (ofs < maxofs) + { + if (inorder (pred, key, a[ofs])) + break; + /* Here a[hint + ofs] <= key. */ + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to use offsets relative to &a[0]. */ + lastofs += hint; + ofs += hint; + } + a -= hint; + + eassume (-1 <= lastofs && lastofs < ofs && ofs <= n); + /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the + right of lastofs but no farther right than ofs. Do a binary + search, with invariant a[lastofs-1] <= key < a[ofs]. */ + ++lastofs; + while (lastofs < ofs) + { + ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1); + + if (inorder (pred, key, a[m])) + ofs = m; /* Here key < a[m]. */ + else + lastofs = m + 1; /* Here a[m] <= key. */ + } + eassume (lastofs == ofs); /* Now a[ofs-1] <= key < a[ofs]. */ + return ofs; +} + + +static void +merge_init (merge_state *ms, const ptrdiff_t list_size, Lisp_Object *lo, + const Lisp_Object predicate) +{ + eassume (ms != NULL); + + ms->a = ms->temparray; + ms->alloced = MERGESTATE_TEMP_SIZE; + + ms->n = 0; + ms->min_gallop = GALLOP_WIN_MIN; + ms->listlen = list_size; + ms->listbase = lo; + ms->predicate = predicate; + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; +} + + +/* The dynamically allocated memory may hold lisp objects during + merging. MERGE_MARKMEM marks them so they aren't reaped during + GC. */ + +static void +merge_markmem (void *arg) +{ + merge_state *ms = arg; + eassume (ms != NULL); + + if (ms->reloc.size != NULL && *ms->reloc.size > 0) + { + eassume (ms->reloc.src != NULL); + mark_objects (*ms->reloc.src, *ms->reloc.size); + } +} + + +/* CLEANUP_MEM frees all temp storage. If an exception occurs while + merging it will first relocate any lisp elements in temp storage + back to the original array. */ + +static void +cleanup_mem (void *arg) +{ + merge_state *ms = arg; + eassume (ms != NULL); + + /* If we have an exception while merging, some of the list elements + might only live in temp storage; we copy everything remaining in + the temp storage back into the original list. This ensures that + the original list has all of the original elements, although + their order is unpredictable. */ + + if (ms->reloc.order != 0 && *ms->reloc.size > 0) + { + eassume (*ms->reloc.src != NULL && *ms->reloc.dst != NULL); + ptrdiff_t n = *ms->reloc.size; + ptrdiff_t shift = ms->reloc.order == -1 ? 0 : n - 1; + memcpy (*ms->reloc.dst - shift, *ms->reloc.src, n * word_size); + } + + /* Free any remaining temp storage. */ + xfree (ms->a); +} + + +/* MERGE_GETMEM() ensures availability of enough temp memory for NEED + array slots. Any previously allocated memory is first freed, and a + cleanup routine is registered to free memory at the very end, or on + exception. */ + +static void +merge_getmem (merge_state *ms, const ptrdiff_t need) +{ + eassume (ms != NULL); + + if (ms->a == ms->temparray) + { + /* We only get here if alloc is needed and this is the first + time, so we set up the unwind. */ + specpdl_ref count = SPECPDL_INDEX (); + record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem); + ms->count = count; + } + else + { + /* We have previously alloced storage. Since we don't care + what's in the block we don't use realloc which would waste + cycles copying the old data. We just free and alloc + again. */ + xfree (ms->a); + ms->a = NULL; + } + ms->a = (Lisp_Object *) xmalloc (need * word_size); + if (ms->a != NULL) + ms->alloced = need; +} + + +/* MERGE_LO() stably merges the NA elements starting at SSA with the + NB elements starting at SSB = SSA + NA, in-place. NA and NB must + be positive. We also require that SSA[NA-1] belongs at the end of + the merge, and should have NA <= NB. */ + +static void +merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb, + ptrdiff_t nb) +{ + Lisp_Object pred = ms->predicate; + + eassume (ms && ssa && ssb && na > 0 && nb > 0); + eassume (ssa + na == ssb); + na <= ms->alloced ? 0 : merge_getmem (ms, na); + + memcpy (ms->a, ssa, na * word_size); + Lisp_Object *dest = ssa; + ssa = ms->a; + + ms->reloc = (struct reloc){&ssa, &dest, &na, -1}; + + *dest++ = *ssb++; + --nb; + if (nb == 0) + goto Succeed; + if (na == 1) + goto CopyB; + + ptrdiff_t min_gallop = ms->min_gallop; + for (;;) + { + ptrdiff_t acount = 0; /* This holds the # of consecutive times A won. */ + + ptrdiff_t bcount = 0; /* This holds the # of consecutive times B won. */ + + for (;;) + { + eassume (na > 1 && nb > 0); + if (inorder (pred, *ssb, *ssa)) + { + *dest++ = *ssb++ ; + ++bcount; + acount = 0; + --nb; + if (nb == 0) + goto Succeed; + if (bcount >= min_gallop) + break; + } + else + { + *dest++ = *ssa++; + ++acount; + bcount = 0; + --na; + if (na == 1) + goto CopyB; + if (acount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may be a huge + win. We try that, and continue galloping until (if ever) + neither run appears to be winning consistently anymore. */ + ++min_gallop; + do { + eassume (na > 1 && nb > 0); + min_gallop -= min_gallop > 1; + ms->min_gallop = min_gallop; + ptrdiff_t k = gallop_right (ms, ssb[0], ssa, na, 0); + acount = k; + if (k) + { + memcpy (dest, ssa, k * word_size); + dest += k; + ssa += k; + na -= k; + if (na == 1) + goto CopyB; + /* While na==0 is impossible now if the comparison function is + consistent, we shouldn't assume that it is. */ + if (na == 0) + goto Succeed; + } + *dest++ = *ssb++ ; + --nb; + if (nb == 0) + goto Succeed; + + k = gallop_left (ms, ssa[0], ssb, nb, 0); + bcount = k; + if (k) + { + memmove (dest, ssb, k * word_size); + dest += k; + ssb += k; + nb -= k; + if (nb == 0) + goto Succeed; + } + *dest++ = *ssa++; + --na; + if (na == 1) + goto CopyB; + } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN); + ++min_gallop; /* Apply a penalty for leaving galloping mode. */ + ms->min_gallop = min_gallop; + } + Succeed: + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + + if (na) + memcpy (dest, ssa, na * word_size); + return; + CopyB: + eassume (na == 1 && nb > 0); + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + + /* The last element of ssa belongs at the end of the merge. */ + memmove (dest, ssb, nb * word_size); + dest[nb] = ssa[0]; +} + + +/* MERGE_HI() stably merges the NA elements starting at SSA with the + NB elements starting at SSB = SSA + NA, in-place. NA and NB must + be positive. We also require that SSA[NA-1] belongs at the end of + the merge, and should have NA >= NB. */ + +static void +merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, + Lisp_Object *ssb, ptrdiff_t nb) +{ + Lisp_Object pred = ms->predicate; + + eassume (ms && ssa && ssb && na > 0 && nb > 0); + eassume (ssa + na == ssb); + nb <= ms->alloced ? 0 : merge_getmem(ms, nb); + Lisp_Object *dest = ssb; + dest += nb - 1; + memcpy(ms->a, ssb, nb * word_size); + Lisp_Object *basea = ssa; + Lisp_Object *baseb = ms->a; + ssb = ms->a + nb - 1; + ssa += na - 1; + + ms->reloc = (struct reloc){&baseb, &dest, &nb, 1}; + + *dest-- = *ssa--; + --na; + if (na == 0) + goto Succeed; + if (nb == 1) + goto CopyA; + + ptrdiff_t min_gallop = ms->min_gallop; + for (;;) { + ptrdiff_t acount = 0; /* This holds the # of consecutive times A won. */ + ptrdiff_t bcount = 0; /* This holds the # of consecutive times B won. */ + + for (;;) { + eassume (na > 0 && nb > 1); + if (inorder (pred, *ssb, *ssa)) + { + *dest-- = *ssa--; + ++acount; + bcount = 0; + --na; + if (na == 0) + goto Succeed; + if (acount >= min_gallop) + break; + } + else + { + *dest-- = *ssb--; + ++bcount; + acount = 0; + --nb; + if (nb == 1) + goto CopyA; + if (bcount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may be a huge + win. Try that, and continue galloping until (if ever) neither + run appears to be winning consistently anymore. */ + ++min_gallop; + do { + eassume (na > 0 && nb > 1); + min_gallop -= min_gallop > 1; + ms->min_gallop = min_gallop; + ptrdiff_t k = gallop_right (ms, ssb[0], basea, na, na - 1); + k = na - k; + acount = k; + if (k) + { + dest += -k; + ssa += -k; + memmove(dest + 1, ssa + 1, k * word_size); + na -= k; + if (na == 0) + goto Succeed; + } + *dest-- = *ssb--; + --nb; + if (nb == 1) + goto CopyA; + + k = gallop_left (ms, ssa[0], baseb, nb, nb - 1); + k = nb - k; + bcount = k; + if (k) + { + dest += -k; + ssb += -k; + memcpy(dest + 1, ssb + 1, k * word_size); + nb -= k; + if (nb == 1) + goto CopyA; + /* While nb==0 is impossible now if the comparison function + is consistent, we shouldn't assume that it is. */ + if (nb == 0) + goto Succeed; + } + *dest-- = *ssa--; + --na; + if (na == 0) + goto Succeed; + } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN); + ++min_gallop; /* Apply a penalty for leaving galloping mode. */ + ms->min_gallop = min_gallop; + } + Succeed: + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + if (nb) + memcpy (dest - nb + 1, baseb, nb * word_size); + return; + CopyA: + eassume (nb == 1 && na > 0); + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + /* The first element of ssb belongs at the front of the merge. */ + memmove (dest + 1 - na, ssa + 1 - na, na * word_size); + dest += -na; + ssa += -na; + dest[0] = ssb[0]; +} + + +/* MERGE_AT() merges the two runs at stack indices I and I+1. */ + +static void +merge_at (merge_state *ms, const ptrdiff_t i) +{ + eassume (ms != NULL); + eassume (ms->n >= 2); + eassume (i >= 0); + eassume (i == ms->n - 2 || i == ms->n - 3); + + Lisp_Object *ssa = ms->pending[i].base; + ptrdiff_t na = ms->pending[i].len; + Lisp_Object *ssb = ms->pending[i + 1].base; + ptrdiff_t nb = ms->pending[i + 1].len; + eassume (na > 0 && nb > 0); + eassume (ssa + na == ssb); + + /* Record the length of the combined runs; if i is the 3rd-last run + now, also slide over the last run (which isn't involved in this + merge). The current run i+1 goes away in any case. */ + ms->pending[i].len = na + nb; + if (i == ms->n - 3) + ms->pending[i + 1] = ms->pending[i + 2]; + --ms->n; + + /* Where does b start in a? Elements in a before that can be + ignored (they are already in place). */ + ptrdiff_t k = gallop_right (ms, *ssb, ssa, na, 0); + eassume (k >= 0); + ssa += k; + na -= k; + if (na == 0) + return; + + /* Where does a end in b? Elements in b after that can be ignored + (they are already in place). */ + nb = gallop_left (ms, ssa[na - 1], ssb, nb, nb - 1); + if (nb == 0) + return; + eassume (nb > 0); + /* Merge what remains of the runs using a temp array with size + min(na, nb) elements. */ + if (na <= nb) + merge_lo (ms, ssa, na, ssb, nb); + else + merge_hi (ms, ssa, na, ssb, nb); +} + + +/* POWERLOOP() computes the "power" of the first of two adjacent runs + begining at index S1, with the first having length N1 and the + second (starting at index S1+N1) having length N2. The list has + total length N. */ + +static int +powerloop (const ptrdiff_t s1, const ptrdiff_t n1, const ptrdiff_t n2, + const ptrdiff_t n) +{ + eassume (s1 >= 0); + eassume (n1 > 0 && n2 > 0); + eassume (s1 + n1 + n2 <= n); + /* The midpoints a and b are + a = s1 + n1/2 + b = s1 + n1 + n2/2 = a + (n1 + n2)/2 + + These may not be integers because of the "/2", so we work with + 2*a and 2*b instead. It makes no difference to the outcome, + since the bits in the expansion of (2*i)/n are merely shifted one + position from those of i/n. */ + ptrdiff_t a = 2 * s1 + n1; + ptrdiff_t b = a + n1 + n2; + int result = 0; + /* Emulate a/n and b/n one bit a time, until their bits differ. */ + for (;;) + { + ++result; + if (a >= n) + { /* Both quotient bits are now 1. */ + eassume (b >= a); + a -= n; + b -= n; + } + else if (b >= n) + { /* a/n bit is 0 and b/n bit is 1. */ + break; + } /* Otherwise both quotient bits are 0. */ + eassume (a < b && b < n); + a <<= 1; + b <<= 1; + } + return result; +} + + +/* FOUND_NEW_RUN() updates the state when a run of length N2 has been + identified. If there's already a stretch on the stack, apply the + "powersort" merge strategy: compute the topmost stretch's "power" + (depth in a conceptual binary merge tree) and merge adjacent runs + on the stack with greater power. */ + +static void +found_new_run (merge_state *ms, const ptrdiff_t n2) +{ + eassume (ms != NULL); + if (ms->n) + { + eassume (ms->n > 0); + struct stretch *p = ms->pending; + ptrdiff_t s1 = p[ms->n - 1].base - ms->listbase; + ptrdiff_t n1 = p[ms->n - 1].len; + int power = powerloop (s1, n1, n2, ms->listlen); + while (ms->n > 1 && p[ms->n - 2].power > power) + { + merge_at (ms, ms->n - 2); + } + eassume (ms->n < 2 || p[ms->n - 2].power < power); + p[ms->n - 1].power = power; + } +} + + +/* MERGE_FORCE_COLLAPSE() unconditionally merges all stretches on the + stack until only one remains, and returns 0 on success. This is + used at the end of the mergesort. */ + +static void +merge_force_collapse (merge_state *ms) +{ + struct stretch *p = ms->pending; + + eassume (ms != NULL); + while (ms->n > 1) + { + ptrdiff_t n = ms->n - 2; + if (n > 0 && p[n - 1].len < p[n + 1].len) + --n; + merge_at (ms, n); + } +} + + +/* MERGE_COMPUTE_MINRUN() computes a good value for the minimum run + length; natural runs shorter than this are boosted artificially via + binary insertion. + + If N < 64, return N (it's too small to bother with fancy stuff). + Otherwise if N is an exact power of 2, return 32. Finally, return + an int k, 32 <= k <= 64, such that N/k is close to, but strictly + less than, an exact power of 2. */ + +static ptrdiff_t +merge_compute_minrun (ptrdiff_t n) +{ + ptrdiff_t r = 0; /* r will become 1 if any non-zero bits are + shifted off. */ + + eassume (n >= 0); + while (n >= 64) + { + r |= n & 1; + n >>= 1; + } + return n + r; +} + + +static void +reverse_vector (Lisp_Object *s, const ptrdiff_t n) +{ + for (ptrdiff_t i = 0; i < n / 2; i++) + { + Lisp_Object tem = s[i]; + s[i] = s[n - i - 1]; + s[n - i - 1] = tem; + } +} + +/* TIM_SORT sorts the array SEQ with LENGTH elements in the order + determined by PREDICATE. */ + +void +tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length) +{ + merge_state ms; + Lisp_Object *lo = seq; + + merge_init (&ms, length, lo, predicate); + + if (length < 2) + return; + + /* March over the array once, left to right, finding natural runs, + and extending short natural runs to minrun elements. */ + const ptrdiff_t minrun = merge_compute_minrun (length); + ptrdiff_t nremaining = length; + do { + bool descending; + + /* Identify the next run. */ + ptrdiff_t n = count_run (&ms, lo, lo + nremaining, &descending); + if (descending) + reverse_vector (lo, n); + /* If the run is short, extend it to min(minrun, nremaining). */ + if (n < minrun) + { + const ptrdiff_t force = nremaining <= minrun ? + nremaining : minrun; + binarysort (&ms, lo, lo + force, lo + n); + n = force; + } + eassume (ms.n == 0 || ms.pending[ms.n - 1].base + + ms.pending[ms.n - 1].len == lo); + found_new_run (&ms, n); + /* Push the new run on to the stack. */ + eassume (ms.n < MAX_MERGE_PENDING); + ms.pending[ms.n].base = lo; + ms.pending[ms.n].len = n; + ++ms.n; + /* Advance to find the next run. */ + lo += n; + nremaining -= n; + } while (nremaining); + + merge_force_collapse (&ms); + eassume (ms.n == 1); + eassume (ms.pending[0].len == length); + lo = ms.pending[0].base; + + if (ms.a != ms.temparray && ms.alloced <= ms.listlen >> 1) + unbind_to (ms.count, Qnil); +} -- 2.39.5