#include "lisp.h"
+/* Reverse a slice of a vector in place, from lo up to (exclusive) hi. */
+static void
+reverse_slice(Lisp_Object *lo, Lisp_Object *hi)
+{
+ eassert (lo && hi);
+
+ --hi;
+ while (lo < hi) {
+ Lisp_Object t = *lo;
+ *lo = *hi;
+ *hi = t;
+ ++lo;
+ --hi;
+ }
+}
+
+/* A sortslice contains a pointer to an array of keys and a pointer to
+ an array of corresponding values. In other words, keys[i]
+ corresponds with values[i]. If values == NULL, then the keys are
+ also the values.
+
+ Several convenience routines are provided here, so that keys and
+ values are always moved in sync. */
+
+typedef struct {
+ Lisp_Object *keys;
+ Lisp_Object *values;
+} sortslice;
+
+/* FIXME: Instead of values=NULL, can we set values=keys, so that they
+ are both moved in lockstep and we avoid a lot of branches?
+ We may do some useless work but it might be cheaper overall. */
+
+static inline void
+sortslice_copy (sortslice *s1, ptrdiff_t i, sortslice *s2, ptrdiff_t j)
+{
+ s1->keys[i] = s2->keys[j];
+ if (s1->values != NULL)
+ s1->values[i] = s2->values[j];
+}
+
+static inline void
+sortslice_copy_incr (sortslice *dst, sortslice *src)
+{
+ *dst->keys++ = *src->keys++;
+ if (dst->values != NULL)
+ *dst->values++ = *src->values++;
+}
+
+static inline void
+sortslice_copy_decr (sortslice *dst, sortslice *src)
+{
+ *dst->keys-- = *src->keys--;
+ if (dst->values != NULL)
+ *dst->values-- = *src->values--;
+}
+
+
+static inline void
+sortslice_memcpy (sortslice *s1, ptrdiff_t i, sortslice *s2, ptrdiff_t j,
+ ptrdiff_t n)
+{
+ memcpy (&s1->keys[i], &s2->keys[j], sizeof s1->keys[0] * n);
+ if (s1->values != NULL)
+ memcpy (&s1->values[i], &s2->values[j], sizeof s1->values[0] * n);
+}
+
+static inline void
+sortslice_memmove (sortslice *s1, ptrdiff_t i, sortslice *s2, ptrdiff_t j,
+ ptrdiff_t n)
+{
+ memmove (&s1->keys[i], &s2->keys[j], sizeof s1->keys[0] * n);
+ if (s1->values != NULL)
+ memmove (&s1->values[i], &s2->values[j], sizeof s1->values[0] * n);
+}
+
+static inline void
+sortslice_advance (sortslice *slice, ptrdiff_t n)
+{
+ slice->keys += n;
+ if (slice->values != NULL)
+ slice->values += n;
+}
+
/* MAX_MERGE_PENDING is the maximum number of entries in merge_state's
pending-stretch stack. For a list with n elements, this needs at most
floor(log2(n)) + 1 entries even if we didn't force runs to a
struct stretch
{
- Lisp_Object *base;
+ sortslice base;
ptrdiff_t len;
int power;
};
struct reloc
{
- Lisp_Object **src;
- Lisp_Object **dst;
+ sortslice *src;
+ sortslice *dst;
ptrdiff_t *size;
int order; /* -1 while in merge_lo; +1 while in merg_hi; 0 otherwise. */
};
typedef struct
{
- Lisp_Object *listbase;
+ Lisp_Object *basekeys;
+ Lisp_Object *allocated_keys; /* heap-alloc'ed key array or NULL */
ptrdiff_t listlen;
/* PENDING is a stack of N pending stretches yet to be merged.
with merges. 'A' initially points to TEMPARRAY, and subsequently
to newly allocated memory if needed. */
- Lisp_Object *a;
+ sortslice a;
ptrdiff_t alloced;
specpdl_ref count;
Lisp_Object temparray[MERGESTATE_TEMP_SIZE];
permutation of the input (nothing is lost or duplicated). */
static void
-binarysort (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi,
+binarysort (merge_state *ms, sortslice lo, const Lisp_Object *hi,
Lisp_Object *start)
{
Lisp_Object pred = ms->predicate;
- eassume (lo <= start && start <= hi);
- if (lo == start)
+ eassume (lo.keys <= start && start <= hi);
+ if (lo.keys == start)
++start;
for (; start < hi; ++start)
{
- Lisp_Object *l = lo;
+ Lisp_Object *l = lo.keys;
Lisp_Object *r = start;
Lisp_Object pivot = *r;
for (Lisp_Object *p = start; p > l; --p)
p[0] = p[-1];
*l = pivot;
+
+ if (lo.values != NULL)
+ {
+ ptrdiff_t offset = lo.values - lo.keys;
+ Lisp_Object *p = start + offset;
+ pivot = *p;
+ l += offset;
+ for (Lisp_Object *p = start + offset; p > l; --p)
+ p[0] = p[-1];
+ *l = pivot;
+ }
}
}
}
+static void merge_register_cleanup (merge_state *ms);
+
static void
-merge_init (merge_state *ms, const ptrdiff_t list_size, Lisp_Object *lo,
- const Lisp_Object predicate)
+merge_init (merge_state *ms, const ptrdiff_t list_size,
+ Lisp_Object *allocated_keys, sortslice *lo, Lisp_Object predicate)
{
eassume (ms != NULL);
- ms->a = ms->temparray;
- ms->alloced = MERGESTATE_TEMP_SIZE;
+ if (lo->values != NULL)
+ {
+ /* The temporary space for merging will need at most half the list
+ size rounded up. Use the minimum possible space so we can use the
+ rest of temparray for other things. In particular, if there is
+ enough extra space, if will be used to store the keys. */
+ ms->alloced = (list_size + 1) / 2;
+
+ /* ms->alloced describes how many keys will be stored at
+ ms->temparray, but we also need to store the values. Hence,
+ ms->alloced is capped at half of MERGESTATE_TEMP_SIZE. */
+ if (MERGESTATE_TEMP_SIZE / 2 < ms->alloced)
+ ms->alloced = MERGESTATE_TEMP_SIZE / 2;
+ ms->a.values = &ms->temparray[ms->alloced];
+ }
+ else
+ {
+ ms->alloced = MERGESTATE_TEMP_SIZE;
+ ms->a.values = NULL;
+ }
+ ms->a.keys = ms->temparray;
ms->n = 0;
ms->min_gallop = GALLOP_WIN_MIN;
ms->listlen = list_size;
- ms->listbase = lo;
+ ms->basekeys = lo->keys;
+ ms->allocated_keys = allocated_keys;
ms->predicate = predicate;
ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
+ ms->count = make_invalid_specpdl_ref ();
+ if (allocated_keys != NULL)
+ merge_register_cleanup (ms);
}
if (ms->reloc.size != NULL && *ms->reloc.size > 0)
{
- eassume (ms->reloc.src != NULL);
- mark_objects (*ms->reloc.src, *ms->reloc.size);
+ Lisp_Object *src = (ms->reloc.src->values
+ ? ms->reloc.src->values : ms->reloc.src->keys);
+ eassume (src != NULL);
+ mark_objects (src, *ms->reloc.size);
}
}
if (ms->reloc.order != 0 && *ms->reloc.size > 0)
{
- eassume (*ms->reloc.src != NULL && *ms->reloc.dst != NULL);
+ Lisp_Object *src = (ms->reloc.src->values
+ ? ms->reloc.src->values : ms->reloc.src->keys);
+ Lisp_Object *dst = (ms->reloc.dst->values
+ ? ms->reloc.dst->values : ms->reloc.dst->keys);
+ eassume (src != NULL && dst != NULL);
ptrdiff_t n = *ms->reloc.size;
ptrdiff_t shift = ms->reloc.order == -1 ? 0 : n - 1;
- memcpy (*ms->reloc.dst - shift, *ms->reloc.src, n * word_size);
+ memcpy (dst - shift, src, n * word_size);
}
/* Free any remaining temp storage. */
- xfree (ms->a);
+ if (ms->a.keys != ms->temparray)
+ {
+ xfree (ms->a.keys);
+ ms->a.keys = NULL;
+ }
+
+ if (ms->allocated_keys != NULL)
+ {
+ xfree (ms->allocated_keys);
+ ms->allocated_keys = NULL;
+ }
}
+static void
+merge_register_cleanup (merge_state *ms)
+{
+ specpdl_ref count = SPECPDL_INDEX ();
+ record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem);
+ ms->count = count;
+}
/* Allocate enough temp memory for NEED array slots. Any previously
allocated memory is first freed, and a cleanup routine is
{
eassume (ms != NULL);
- if (ms->a == ms->temparray)
+ if (ms->a.keys == ms->temparray)
{
/* We only get here if alloc is needed and this is the first
time, so we set up the unwind protection. */
- specpdl_ref count = SPECPDL_INDEX ();
- record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem);
- ms->count = count;
+ if (!specpdl_ref_valid_p (ms->count))
+ merge_register_cleanup (ms);
}
else
{
what's in the block we don't use realloc which would waste
cycles copying the old data. We just free and alloc
again. */
- xfree (ms->a);
+ xfree (ms->a.keys);
}
- ms->a = xmalloc (need * word_size);
+ ptrdiff_t bytes = (need * word_size) << (ms->a.values != NULL ? 1 : 0);
+ ms->a.keys = xmalloc (bytes);
ms->alloced = need;
+ if (ms->a.values != NULL)
+ ms->a.values = &ms->a.keys[need];
}
NB. */
static void
-merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb,
- ptrdiff_t nb)
+merge_lo (merge_state *ms, sortslice ssa, ptrdiff_t na,
+ sortslice ssb, ptrdiff_t nb)
{
Lisp_Object pred = ms->predicate;
- eassume (ms && ssa && ssb && na > 0 && nb > 0);
- eassume (ssa + na == ssb);
+ eassume (ms && ssa.keys && ssb.keys && na > 0 && nb > 0);
+ eassume (ssa.keys + na == ssb.keys);
needmem (ms, na);
- memcpy (ms->a, ssa, na * word_size);
- Lisp_Object *dest = ssa;
+ sortslice_memcpy (&ms->a, 0, &ssa, 0, na);
+ sortslice dest = ssa;
ssa = ms->a;
ms->reloc = (struct reloc){&ssa, &dest, &na, -1};
- *dest++ = *ssb++;
+ sortslice_copy_incr (&dest, &ssb);
--nb;
if (nb == 0)
goto Succeed;
for (;;)
{
eassume (na > 1 && nb > 0);
- if (inorder (pred, *ssb, *ssa))
+ if (inorder (pred, ssb.keys[0], ssa.keys[0]))
{
- *dest++ = *ssb++ ;
+ sortslice_copy_incr (&dest, &ssb);
++bcount;
acount = 0;
--nb;
}
else
{
- *dest++ = *ssa++;
+ sortslice_copy_incr (&dest, &ssa);
++acount;
bcount = 0;
--na;
eassume (na > 1 && nb > 0);
min_gallop -= min_gallop > 1;
ms->min_gallop = min_gallop;
- ptrdiff_t k = gallop_right (ms, ssb[0], ssa, na, 0);
+ ptrdiff_t k = gallop_right (ms, ssb.keys[0], ssa.keys, na, 0);
acount = k;
if (k)
{
- memcpy (dest, ssa, k * word_size);
- dest += k;
- ssa += k;
+ sortslice_memcpy (&dest, 0, &ssa, 0, k);
+ sortslice_advance (&dest, k);
+ sortslice_advance (&ssa, k);
na -= k;
if (na == 1)
goto CopyB;
if (na == 0)
goto Succeed;
}
- *dest++ = *ssb++ ;
+ sortslice_copy_incr (&dest, &ssb);
--nb;
if (nb == 0)
goto Succeed;
- k = gallop_left (ms, ssa[0], ssb, nb, 0);
+ k = gallop_left (ms, ssa.keys[0], ssb.keys, nb, 0);
bcount = k;
if (k)
{
- memmove (dest, ssb, k * word_size);
- dest += k;
- ssb += k;
+ sortslice_memmove (&dest, 0, &ssb, 0, k);
+ sortslice_advance (&dest, k);
+ sortslice_advance (&ssb, k);
nb -= k;
if (nb == 0)
goto Succeed;
}
- *dest++ = *ssa++;
+ sortslice_copy_incr (&dest, &ssa);
--na;
if (na == 1)
goto CopyB;
ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
if (na)
- memcpy (dest, ssa, na * word_size);
+ sortslice_memcpy(&dest, 0, &ssa, 0, na);
return;
CopyB:
eassume (na == 1 && nb > 0);
ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
/* The last element of ssa belongs at the end of the merge. */
- memmove (dest, ssb, nb * word_size);
- dest[nb] = ssa[0];
+ sortslice_memmove (&dest, 0, &ssb, 0, nb);
+ sortslice_copy (&dest, nb, &ssa, 0);
}
NB. */
static void
-merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na,
- Lisp_Object *ssb, ptrdiff_t nb)
+merge_hi (merge_state *ms, sortslice ssa, ptrdiff_t na,
+ sortslice ssb, ptrdiff_t nb)
{
Lisp_Object pred = ms->predicate;
- eassume (ms && ssa && ssb && na > 0 && nb > 0);
- eassume (ssa + na == ssb);
+ eassume (ms && ssa.keys && ssb.keys && na > 0 && nb > 0);
+ eassume (ssa.keys + na == ssb.keys);
needmem (ms, nb);
- Lisp_Object *dest = ssb;
- dest += nb - 1;
- memcpy(ms->a, ssb, nb * word_size);
- Lisp_Object *basea = ssa;
- Lisp_Object *baseb = ms->a;
- ssb = ms->a + nb - 1;
- ssa += na - 1;
+ sortslice dest = ssb;
+ sortslice_advance (&dest, nb-1);
+ sortslice_memcpy (&ms->a, 0, &ssb, 0, nb);
+ sortslice basea = ssa;
+ sortslice baseb = ms->a;
+ ssb.keys = ms->a.keys + nb - 1;
+ if (ssb.values != NULL)
+ ssb.values = ms->a.values + nb - 1;
+ sortslice_advance (&ssa, na - 1);
ms->reloc = (struct reloc){&baseb, &dest, &nb, 1};
- *dest-- = *ssa--;
+ sortslice_copy_decr (&dest, &ssa);
--na;
if (na == 0)
goto Succeed;
for (;;) {
eassume (na > 0 && nb > 1);
- if (inorder (pred, *ssb, *ssa))
+ if (inorder (pred, ssb.keys[0], ssa.keys[0]))
{
- *dest-- = *ssa--;
+ sortslice_copy_decr (&dest, &ssa);
++acount;
bcount = 0;
--na;
}
else
{
- *dest-- = *ssb--;
+ sortslice_copy_decr (&dest, &ssb);
++bcount;
acount = 0;
--nb;
eassume (na > 0 && nb > 1);
min_gallop -= min_gallop > 1;
ms->min_gallop = min_gallop;
- ptrdiff_t k = gallop_right (ms, ssb[0], basea, na, na - 1);
+ ptrdiff_t k = gallop_right (ms, ssb.keys[0], basea.keys, na, na - 1);
k = na - k;
acount = k;
if (k)
{
- dest += -k;
- ssa += -k;
- memmove(dest + 1, ssa + 1, k * word_size);
+ sortslice_advance (&dest, -k);
+ sortslice_advance (&ssa, -k);
+ sortslice_memmove (&dest, 1, &ssa, 1, k);
na -= k;
if (na == 0)
goto Succeed;
}
- *dest-- = *ssb--;
+ sortslice_copy_decr(&dest, &ssb);
--nb;
if (nb == 1)
goto CopyA;
- k = gallop_left (ms, ssa[0], baseb, nb, nb - 1);
+ k = gallop_left (ms, ssa.keys[0], baseb.keys, nb, nb - 1);
k = nb - k;
bcount = k;
if (k)
{
- dest += -k;
- ssb += -k;
- memcpy(dest + 1, ssb + 1, k * word_size);
+ sortslice_advance (&dest, -k);
+ sortslice_advance (&ssb, -k);
+ sortslice_memcpy (&dest, 1, &ssb, 1, k);
nb -= k;
if (nb == 1)
goto CopyA;
if (nb == 0)
goto Succeed;
}
- *dest-- = *ssa--;
+ sortslice_copy_decr (&dest, &ssa);
--na;
if (na == 0)
goto Succeed;
Succeed:
ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
if (nb)
- memcpy (dest - nb + 1, baseb, nb * word_size);
+ sortslice_memcpy (&dest, -(nb-1), &baseb, 0, nb);
return;
CopyA:
eassume (nb == 1 && na > 0);
ms->reloc = (struct reloc){NULL, NULL, NULL, 0};
/* The first element of ssb belongs at the front of the merge. */
- memmove (dest + 1 - na, ssa + 1 - na, na * word_size);
- dest += -na;
- ssa += -na;
- dest[0] = ssb[0];
+ sortslice_memmove (&dest, 1-na, &ssa, 1-na, na);
+ sortslice_advance (&dest, -na);
+ sortslice_advance (&ssa, -na);
+ sortslice_copy (&dest, 0, &ssb, 0);
}
eassume (i >= 0);
eassume (i == ms->n - 2 || i == ms->n - 3);
- Lisp_Object *ssa = ms->pending[i].base;
+ sortslice ssa = ms->pending[i].base;
ptrdiff_t na = ms->pending[i].len;
- Lisp_Object *ssb = ms->pending[i + 1].base;
+ sortslice ssb = ms->pending[i + 1].base;
ptrdiff_t nb = ms->pending[i + 1].len;
eassume (na > 0 && nb > 0);
- eassume (ssa + na == ssb);
+ eassume (ssa.keys + na == ssb.keys);
/* Record the length of the combined runs. The current run i+1 goes
away after the merge. If i is the 3rd-last run now, slide the
/* Where does b start in a? Elements in a before that can be
ignored (they are already in place). */
- ptrdiff_t k = gallop_right (ms, *ssb, ssa, na, 0);
+ ptrdiff_t k = gallop_right (ms, *ssb.keys, ssa.keys, na, 0);
eassume (k >= 0);
- ssa += k;
+ sortslice_advance (&ssa, k);
na -= k;
if (na == 0)
return;
/* Where does a end in b? Elements in b after that can be ignored
(they are already in place). */
- nb = gallop_left (ms, ssa[na - 1], ssb, nb, nb - 1);
+ nb = gallop_left (ms, ssa.keys[na - 1], ssb.keys, nb, nb - 1);
if (nb == 0)
return;
eassume (nb > 0);
{
eassume (ms->n > 0);
struct stretch *p = ms->pending;
- ptrdiff_t s1 = p[ms->n - 1].base - ms->listbase;
+ ptrdiff_t s1 = p[ms->n - 1].base.keys - ms->basekeys;
ptrdiff_t n1 = p[ms->n - 1].len;
int power = powerloop (s1, n1, n2, ms->listlen);
while (ms->n > 1 && p[ms->n - 2].power > power)
static void
-reverse_vector (Lisp_Object *s, const ptrdiff_t n)
+reverse_sortslice (sortslice *s, const ptrdiff_t n)
{
- for (ptrdiff_t i = 0; i < n >> 1; i++)
+ reverse_slice(s->keys, &s->keys[n]);
+ if (s->values != NULL)
+ reverse_slice(s->values, &s->values[n]);
+}
+
+static Lisp_Object
+resolve_fun (Lisp_Object fun)
+{
+ if (SYMBOLP (fun))
{
- Lisp_Object tem = s[i];
- s[i] = s[n - i - 1];
- s[n - i - 1] = tem;
+ /* Attempt to resolve the function as far as possible ahead of time,
+ to avoid having to do it for each call. */
+ Lisp_Object f = XSYMBOL (fun)->u.s.function;
+ if (SYMBOLP (f))
+ /* Function was an alias; use slow-path resolution. */
+ f = indirect_function (f);
+ /* Don't resolve to an autoload spec; that would be very slow. */
+ if (!NILP (f) && !(CONSP (f) && EQ (XCAR (f), Qautoload)))
+ fun = f;
}
+ return fun;
}
/* Sort the array SEQ with LENGTH elements in the order determined by
PREDICATE. */
void
-tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length)
+tim_sort (Lisp_Object predicate, Lisp_Object keyfunc,
+ Lisp_Object *seq, const ptrdiff_t length)
{
- if (SYMBOLP (predicate))
+ /* FIXME: optimise for the predicate being value<; at the very
+ least we'd go without the Lisp funcall overhead. */
+ predicate = resolve_fun (predicate);
+
+ sortslice lo;
+ Lisp_Object *keys;
+ Lisp_Object *allocated_keys = NULL;
+ merge_state ms;
+
+ /* FIXME: hoist this to the caller? */
+ if (EQ (keyfunc, Qidentity))
+ keyfunc = Qnil;
+
+ /* FIXME: consider a built-in reverse sorting flag: we would reverse
+ the input in-place here and reverse it back just before
+ returning. */
+
+ if (NILP (keyfunc))
{
- /* Attempt to resolve the function as far as possible ahead of time,
- to avoid having to do it for each call. */
- Lisp_Object fun = XSYMBOL (predicate)->u.s.function;
- if (SYMBOLP (fun))
- /* Function was an alias; use slow-path resolution. */
- fun = indirect_function (fun);
- /* Don't resolve to an autoload spec; that would be very slow. */
- if (!NILP (fun) && !(CONSP (fun) && EQ (XCAR (fun), Qautoload)))
- predicate = fun;
+ keys = NULL;
+ lo.keys = seq;
+ lo.values = NULL;
}
+ else
+ {
+ keyfunc = resolve_fun (keyfunc);
+ if (length < MERGESTATE_TEMP_SIZE / 2)
+ keys = &ms.temparray[length + 1];
+ else
+ keys = allocated_keys = xmalloc (length * word_size);
- merge_state ms;
- Lisp_Object *lo = seq;
+ for (ptrdiff_t i = 0; i < length; i++)
+ keys[i] = call1 (keyfunc, seq[i]);
+
+ lo.keys = keys;
+ lo.values = seq;
+ }
+
+ /* FIXME: This is where we would check the keys for interesting
+ properties for more optimised comparison (such as all being fixnums
+ etc). */
- merge_init (&ms, length, lo, predicate);
+ merge_init (&ms, length, allocated_keys, &lo, predicate);
/* March over the array once, left to right, finding natural runs,
and extending short natural runs to minrun elements. */
bool descending;
/* Identify the next run. */
- ptrdiff_t n = count_run (&ms, lo, lo + nremaining, &descending);
+ ptrdiff_t n = count_run (&ms, lo.keys, lo.keys + nremaining, &descending);
if (descending)
- reverse_vector (lo, n);
+ reverse_sortslice (&lo, n);
/* If the run is short, extend it to min(minrun, nremaining). */
if (n < minrun)
{
const ptrdiff_t force = min (nremaining, minrun);
- binarysort (&ms, lo, lo + force, lo + n);
+ binarysort (&ms, lo, lo.keys + force, lo.keys + n);
n = force;
}
- eassume (ms.n == 0 || ms.pending[ms.n - 1].base +
- ms.pending[ms.n - 1].len == lo);
+ eassume (ms.n == 0
+ || (ms.pending[ms.n - 1].base.keys + ms.pending[ms.n - 1].len
+ == lo.keys));
found_new_run (&ms, n);
/* Push the new run on to the stack. */
eassume (ms.n < MAX_MERGE_PENDING);
ms.pending[ms.n].len = n;
++ms.n;
/* Advance to find the next run. */
- lo += n;
+ sortslice_advance(&lo, n);
nremaining -= n;
} while (nremaining);
eassume (ms.pending[0].len == length);
lo = ms.pending[0].base;
- if (ms.a != ms.temparray)
+ if (ms.a.keys != ms.temparray || allocated_keys != NULL)
unbind_to (ms.count, Qnil);
}