From 9ff2f0be32be621a0a1953cac2d552afebafe226 Mon Sep 17 00:00:00 2001 From: Andrew G Cohen Date: Thu, 10 Mar 2022 09:30:00 +0800 Subject: [PATCH] Replace list and vector sorting with TIMSORT algorithm * src/Makefile.in (base_obj): Add sort.o. * src/deps.mk (fns.o): Add sort.c. * src/lisp.h: Add prototypes for inorder, tim_sort. * src/sort.c: New file providing tim_sort. * src/fns.c: Remove prototypes for removed routines. (merge_vectors, sort_vector_inplace, sort_vector_copy): Remove. (sort_list, sort_vector): Use tim_sort. * test/src/fns-tests.el (fns-tests-sort): New sorting unit tests. --- src/Makefile.in | 2 +- src/deps.mk | 2 +- src/fns.c | 135 ++---- src/lisp.h | 3 + src/sort.c | 974 ++++++++++++++++++++++++++++++++++++++++++ test/src/fns-tests.el | 70 +++ 6 files changed, 1082 insertions(+), 104 deletions(-) create mode 100644 src/sort.c diff --git a/src/Makefile.in b/src/Makefile.in index 69c4c44d1a0..7d15b7afd51 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -434,7 +434,7 @@ base_obj = dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \ minibuf.o fileio.o dired.o \ cmds.o casetab.o casefiddle.o indent.o search.o regex-emacs.o undo.o \ alloc.o pdumper.o data.o doc.o editfns.o callint.o \ - eval.o floatfns.o fns.o font.o print.o lread.o $(MODULES_OBJ) \ + eval.o floatfns.o fns.o sort.o font.o print.o lread.o $(MODULES_OBJ) \ syntax.o $(UNEXEC_OBJ) bytecode.o comp.o $(DYNLIB_OBJ) \ process.o gnutls.o callproc.o \ region-cache.o sound.o timefns.o atimer.o \ diff --git a/src/deps.mk b/src/deps.mk index deffab93eca..39edd5c1dd3 100644 --- a/src/deps.mk +++ b/src/deps.mk @@ -279,7 +279,7 @@ eval.o: eval.c commands.h keyboard.h blockinput.h atimer.h systime.h frame.h \ dispextern.h lisp.h globals.h $(config_h) coding.h composite.h xterm.h \ msdos.h floatfns.o: floatfns.c syssignal.h lisp.h globals.h $(config_h) -fns.o: fns.c commands.h lisp.h $(config_h) frame.h buffer.h character.h \ +fns.o: fns.c sort.c commands.h lisp.h $(config_h) frame.h buffer.h character.h \ keyboard.h keymap.h window.h $(INTERVALS_H) coding.h ../lib/md5.h \ ../lib/sha1.h ../lib/sha256.h ../lib/sha512.h blockinput.h atimer.h \ systime.h xterm.h ../lib/unistd.h globals.h diff --git a/src/fns.c b/src/fns.c index 0cc0c0a53d8..8ec23c4e3a8 100644 --- a/src/fns.c +++ b/src/fns.c @@ -39,9 +39,6 @@ along with GNU Emacs. If not, see . */ #include "puresize.h" #include "gnutls.h" -static void sort_vector_copy (Lisp_Object pred, ptrdiff_t len, - Lisp_Object src[restrict VLA_ELEMS (len)], - Lisp_Object dest[restrict VLA_ELEMS (len)]); enum equal_kind { EQUAL_NO_QUIT, EQUAL_PLAIN, EQUAL_INCLUDING_PROPERTIES }; static bool internal_equal (Lisp_Object, Lisp_Object, enum equal_kind, int, Lisp_Object); @@ -2107,8 +2104,11 @@ See also the function `nreverse', which is used more often. */) return new; } -/* Sort LIST using PREDICATE, preserving original order of elements - considered as equal. */ + +/* Stably sort LIST ordered by PREDICATE using the TIMSORT + algorithm. This converts the list to a vector, sorts the vector, + and returns the result converted back to a list. The input list is + destructively reused to hold the sorted result. */ static Lisp_Object sort_list (Lisp_Object list, Lisp_Object predicate) @@ -2116,112 +2116,43 @@ sort_list (Lisp_Object list, Lisp_Object predicate) ptrdiff_t length = list_length (list); if (length < 2) return list; - - Lisp_Object tem = Fnthcdr (make_fixnum (length / 2 - 1), list); - Lisp_Object back = Fcdr (tem); - Fsetcdr (tem, Qnil); - - return merge (Fsort (list, predicate), Fsort (back, predicate), predicate); -} - -/* Using PRED to compare, return whether A and B are in order. - Compare stably when A appeared before B in the input. */ -static bool -inorder (Lisp_Object pred, Lisp_Object a, Lisp_Object b) -{ - return NILP (call2 (pred, b, a)); -} - -/* Using PRED to compare, merge from ALEN-length A and BLEN-length B - into DEST. Argument arrays must be nonempty and must not overlap, - except that B might be the last part of DEST. */ -static void -merge_vectors (Lisp_Object pred, - ptrdiff_t alen, Lisp_Object const a[restrict VLA_ELEMS (alen)], - ptrdiff_t blen, Lisp_Object const b[VLA_ELEMS (blen)], - Lisp_Object dest[VLA_ELEMS (alen + blen)]) -{ - eassume (0 < alen && 0 < blen); - Lisp_Object const *alim = a + alen; - Lisp_Object const *blim = b + blen; - - while (true) - { - if (inorder (pred, a[0], b[0])) - { - *dest++ = *a++; - if (a == alim) - { - if (dest != b) - memcpy (dest, b, (blim - b) * sizeof *dest); - return; - } - } - else - { - *dest++ = *b++; - if (b == blim) - { - memcpy (dest, a, (alim - a) * sizeof *dest); - return; - } - } - } -} - -/* Using PRED to compare, sort LEN-length VEC in place, using TMP for - temporary storage. LEN must be at least 2. */ -static void -sort_vector_inplace (Lisp_Object pred, ptrdiff_t len, - Lisp_Object vec[restrict VLA_ELEMS (len)], - Lisp_Object tmp[restrict VLA_ELEMS (len >> 1)]) -{ - eassume (2 <= len); - ptrdiff_t halflen = len >> 1; - sort_vector_copy (pred, halflen, vec, tmp); - if (1 < len - halflen) - sort_vector_inplace (pred, len - halflen, vec + halflen, vec); - merge_vectors (pred, halflen, tmp, len - halflen, vec + halflen, vec); -} - -/* Using PRED to compare, sort from LEN-length SRC into DST. - Len must be positive. */ -static void -sort_vector_copy (Lisp_Object pred, ptrdiff_t len, - Lisp_Object src[restrict VLA_ELEMS (len)], - Lisp_Object dest[restrict VLA_ELEMS (len)]) -{ - eassume (0 < len); - ptrdiff_t halflen = len >> 1; - if (halflen < 1) - dest[0] = src[0]; else { - if (1 < halflen) - sort_vector_inplace (pred, halflen, src, dest); - if (1 < len - halflen) - sort_vector_inplace (pred, len - halflen, src + halflen, dest); - merge_vectors (pred, halflen, src, len - halflen, src + halflen, dest); + Lisp_Object *result; + USE_SAFE_ALLOCA; + SAFE_ALLOCA_LISP (result, length); + Lisp_Object tail = list; + for (ptrdiff_t i = 0; i < length; i++) + { + result[i] = Fcar (tail); + tail = XCDR (tail); + } + tim_sort (predicate, result, length); + + ptrdiff_t i = 0; + tail = list; + while (CONSP (tail)) + { + XSETCAR (tail, result[i]); + tail = XCDR (tail); + i++; + } + SAFE_FREE (); + return list; } } -/* Sort VECTOR in place using PREDICATE, preserving original order of - elements considered as equal. */ +/* Stably sort VECTOR ordered by PREDICATE using the TIMSORT + algorithm. */ static void sort_vector (Lisp_Object vector, Lisp_Object predicate) { - ptrdiff_t len = ASIZE (vector); - if (len < 2) + ptrdiff_t length = ASIZE (vector); + if (length < 2) return; - ptrdiff_t halflen = len >> 1; - Lisp_Object *tmp; - USE_SAFE_ALLOCA; - SAFE_ALLOCA_LISP (tmp, halflen); - for (ptrdiff_t i = 0; i < halflen; i++) - tmp[i] = make_fixnum (0); - sort_vector_inplace (predicate, len, XVECTOR (vector)->contents, tmp); - SAFE_FREE (); + + tim_sort (predicate, XVECTOR (vector)->contents, length); } DEFUN ("sort", Fsort, Ssort, 2, 2, 0, @@ -2267,7 +2198,7 @@ merge (Lisp_Object org_l1, Lisp_Object org_l2, Lisp_Object pred) } Lisp_Object tem; - if (inorder (pred, Fcar (l1), Fcar (l2))) + if (!NILP (call2 (pred, Fcar (l1), Fcar (l2)))) { tem = l1; l1 = Fcdr (l1); diff --git a/src/lisp.h b/src/lisp.h index c5a772b4231..179c09702ca 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -3939,6 +3939,9 @@ extern Lisp_Object string_to_multibyte (Lisp_Object); extern Lisp_Object string_make_unibyte (Lisp_Object); extern void syms_of_fns (void); +/* Defined in sort.c */ +extern void tim_sort (Lisp_Object, Lisp_Object *, const ptrdiff_t); + /* Defined in floatfns.c. */ verify (FLT_RADIX == 2 || FLT_RADIX == 16); enum { LOG2_FLT_RADIX = FLT_RADIX == 2 ? 1 : 4 }; diff --git a/src/sort.c b/src/sort.c new file mode 100644 index 00000000000..c7ccfc23055 --- /dev/null +++ b/src/sort.c @@ -0,0 +1,974 @@ +/* Timsort for sequences. + +Copyright (C) 2022 Free Software Foundation, Inc. + +This file is part of GNU Emacs. + +GNU Emacs is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. + +GNU Emacs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Emacs. If not, see . */ + +/* This is a version of the cpython code implementing the TIMSORT + sorting algorithm described in + https://github.com/python/cpython/blob/main/Objects/listsort.txt. + This algorithm identifies and pushes naturally ordered sublists of + the original list, or "runs", onto a stack, and merges them + periodically according to a merge strategy called "powersort". + State is maintained during the sort in a merge_state structure, + which is passed around as an argument to all the subroutines. A + "stretch" structure includes a pointer to the run BASE of length + LEN along with its POWER (a computed integer used by the powersort + merge strategy that depends on this run and the succeeding run.) */ + + +#include +#include "lisp.h" + + +/* MAX_MERGE_PENDING is the maximum number of entries in merge_state's + pending-stretch stack. For a list with n elements, this needs at most + floor(log2(n)) + 1 entries even if we didn't force runs to a + minimal length. So the number of bits in a ptrdiff_t is plenty large + enough for all cases. */ + +#define MAX_MERGE_PENDING (sizeof (ptrdiff_t) * 8) + +/* Once we get into galloping mode, we stay there as long as both runs + win at least GALLOP_WIN_MIN consecutive times. */ + +#define GALLOP_WIN_MIN 7 + +/* A small temp array of size MERGESTATE_TEMP_SIZE is used to avoid + malloc when merging small lists. */ + +#define MERGESTATE_TEMP_SIZE 256 + +struct stretch +{ + Lisp_Object *base; + ptrdiff_t len; + int power; +}; + +struct reloc +{ + Lisp_Object **src; + Lisp_Object **dst; + ptrdiff_t *size; + int order; /* -1 while in merge_lo; +1 while in merg_hi; 0 otherwise. */ +}; + + +typedef struct +{ + Lisp_Object *listbase; + ptrdiff_t listlen; + + /* PENDING is a stack of N pending stretches yet to be merged. + Stretch #i starts at address base[i] and extends for len[i] + elements. */ + + int n; + struct stretch pending[MAX_MERGE_PENDING]; + + /* The variable MIN_GALLOP, initialized to GALLOP_WIN_MIN, controls + when we get *into* galloping mode. merge_lo and merge_hi tend to + nudge it higher for random data, and lower for highly structured + data. */ + + ptrdiff_t min_gallop; + + /* 'A' is temporary storage, able to hold ALLOCED elements, to help + with merges. 'A' initially points to TEMPARRAY, and subsequently + to newly allocated memory if needed. */ + + Lisp_Object *a; + ptrdiff_t alloced; + specpdl_ref count; + Lisp_Object temparray[MERGESTATE_TEMP_SIZE]; + + /* If an exception is thrown while merging we might have to relocate + some list elements from temporary storage back into the list. + RELOC keeps track of the information needed to do this. */ + + struct reloc reloc; + + /* PREDICATE is the lisp comparison predicate for the sort. */ + + Lisp_Object predicate; +} merge_state; + + +/* Return true iff (PREDICATE A B) is non-nil. */ + +static inline bool +inorder (const Lisp_Object predicate, const Lisp_Object a, const Lisp_Object b) +{ + return !NILP (call2 (predicate, a, b)); +} + + +/* Sort the list starting at LO and ending at HI using a stable binary + insertion sort algorithm. On entry the sublist [LO, START) (with + START between LO and HIGH) is known to be sorted (pass START == LO + if you are unsure). Even in case of error, the output will be some + permutation of the input (nothing is lost or duplicated). */ + +static void +binarysort (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, + Lisp_Object *start) +{ + Lisp_Object pred = ms->predicate; + + eassume (lo <= start && start <= hi); + if (lo == start) + ++start; + for (; start < hi; ++start) + { + Lisp_Object *l = lo; + Lisp_Object *r = start; + Lisp_Object pivot = *r; + + eassume (l < r); + do { + Lisp_Object *p = l + ((r - l) >> 1); + if (inorder (pred, pivot, *p)) + r = p; + else + l = p + 1; + } while (l < r); + eassume (l == r); + for (Lisp_Object *p = start; p > l; --p) + p[0] = p[-1]; + *l = pivot; + } +} + + +/* Find and return the length of the "run" (the longest + non-decreasing sequence or the longest strictly decreasing + sequence, with the Boolean *DESCENDING set to 0 in the former + case, or to 1 in the latter) beginning at LO, in the slice [LO, + HI) with LO < HI. The strictness of the definition of + "descending" ensures there are no equal elements to get out of + order so the caller can safely reverse a descending sequence + without violating stability. */ + +static ptrdiff_t +count_run (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, + bool *descending) +{ + Lisp_Object pred = ms->predicate; + + eassume (lo < hi); + *descending = 0; + ++lo; + ptrdiff_t n = 1; + if (lo == hi) + return n; + + n = 2; + if (inorder (pred, lo[0], lo[-1])) + { + *descending = 1; + for (lo = lo + 1; lo < hi; ++lo, ++n) + { + if (!inorder (pred, lo[0], lo[-1])) + break; + } + } + else + { + for (lo = lo + 1; lo < hi; ++lo, ++n) + { + if (inorder (pred, lo[0], lo[-1])) + break; + } + } + + return n; +} + + +/* Locate and return the proper insertion position of KEY in a sorted + vector: if the vector contains an element equal to KEY, return the + position immediately to the left of the leftmost equal element. + [GALLOP_RIGHT does the same except it returns the position to the + right of the rightmost equal element (if any).] + + 'A' is a sorted vector of N elements. N must be > 0. + + Elements preceding HINT, a non-negative index less than N, are + skipped. The closer HINT is to the final result, the faster this + runs. + + The return value is the int k in [0, N] such that + + A[k-1] < KEY <= a[k] + + pretending that *(A-1) precedes all values and *(A+N) succeeds all + values. In other words, the first k elements of A should precede + KEY, and the last N-k should follow KEY. */ + +static ptrdiff_t +gallop_left (merge_state *ms, const Lisp_Object key, Lisp_Object *a, + const ptrdiff_t n, const ptrdiff_t hint) +{ + Lisp_Object pred = ms->predicate; + + eassume (a && n > 0 && hint >= 0 && hint < n); + + a += hint; + ptrdiff_t lastofs = 0; + ptrdiff_t ofs = 1; + if (inorder (pred, *a, key)) + { + /* When a[hint] < key, gallop right until + a[hint + lastofs] < key <= a[hint + ofs]. */ + const ptrdiff_t maxofs = n - hint; /* This is one after the end of a. */ + while (ofs < maxofs) + { + if (inorder (pred, a[ofs], key)) + { + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + else + break; /* Here key <= a[hint+ofs]. */ + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to offsets relative to &a[0]. */ + lastofs += hint; + ofs += hint; + } + else + { + /* When key <= a[hint], gallop left, until + a[hint - ofs] < key <= a[hint - lastofs]. */ + const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */ + while (ofs < maxofs) + { + if (inorder (pred, a[-ofs], key)) + break; + /* Here key <= a[hint - ofs]. */ + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to use positive offsets relative to &a[0]. */ + ptrdiff_t k = lastofs; + lastofs = hint - ofs; + ofs = hint - k; + } + a -= hint; + + eassume (-1 <= lastofs && lastofs < ofs && ofs <= n); + /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the + right of lastofs but no farther right than ofs. Do a binary + search, with invariant a[lastofs-1] < key <= a[ofs]. */ + ++lastofs; + while (lastofs < ofs) + { + ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1); + + if (inorder (pred, a[m], key)) + lastofs = m + 1; /* Here a[m] < key. */ + else + ofs = m; /* Here key <= a[m]. */ + } + eassume (lastofs == ofs); /* Then a[ofs-1] < key <= a[ofs]. */ + return ofs; +} + + +/* Locate and return the proper position of KEY in a sorted vector + exactly like GALLOP_LEFT, except that if KEY already exists in + A[0:N] find the position immediately to the right of the rightmost + equal value. + + The return value is the int k in [0, N] such that + + A[k-1] <= KEY < A[k]. */ + +static ptrdiff_t +gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a, + const ptrdiff_t n, const ptrdiff_t hint) +{ + Lisp_Object pred = ms->predicate; + + eassume (a && n > 0 && hint >= 0 && hint < n); + + a += hint; + ptrdiff_t lastofs = 0; + ptrdiff_t ofs = 1; + if (inorder (pred, key, *a)) + { + /* When key < a[hint], gallop left until + a[hint - ofs] <= key < a[hint - lastofs]. */ + const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */ + while (ofs < maxofs) + { + if (inorder (pred, key, a[-ofs])) + { + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + else /* Here a[hint - ofs] <= key. */ + break; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to use positive offsets relative to &a[0]. */ + ptrdiff_t k = lastofs; + lastofs = hint - ofs; + ofs = hint - k; + } + else + { + /* When a[hint] <= key, gallop right, until + a[hint + lastofs] <= key < a[hint + ofs]. */ + const ptrdiff_t maxofs = n - hint; /* Here &a[n-1] is highest. */ + while (ofs < maxofs) + { + if (inorder (pred, key, a[ofs])) + break; + /* Here a[hint + ofs] <= key. */ + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to use offsets relative to &a[0]. */ + lastofs += hint; + ofs += hint; + } + a -= hint; + + eassume (-1 <= lastofs && lastofs < ofs && ofs <= n); + /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the + right of lastofs but no farther right than ofs. Do a binary + search, with invariant a[lastofs-1] <= key < a[ofs]. */ + ++lastofs; + while (lastofs < ofs) + { + ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1); + + if (inorder (pred, key, a[m])) + ofs = m; /* Here key < a[m]. */ + else + lastofs = m + 1; /* Here a[m] <= key. */ + } + eassume (lastofs == ofs); /* Now a[ofs-1] <= key < a[ofs]. */ + return ofs; +} + + +static void +merge_init (merge_state *ms, const ptrdiff_t list_size, Lisp_Object *lo, + const Lisp_Object predicate) +{ + eassume (ms != NULL); + + ms->a = ms->temparray; + ms->alloced = MERGESTATE_TEMP_SIZE; + + ms->n = 0; + ms->min_gallop = GALLOP_WIN_MIN; + ms->listlen = list_size; + ms->listbase = lo; + ms->predicate = predicate; + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; +} + + +/* The dynamically allocated memory may hold lisp objects during + merging. MERGE_MARKMEM marks them so they aren't reaped during + GC. */ + +static void +merge_markmem (void *arg) +{ + merge_state *ms = arg; + eassume (ms != NULL); + + if (ms->reloc.size != NULL && *ms->reloc.size > 0) + { + eassume (ms->reloc.src != NULL); + mark_objects (*ms->reloc.src, *ms->reloc.size); + } +} + + +/* Free all temp storage. If an exception occurs while merging, + relocate any lisp elements in temp storage back to the original + array. */ + +static void +cleanup_mem (void *arg) +{ + merge_state *ms = arg; + eassume (ms != NULL); + + /* If we have an exception while merging, some of the list elements + might only live in temp storage; we copy everything remaining in + the temp storage back into the original list. This ensures that + the original list has all of the original elements, although + their order is unpredictable. */ + + if (ms->reloc.order != 0 && *ms->reloc.size > 0) + { + eassume (*ms->reloc.src != NULL && *ms->reloc.dst != NULL); + ptrdiff_t n = *ms->reloc.size; + ptrdiff_t shift = ms->reloc.order == -1 ? 0 : n - 1; + memcpy (*ms->reloc.dst - shift, *ms->reloc.src, n * word_size); + } + + /* Free any remaining temp storage. */ + xfree (ms->a); +} + + +/* Allocate enough temp memory for NEED array slots. Any previously + allocated memory is first freed, and a cleanup routine is + registered to free memory at the very end of the sort, or on + exception. */ + +static void +merge_getmem (merge_state *ms, const ptrdiff_t need) +{ + eassume (ms != NULL); + + if (ms->a == ms->temparray) + { + /* We only get here if alloc is needed and this is the first + time, so we set up the unwind protection. */ + specpdl_ref count = SPECPDL_INDEX (); + record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem); + ms->count = count; + } + else + { + /* We have previously alloced storage. Since we don't care + what's in the block we don't use realloc which would waste + cycles copying the old data. We just free and alloc + again. */ + xfree (ms->a); + } + ms->a = xmalloc (need * word_size); + ms->alloced = need; +} + + +static inline void +needmem (merge_state *ms, ptrdiff_t na) +{ + if (na > ms->alloced) + merge_getmem (ms, na); +} + + +/* Stably merge (in-place) the NA elements starting at SSA with the NB + elements starting at SSB = SSA + NA. NA and NB must be positive. + Require that SSA[NA-1] belongs at the end of the merge, and NA <= + NB. */ + +static void +merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb, + ptrdiff_t nb) +{ + Lisp_Object pred = ms->predicate; + + eassume (ms && ssa && ssb && na > 0 && nb > 0); + eassume (ssa + na == ssb); + needmem (ms, na); + memcpy (ms->a, ssa, na * word_size); + Lisp_Object *dest = ssa; + ssa = ms->a; + + ms->reloc = (struct reloc){&ssa, &dest, &na, -1}; + + *dest++ = *ssb++; + --nb; + if (nb == 0) + goto Succeed; + if (na == 1) + goto CopyB; + + ptrdiff_t min_gallop = ms->min_gallop; + for (;;) + { + ptrdiff_t acount = 0; /* The # of consecutive times A won. */ + + ptrdiff_t bcount = 0; /* The # of consecutive times B won. */ + + for (;;) + { + eassume (na > 1 && nb > 0); + if (inorder (pred, *ssb, *ssa)) + { + *dest++ = *ssb++ ; + ++bcount; + acount = 0; + --nb; + if (nb == 0) + goto Succeed; + if (bcount >= min_gallop) + break; + } + else + { + *dest++ = *ssa++; + ++acount; + bcount = 0; + --na; + if (na == 1) + goto CopyB; + if (acount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may be a + huge speedup. We try that, and continue galloping until (if + ever) neither run appears to be winning consistently + anymore. */ + ++min_gallop; + do { + eassume (na > 1 && nb > 0); + min_gallop -= min_gallop > 1; + ms->min_gallop = min_gallop; + ptrdiff_t k = gallop_right (ms, ssb[0], ssa, na, 0); + acount = k; + if (k) + { + memcpy (dest, ssa, k * word_size); + dest += k; + ssa += k; + na -= k; + if (na == 1) + goto CopyB; + /* While na==0 is impossible for a consistent comparison + function, we shouldn't assume that it is. */ + if (na == 0) + goto Succeed; + } + *dest++ = *ssb++ ; + --nb; + if (nb == 0) + goto Succeed; + + k = gallop_left (ms, ssa[0], ssb, nb, 0); + bcount = k; + if (k) + { + memmove (dest, ssb, k * word_size); + dest += k; + ssb += k; + nb -= k; + if (nb == 0) + goto Succeed; + } + *dest++ = *ssa++; + --na; + if (na == 1) + goto CopyB; + } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN); + ++min_gallop; /* Apply a penalty for leaving galloping mode. */ + ms->min_gallop = min_gallop; + } + Succeed: + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + + if (na) + memcpy (dest, ssa, na * word_size); + return; + CopyB: + eassume (na == 1 && nb > 0); + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + + /* The last element of ssa belongs at the end of the merge. */ + memmove (dest, ssb, nb * word_size); + dest[nb] = ssa[0]; +} + + +/* Stably merge (in-place) the NA elements starting at SSA with the NB + elements starting at SSB = SSA + NA. NA and NB must be positive. + Require that SSA[NA-1] belongs at the end of the merge, and NA >= + NB. */ + +static void +merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, + Lisp_Object *ssb, ptrdiff_t nb) +{ + Lisp_Object pred = ms->predicate; + + eassume (ms && ssa && ssb && na > 0 && nb > 0); + eassume (ssa + na == ssb); + needmem (ms, nb); + Lisp_Object *dest = ssb; + dest += nb - 1; + memcpy(ms->a, ssb, nb * word_size); + Lisp_Object *basea = ssa; + Lisp_Object *baseb = ms->a; + ssb = ms->a + nb - 1; + ssa += na - 1; + + ms->reloc = (struct reloc){&baseb, &dest, &nb, 1}; + + *dest-- = *ssa--; + --na; + if (na == 0) + goto Succeed; + if (nb == 1) + goto CopyA; + + ptrdiff_t min_gallop = ms->min_gallop; + for (;;) { + ptrdiff_t acount = 0; /* The # of consecutive times A won. */ + ptrdiff_t bcount = 0; /* The # of consecutive times B won. */ + + for (;;) { + eassume (na > 0 && nb > 1); + if (inorder (pred, *ssb, *ssa)) + { + *dest-- = *ssa--; + ++acount; + bcount = 0; + --na; + if (na == 0) + goto Succeed; + if (acount >= min_gallop) + break; + } + else + { + *dest-- = *ssb--; + ++bcount; + acount = 0; + --nb; + if (nb == 1) + goto CopyA; + if (bcount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may be a huge + speedup. Try that, and continue galloping until (if ever) + neither run appears to be winning consistently anymore. */ + ++min_gallop; + do { + eassume (na > 0 && nb > 1); + min_gallop -= min_gallop > 1; + ms->min_gallop = min_gallop; + ptrdiff_t k = gallop_right (ms, ssb[0], basea, na, na - 1); + k = na - k; + acount = k; + if (k) + { + dest += -k; + ssa += -k; + memmove(dest + 1, ssa + 1, k * word_size); + na -= k; + if (na == 0) + goto Succeed; + } + *dest-- = *ssb--; + --nb; + if (nb == 1) + goto CopyA; + + k = gallop_left (ms, ssa[0], baseb, nb, nb - 1); + k = nb - k; + bcount = k; + if (k) + { + dest += -k; + ssb += -k; + memcpy(dest + 1, ssb + 1, k * word_size); + nb -= k; + if (nb == 1) + goto CopyA; + /* While nb==0 is impossible for a consistent comparison + function we shouldn't assume that it is. */ + if (nb == 0) + goto Succeed; + } + *dest-- = *ssa--; + --na; + if (na == 0) + goto Succeed; + } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN); + ++min_gallop; /* Apply a penalty for leaving galloping mode. */ + ms->min_gallop = min_gallop; + } + Succeed: + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + if (nb) + memcpy (dest - nb + 1, baseb, nb * word_size); + return; + CopyA: + eassume (nb == 1 && na > 0); + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + /* The first element of ssb belongs at the front of the merge. */ + memmove (dest + 1 - na, ssa + 1 - na, na * word_size); + dest += -na; + ssa += -na; + dest[0] = ssb[0]; +} + + +/* Merge the two runs at stack indices I and I+1. */ + +static void +merge_at (merge_state *ms, const ptrdiff_t i) +{ + eassume (ms != NULL); + eassume (ms->n >= 2); + eassume (i >= 0); + eassume (i == ms->n - 2 || i == ms->n - 3); + + Lisp_Object *ssa = ms->pending[i].base; + ptrdiff_t na = ms->pending[i].len; + Lisp_Object *ssb = ms->pending[i + 1].base; + ptrdiff_t nb = ms->pending[i + 1].len; + eassume (na > 0 && nb > 0); + eassume (ssa + na == ssb); + + /* Record the length of the combined runs. The current run i+1 goes + away after the merge. If i is the 3rd-last run now, slide the + last run (which isn't involved in this merge) over to i+1. */ + ms->pending[i].len = na + nb; + if (i == ms->n - 3) + ms->pending[i + 1] = ms->pending[i + 2]; + --ms->n; + + /* Where does b start in a? Elements in a before that can be + ignored (they are already in place). */ + ptrdiff_t k = gallop_right (ms, *ssb, ssa, na, 0); + eassume (k >= 0); + ssa += k; + na -= k; + if (na == 0) + return; + + /* Where does a end in b? Elements in b after that can be ignored + (they are already in place). */ + nb = gallop_left (ms, ssa[na - 1], ssb, nb, nb - 1); + if (nb == 0) + return; + eassume (nb > 0); + /* Merge what remains of the runs using a temp array with size + min(na, nb) elements. */ + if (na <= nb) + merge_lo (ms, ssa, na, ssb, nb); + else + merge_hi (ms, ssa, na, ssb, nb); +} + + +/* Compute the "power" of the first of two adjacent runs begining at + index S1, with the first having length N1 and the second (starting + at index S1+N1) having length N2. The run has total length N. */ + +static int +powerloop (const ptrdiff_t s1, const ptrdiff_t n1, const ptrdiff_t n2, + const ptrdiff_t n) +{ + eassume (s1 >= 0); + eassume (n1 > 0 && n2 > 0); + eassume (s1 + n1 + n2 <= n); + /* The midpoints a and b are + a = s1 + n1/2 + b = s1 + n1 + n2/2 = a + (n1 + n2)/2 + + These may not be integers because of the "/2", so we work with + 2*a and 2*b instead. It makes no difference to the outcome, + since the bits in the expansion of (2*i)/n are merely shifted one + position from those of i/n. */ + ptrdiff_t a = 2 * s1 + n1; + ptrdiff_t b = a + n1 + n2; + int result = 0; + /* Emulate a/n and b/n one bit a time, until their bits differ. */ + for (;;) + { + ++result; + if (a >= n) + { /* Both quotient bits are now 1. */ + eassume (b >= a); + a -= n; + b -= n; + } + else if (b >= n) + { /* a/n bit is 0 and b/n bit is 1. */ + break; + } /* Otherwise both quotient bits are 0. */ + eassume (a < b && b < n); + a <<= 1; + b <<= 1; + } + return result; +} + + +/* Update the state upon identifying a run of length N2. If there's + already a stretch on the stack, apply the "powersort" merge + strategy: compute the topmost stretch's "power" (depth in a + conceptual binary merge tree) and merge adjacent runs on the stack + with greater power. */ + +static void +found_new_run (merge_state *ms, const ptrdiff_t n2) +{ + eassume (ms != NULL); + if (ms->n) + { + eassume (ms->n > 0); + struct stretch *p = ms->pending; + ptrdiff_t s1 = p[ms->n - 1].base - ms->listbase; + ptrdiff_t n1 = p[ms->n - 1].len; + int power = powerloop (s1, n1, n2, ms->listlen); + while (ms->n > 1 && p[ms->n - 2].power > power) + { + merge_at (ms, ms->n - 2); + } + eassume (ms->n < 2 || p[ms->n - 2].power < power); + p[ms->n - 1].power = power; + } +} + + +/* Unconditionally merge all stretches on the stack until only one + remains. */ + +static void +merge_force_collapse (merge_state *ms) +{ + struct stretch *p = ms->pending; + + eassume (ms != NULL); + while (ms->n > 1) + { + ptrdiff_t n = ms->n - 2; + if (n > 0 && p[n - 1].len < p[n + 1].len) + --n; + merge_at (ms, n); + } +} + + +/* Compute a good value for the minimum run length; natural runs + shorter than this are boosted artificially via binary insertion. + + If N < 64, return N (it's too small to bother with fancy stuff). + Otherwise if N is an exact power of 2, return 32. Finally, return + an int k, 32 <= k <= 64, such that N/k is close to, but strictly + less than, an exact power of 2. */ + +static ptrdiff_t +merge_compute_minrun (ptrdiff_t n) +{ + ptrdiff_t r = 0; /* r will become 1 if any non-zero bits are + shifted off. */ + + eassume (n >= 0); + while (n >= 64) + { + r |= n & 1; + n >>= 1; + } + return n + r; +} + + +static void +reverse_vector (Lisp_Object *s, const ptrdiff_t n) +{ + for (ptrdiff_t i = 0; i < n >> 1; i++) + { + Lisp_Object tem = s[i]; + s[i] = s[n - i - 1]; + s[n - i - 1] = tem; + } +} + +/* Sort the array SEQ with LENGTH elements in the order determined by + PREDICATE. */ + +void +tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length) +{ + if (SYMBOLP (predicate)) + { + /* Attempt to resolve the function as far as possible ahead of time, + to avoid having to do it for each call. */ + Lisp_Object fun = XSYMBOL (predicate)->u.s.function; + if (SYMBOLP (fun)) + /* Function was an alias; use slow-path resolution. */ + fun = indirect_function (fun); + /* Don't resolve to an autoload spec; that would be very slow. */ + if (!NILP (fun) && !(CONSP (fun) && EQ (XCAR (fun), Qautoload))) + predicate = fun; + } + + merge_state ms; + Lisp_Object *lo = seq; + + merge_init (&ms, length, lo, predicate); + + /* March over the array once, left to right, finding natural runs, + and extending short natural runs to minrun elements. */ + const ptrdiff_t minrun = merge_compute_minrun (length); + ptrdiff_t nremaining = length; + do { + bool descending; + + /* Identify the next run. */ + ptrdiff_t n = count_run (&ms, lo, lo + nremaining, &descending); + if (descending) + reverse_vector (lo, n); + /* If the run is short, extend it to min(minrun, nremaining). */ + if (n < minrun) + { + const ptrdiff_t force = nremaining <= minrun ? + nremaining : minrun; + binarysort (&ms, lo, lo + force, lo + n); + n = force; + } + eassume (ms.n == 0 || ms.pending[ms.n - 1].base + + ms.pending[ms.n - 1].len == lo); + found_new_run (&ms, n); + /* Push the new run on to the stack. */ + eassume (ms.n < MAX_MERGE_PENDING); + ms.pending[ms.n].base = lo; + ms.pending[ms.n].len = n; + ++ms.n; + /* Advance to find the next run. */ + lo += n; + nremaining -= n; + } while (nremaining); + + merge_force_collapse (&ms); + eassume (ms.n == 1); + eassume (ms.pending[0].len == length); + lo = ms.pending[0].base; + + if (ms.a != ms.temparray) + unbind_to (ms.count, Qnil); +} diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 723ef4c710f..5b252e184f0 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -204,6 +204,76 @@ [-1 2 3 4 5 5 7 8 9])) (should (equal (sort (vector 9 5 2 -1 5 3 8 7 4) (lambda (x y) (> x y))) [9 8 7 5 5 4 3 2 -1])) + ;; Sort a reversed list and vector. + (should (equal + (sort (reverse (number-sequence 1 1000)) (lambda (x y) (< x y))) + (number-sequence 1 1000))) + (should (equal + (sort (reverse (vconcat (number-sequence 1 1000))) + (lambda (x y) (< x y))) + (vconcat (number-sequence 1 1000)))) + ;; Sort a constant list and vector. + (should (equal + (sort (make-vector 100 1) (lambda (x y) (> x y))) + (make-vector 100 1))) + (should (equal + (sort (append (make-vector 100 1) nil) (lambda (x y) (> x y))) + (append (make-vector 100 1) nil))) + ;; Sort a long list and vector with every pair reversed. + (let ((vec (make-vector 100000 nil)) + (logxor-vec (make-vector 100000 nil))) + (dotimes (i 100000) + (aset logxor-vec i (logxor i 1)) + (aset vec i i)) + (should (equal + (sort logxor-vec (lambda (x y) (< x y))) + vec)) + (should (equal + (sort (append logxor-vec nil) (lambda (x y) (< x y))) + (append vec nil)))) + ;; Sort a list and vector with seven swaps. + (let ((vec (make-vector 100 nil)) + (swap-vec (make-vector 100 nil))) + (dotimes (i 100) + (aset vec i (- i 50)) + (aset swap-vec i (- i 50))) + (mapc (lambda (p) + (let ((tmp (elt swap-vec (car p)))) + (aset swap-vec (car p) (elt swap-vec (cdr p))) + (aset swap-vec (cdr p) tmp))) + '((48 . 94) (75 . 77) (33 . 41) (92 . 52) + (10 . 96) (1 . 14) (43 . 81))) + (should (equal + (sort (copy-sequence swap-vec) (lambda (x y) (< x y))) + vec)) + (should (equal + (sort (append swap-vec nil) (lambda (x y) (< x y))) + (append vec nil)))) + ;; Check for possible corruption after GC. + (let* ((size 3000) + (complex-vec (make-vector size nil)) + (vec (make-vector size nil)) + (counter 0) + (my-counter (lambda () + (if (< counter 500) + (cl-incf counter) + (setq counter 0) + (garbage-collect)))) + (rand 1) + (generate-random + (lambda () (setq rand + (logand (+ (* rand 1103515245) 12345) 2147483647))))) + ;; Make a complex vector and its sorted version. + (dotimes (i size) + (let ((r (funcall generate-random))) + (aset complex-vec i (cons r "a")) + (aset vec i (cons r "a")))) + ;; Sort it. + (should (equal + (sort complex-vec + (lambda (x y) (funcall my-counter) (< (car x) (car y)))) + (sort vec 'car-less-than-car)))) + ;; Check for sorting stability. (should (equal (sort (vector