From 876dee2cdb89cb56219ef336aeb38a22d5cba92e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Sun, 10 Mar 2024 13:18:22 +0100 Subject: [PATCH] Add `value<` (bug#69709) It's a general-purpose polymorphic ordering function, like `<` but for any two values of the same type. * src/data.c (syms_of_data): Add the `type-mismatch` error. (bits_word_to_host_endian): Move... * src/lisp.h (bits_word_to_host_endian): ...here, and declare inline. * src/fns.c (Fstring_lessp): Extract the bulk of this function to... (string_cmp): ...this 3-way comparison function, for use elsewhere. (bool_vector_cmp, value_cmp, Fvaluelt): New. * lisp/emacs-lisp/byte-opt.el (side-effect-free-fns, pure-fns): Add `value<`, which is pure and side-effect-free. * test/src/fns-tests.el (fns-value<-ordered, fns-value<-unordered) (fns-value<-type-mismatch, fns-value<-symbol-with-pos) (fns-value<-circle, ert-deftest fns-value<-bool-vector): New tests. * doc/lispref/sequences.texi (Sequence Functions): * doc/lispref/numbers.texi (Comparison of Numbers): * doc/lispref/strings.texi (Text Comparison): Document the new value< function. * etc/NEWS: Announce. (cherry picked from commit 1232ab31c656b8564984a758957466f90ac10501) --- doc/lispref/numbers.texi | 1 + doc/lispref/sequences.texi | 35 +++++ doc/lispref/strings.texi | 1 + etc/NEWS | 10 ++ lisp/emacs-lisp/byte-opt.el | 4 +- src/data.c | 26 +--- src/fns.c | 280 +++++++++++++++++++++++++++++++++--- src/lisp.h | 24 ++++ test/src/fns-tests.el | 218 ++++++++++++++++++++++++++++ 9 files changed, 552 insertions(+), 47 deletions(-) diff --git a/doc/lispref/numbers.texi b/doc/lispref/numbers.texi index 99b456043b9..2c093ccd6bd 100644 --- a/doc/lispref/numbers.texi +++ b/doc/lispref/numbers.texi @@ -476,6 +476,7 @@ This function tests whether its arguments are numerically equal, and returns @code{t} if they are not, and @code{nil} if they are. @end defun +@anchor{definition of <} @defun < number-or-marker &rest number-or-markers This function tests whether each argument is strictly less than the following argument. It returns @code{t} if so, @code{nil} otherwise. diff --git a/doc/lispref/sequences.texi b/doc/lispref/sequences.texi index 74719d4779f..5bdf71fe02e 100644 --- a/doc/lispref/sequences.texi +++ b/doc/lispref/sequences.texi @@ -436,6 +436,41 @@ but their relative order is also preserved: @end example @end defun +@cindex comparing values +@cindex standard sorting order +@defun value< a b +This function returns non-@code{nil} if @var{a} comes before @var{b} in +the standard sorting order; this means that it returns @code{nil} when +@var{b} comes before @var{a}, or if they are equal or unordered. + +@var{a} and @var{b} must have the same type. Specifically: + +@itemize @bullet +@item +Numbers are compared using @code{<} (@pxref{definition of <}). +@item +Strings and symbols are compared using @code{string<} +(@pxref{definition of string<}). +@item +Conses, lists, vectors and records are compared lexicographically. +@item +Markers are compared first by buffer, then by position. +@item +Buffers and processes are compared by name. +@item +Other types are considered unordered and the return value will be @code{nil}. +@end itemize + +Examples: +@example +(value< -4 3.5) @result{} t +(value< "dog" "cat") @result{} nil +(value< 'yip 'yip) @result{} nil +(value< '(3 2) '(3 2 0)) @result{} t +(value< [3 2 1] [3 2 0]) @result{} nil +@end example +@end defun + Sometimes, computation of sort keys of list or vector elements is expensive, and therefore it is important to perform it the minimum number of times. By contrast, computing the sort keys of elements diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index a2285098aad..6a9dd589237 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -612,6 +612,7 @@ that collation implements. @end defun @cindex lexical comparison of strings +@anchor{definition of string<} @defun string< string1 string2 @c (findex string< causes problems for permuted index!!) This function compares two strings a character at a time. It diff --git a/etc/NEWS b/etc/NEWS index e386191bff5..41aa04158e4 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1900,6 +1900,16 @@ precedence over the variable when present. Mostly used internally to do a kind of topological sort of inheritance hierarchies. ++++ +** New polymorphic comparison function 'value<'. +This function returns non-nil if the first argument is less than the +second. It works for any two values of the same type with reasonable +ordering for numbers, strings, symbols, bool-vectors, markers, buffers +and processes. Conses, lists, vectors and records are ordered +lexicographically. +It is intended as a convenient ordering predicate for sorting, and is +likely to be faster than hand-written Lisp functions. + ** New function 'sort-on'. This function implements the Schwartzian transform, and is appropriate for sorting lists when the computation of the sort key of a list diff --git a/lisp/emacs-lisp/byte-opt.el b/lisp/emacs-lisp/byte-opt.el index 54997205edb..ea163723a3e 100644 --- a/lisp/emacs-lisp/byte-opt.el +++ b/lisp/emacs-lisp/byte-opt.el @@ -1772,7 +1772,7 @@ See Info node `(elisp) Integer Basics'." string-version-lessp substring substring-no-properties sxhash-eq sxhash-eql sxhash-equal sxhash-equal-including-properties - take vconcat + take value< vconcat ;; frame.c frame-ancestor-p frame-bottom-divider-width frame-char-height frame-char-width frame-child-frame-border-width frame-focus @@ -1973,7 +1973,7 @@ See Info node `(elisp) Integer Basics'." hash-table-p identity length length< length= length> member memq memql nth nthcdr proper-list-p rassoc rassq safe-length string-bytes string-distance string-equal string-lessp - string-search string-version-lessp take + string-search string-version-lessp take value< ;; search.c regexp-quote ;; syntax.c diff --git a/src/data.c b/src/data.c index 69b990bed76..a86f86c52f5 100644 --- a/src/data.c +++ b/src/data.c @@ -3835,30 +3835,6 @@ count_trailing_zero_bits (bits_word val) } } -static bits_word -bits_word_to_host_endian (bits_word val) -{ -#ifndef WORDS_BIGENDIAN - return val; -#else - if (BITS_WORD_MAX >> 31 == 1) - return bswap_32 (val); - if (BITS_WORD_MAX >> 31 >> 31 >> 1 == 1) - return bswap_64 (val); - { - int i; - bits_word r = 0; - for (i = 0; i < sizeof val; i++) - { - r = ((r << 1 << (CHAR_BIT - 1)) - | (val & ((1u << 1 << (CHAR_BIT - 1)) - 1))); - val = val >> 1 >> (CHAR_BIT - 1); - } - return r; - } -#endif -} - DEFUN ("bool-vector-exclusive-or", Fbool_vector_exclusive_or, Sbool_vector_exclusive_or, 2, 3, 0, doc: /* Return A ^ B, bitwise exclusive or. @@ -4072,6 +4048,7 @@ syms_of_data (void) DEFSYM (Qminibuffer_quit, "minibuffer-quit"); DEFSYM (Qwrong_length_argument, "wrong-length-argument"); DEFSYM (Qwrong_type_argument, "wrong-type-argument"); + DEFSYM (Qtype_mismatch, "type-mismatch") DEFSYM (Qargs_out_of_range, "args-out-of-range"); DEFSYM (Qvoid_function, "void-function"); DEFSYM (Qcyclic_function_indirection, "cyclic-function-indirection"); @@ -4163,6 +4140,7 @@ syms_of_data (void) PUT_ERROR (Quser_error, error_tail, ""); PUT_ERROR (Qwrong_length_argument, error_tail, "Wrong length argument"); PUT_ERROR (Qwrong_type_argument, error_tail, "Wrong type argument"); + PUT_ERROR (Qtype_mismatch, error_tail, "Types do not match"); PUT_ERROR (Qargs_out_of_range, error_tail, "Args out of range"); PUT_ERROR (Qvoid_function, error_tail, "Symbol's function definition is void"); diff --git a/src/fns.c b/src/fns.c index 0a64e515402..7faf25b9088 100644 --- a/src/fns.c +++ b/src/fns.c @@ -27,6 +27,7 @@ along with GNU Emacs. If not, see . */ #include #include #include +#include #include "lisp.h" #include "bignum.h" @@ -466,21 +467,10 @@ load_unaligned_size_t (const void *p) return x; } -DEFUN ("string-lessp", Fstring_lessp, Sstring_lessp, 2, 2, 0, - doc: /* Return non-nil if STRING1 is less than STRING2 in lexicographic order. -Case is significant. -Symbols are also allowed; their print names are used instead. */) - (Lisp_Object string1, Lisp_Object string2) +/* Return -1/0/1 to indicate the relation between string1 and string2. */ +static int +string_cmp (Lisp_Object string1, Lisp_Object string2) { - if (SYMBOLP (string1)) - string1 = SYMBOL_NAME (string1); - else - CHECK_STRING (string1); - if (SYMBOLP (string2)) - string2 = SYMBOL_NAME (string2); - else - CHECK_STRING (string2); - ptrdiff_t n = min (SCHARS (string1), SCHARS (string2)); if ((!STRING_MULTIBYTE (string1) || SCHARS (string1) == SBYTES (string1)) @@ -489,7 +479,9 @@ Symbols are also allowed; their print names are used instead. */) /* Each argument is either unibyte or all-ASCII multibyte: we can compare bytewise. */ int d = memcmp (SSDATA (string1), SSDATA (string2), n); - return d < 0 || (d == 0 && n < SCHARS (string2)) ? Qt : Qnil; + if (d) + return d; + return n < SCHARS (string2) ? -1 : n > SCHARS (string2); } else if (STRING_MULTIBYTE (string1) && STRING_MULTIBYTE (string2)) { @@ -523,7 +515,7 @@ Symbols are also allowed; their print names are used instead. */) if (b >= nb) /* One string is a prefix of the other. */ - return b < nb2 ? Qt : Qnil; + return b < nb2 ? -1 : b > nb2; /* Now back up to the start of the differing characters: it's the last byte not having the bit pattern 10xxxxxx. */ @@ -535,7 +527,7 @@ Symbols are also allowed; their print names are used instead. */) ptrdiff_t i1_byte = b, i2_byte = b; int c1 = fetch_string_char_advance_no_check (string1, &i1, &i1_byte); int c2 = fetch_string_char_advance_no_check (string2, &i2, &i2_byte); - return c1 < c2 ? Qt : Qnil; + return c1 < c2 ? -1 : c1 > c2; } else if (STRING_MULTIBYTE (string1)) { @@ -546,9 +538,9 @@ Symbols are also allowed; their print names are used instead. */) int c1 = fetch_string_char_advance_no_check (string1, &i1, &i1_byte); int c2 = SREF (string2, i2++); if (c1 != c2) - return c1 < c2 ? Qt : Qnil; + return c1 < c2 ? -1 : 1; } - return i1 < SCHARS (string2) ? Qt : Qnil; + return i1 < SCHARS (string2) ? -1 : i1 > SCHARS (string2); } else { @@ -559,12 +551,30 @@ Symbols are also allowed; their print names are used instead. */) int c1 = SREF (string1, i1++); int c2 = fetch_string_char_advance_no_check (string2, &i2, &i2_byte); if (c1 != c2) - return c1 < c2 ? Qt : Qnil; + return c1 < c2 ? -1 : 1; } - return i1 < SCHARS (string2) ? Qt : Qnil; + return i1 < SCHARS (string2) ? -1 : i1 > SCHARS (string2); } } +DEFUN ("string-lessp", Fstring_lessp, Sstring_lessp, 2, 2, 0, + doc: /* Return non-nil if STRING1 is less than STRING2 in lexicographic order. +Case is significant. +Symbols are also allowed; their print names are used instead. */) + (Lisp_Object string1, Lisp_Object string2) +{ + if (SYMBOLP (string1)) + string1 = SYMBOL_NAME (string1); + else + CHECK_STRING (string1); + if (SYMBOLP (string2)) + string2 = SYMBOL_NAME (string2); + else + CHECK_STRING (string2); + + return string_cmp (string1, string2) < 0 ? Qt : Qnil; +} + DEFUN ("string-version-lessp", Fstring_version_lessp, Sstring_version_lessp, 2, 2, 0, doc: /* Return non-nil if S1 is less than S2, as version strings. @@ -2908,6 +2918,233 @@ internal_equal (Lisp_Object o1, Lisp_Object o2, enum equal_kind equal_kind, return false; } + +/* Return -1/0/1 for the lexicographic relation between bool-vectors. */ +static int +bool_vector_cmp (Lisp_Object a, Lisp_Object b) +{ + ptrdiff_t na = bool_vector_size (a); + ptrdiff_t nb = bool_vector_size (b); + /* Skip equal words. */ + ptrdiff_t words_min = min (na, nb) / BITS_PER_BITS_WORD; + bits_word *ad = bool_vector_data (a); + bits_word *bd = bool_vector_data (b); + ptrdiff_t i = 0; + while (i < words_min && ad[i] == bd[i]) + i++; + na -= i * BITS_PER_BITS_WORD; + nb -= i * BITS_PER_BITS_WORD; + eassume (na >= 0 && nb >= 0); + if (nb == 0) + return na != 0; + if (na == 0) + return -1; + + bits_word aw = bits_word_to_host_endian (ad[i]); + bits_word bw = bits_word_to_host_endian (bd[i]); + bits_word xw = aw ^ bw; + if (xw == 0) + return na < nb ? -1 : na > nb; + + bits_word d = xw & -xw; /* Isolate first difference. */ + eassume (d != 0); + return (d & aw) ? 1 : -1; +} + +/* Return -1, 0 or 1 to indicate whether ab in the sense of value<. + In particular 0 does not mean equality in the sense of Fequal, only + that the arguments cannot be ordered yet they can be compared (same + type). */ +static int +value_cmp (Lisp_Object a, Lisp_Object b, int maxdepth) +{ + if (maxdepth < 0) + error ("Maximum depth exceeded in comparison"); + + tail_recurse: + /* Shortcut for a common case. */ + if (BASE_EQ (a, b)) + return 0; + + switch (XTYPE (a)) + { + case_Lisp_Int: + { + EMACS_INT ia = XFIXNUM (a); + if (FIXNUMP (b)) + return ia < XFIXNUM (b) ? -1 : 1; /* we know that a≠b */ + if (FLOATP (b)) + return ia < XFLOAT_DATA (b) ? -1 : ia > XFLOAT_DATA (b); + if (BIGNUMP (b)) + return -mpz_sgn (*xbignum_val (b)); + } + goto type_mismatch; + + case Lisp_Symbol: + if (BARE_SYMBOL_P (b)) + return string_cmp (XBARE_SYMBOL (a)->u.s.name, + XBARE_SYMBOL (b)->u.s.name); + if (CONSP (b) && NILP (a)) + return -1; + if (SYMBOLP (b)) + /* Slow-path branch when B is a symbol-with-pos. */ + return string_cmp (XBARE_SYMBOL (a)->u.s.name, XSYMBOL (b)->u.s.name); + goto type_mismatch; + + case Lisp_String: + if (STRINGP (b)) + return string_cmp (a, b); + goto type_mismatch; + + case Lisp_Cons: + /* FIXME: Optimise for difference in the first element? */ + FOR_EACH_TAIL (b) + { + int cmp = value_cmp (XCAR (a), XCAR (b), maxdepth - 1); + if (cmp != 0) + return cmp; + a = XCDR (a); + if (!CONSP (a)) + { + b = XCDR (b); + goto tail_recurse; + } + } + if (NILP (b)) + return 1; + else + goto type_mismatch; + goto tail_recurse; + + case Lisp_Vectorlike: + if (VECTORLIKEP (b)) + { + enum pvec_type ta = PSEUDOVECTOR_TYPE (XVECTOR (a)); + enum pvec_type tb = PSEUDOVECTOR_TYPE (XVECTOR (b)); + if (ta == tb) + switch (ta) + { + case PVEC_NORMAL_VECTOR: + case PVEC_RECORD: + { + ptrdiff_t len_a = ASIZE (a); + ptrdiff_t len_b = ASIZE (b); + if (ta == PVEC_RECORD) + { + len_a &= PSEUDOVECTOR_SIZE_MASK; + len_b &= PSEUDOVECTOR_SIZE_MASK; + } + ptrdiff_t len_min = min (len_a, len_b); + for (ptrdiff_t i = 0; i < len_min; i++) + { + int cmp = value_cmp (AREF (a, i), AREF (b, i), + maxdepth - 1); + if (cmp != 0) + return cmp; + } + return len_a < len_b ? -1 : len_a > len_b; + } + + case PVEC_BOOL_VECTOR: + return bool_vector_cmp (a, b); + + case PVEC_MARKER: + { + Lisp_Object buf_a = Fmarker_buffer (a); + Lisp_Object buf_b = Fmarker_buffer (b); + if (NILP (buf_a)) + return NILP (buf_b) ? 0 : -1; + if (NILP (buf_b)) + return 1; + int cmp = value_cmp (buf_a, buf_b, maxdepth - 1); + if (cmp != 0) + return cmp; + ptrdiff_t pa = XMARKER (a)->charpos; + ptrdiff_t pb = XMARKER (b)->charpos; + return pa < pb ? -1 : pa > pb; + } + + case PVEC_PROCESS: + a = Fprocess_name (a); + b = Fprocess_name (b); + goto tail_recurse; + + case PVEC_BUFFER: + { + /* Killed buffers lack names and sort before those alive. */ + Lisp_Object na = Fbuffer_name (a); + Lisp_Object nb = Fbuffer_name (b); + if (NILP (na)) + return NILP (nb) ? 0 : -1; + if (NILP (nb)) + return 1; + a = na; + b = nb; + goto tail_recurse; + } + + case PVEC_BIGNUM: + return mpz_cmp (*xbignum_val (a), *xbignum_val (b)); + + case PVEC_SYMBOL_WITH_POS: + /* Compare by name, enabled or not. */ + a = XSYMBOL_WITH_POS_SYM (a); + b = XSYMBOL_WITH_POS_SYM (b); + goto tail_recurse; + + default: + /* Treat other types as unordered. */ + return 0; + } + } + else if (BIGNUMP (a)) + return -value_cmp (b, a, maxdepth); + else if (SYMBOL_WITH_POS_P (a) && symbols_with_pos_enabled) + { + a = XSYMBOL_WITH_POS_SYM (a); + goto tail_recurse; + } + + goto type_mismatch; + + case Lisp_Float: + { + double fa = XFLOAT_DATA (a); + if (FLOATP (b)) + return fa < XFLOAT_DATA (b) ? -1 : fa > XFLOAT_DATA (b); + if (FIXNUMP (b)) + return fa < XFIXNUM (b) ? -1 : fa > XFIXNUM (b); + if (BIGNUMP (b)) + { + if (isnan (fa)) + return 0; + return -mpz_cmp_d (*xbignum_val (b), fa); + } + } + goto type_mismatch; + + default: + eassume (0); + } + type_mismatch: + xsignal2 (Qtype_mismatch, a, b); +} + +DEFUN ("value<", Fvaluelt, Svaluelt, 2, 2, 0, + doc: /* Return non-nil if A precedes B in standard value order. +A and B must have the same basic type. +Numbers are compared with `<'. +Strings and symbols are compared with `string-lessp'. +Lists, vectors, bool-vectors and records are compared lexicographically. +Markers are compared lexicographically by buffer and position. +Buffers and processes are compared by name. +Other types are considered unordered and the return value will be `nil'. */) + (Lisp_Object a, Lisp_Object b) +{ + int maxdepth = 20; /* FIXME: arbitrary value */ + return value_cmp (a, b, maxdepth) < 0 ? Qt : Qnil; +} + DEFUN ("fillarray", Ffillarray, Sfillarray, 2, 2, 0, @@ -6589,6 +6826,7 @@ For best results this should end in a space. */); defsubr (&Seql); defsubr (&Sequal); defsubr (&Sequal_including_properties); + defsubr (&Svaluelt); defsubr (&Sfillarray); defsubr (&Sclear_string); defsubr (&Snconc); diff --git a/src/lisp.h b/src/lisp.h index f86758c88fb..5583a7e2e8e 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -1882,6 +1882,30 @@ bool_vector_bytes (EMACS_INT size) return (size + BOOL_VECTOR_BITS_PER_CHAR - 1) / BOOL_VECTOR_BITS_PER_CHAR; } +INLINE bits_word +bits_word_to_host_endian (bits_word val) +{ +#ifndef WORDS_BIGENDIAN + return val; +#else + if (BITS_WORD_MAX >> 31 == 1) + return bswap_32 (val); + if (BITS_WORD_MAX >> 31 >> 31 >> 1 == 1) + return bswap_64 (val); + { + int i; + bits_word r = 0; + for (i = 0; i < sizeof val; i++) + { + r = ((r << 1 << (CHAR_BIT - 1)) + | (val & ((1u << 1 << (CHAR_BIT - 1)) - 1))); + val = val >> 1 >> (CHAR_BIT - 1); + } + return r; + } +#endif +} + INLINE bool BOOL_VECTOR_P (Lisp_Object a) { diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 7437c07f156..844000cdc76 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -1513,4 +1513,222 @@ (should-error (copy-alist "abc") :type 'wrong-type-argument)) +(ert-deftest fns-value<-ordered () + ;; values (X . Y) where X nil: `b' is now always a proper prefix of `a'. + (should-not (value< a b)) + (should (value< b a))) + (t + ;; nil -> t: `a' is now less than `b'. + (should (value< a b)) + (should-not (value< b a)))) + ;; Undo the flip. + (aset b i val))))))))))) + ;;; fns-tests.el ends here -- 2.39.5