From 71783e90a46ca913ea2c334cdc8cb24cd74055f8 Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Sun, 21 Feb 2016 15:32:45 +1100 Subject: [PATCH] Add the string-numeric-lessp function * doc/lispref/strings.texi (Text Comparison): Document `string-numerical-lessp'. * src/fns.c (Fstring_numeric_lessp): New function. (gather_number_from_string): Helper function for that function. * test/src/fns-tests.el (fns-tests-string-numeric-lessp): Add tests. --- doc/lispref/strings.texi | 13 ++++++ etc/NEWS | 6 +++ src/fns.c | 98 ++++++++++++++++++++++++++++++++++++++++ test/src/fns-tests.el | 17 +++++++ 4 files changed, 134 insertions(+) diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 9d6613c522c..a3efbf2f223 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -633,6 +633,19 @@ If your system does not support a locale environment, this function behaves like @code{string-lessp}. @end defun +@defun string-numerical-lessp strin1 string2 +This function behaves like @code{string-lessp} for stretches of +consecutive non-numerical characters, but compares sequences of +numerical characters as if they comprised a base-ten number, and then +compares the numbers. So @samp{foo2.png} is ``smaller'' than +@samp{foo12.png} according to this predicate, even if @samp{12} is +lexicographically ``smaller'' than @samp{2}. + +If one string has a number in a position in the string, and the other +doesn't, then lexicograpic comparison is done at that point, so +@samp{foo.png} is ``smaller'' than @samp{foo2.png}. +@end defun + @defun string-prefix-p string1 string2 &optional ignore-case This function returns non-@code{nil} if @var{string1} is a prefix of @var{string2}; i.e., if @var{string2} starts with @var{string1}. If diff --git a/etc/NEWS b/etc/NEWS index 33c1b136ebc..9f0fb8d6941 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1720,6 +1720,12 @@ environment. For the time being this is implemented for modern POSIX systems and for MS-Windows, for other systems they fall back to their counterparts `string-lessp' and `string-equal'. ++++ +** The new function `string-numeric-lessp' compares strings by +interpreting consecutive runs of numerical characters as numbers, and +compares their numerical values. According to this predicate, +"foo2.png" is smaller than "foo12.png". + --- *** The ls-lisp package uses `string-collate-lessp' to sort file names. The effect is that, on systems that use ls-lisp for Dired, the default diff --git a/src/fns.c b/src/fns.c index d1808440966..927fcdac02d 100644 --- a/src/fns.c +++ b/src/fns.c @@ -331,6 +331,103 @@ Symbols are also allowed; their print names are used instead. */) return i1 < SCHARS (string2) ? Qt : Qnil; } +/* Return the numerical value of a consecutive run of numerical + characters from STRING. The ISP and ISP_BYTE address pointer + pointers are increased and left at the next character after the + numerical characters. */ +static size_t +gather_number_from_string (int c, Lisp_Object string, + ptrdiff_t *isp, ptrdiff_t *isp_byte) +{ + size_t number = c - '0'; + unsigned char *chp; + int chlen; + + do + { + if (STRING_MULTIBYTE (string)) + { + chp = &SDATA (string)[*isp_byte]; + c = STRING_CHAR_AND_LENGTH (chp, chlen); + } + else + { + c = SREF (string, *isp_byte); + chlen = 1; + } + + /* If we're still in a number, add it to the sum and continue. */ + /* FIXME: Integer overflow? */ + if (c >= '0' && c <= '9') + { + number = number * 10; + number += c - '0'; + (*isp)++; + (*isp_byte) += chlen; + } + else + break; + } + /* Stop when we get to the end of the string anyway. */ + while (c != 0); + + return number; +} + +DEFUN ("string-numeric-lessp", Fstring_numeric_lessp, + Sstring_numeric_lessp, 2, 2, 0, + doc: /* Return non-nil if STRING1 is less than STRING2 in 'numeric' order. +Sequences of non-numerical characters are compared lexicographically, +while sequences of numerical characters are converted into numbers, +and then the numbers are compared. This means that \"foo2.png\" is +less than \"foo12.png\" according to this predicate. +Case is significant. +Symbols are also allowed; their print names are used instead. */) + (register Lisp_Object string1, Lisp_Object string2) +{ + ptrdiff_t end; + ptrdiff_t i1, i1_byte, i2, i2_byte; + size_t num1, num2; + + if (SYMBOLP (string1)) + string1 = SYMBOL_NAME (string1); + if (SYMBOLP (string2)) + string2 = SYMBOL_NAME (string2); + CHECK_STRING (string1); + CHECK_STRING (string2); + + i1 = i1_byte = i2 = i2_byte = 0; + + end = SCHARS (string1); + if (end > SCHARS (string2)) + end = SCHARS (string2); + + while (i1 < end) + { + /* When we find a mismatch, we must compare the + characters, not just the bytes. */ + int c1, c2; + + FETCH_STRING_CHAR_ADVANCE (c1, string1, i1, i1_byte); + FETCH_STRING_CHAR_ADVANCE (c2, string2, i2, i2_byte); + + if (c1 >= '0' && c1 <= '9' && + c2 >= '0' && c2 <= '9') + /* Both strings are numbers, so compare them. */ + { + num1 = gather_number_from_string (c1, string1, &i1, &i1_byte); + num2 = gather_number_from_string (c2, string2, &i2, &i2_byte); + if (num1 < num2) + return Qt; + else if (num1 > num2) + return Qnil; + } + else if (c1 != c2) + return c1 < c2 ? Qt : Qnil; + } + return i1 < SCHARS (string2) ? Qt : Qnil; +} + DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0, doc: /* Return t if first arg string is less than second in collation order. Symbols are also allowed; their print names are used instead. @@ -5049,6 +5146,7 @@ this variable. */); defsubr (&Sstring_equal); defsubr (&Scompare_strings); defsubr (&Sstring_lessp); + defsubr (&Sstring_numeric_lessp); defsubr (&Sstring_collate_lessp); defsubr (&Sstring_collate_equalp); defsubr (&Sappend); diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 762f7bdd94f..0c6edb89252 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -191,3 +191,20 @@ (string-collate-lessp a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8"))))) '("Adrian" "Ævar" "Agustín" "Eli")))) + +(ert-deftest fns-tests-string-numeric-lessp () + (should (string-numeric-lessp "foo2.png" "foo12.png")) + (should (not (string-numeric-lessp "foo12.png" "foo2.png"))) + (should (string-numeric-lessp "foo12.png" "foo20000.png")) + (should (not (string-numeric-lessp "foo20000.png" "foo12.png"))) + (should (string-numeric-lessp "foo.png" "foo2.png")) + (should (not (string-numeric-lessp "foo2.png" "foo.png"))) + (should (equal (sort '("foo12.png" "foo2.png" "foo1.png") + 'string-numeric-lessp) + '("foo1.png" "foo2.png" "foo12.png"))) + (should (string-numeric-lessp "foo2" "foo1234")) + (should (not (string-numeric-lessp "foo1234" "foo2"))) + (should (string-numeric-lessp "foo.png" "foo2")) + (should (string-numeric-lessp "foo1.25.5.png" "foo1.125.5")) + (should (string-numeric-lessp "2" "1245")) + (should (not (string-numeric-lessp "1245" "2")))) -- 2.39.5