From: Paul Eggert Date: Sun, 21 Feb 2016 21:25:24 +0000 (-0800) Subject: Use Gnulib filevercmp for version comparison X-Git-Tag: emacs-26.0.90~2523 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=1f7feecaee0ed3fb79758fe60020aefb30d9ff01;p=emacs.git Use Gnulib filevercmp for version comparison * admin/merge-gnulib (GNULIB_MODULES): Add filevercmp. * doc/lispref/strings.texi (Text Comparison): * etc/NEWS, src/fns.c: * test/src/fns-tests.el (fns-tests-string-version-lessp): Rename newly-introduced function to string-version-lessp, by analogy with strverscmp. * lib/filevercmp.c, lib/filevercmp.h: New files, copied from gnulib. * lib/gnulib.mk, m4/gnulib-comp.m4: Regenerate. * src/fns.c: Include . (gather_number_from_string): Remove. (Fstring_version_lessp): Reimplement via filevercmp. --- diff --git a/admin/merge-gnulib b/admin/merge-gnulib index 5463d1b667b..5d6512760d9 100755 --- a/admin/merge-gnulib +++ b/admin/merge-gnulib @@ -30,7 +30,7 @@ GNULIB_MODULES=' careadlinkat close-stream count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat - fcntl fcntl-h fdatasync fdopendir filemode fstatat fsync + fcntl fcntl-h fdatasync fdopendir filemode filevercmp fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 19301de06fa..ce629aa8259 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -633,20 +633,12 @@ If your system does not support a locale environment, this function behaves like @code{string-lessp}. @end defun -@defun string-numerical-lessp strin1 string2 -This function behaves like @code{string-lessp} for stretches of -consecutive non-numerical characters, but compares sequences of -numerical characters as if they comprised a base-ten number, and then -compares the numbers. So @samp{foo2.png} is ``smaller'' than -@samp{foo12.png} according to this predicate, even if @samp{12} is -lexicographically ``smaller'' than @samp{2}. - -If one string has a number in a position in the string, and the other -doesn't, then lexicograpic comparison is done at that point, so -@samp{foo.png} is ``smaller'' than @samp{foo2.png}. If any of the -numbers in the strings are larger than can be represented as an -integer number, the entire string is compared using -@code{string-less}. +@defun string-version-lessp string1 string2 +This function compares strings lexicographically, except it treats +sequences of numerical characters as if they comprised a base-ten +number, and then compares the numbers. So @samp{foo2.png} is +``smaller'' than @samp{foo12.png} according to this predicate, even if +@samp{12} is lexicographically ``smaller'' than @samp{2}. @end defun @defun string-prefix-p string1 string2 &optional ignore-case diff --git a/etc/NEWS b/etc/NEWS index bad95191884..9a3799a62a0 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1726,7 +1726,7 @@ systems and for MS-Windows, for other systems they fall back to their counterparts `string-lessp' and `string-equal'. +++ -** The new function `string-numeric-lessp' compares strings by +** The new function `string-version-lessp' compares strings by interpreting consecutive runs of numerical characters as numbers, and compares their numerical values. According to this predicate, "foo2.png" is smaller than "foo12.png". diff --git a/lib/filevercmp.c b/lib/filevercmp.c new file mode 100644 index 00000000000..a75c9468e31 --- /dev/null +++ b/lib/filevercmp.c @@ -0,0 +1,181 @@ +/* + Copyright (C) 1995 Ian Jackson + Copyright (C) 2001 Anthony Towns + Copyright (C) 2008-2016 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include +#include "filevercmp.h" + +#include +#include +#include +#include +#include +#include + +/* Match a file suffix defined by this regular expression: + /(\.[A-Za-z~][A-Za-z0-9~]*)*$/ + Scan the string *STR and return a pointer to the matching suffix, or + NULL if not found. Upon return, *STR points to terminating NUL. */ +static const char * +match_suffix (const char **str) +{ + const char *match = NULL; + bool read_alpha = false; + while (**str) + { + if (read_alpha) + { + read_alpha = false; + if (!c_isalpha (**str) && '~' != **str) + match = NULL; + } + else if ('.' == **str) + { + read_alpha = true; + if (!match) + match = *str; + } + else if (!c_isalnum (**str) && '~' != **str) + match = NULL; + (*str)++; + } + return match; +} + +/* verrevcmp helper function */ +static int +order (unsigned char c) +{ + if (c_isdigit (c)) + return 0; + else if (c_isalpha (c)) + return c; + else if (c == '~') + return -1; + else + return (int) c + UCHAR_MAX + 1; +} + +/* slightly modified verrevcmp function from dpkg + S1, S2 - compared string + S1_LEN, S2_LEN - length of strings to be scanned + + This implements the algorithm for comparison of version strings + specified by Debian and now widely adopted. The detailed + specification can be found in the Debian Policy Manual in the + section on the 'Version' control field. This version of the code + implements that from s5.6.12 of Debian Policy v3.8.0.1 + http://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version */ +static int _GL_ATTRIBUTE_PURE +verrevcmp (const char *s1, size_t s1_len, const char *s2, size_t s2_len) +{ + size_t s1_pos = 0; + size_t s2_pos = 0; + while (s1_pos < s1_len || s2_pos < s2_len) + { + int first_diff = 0; + while ((s1_pos < s1_len && !c_isdigit (s1[s1_pos])) + || (s2_pos < s2_len && !c_isdigit (s2[s2_pos]))) + { + int s1_c = (s1_pos == s1_len) ? 0 : order (s1[s1_pos]); + int s2_c = (s2_pos == s2_len) ? 0 : order (s2[s2_pos]); + if (s1_c != s2_c) + return s1_c - s2_c; + s1_pos++; + s2_pos++; + } + while (s1[s1_pos] == '0') + s1_pos++; + while (s2[s2_pos] == '0') + s2_pos++; + while (c_isdigit (s1[s1_pos]) && c_isdigit (s2[s2_pos])) + { + if (!first_diff) + first_diff = s1[s1_pos] - s2[s2_pos]; + s1_pos++; + s2_pos++; + } + if (c_isdigit (s1[s1_pos])) + return 1; + if (c_isdigit (s2[s2_pos])) + return -1; + if (first_diff) + return first_diff; + } + return 0; +} + +/* Compare version strings S1 and S2. + See filevercmp.h for function description. */ +int +filevercmp (const char *s1, const char *s2) +{ + const char *s1_pos; + const char *s2_pos; + const char *s1_suffix, *s2_suffix; + size_t s1_len, s2_len; + int result; + + /* easy comparison to see if strings are identical */ + int simple_cmp = strcmp (s1, s2); + if (simple_cmp == 0) + return 0; + + /* special handle for "", "." and ".." */ + if (!*s1) + return -1; + if (!*s2) + return 1; + if (0 == strcmp (".", s1)) + return -1; + if (0 == strcmp (".", s2)) + return 1; + if (0 == strcmp ("..", s1)) + return -1; + if (0 == strcmp ("..", s2)) + return 1; + + /* special handle for other hidden files */ + if (*s1 == '.' && *s2 != '.') + return -1; + if (*s1 != '.' && *s2 == '.') + return 1; + if (*s1 == '.' && *s2 == '.') + { + s1++; + s2++; + } + + /* "cut" file suffixes */ + s1_pos = s1; + s2_pos = s2; + s1_suffix = match_suffix (&s1_pos); + s2_suffix = match_suffix (&s2_pos); + s1_len = (s1_suffix ? s1_suffix : s1_pos) - s1; + s2_len = (s2_suffix ? s2_suffix : s2_pos) - s2; + + /* restore file suffixes if strings are identical after "cut" */ + if ((s1_suffix || s2_suffix) && (s1_len == s2_len) + && 0 == strncmp (s1, s2, s1_len)) + { + s1_len = s1_pos - s1; + s2_len = s2_pos - s2; + } + + result = verrevcmp (s1, s1_len, s2, s2_len); + return result == 0 ? simple_cmp : result; +} diff --git a/lib/filevercmp.h b/lib/filevercmp.h new file mode 100644 index 00000000000..220b71b5790 --- /dev/null +++ b/lib/filevercmp.h @@ -0,0 +1,42 @@ +/* + Copyright (C) 1995 Ian Jackson + Copyright (C) 2001 Anthony Towns + Copyright (C) 2008-2016 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef FILEVERCMP_H +#define FILEVERCMP_H + +/* Compare version strings: + + This function compares strings S1 and S2: + 1) By PREFIX in the same way as strcmp. + 2) Then by VERSION (most similarly to version compare of Debian's dpkg). + Leading zeros in version numbers are ignored. + 3) If both (PREFIX and VERSION) are equal, strcmp function is used for + comparison. So this function can return 0 if (and only if) strings S1 + and S2 are identical. + + It returns number >0 for S1 > S2, 0 for S1 == S2 and number <0 for S1 < S2. + + This function compares strings, in a way that if VER1 and VER2 are version + numbers and PREFIX and SUFFIX (SUFFIX defined as (\.[A-Za-z~][A-Za-z0-9~]*)*) + are strings then VER1 < VER2 implies filevercmp (PREFIX VER1 SUFFIX, + PREFIX VER2 SUFFIX) < 0. + + This function is intended to be a replacement for strverscmp. */ +int filevercmp (const char *s1, const char *s2) _GL_ATTRIBUTE_PURE; + +#endif /* FILEVERCMP_H */ diff --git a/lib/gnulib.mk b/lib/gnulib.mk index b1edd86f92c..cc8429658e1 100644 --- a/lib/gnulib.mk +++ b/lib/gnulib.mk @@ -21,7 +21,7 @@ # the same distribution terms as the rest of that program. # # Generated by gnulib-tool. -# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=flexmember --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=unsetenv --makefile-name=gnulib.mk --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax strtoumax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub unsetenv update-copyright utimens vla warnings +# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=flexmember --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=unsetenv --makefile-name=gnulib.mk --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode filevercmp fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax strtoumax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub unsetenv update-copyright utimens vla warnings MOSTLYCLEANFILES += core *.stackdump @@ -441,6 +441,14 @@ EXTRA_DIST += filemode.h ## end gnulib module filemode +## begin gnulib module filevercmp + +libgnu_a_SOURCES += filevercmp.c + +EXTRA_DIST += filevercmp.h + +## end gnulib module filevercmp + ## begin gnulib module fpending diff --git a/m4/gnulib-comp.m4 b/m4/gnulib-comp.m4 index 831bb4c4f4b..5a3fc98597c 100644 --- a/m4/gnulib-comp.m4 +++ b/m4/gnulib-comp.m4 @@ -78,6 +78,7 @@ AC_DEFUN([gl_EARLY], # Code from module fdatasync: # Code from module fdopendir: # Code from module filemode: + # Code from module filevercmp: # Code from module fpending: # Code from module fstatat: # Code from module fsync: @@ -889,6 +890,8 @@ AC_DEFUN([gl_FILE_LIST], [ lib/fdopendir.c lib/filemode.c lib/filemode.h + lib/filevercmp.c + lib/filevercmp.h lib/fpending.c lib/fpending.h lib/fstatat.c diff --git a/src/fns.c b/src/fns.c index 77ad4505c94..d314fcd0711 100644 --- a/src/fns.c +++ b/src/fns.c @@ -21,6 +21,7 @@ along with GNU Emacs. If not, see . */ #include #include +#include #include #include #include @@ -332,50 +333,21 @@ Symbols are also allowed; their print names are used instead. */) return i1 < SCHARS (string2) ? Qt : Qnil; } -/* Return the numerical value of a consecutive run of numerical - characters from STRING. The ISP and ISP_BYTE address pointer - pointers are increased and left at the next character after the - numerical characters. */ -static size_t -gather_number_from_string (Lisp_Object string, - ptrdiff_t *isp, ptrdiff_t *isp_byte) -{ - size_t number = 0; - char *s = SSDATA (string); - char *end; - - errno = 0; - number = strtoumax (s + *isp_byte, &end, 10); - if (errno == ERANGE) - /* If we have an integer overflow, then we fall back on lexical - comparison. */ - return -1; - else - { - size_t diff = end - (s + *isp_byte); - (*isp) += diff; - (*isp_byte) += diff; - return number; - } -} +DEFUN ("string-version-lessp", Fstring_version_lessp, + Sstring_version_lessp, 2, 2, 0, + doc: /* Return non-nil if S1 is less than S2, as version strings. + +This function compares version strings S1 and S2: + 1) By prefix lexicographically. + 2) Then by version (similarly to version comparison of Debian's dpkg). + Leading zeros in version numbers are ignored. + 3) If both prefix and version are equal, compare as ordinary strings. -DEFUN ("string-numeric-lessp", Fstring_numeric_lessp, - Sstring_numeric_lessp, 2, 2, 0, - doc: /* Return non-nil if STRING1 is less than STRING2 in 'numeric' order. -Sequences of non-numerical characters are compared lexicographically, -while sequences of numerical characters are converted into numbers, -and then the numbers are compared. This means that \"foo2.png\" is -less than \"foo12.png\" according to this predicate. +For example, \"foo2.png\" compares less than \"foo12.png\". Case is significant. Symbols are also allowed; their print names are used instead. */) - (register Lisp_Object string1, Lisp_Object string2) + (Lisp_Object string1, Lisp_Object string2) { - ptrdiff_t end; - ptrdiff_t i1, i1_byte, i2, i2_byte; - size_t num1, num2; - unsigned char *chp; - int chlen1, chlen2; - if (SYMBOLP (string1)) string1 = SYMBOL_NAME (string1); if (SYMBOLP (string2)) @@ -383,67 +355,26 @@ Symbols are also allowed; their print names are used instead. */) CHECK_STRING (string1); CHECK_STRING (string2); - i1 = i1_byte = i2 = i2_byte = 0; + char *p1 = SSDATA (string1); + char *p2 = SSDATA (string2); + char *lim1 = p1 + SBYTES (string1); + char *lim2 = p2 + SBYTES (string2); + int cmp; - end = SCHARS (string1); - if (end > SCHARS (string2)) - end = SCHARS (string2); - - while (i1 < end) + while ((cmp = filevercmp (p1, p2)) == 0) { - /* When we find a mismatch, we must compare the - characters, not just the bytes. */ - int c1, c2; - - if (STRING_MULTIBYTE (string1)) - { - chp = &SDATA (string1)[i1_byte]; - c1 = STRING_CHAR_AND_LENGTH (chp, chlen1); - } - else - { - c1 = SREF (string1, i1_byte); - chlen1 = 1; - } - - if (STRING_MULTIBYTE (string2)) - { - chp = &SDATA (string1)[i2_byte]; - c2 = STRING_CHAR_AND_LENGTH (chp, chlen2); - } - else - { - c2 = SREF (string2, i2_byte); - chlen2 = 1; - } - - if (c1 >= '0' && c1 <= '9' && - c2 >= '0' && c2 <= '9') - /* Both strings are numbers, so compare them. */ - { - num1 = gather_number_from_string (string1, &i1, &i1_byte); - num2 = gather_number_from_string (string2, &i2, &i2_byte); - /* If we have an integer overflow, then resort to sorting - the entire string lexicographically. */ - if (num1 == -1 || num2 == -1) - return Fstring_lessp (string1, string2); - else if (num1 < num2) - return Qt; - else if (num1 > num2) - return Qnil; - } - else - { - if (c1 != c2) - return c1 < c2 ? Qt : Qnil; - - i1++; - i2++; - i1_byte += chlen1; - i2_byte += chlen2; - } + /* If the strings are identical through their first null bytes, + skip past identical prefixes and try again. */ + ptrdiff_t size = strlen (p1) + 1; + p1 += size; + p2 += size; + if (lim1 < p1) + return lim2 < p2 ? Qnil : Qt; + if (lim2 < p2) + return Qnil; } - return i1 < SCHARS (string2) ? Qt : Qnil; + + return cmp < 0 ? Qt : Qnil; } DEFUN ("string-collate-lessp", Fstring_collate_lessp, Sstring_collate_lessp, 2, 4, 0, @@ -5164,7 +5095,7 @@ this variable. */); defsubr (&Sstring_equal); defsubr (&Scompare_strings); defsubr (&Sstring_lessp); - defsubr (&Sstring_numeric_lessp); + defsubr (&Sstring_version_lessp); defsubr (&Sstring_collate_lessp); defsubr (&Sstring_collate_equalp); defsubr (&Sappend); diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index 0c6edb89252..861736995f4 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -192,19 +192,19 @@ a b (if (eq system-type 'windows-nt) "enu_USA" "en_US.UTF-8"))))) '("Adrian" "Ævar" "Agustín" "Eli")))) -(ert-deftest fns-tests-string-numeric-lessp () - (should (string-numeric-lessp "foo2.png" "foo12.png")) - (should (not (string-numeric-lessp "foo12.png" "foo2.png"))) - (should (string-numeric-lessp "foo12.png" "foo20000.png")) - (should (not (string-numeric-lessp "foo20000.png" "foo12.png"))) - (should (string-numeric-lessp "foo.png" "foo2.png")) - (should (not (string-numeric-lessp "foo2.png" "foo.png"))) +(ert-deftest fns-tests-string-version-lessp () + (should (string-version-lessp "foo2.png" "foo12.png")) + (should (not (string-version-lessp "foo12.png" "foo2.png"))) + (should (string-version-lessp "foo12.png" "foo20000.png")) + (should (not (string-version-lessp "foo20000.png" "foo12.png"))) + (should (string-version-lessp "foo.png" "foo2.png")) + (should (not (string-version-lessp "foo2.png" "foo.png"))) (should (equal (sort '("foo12.png" "foo2.png" "foo1.png") - 'string-numeric-lessp) + 'string-version-lessp) '("foo1.png" "foo2.png" "foo12.png"))) - (should (string-numeric-lessp "foo2" "foo1234")) - (should (not (string-numeric-lessp "foo1234" "foo2"))) - (should (string-numeric-lessp "foo.png" "foo2")) - (should (string-numeric-lessp "foo1.25.5.png" "foo1.125.5")) - (should (string-numeric-lessp "2" "1245")) - (should (not (string-numeric-lessp "1245" "2")))) + (should (string-version-lessp "foo2" "foo1234")) + (should (not (string-version-lessp "foo1234" "foo2"))) + (should (string-version-lessp "foo.png" "foo2")) + (should (string-version-lessp "foo1.25.5.png" "foo1.125.5")) + (should (string-version-lessp "2" "1245")) + (should (not (string-version-lessp "1245" "2"))))