From: Paul Eggert Date: Thu, 1 Jun 2017 23:03:12 +0000 (-0700) Subject: Improve performance by avoiding strtoumax X-Git-Tag: emacs-26.0.90~521^2~190 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=178d0cb5f530e6d7eb36eb9987ff405c854ccdb3;p=emacs.git Improve performance by avoiding strtoumax This made (string-to-number "10") 20% faster on my old desktop, an AMD Phenom II X4 910e running Fedora 25 x86-64. * admin/merge-gnulib (GNULIB_MODULES): Remove strtoumax. * lib/gnulib.mk.in, m4/gnulib-comp.m4: Regenerate. * lib/strtoul.c, lib/strtoull.c, lib/strtoumax.c, m4/strtoull.m4: * m4/strtoumax.m4: Remove. * src/editfns.c (str2num): New function. (styled_format): Use it instead of strtoumax. Use ptrdiff_t instead of uintmax_t. Check for integer overflow. * src/lread.c (LEAD_INT, DOT_CHAR, TRAIL_INT, E_EXP): Move to private scope and make them enums. (string_to_number): Compute integer value directly during first pass instead of revisiting it with strtoumax later. --- diff --git a/admin/merge-gnulib b/admin/merge-gnulib index 45e4a788a35..e5fb0f59fb3 100755 --- a/admin/merge-gnulib +++ b/admin/merge-gnulib @@ -38,7 +38,7 @@ GNULIB_MODULES=' manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio - stpcpy strftime strtoimax strtoumax symlink sys_stat + stpcpy strftime strtoimax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub update-copyright utimens vla warnings diff --git a/lib/gnulib.mk.in b/lib/gnulib.mk.in index d23c2a57ec3..73d304307d4 100644 --- a/lib/gnulib.mk.in +++ b/lib/gnulib.mk.in @@ -21,7 +21,7 @@ # the same distribution terms as the rest of that program. # # Generated by gnulib-tool. -# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stat --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=tzset --avoid=unsetenv --avoid=utime --avoid=utime-h --gnu-make --makefile-name=gnulib.mk.in --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-leading-zeros count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode filevercmp flexmember fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax strtoumax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub update-copyright utimens vla warnings +# Reproduce by: gnulib-tool --import --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --avoid=close --avoid=dup --avoid=fchdir --avoid=fstat --avoid=malloc-posix --avoid=msvc-inval --avoid=msvc-nothrow --avoid=open --avoid=openat-die --avoid=opendir --avoid=raise --avoid=save-cwd --avoid=select --avoid=setenv --avoid=sigprocmask --avoid=stat --avoid=stdarg --avoid=stdbool --avoid=threadlib --avoid=tzset --avoid=unsetenv --avoid=utime --avoid=utime-h --gnu-make --makefile-name=gnulib.mk.in --conditional-dependencies --no-libtool --macro-prefix=gl --no-vc-files alloca-opt binary-io byteswap c-ctype c-strcase careadlinkat close-stream count-leading-zeros count-one-bits count-trailing-zeros crypto/md5 crypto/sha1 crypto/sha256 crypto/sha512 dtoastr dtotimespec dup2 environ execinfo faccessat fcntl fcntl-h fdatasync fdopendir filemode filevercmp flexmember fstatat fsync getloadavg getopt-gnu gettime gettimeofday gitlog-to-changelog ignore-value intprops largefile lstat manywarnings memrchr mkostemp mktime pipe2 pselect pthread_sigmask putenv qcopy-acl readlink readlinkat sig2str socklen stat-time std-gnu11 stdalign stddef stdio stpcpy strftime strtoimax symlink sys_stat sys_time time time_r time_rz timegm timer-time timespec-add timespec-sub update-copyright utimens vla warnings MOSTLYCLEANFILES += core *.stackdump @@ -905,7 +905,6 @@ gl_GNULIB_ENABLED_getdtablesize = @gl_GNULIB_ENABLED_getdtablesize@ gl_GNULIB_ENABLED_getgroups = @gl_GNULIB_ENABLED_getgroups@ gl_GNULIB_ENABLED_secure_getenv = @gl_GNULIB_ENABLED_secure_getenv@ gl_GNULIB_ENABLED_strtoll = @gl_GNULIB_ENABLED_strtoll@ -gl_GNULIB_ENABLED_strtoull = @gl_GNULIB_ENABLED_strtoull@ gl_GNULIB_ENABLED_tempname = @gl_GNULIB_ENABLED_tempname@ gl_LIBOBJS = @gl_LIBOBJS@ gl_LTLIBOBJS = @gl_LTLIBOBJS@ @@ -2507,30 +2506,6 @@ EXTRA_libgnu_a_SOURCES += strtol.c strtoll.c endif ## end gnulib module strtoll -## begin gnulib module strtoull -ifeq (,$(OMIT_GNULIB_MODULE_strtoull)) - -ifneq (,$(gl_GNULIB_ENABLED_strtoull)) - -endif -EXTRA_DIST += strtol.c strtoul.c strtoull.c - -EXTRA_libgnu_a_SOURCES += strtol.c strtoul.c strtoull.c - -endif -## end gnulib module strtoull - -## begin gnulib module strtoumax -ifeq (,$(OMIT_GNULIB_MODULE_strtoumax)) - - -EXTRA_DIST += strtoimax.c strtoumax.c - -EXTRA_libgnu_a_SOURCES += strtoimax.c strtoumax.c - -endif -## end gnulib module strtoumax - ## begin gnulib module symlink ifeq (,$(OMIT_GNULIB_MODULE_symlink)) diff --git a/lib/strtoul.c b/lib/strtoul.c deleted file mode 100644 index c4974e069e5..00000000000 --- a/lib/strtoul.c +++ /dev/null @@ -1,19 +0,0 @@ -/* Copyright (C) 1991, 1997, 2009-2017 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#define UNSIGNED 1 - -#include "strtol.c" diff --git a/lib/strtoull.c b/lib/strtoull.c deleted file mode 100644 index 51ae3acb03c..00000000000 --- a/lib/strtoull.c +++ /dev/null @@ -1,26 +0,0 @@ -/* Function to parse an 'unsigned long long int' from text. - Copyright (C) 1995-1997, 1999, 2009-2017 Free Software Foundation, Inc. - NOTE: The canonical source of this file is maintained with the GNU C - Library. Bugs can be reported to bug-glibc@gnu.org. - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation; either version 3 of the License, or any - later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -#define QUAD 1 - -#include "strtoul.c" - -#ifdef _LIBC -strong_alias (__strtoull_internal, __strtouq_internal) -weak_alias (strtoull, strtouq) -#endif diff --git a/lib/strtoumax.c b/lib/strtoumax.c deleted file mode 100644 index dc395d626ab..00000000000 --- a/lib/strtoumax.c +++ /dev/null @@ -1,2 +0,0 @@ -#define UNSIGNED 1 -#include "strtoimax.c" diff --git a/m4/gnulib-comp.m4 b/m4/gnulib-comp.m4 index 3f196d4f1de..8f53a990e34 100644 --- a/m4/gnulib-comp.m4 +++ b/m4/gnulib-comp.m4 @@ -140,8 +140,6 @@ AC_DEFUN([gl_EARLY], # Code from module string: # Code from module strtoimax: # Code from module strtoll: - # Code from module strtoull: - # Code from module strtoumax: # Code from module symlink: # Code from module sys_select: # Code from module sys_stat: @@ -364,12 +362,6 @@ AC_DEFUN([gl_INIT], gl_PREREQ_STRTOIMAX fi gl_INTTYPES_MODULE_INDICATOR([strtoimax]) - gl_FUNC_STRTOUMAX - if test $HAVE_DECL_STRTOUMAX = 0 || test $REPLACE_STRTOUMAX = 1; then - AC_LIBOBJ([strtoumax]) - gl_PREREQ_STRTOUMAX - fi - gl_INTTYPES_MODULE_INDICATOR([strtoumax]) gl_FUNC_SYMLINK if test $HAVE_SYMLINK = 0 || test $REPLACE_SYMLINK = 1; then AC_LIBOBJ([symlink]) @@ -420,7 +412,6 @@ AC_DEFUN([gl_INIT], gl_gnulib_enabled_6099e9737f757db36c47fa9d9f02e88c=false gl_gnulib_enabled_secure_getenv=false gl_gnulib_enabled_strtoll=false - gl_gnulib_enabled_strtoull=false gl_gnulib_enabled_tempname=false gl_gnulib_enabled_682e609604ccaac6be382e4ee3a4eaec=false func_gl_gnulib_m4code_260941c0e5dc67ec9e87d1fb321c300b () @@ -569,18 +560,6 @@ AC_DEFUN([gl_INIT], gl_gnulib_enabled_strtoll=true fi } - func_gl_gnulib_m4code_strtoull () - { - if ! $gl_gnulib_enabled_strtoull; then - gl_FUNC_STRTOULL - if test $HAVE_STRTOULL = 0; then - AC_LIBOBJ([strtoull]) - gl_PREREQ_STRTOULL - fi - gl_STDLIB_MODULE_INDICATOR([strtoull]) - gl_gnulib_enabled_strtoull=true - fi - } func_gl_gnulib_m4code_tempname () { if ! $gl_gnulib_enabled_tempname; then @@ -649,9 +628,6 @@ AC_DEFUN([gl_INIT], if { test $HAVE_DECL_STRTOIMAX = 0 || test $REPLACE_STRTOIMAX = 1; } && test $ac_cv_type_long_long_int = yes; then func_gl_gnulib_m4code_strtoll fi - if { test $HAVE_DECL_STRTOUMAX = 0 || test $REPLACE_STRTOUMAX = 1; } && test $ac_cv_type_unsigned_long_long_int = yes; then - func_gl_gnulib_m4code_strtoull - fi if test $HAVE_TIMEGM = 0 || test $REPLACE_TIMEGM = 1; then func_gl_gnulib_m4code_5264294aa0a5557541b53c8c741f7f31 fi @@ -670,7 +646,6 @@ AC_DEFUN([gl_INIT], AM_CONDITIONAL([gl_GNULIB_ENABLED_6099e9737f757db36c47fa9d9f02e88c], [$gl_gnulib_enabled_6099e9737f757db36c47fa9d9f02e88c]) AM_CONDITIONAL([gl_GNULIB_ENABLED_secure_getenv], [$gl_gnulib_enabled_secure_getenv]) AM_CONDITIONAL([gl_GNULIB_ENABLED_strtoll], [$gl_gnulib_enabled_strtoll]) - AM_CONDITIONAL([gl_GNULIB_ENABLED_strtoull], [$gl_gnulib_enabled_strtoull]) AM_CONDITIONAL([gl_GNULIB_ENABLED_tempname], [$gl_gnulib_enabled_tempname]) AM_CONDITIONAL([gl_GNULIB_ENABLED_682e609604ccaac6be382e4ee3a4eaec], [$gl_gnulib_enabled_682e609604ccaac6be382e4ee3a4eaec]) # End of code from modules @@ -940,9 +915,6 @@ AC_DEFUN([gl_FILE_LIST], [ lib/strtoimax.c lib/strtol.c lib/strtoll.c - lib/strtoul.c - lib/strtoull.c - lib/strtoumax.c lib/symlink.c lib/sys_select.in.h lib/sys_stat.in.h @@ -1051,8 +1023,6 @@ AC_DEFUN([gl_FILE_LIST], [ m4/string_h.m4 m4/strtoimax.m4 m4/strtoll.m4 - m4/strtoull.m4 - m4/strtoumax.m4 m4/symlink.m4 m4/sys_select_h.m4 m4/sys_socket_h.m4 diff --git a/m4/strtoull.m4 b/m4/strtoull.m4 deleted file mode 100644 index c6b215072b6..00000000000 --- a/m4/strtoull.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# strtoull.m4 serial 7 -dnl Copyright (C) 2002, 2004, 2006, 2008-2017 Free Software Foundation, Inc. -dnl This file is free software; the Free Software Foundation -dnl gives unlimited permission to copy and/or distribute it, -dnl with or without modifications, as long as this notice is preserved. - -AC_DEFUN([gl_FUNC_STRTOULL], -[ - AC_REQUIRE([gl_STDLIB_H_DEFAULTS]) - dnl We don't need (and can't compile) the replacement strtoull - dnl unless the type 'unsigned long long int' exists. - AC_REQUIRE([AC_TYPE_UNSIGNED_LONG_LONG_INT]) - if test "$ac_cv_type_unsigned_long_long_int" = yes; then - AC_CHECK_FUNCS([strtoull]) - if test $ac_cv_func_strtoull = no; then - HAVE_STRTOULL=0 - fi - fi -]) - -# Prerequisites of lib/strtoull.c. -AC_DEFUN([gl_PREREQ_STRTOULL], [ - : -]) diff --git a/m4/strtoumax.m4 b/m4/strtoumax.m4 deleted file mode 100644 index 43ef5b5abbf..00000000000 --- a/m4/strtoumax.m4 +++ /dev/null @@ -1,28 +0,0 @@ -# strtoumax.m4 serial 12 -dnl Copyright (C) 2002-2004, 2006, 2009-2017 Free Software Foundation, Inc. -dnl This file is free software; the Free Software Foundation -dnl gives unlimited permission to copy and/or distribute it, -dnl with or without modifications, as long as this notice is preserved. - -AC_DEFUN([gl_FUNC_STRTOUMAX], -[ - AC_REQUIRE([gl_INTTYPES_H_DEFAULTS]) - - dnl On OSF/1 5.1 with cc, this function is declared but not defined. - AC_CHECK_FUNCS_ONCE([strtoumax]) - AC_CHECK_DECLS_ONCE([strtoumax]) - if test "$ac_cv_have_decl_strtoumax" = yes; then - if test "$ac_cv_func_strtoumax" != yes; then - # HP-UX 11.11 has "#define strtoimax(...) ..." but no function. - REPLACE_STRTOUMAX=1 - fi - else - HAVE_DECL_STRTOUMAX=0 - fi -]) - -# Prerequisites of lib/strtoumax.c. -AC_DEFUN([gl_PREREQ_STRTOUMAX], [ - AC_CHECK_DECLS([strtoull]) - AC_REQUIRE([AC_TYPE_UNSIGNED_LONG_LONG_INT]) -]) diff --git a/src/editfns.c b/src/editfns.c index 98187df5d97..1dbae8f5d4b 100644 --- a/src/editfns.c +++ b/src/editfns.c @@ -3851,6 +3851,23 @@ usage: (propertize STRING &rest PROPERTIES) */) return string; } +/* Convert the prefix of STR from ASCII decimal digits to a number. + Set *STR_END to the address of the first non-digit. Return the + number, or PTRDIFF_MAX on overflow. Return 0 if there is no number. + This is like strtol for ptrdiff_t and base 10 and C locale, + except without negative numbers or errno. */ + +static ptrdiff_t +str2num (char *str, char **str_end) +{ + ptrdiff_t n = 0; + for (; c_isdigit (*str); str++) + if (INT_MULTIPLY_WRAPV (n, 10, &n) || INT_ADD_WRAPV (n, *str - '0', &n)) + n = PTRDIFF_MAX; + *str_end = str; + return n; +} + DEFUN ("format", Fformat, Sformat, 1, MANY, 0, doc: /* Format a string out of a format-string and arguments. The first argument is a format control string. @@ -4057,17 +4074,16 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message) digits to print after the '.' for floats, or the max. number of chars to print from a string. */ - uintmax_t num; + ptrdiff_t num; char *num_end; if (c_isdigit (*format)) { - num = strtoumax (format, &num_end, 10); + num = str2num (format, &num_end); if (*num_end == '$') { if (num == 0) error ("Invalid format field number 0"); - n = min (num, PTRDIFF_MAX); - n--; + n = num - 1; format = num_end + 1; } } @@ -4095,15 +4111,15 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message) space_flag &= ! plus_flag; zero_flag &= ! minus_flag; - num = strtoumax (format, &num_end, 10); + num = str2num (format, &num_end); if (max_bufsize <= num) string_overflow (); ptrdiff_t field_width = num; bool precision_given = *num_end == '.'; - uintmax_t precision = (precision_given - ? strtoumax (num_end + 1, &num_end, 10) - : UINTMAX_MAX); + ptrdiff_t precision = (precision_given + ? str2num (num_end + 1, &num_end) + : PTRDIFF_MAX); format = num_end; if (format == end) @@ -4176,7 +4192,7 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message) /* handle case (precision[n] >= 0) */ ptrdiff_t prec = -1; - if (precision_given && precision <= TYPE_MAXIMUM (ptrdiff_t)) + if (precision_given) prec = precision; /* lisp_string_width ignores a precision of 0, but GNU @@ -4424,8 +4440,9 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message) padding and excess precision. Deal with excess precision first. This happens only when the format specifies ridiculously large precision. */ - uintmax_t excess_precision = precision - prec; - uintmax_t leading_zeros = 0, trailing_zeros = 0; + ptrdiff_t excess_precision + = precision_given ? precision - prec : 0; + ptrdiff_t leading_zeros = 0, trailing_zeros = 0; if (excess_precision) { if (float_conversion) @@ -4451,7 +4468,9 @@ styled_format (ptrdiff_t nargs, Lisp_Object *args, bool message) /* Compute the total bytes needed for this item, including excess precision and padding. */ - uintmax_t numwidth = sprintf_bytes + excess_precision; + ptrdiff_t numwidth; + if (INT_ADD_WRAPV (sprintf_bytes, excess_precision, &numwidth)) + numwidth = PTRDIFF_MAX; ptrdiff_t padding = numwidth < field_width ? field_width - numwidth : 0; if (max_bufsize - sprintf_bytes <= excess_precision diff --git a/src/lread.c b/src/lread.c index 368b86e8189..f8493982c67 100644 --- a/src/lread.c +++ b/src/lread.c @@ -3495,25 +3495,18 @@ substitute_in_interval (INTERVAL interval, Lisp_Object arg) } -#define LEAD_INT 1 -#define DOT_CHAR 2 -#define TRAIL_INT 4 -#define E_EXP 16 - - -/* Convert STRING to a number, assuming base BASE. Return a fixnum if CP has - integer syntax and fits in a fixnum, else return the nearest float if CP has - either floating point or integer syntax and BASE is 10, else return nil. If - IGNORE_TRAILING, consider just the longest prefix of CP that has - valid floating point syntax. Signal an overflow if BASE is not 10 and the - number has integer syntax but does not fit. */ +/* Convert STRING to a number, assuming base BASE. Return a fixnum if + STRING has integer syntax and fits in a fixnum, else return the + nearest float if STRING has either floating point or integer syntax + and BASE is 10, else return nil. If IGNORE_TRAILING, consider just + the longest prefix of STRING that has valid floating point syntax. + Signal an overflow if BASE is not 10 and the number has integer + syntax but does not fit. */ Lisp_Object string_to_number (char const *string, int base, bool ignore_trailing) { - int state; char const *cp = string; - int leading_digit; bool float_syntax = 0; double value = 0; @@ -3525,15 +3518,23 @@ string_to_number (char const *string, int base, bool ignore_trailing) bool signedp = negative || *cp == '+'; cp += signedp; - state = 0; - - leading_digit = digit_to_number (*cp, base); + enum { INTOVERFLOW = 1, LEAD_INT = 2, DOT_CHAR = 4, TRAIL_INT = 8, + E_EXP = 16 }; + int state = 0; + int leading_digit = digit_to_number (*cp, base); + uintmax_t n = leading_digit; if (leading_digit >= 0) { state |= LEAD_INT; - do - ++cp; - while (digit_to_number (*cp, base) >= 0); + for (int digit; 0 <= (digit = digit_to_number (*++cp, base)); ) + { + if (INT_MULTIPLY_OVERFLOW (n, base)) + state |= INTOVERFLOW; + n *= base; + if (INT_ADD_OVERFLOW (n, digit)) + state |= INTOVERFLOW; + n += digit; + } } if (*cp == '.') { @@ -3583,32 +3584,22 @@ string_to_number (char const *string, int base, bool ignore_trailing) } float_syntax = ((state & (DOT_CHAR|TRAIL_INT)) == (DOT_CHAR|TRAIL_INT) - || state == (LEAD_INT|E_EXP)); + || (state & ~INTOVERFLOW) == (LEAD_INT|E_EXP)); } /* Return nil if the number uses invalid syntax. If IGNORE_TRAILING, accept any prefix that matches. Otherwise, the entire string must match. */ if (! (ignore_trailing ? ((state & LEAD_INT) != 0 || float_syntax) - : (!*cp && ((state & ~DOT_CHAR) == LEAD_INT || float_syntax)))) + : (!*cp && ((state & ~(INTOVERFLOW | DOT_CHAR)) == LEAD_INT + || float_syntax)))) return Qnil; /* If the number uses integer and not float syntax, and is in C-language range, use its value, preferably as a fixnum. */ if (leading_digit >= 0 && ! float_syntax) { - uintmax_t n; - - /* Fast special case for single-digit integers. This also avoids a - glitch when BASE is 16 and IGNORE_TRAILING, because in that - case some versions of strtoumax accept numbers like "0x1" that Emacs - does not allow. */ - if (digit_to_number (string[signedp + 1], base) < 0) - return make_number (negative ? -leading_digit : leading_digit); - - errno = 0; - n = strtoumax (string + signedp, NULL, base); - if (errno == ERANGE) + if (state & INTOVERFLOW) { /* Unfortunately there's no simple and accurate way to convert non-base-10 numbers that are out of C-language range. */