From 3b7d55a8013914b6707211d148a6e878ca118ad9 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Wed, 16 Feb 2022 12:27:59 +0100 Subject: [PATCH] Speed up count_size_as_multibyte This function is used in many places to calculate the length of a unibyte string converted to multibyte. * src/character.c (count_size_as_multibyte): Move the overflow test outside the loop, which makes it much faster. Standard compilers will even vectorise it if asked to (-O2 in Clang, -O3 in GCC). --- src/character.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/character.c b/src/character.c index eba417d005d..c1a1b553891 100644 --- a/src/character.c +++ b/src/character.c @@ -654,15 +654,14 @@ str_as_multibyte (unsigned char *str, ptrdiff_t len, ptrdiff_t nbytes, ptrdiff_t count_size_as_multibyte (const unsigned char *str, ptrdiff_t len) { - const unsigned char *endp = str + len; + /* Count the number of non-ASCII (raw) bytes, since they will occupy + two bytes in a multibyte string. */ + ptrdiff_t nonascii = 0; + for (ptrdiff_t i = 0; i < len; i++) + nonascii += str[i] >> 7; ptrdiff_t bytes; - - for (bytes = 0; str < endp; str++) - { - int n = *str < 0x80 ? 1 : 2; - if (INT_ADD_WRAPV (bytes, n, &bytes)) - string_overflow (); - } + if (INT_ADD_WRAPV (len, nonascii, &bytes)) + string_overflow (); return bytes; } -- 2.39.5