From: Mattias EngdegÄrd Date: Wed, 9 Aug 2023 10:34:06 +0000 (+0200) Subject: Faster NSString to Lisp string conversion X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=722b1ebc6e0ca23c018d5264e5c70b8f37bd9150;p=emacs.git Faster NSString to Lisp string conversion Since we know that the value from [NSString UTF8String] is valid UTF-8, using make_string is wastefully slow. * src/nsfns.m (count_utf8_chars): New function, at least twice as fast as parse_str_as_multibyte used by make_string for this purpose. ([NSString lispString]): Use count_utf8_chars. We now always make a multibyte string because there is no reason not to. --- diff --git a/src/nsfns.m b/src/nsfns.m index f962de02cb9..508568d90c3 100644 --- a/src/nsfns.m +++ b/src/nsfns.m @@ -3796,6 +3796,27 @@ all_nonzero_ascii (unsigned char *str, ptrdiff_t n) return true; } +/* Count the number of characters in STR, NBYTES long. + The string is valid UTF-8 except that it may contain unpaired surrogates. */ +static ptrdiff_t +count_utf8_chars (const char *str, ptrdiff_t nbytes) +{ + /* This is faster than parse_str_as_multibyte, and much faster than + [NSString lengthOfBytesUsingEncoding: NSUTF32StringEncoding]. */ + const char *end = str + nbytes; + ptrdiff_t nc = 0; + while (str < end) + { + nc++; + unsigned char c = *str; + str += ( c <= 0x7f ? 1 // 0xxxxxxx + : c <= 0xdf ? 2 // 110xxxxx 10xxxxxx + : c <= 0xef ? 3 // 1110xxxx 10xxxxxx 10xxxxxx + : 4); // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + } + return nc; +} + @implementation NSString (EmacsString) /* Make an NSString from a Lisp string. STRING must not be in an encoded form (e.g. UTF-8). */ @@ -3840,9 +3861,9 @@ all_nonzero_ascii (unsigned char *str, ptrdiff_t n) /* Make a Lisp string from an NSString. */ - (Lisp_Object)lispString { - // make_string behaves predictably and correctly with UTF-8 input. - return make_string ([self UTF8String], - [self lengthOfBytesUsingEncoding: NSUTF8StringEncoding]); + const char *utf8 = [self UTF8String]; + ptrdiff_t bytes = [self lengthOfBytesUsingEncoding: NSUTF8StringEncoding]; + return make_multibyte_string (utf8, count_utf8_chars (utf8, bytes), bytes); } @end