return true;
}
+/* Count the number of characters in STR, NBYTES long.
+ The string is valid UTF-8 except that it may contain unpaired surrogates. */
+static ptrdiff_t
+count_utf8_chars (const char *str, ptrdiff_t nbytes)
+{
+ /* This is faster than parse_str_as_multibyte, and much faster than
+ [NSString lengthOfBytesUsingEncoding: NSUTF32StringEncoding]. */
+ const char *end = str + nbytes;
+ ptrdiff_t nc = 0;
+ while (str < end)
+ {
+ nc++;
+ unsigned char c = *str;
+ str += ( c <= 0x7f ? 1 // 0xxxxxxx
+ : c <= 0xdf ? 2 // 110xxxxx 10xxxxxx
+ : c <= 0xef ? 3 // 1110xxxx 10xxxxxx 10xxxxxx
+ : 4); // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+ }
+ return nc;
+}
+
@implementation NSString (EmacsString)
/* Make an NSString from a Lisp string. STRING must not be in an
encoded form (e.g. UTF-8). */
/* Make a Lisp string from an NSString. */
- (Lisp_Object)lispString
{
- // make_string behaves predictably and correctly with UTF-8 input.
- return make_string ([self UTF8String],
- [self lengthOfBytesUsingEncoding: NSUTF8StringEncoding]);
+ const char *utf8 = [self UTF8String];
+ ptrdiff_t bytes = [self lengthOfBytesUsingEncoding: NSUTF8StringEncoding];
+ return make_multibyte_string (utf8, count_utf8_chars (utf8, bytes), bytes);
}
@end