From bfbdf4eb892935536fc665d6cc986fd669364263 Mon Sep 17 00:00:00 2001 From: Po Lu Date: Fri, 4 Aug 2023 14:29:55 +0800 Subject: [PATCH] Optimize creation of multibyte menu items on Android * src/androidvfs.c (android_verify_jni_string): Move to android.c. * src/android.c (android_verify_jni_string): New function. (android_build_string): Forgo encoding menu text if TEXT is a multibyte string that's also a valid JNI string. * src/android.h: Update prototypes. --- src/android.c | 81 ++++++++++++++++++++++++++++++++++++++++++++---- src/android.h | 1 + src/androidvfs.c | 63 ------------------------------------- 3 files changed, 76 insertions(+), 69 deletions(-) diff --git a/src/android.c b/src/android.c index c30d7b58979..bd19107f53a 100644 --- a/src/android.c +++ b/src/android.c @@ -5480,6 +5480,69 @@ android_check_string (Lisp_Object text) return true; } +/* Verify that the specified NULL-terminated STRING is a valid JNI + ``UTF-8'' string. Return 0 if so, 1 otherwise. + + Do not perform GC, enabling NAME to be a direct reference to string + data. + + The native coding system used by the JVM to store strings derives + from UTF-8, but deviates from it in two aspects in an attempt to + better represent the UCS-16 based Java String format, and to let + strings contain NULL characters while remaining valid C strings: + NULL bytes are encoded as two-byte sequences, and Unicode surrogate + pairs encoded as two-byte sequences are prefered to four-byte + sequences when encoding characters above the BMP. */ + +int +android_verify_jni_string (const char *name) +{ + const unsigned char *chars; + + chars = (unsigned char *) name; + while (*chars) + { + /* Switch on the high 4 bits. */ + + switch (*chars++ >> 4) + { + case 0 ... 7: + /* The 8th bit is clean, so this is a regular C + character. */ + break; + + case 8 ... 0xb: + /* Invalid starting byte! */ + return 1; + + case 0xf: + /* The start of a four byte sequence. These aren't allowed + in Java. */ + return 1; + + case 0xe: + /* The start of a three byte sequence. Verify that its + continued. */ + + if ((*chars++ & 0xc0) != 0x80) + return 1; + + FALLTHROUGH; + + case 0xc ... 0xd: + /* The start of a two byte sequence. Verify that the + next byte exists and has its high bit set. */ + + if ((*chars++ & 0xc0) != 0x80) + return 1; + + break; + } + } + + return 0; +} + /* Given a Lisp string TEXT, return a local reference to an equivalent Java string. */ @@ -5492,12 +5555,18 @@ android_build_string (Lisp_Object text) jchar *characters; USE_SAFE_ALLOCA; - /* Directly encode TEXT if it contains no multibyte - characters. This is okay because the Java extended UTF - format is compatible with ASCII. */ - - if (SBYTES (text) == SCHARS (text) - && android_check_string (text)) + /* Directly encode TEXT if it contains no non-ASCII characters, or + is multibyte and a valid Modified UTF-8 string. This is okay + because the Java extended UTF format is compatible with + ASCII. */ + + if ((SBYTES (text) == SCHARS (text) + && android_check_string (text)) + /* If TEXT is a multibyte string, then it's using Emacs's + internal UTF-8 coding system, a significant subset of which + is compatible with JNI. */ + || (STRING_MULTIBYTE (text) + && !android_verify_jni_string (SSDATA (text)))) { string = (*android_java_env)->NewStringUTF (android_java_env, SSDATA (text)); diff --git a/src/android.h b/src/android.h index cecdfab002f..a052d3a3b21 100644 --- a/src/android.h +++ b/src/android.h @@ -105,6 +105,7 @@ extern bool android_detect_mouse (void); extern void android_set_dont_focus_on_map (android_window, bool); extern void android_set_dont_accept_focus (android_window, bool); +extern int android_verify_jni_string (const char *); extern jstring android_build_string (Lisp_Object); extern jstring android_build_jstring (const char *); extern void android_exception_check (void); diff --git a/src/androidvfs.c b/src/androidvfs.c index 2b467bc444f..0d99116c75c 100644 --- a/src/androidvfs.c +++ b/src/androidvfs.c @@ -3299,9 +3299,6 @@ static struct android_saf_root_vdir *all_saf_root_vdirs; static struct android_vnode *android_saf_tree_from_name (char *, const char *, const char *); -/* Forward declaration. */ -static int android_verify_jni_string (const char *); - /* Ascertain and return whether or not AUTHORITY designates a content provider offering at least one directory tree accessible to Emacs. */ @@ -4437,66 +4434,6 @@ static struct android_vops saf_new_vfs_ops; /* Chain of all open SAF directory streams. */ static struct android_saf_tree_vdir *all_saf_tree_vdirs; -/* Verify that the specified NULL-terminated STRING is a valid JNI - ``UTF-8'' string. Return 0 if so, 1 otherwise. - - The native coding system used by the JVM to store strings derives - from UTF-8, but deviates from it in two aspects in an attempt to - better represent the UCS-16 based Java String format, and to let - strings contain NULL characters while remaining valid C strings: - NULL bytes are encoded as two-byte sequences, and Unicode surrogate - pairs encoded as two-byte sequences are prefered to four-byte - sequences when encoding characters above the BMP. */ - -static int -android_verify_jni_string (const char *name) -{ - const unsigned char *chars; - - chars = (unsigned char *) name; - while (*chars) - { - /* Switch on the high 4 bits. */ - - switch (*chars++ >> 4) - { - case 0 ... 7: - /* The 8th bit is clean, so this is a regular C - character. */ - break; - - case 8 ... 0xb: - /* Invalid starting byte! */ - return 1; - - case 0xf: - /* The start of a four byte sequence. These aren't allowed - in Java. */ - return 1; - - case 0xe: - /* The start of a three byte sequence. Verify that its - continued. */ - - if ((*chars++ & 0xc0) != 0x80) - return 1; - - FALLTHROUGH; - - case 0xc ... 0xd: - /* The start of a two byte sequence. Verify that the - next byte exists and has its high bit set. */ - - if ((*chars++ & 0xc0) != 0x80) - return 1; - - break; - } - } - - return 0; -} - /* Find the document ID of the file within TREE_URI designated by NAME. -- 2.39.2