From f9ce6746c4ef80ce1c274b40be0b7f558ae356dd Mon Sep 17 00:00:00 2001 From: Po Lu Date: Sat, 23 Mar 2024 15:37:43 +0800 Subject: [PATCH] Correctly handle non-BMP characters in Android content file names * lisp/term/android-win.el (android-encode-jni) (android-decode-jni, android-jni): New coding system, for Android file names and runtime data. * src/androidterm.h (syms_of_androidvfs): New function. * src/androidvfs.c (struct android_special_vnode): New field special_coding_system. (android_saf_tree_readdir): Decode the file name using the android-jni coding system. (special_vnodes): : Specify a file name coding system. (android_vfs_convert_name): New function. (android_root_name): If a special coding system be specified for a special vnode, convert components to it before invoking its name function. (syms_of_androidvfs): New symbol Qandroid_jni. * src/emacs.c (android_emacs_init): Call syms_of_androidvfs. (cherry picked from commit e39cb515a108682b520e499c334a600ee634fbf6) --- lisp/term/android-win.el | 89 +++++++++++++++++++++++++ src/androidterm.h | 5 +- src/androidvfs.c | 137 ++++++++++++++++++++++++++++++++++----- src/emacs.c | 1 + 4 files changed, 215 insertions(+), 17 deletions(-) diff --git a/lisp/term/android-win.el b/lisp/term/android-win.el index 8d262e5da98..6512ef81ff7 100644 --- a/lisp/term/android-win.el +++ b/lisp/term/android-win.el @@ -528,6 +528,95 @@ accessible to other programs." (setq url replacement-url)) (android-browse-url-internal url send)) + +;; Coding systems used by androidvfs.c. + +(define-ccl-program android-encode-jni + `(2 ((loop + (read r0) + (if (r0 < #x1) ; 0x0 is encoded specially in JNI environments. + ((write #xc0) + (write #x80)) + ((if (r0 < #x80) ; ASCII + ((write r0)) + (if (r0 < #x800) ; \u0080 - \u07ff + ((write ((r0 >> 6) | #xC0)) + (write ((r0 & #x3F) | #x80))) + ;; \u0800 - \uFFFF + (if (r0 < #x10000) + ((write ((r0 >> 12) | #xE0)) + (write (((r0 >> 6) & #x3F) | #x80)) + (write ((r0 & #x3F) | #x80))) + ;; Supplementary characters must be converted into + ;; surrogate pairs before encoding. + (;; High surrogate + (r1 = ((((r0 - #x10000) >> 10) & #x3ff) + #xD800)) + ;; Low surrogate. + (r2 = (((r0 - #x10000) & #x3ff) + #xDC00)) + ;; Write both surrogate characters. + (write ((r1 >> 12) | #xE0)) + (write (((r1 >> 6) & #x3F) | #x80)) + (write ((r1 & #x3F) | #x80)) + (write ((r2 >> 12) | #xE0)) + (write (((r2 >> 6) & #x3F) | #x80)) + (write ((r2 & #x3F) | #x80)))))))) + (repeat)))) + "Encode characters from the input buffer for Java virtual machines.") + +(define-ccl-program android-decode-jni + `(1 ((loop + ((read-if (r0 >= #x80) ; More than a one-byte sequence? + ((if (r0 < #xe0) + ;; Two-byte sequence; potentially a NULL + ;; character. + ((read r4) + (r4 &= #x3f) + (r0 = (((r0 & #x1f) << 6) | r4))) + (if (r0 < ?\xF0) + ;; Three-byte sequence, after which surrogate + ;; pairs should be processed. + ((read r4 r6) + (r4 = ((r4 & #x3f) << 6)) + (r6 &= #x3f) + (r0 = ((((r0 & #xf) << 12) | r4) | r6))) + ;; Four-byte sequences are not valid under the + ;; JVM specification, but Android produces them + ;; when encoding Emoji characters for being + ;; supposedly less of a surprise to applications. + ;; This is obviously not true of programs written + ;; to the letter of the documentation, but 50 + ;; million Frenchmen make a right (and this + ;; deviation from the norm is predictably absent + ;; from Android's documentation on the subject). + ((read r1 r4 r6) + (r1 = ((r1 & #x3f) << 12)) + (r4 = ((r4 & #x3f) << 6)) + (r6 &= #x3F) + (r0 = (((((r0 & #x07) << 18) | r1) | r4) | r6)))))))) + (if ((r0 & #xf800) == #xd800) + ;; High surrogate. + ((read-if (r2 >= #xe0) + ((r0 = ((r0 & #x3ff) << 10)) + (read r4 r6) + (r4 = ((r4 & #x3f) << 6)) + (r6 &= #x3f) + (r1 = ((((r2 & #xf) << 12) | r4) | r6)) + (r0 = (((r1 & #x3ff) | r0) + #xffff)))))) + (write r0) + (repeat)))) + "Decode JVM-encoded characters in the input buffer.") + +(define-coding-system 'android-jni + "CESU-8 based encoding for communication with the Android runtime." + :mnemonic ?J + :coding-type 'ccl + :eol-type 'unix + :ascii-compatible-p nil ; for \0 is encoded as a two-byte sequence. + :default-char ?\0 + :charset-list '(unicode) + :ccl-decoder 'android-decode-jni + :ccl-encoder 'android-encode-jni) + (provide 'android-win) ;; android-win.el ends here. diff --git a/src/androidterm.h b/src/androidterm.h index ca6929bef0e..fd4cc99f641 100644 --- a/src/androidterm.h +++ b/src/androidterm.h @@ -461,7 +461,7 @@ extern void sfntfont_android_shrink_scanline_buffer (void); extern void init_sfntfont_android (void); extern void syms_of_sfntfont_android (void); -/* Defined in androidselect.c */ +/* Defined in androidselect.c. */ #ifndef ANDROID_STUBIFY @@ -473,6 +473,9 @@ extern void android_notification_action (struct android_notification_event *, extern void init_androidselect (void); extern void syms_of_androidselect (void); +/* Defined in androidvfs.c. */ +extern void syms_of_androidvfs (void); + #endif diff --git a/src/androidvfs.c b/src/androidvfs.c index 9e3d5cab8cf..6a9ddb33c56 100644 --- a/src/androidvfs.c +++ b/src/androidvfs.c @@ -38,8 +38,10 @@ along with GNU Emacs. If not, see . */ #include #include "android.h" +#include "androidterm.h" #include "systime.h" #include "blockinput.h" +#include "coding.h" #if __ANDROID_API__ >= 9 #include @@ -248,8 +250,14 @@ struct android_special_vnode /* Function called to create the initial vnode from the rest of the component. */ struct android_vnode *(*initial) (char *, size_t); + + /* If non-nil, an encoding system into which file name buffers are to + be re-encoded before being handed to VFS functions. */ + Lisp_Object special_coding_system; }; +verify (NIL_IS_ZERO); /* special_coding_system above. */ + enum android_vnode_type { ANDROID_VNODE_UNIX, @@ -3867,7 +3875,8 @@ android_saf_root_readdir (struct android_vdir *vdir) NULL); android_exception_check_nonnull ((void *) chars, string); - /* Figure out how large it is, and then resize dirent to fit. */ + /* Figure out how large it is, and then resize dirent to fit--this + string is always ASCII. */ length = strlen (chars) + 1; size = offsetof (struct dirent, d_name) + length; dirent = xrealloc (dirent, size); @@ -5479,6 +5488,7 @@ android_saf_tree_readdir (struct android_vdir *vdir) jmethodID method; size_t length, size; const char *chars; + struct coding_system coding; dir = (struct android_saf_tree_vdir *) vdir; @@ -5526,9 +5536,25 @@ android_saf_tree_readdir (struct android_vdir *vdir) NULL); android_exception_check_nonnull ((void *) chars, d_name); - /* Figure out how large it is, and then resize dirent to fit. */ + /* Decode this JNI string into utf-8-emacs; see + android_vfs_convert_name for considerations regarding coding + systems. */ + length = strlen (chars); + setup_coding_system (Qandroid_jni, &coding); + coding.mode |= CODING_MODE_LAST_BLOCK; + coding.source = (const unsigned char *) chars; + coding.dst_bytes = 0; + coding.destination = NULL; + decode_coding_object (&coding, Qnil, 0, 0, length, length, Qnil); + + /* Release the string data and the local reference to STRING. */ + (*android_java_env)->ReleaseStringUTFChars (android_java_env, + (jstring) d_name, + chars); + + /* Resize dirent to accommodate the decoded text. */ length = strlen (chars) + 1; - size = offsetof (struct dirent, d_name) + length; + size = offsetof (struct dirent, d_name) + 1 + coding.produced; dirent = xrealloc (dirent, size); /* Clear dirent. */ @@ -5540,12 +5566,12 @@ android_saf_tree_readdir (struct android_vdir *vdir) dirent->d_off = 0; dirent->d_reclen = size; dirent->d_type = d_type ? DT_DIR : DT_UNKNOWN; - strcpy (dirent->d_name, chars); + memcpy (dirent->d_name, coding.destination, coding.produced); + dirent->d_name[coding.produced] = '\0'; + + /* Free the coding system destination buffer. */ + xfree (coding.destination); - /* Release the string data and the local reference to STRING. */ - (*android_java_env)->ReleaseStringUTFChars (android_java_env, - (jstring) d_name, - chars); ANDROID_DELETE_LOCAL_REF (d_name); return dirent; } @@ -6531,9 +6557,35 @@ static struct android_vops root_vfs_ops = static struct android_special_vnode special_vnodes[] = { { "assets", 6, android_afs_initial, }, - { "content", 7, android_content_initial, }, + { "content", 7, android_content_initial, + LISPSYM_INITIALLY (Qandroid_jni), }, }; +/* Convert the file name NAME from Emacs's internal character encoding + to CODING, and return a Lisp string with the data so produced. + + Calling this function creates an implicit assumption that + file-name-coding-system is compatible with utf-8-emacs, which is not + unacceptable as users with cause to modify file-name-coding-system + should be aware and prepared for consequences towards files stored on + different filesystems, including virtual ones. */ + +static Lisp_Object +android_vfs_convert_name (const char *name, Lisp_Object coding) +{ + Lisp_Object src_coding, name1; + + src_coding = Qutf_8_emacs; + + /* Convert the contents of the buffer after BUFFER_END + from the file name coding system to + special->special_coding_system. */ + AUTO_STRING (file_name, name); + name1 = code_convert_string_norecord (file_name, src_coding, false); + name1 = code_convert_string (name1, coding, Qt, true, true, true); + return name1; +} + static struct android_vnode * android_root_name (struct android_vnode *vnode, char *name, size_t length) @@ -6541,6 +6593,8 @@ android_root_name (struct android_vnode *vnode, char *name, char *component_end; struct android_special_vnode *special; size_t i; + Lisp_Object file_name; + struct android_vnode *vp; /* Skip any leading separator in NAME. */ @@ -6567,8 +6621,29 @@ android_root_name (struct android_vnode *vnode, char *name, if (component_end - name == special->length && !memcmp (special->name, name, special->length)) - return (*special->initial) (component_end, - length - special->length); + { + if (!NILP (special->special_coding_system)) + { + USE_SAFE_ALLOCA; + + file_name + = android_vfs_convert_name (component_end, + special->special_coding_system); + + /* Allocate a buffer and copy file_name into the same. */ + length = SBYTES (file_name) + 1; + name = SAFE_ALLOCA (length + 1); + + /* Copy the trailing NULL byte also. */ + memcpy (name, SDATA (file_name), length); + vp = (*special->initial) (name, length - 1); + SAFE_FREE (); + return vp; + } + + return (*special->initial) (component_end, + length - special->length); + } /* Detect the case where a special is named with a trailing directory separator. */ @@ -6576,9 +6651,30 @@ android_root_name (struct android_vnode *vnode, char *name, if (component_end - name == special->length + 1 && !memcmp (special->name, name, special->length) && name[special->length] == '/') - /* Make sure to include the directory separator. */ - return (*special->initial) (component_end - 1, - length - special->length); + { + if (!NILP (special->special_coding_system)) + { + USE_SAFE_ALLOCA; + + file_name + = android_vfs_convert_name (component_end - 1, + special->special_coding_system); + + /* Allocate a buffer and copy file_name into the same. */ + length = SBYTES (file_name) + 1; + name = SAFE_ALLOCA (length + 1); + + /* Copy the trailing NULL byte also. */ + memcpy (name, SDATA (file_name), length); + vp = (*special->initial) (name, length - 1); + SAFE_FREE (); + return vp; + } + + /* Make sure to include the directory separator. */ + return (*special->initial) (component_end - 1, + length - special->length); + } } /* Otherwise, continue searching for a vnode normally. */ @@ -6589,8 +6685,9 @@ android_root_name (struct android_vnode *vnode, char *name, /* File system lookup. */ -/* Look up the vnode that designates NAME, a file name that is at - least N bytes. +/* Look up the vnode that designates NAME, a file name that is at least + N bytes, converting between different file name coding systems as + necessary. NAME may be either an absolute file name or a name relative to the current working directory. It must not be longer than EMACS_PATH_MAX @@ -7605,3 +7702,11 @@ android_closedir (struct android_vdir *dirp) { return (*dirp->closedir) (dirp); } + + + +void +syms_of_androidvfs (void) +{ + DEFSYM (Qandroid_jni, "android-jni"); +} diff --git a/src/emacs.c b/src/emacs.c index f4bfb9a6bbd..87f12d3fa86 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -2444,6 +2444,7 @@ Using an Emacs configured with --with-x-toolkit=lucid does not have this problem #if !defined ANDROID_STUBIFY syms_of_androidfont (); syms_of_androidselect (); + syms_of_androidvfs (); syms_of_sfntfont (); syms_of_sfntfont_android (); #endif /* !ANDROID_STUBIFY */ -- 2.39.5