From 3aab8626ba5080bb04d0fdae52d99c850a842a52 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Fri, 18 May 2018 16:34:19 +0300 Subject: [PATCH] Fix decoding of directories when "~" includes non-ASCII chars * src/fileio.c (Fexpand_file_name): Don't build multibyte strings from unibyte non-ASCII strings when NAME and DEFAULT_DIRECTORY have different multibyteness, as this adds bytes to the byte sequence, and in some situations, e.g., when the home directory includes non-ASCII characters, can fail file APIs. (Bug#30755) * lisp/startup.el (normal-top-level): Make sure default-directory is set to a multibyte string when decoded on MS-Windows. --- lisp/startup.el | 14 +++++++-- src/fileio.c | 75 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 71 insertions(+), 18 deletions(-) diff --git a/lisp/startup.el b/lisp/startup.el index 5b2d3e58cba..83fd190ea2b 100644 --- a/lisp/startup.el +++ b/lisp/startup.el @@ -560,9 +560,17 @@ It is the default value of the variable `top-level'." (if default-directory (setq default-directory (if (eq system-type 'windows-nt) - ;; Convert backslashes to forward slashes. - (expand-file-name - (decode-coding-string default-directory coding t)) + ;; We pass the decoded default-directory as + ;; the 2nd arg to expand-file-name to make + ;; sure it sees a multibyte string as the + ;; default directory; this avoids the side + ;; effect of returning a unibyte string from + ;; expand-file-name because it still sees + ;; the undecoded value of default-directory. + (let ((defdir (decode-coding-string default-directory + coding t))) + ;; Convert backslashes to forward slashes. + (expand-file-name defdir defdir)) (decode-coding-string default-directory coding t)))))) ;; Decode all the important variables and directory lists, now diff --git a/src/fileio.c b/src/fileio.c index 2f8358f01b5..e8d966e1631 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -867,33 +867,78 @@ the root directory. */) } } multibyte = STRING_MULTIBYTE (name); - if (multibyte != STRING_MULTIBYTE (default_directory)) + bool defdir_multibyte = STRING_MULTIBYTE (default_directory); + if (multibyte != defdir_multibyte) { + /* We want to make both NAME and DEFAULT_DIRECTORY have the same + multibyteness. Strategy: + . If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they + can be converted to the multibyteness of the other one + while keeping the same byte sequence. + . If both are non-ASCII, the only safe conversion is to + convert the multibyte one to be unibyte, because the + reverse conversion potentially adds bytes while raw bytes + are converted to their multibyte forms, which we will be + unable to account for, since the information about the + original multibyteness is lost. If those additional bytes + later leak to system APIs because they are not encoded or + because they are converted to unibyte strings by keeping + the data, file APIs will fail. + + Note: One could argue that if we see a multibyte string, it + is evidence that file-name decoding was already set up, and + we could convert unibyte strings to multibyte using + DECODE_FILE. However, this is risky, because the likes of + string_to_multibyte are able of creating multibyte strings + without any decoding. */ if (multibyte) { - unsigned char *p = SDATA (name); + bool name_ascii_p = SCHARS (name) == SBYTES (name); + unsigned char *p = SDATA (default_directory); - while (*p && ASCII_CHAR_P (*p)) - p++; - if (*p == '\0') + if (!name_ascii_p) + while (*p && ASCII_CHAR_P (*p)) + p++; + if (name_ascii_p || *p != '\0') { - /* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is - unibyte. Do not convert DEFAULT_DIRECTORY to - multibyte; instead, convert NAME to a unibyte string, - so that the result of this function is also a unibyte - string. This is needed during bootstrapping and - dumping, when Emacs cannot decode file names, because - the locale environment is not set up. */ + /* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII. + Make a unibyte string out of NAME, and arrange for + the result of this function to be a unibyte string. + This is needed during bootstrapping and dumping, when + Emacs cannot decode file names, because the locale + environment is not set up. */ name = make_unibyte_string (SSDATA (name), SBYTES (name)); multibyte = 0; } else - default_directory = string_to_multibyte (default_directory); + { + /* NAME is non-ASCII and multibyte, and + DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a + multibyte string out of DEFAULT_DIRECTORY's data. */ + default_directory = + make_multibyte_string (SSDATA (default_directory), + SCHARS (default_directory), + SCHARS (default_directory)); + } } else { - name = string_to_multibyte (name); - multibyte = 1; + unsigned char *p = SDATA (name); + + while (*p && ASCII_CHAR_P (*p)) + p++; + if (*p == '\0') + { + /* DEFAULT_DIRECTORY is multibyte and NAME is unibyte + and pure-ASCII. Make a multibyte string out of + NAME's data. */ + name = make_multibyte_string (SSDATA (name), + SCHARS (name), SCHARS (name)); + multibyte = 1; + } + else + default_directory = make_unibyte_string (SSDATA (default_directory), + SBYTES (default_directory)); } } -- 2.39.5