(if default-directory
(setq default-directory
(if (eq system-type 'windows-nt)
- ;; Convert backslashes to forward slashes.
- (expand-file-name
- (decode-coding-string default-directory coding t))
+ ;; We pass the decoded default-directory as
+ ;; the 2nd arg to expand-file-name to make
+ ;; sure it sees a multibyte string as the
+ ;; default directory; this avoids the side
+ ;; effect of returning a unibyte string from
+ ;; expand-file-name because it still sees
+ ;; the undecoded value of default-directory.
+ (let ((defdir (decode-coding-string default-directory
+ coding t)))
+ ;; Convert backslashes to forward slashes.
+ (expand-file-name defdir defdir))
(decode-coding-string default-directory coding t))))))
;; Decode all the important variables and directory lists, now
}
}
multibyte = STRING_MULTIBYTE (name);
- if (multibyte != STRING_MULTIBYTE (default_directory))
+ bool defdir_multibyte = STRING_MULTIBYTE (default_directory);
+ if (multibyte != defdir_multibyte)
{
+ /* We want to make both NAME and DEFAULT_DIRECTORY have the same
+ multibyteness. Strategy:
+ . If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they
+ can be converted to the multibyteness of the other one
+ while keeping the same byte sequence.
+ . If both are non-ASCII, the only safe conversion is to
+ convert the multibyte one to be unibyte, because the
+ reverse conversion potentially adds bytes while raw bytes
+ are converted to their multibyte forms, which we will be
+ unable to account for, since the information about the
+ original multibyteness is lost. If those additional bytes
+ later leak to system APIs because they are not encoded or
+ because they are converted to unibyte strings by keeping
+ the data, file APIs will fail.
+
+ Note: One could argue that if we see a multibyte string, it
+ is evidence that file-name decoding was already set up, and
+ we could convert unibyte strings to multibyte using
+ DECODE_FILE. However, this is risky, because the likes of
+ string_to_multibyte are able of creating multibyte strings
+ without any decoding. */
if (multibyte)
{
- unsigned char *p = SDATA (name);
+ bool name_ascii_p = SCHARS (name) == SBYTES (name);
+ unsigned char *p = SDATA (default_directory);
- while (*p && ASCII_CHAR_P (*p))
- p++;
- if (*p == '\0')
+ if (!name_ascii_p)
+ while (*p && ASCII_CHAR_P (*p))
+ p++;
+ if (name_ascii_p || *p != '\0')
{
- /* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is
- unibyte. Do not convert DEFAULT_DIRECTORY to
- multibyte; instead, convert NAME to a unibyte string,
- so that the result of this function is also a unibyte
- string. This is needed during bootstrapping and
- dumping, when Emacs cannot decode file names, because
- the locale environment is not set up. */
+ /* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII.
+ Make a unibyte string out of NAME, and arrange for
+ the result of this function to be a unibyte string.
+ This is needed during bootstrapping and dumping, when
+ Emacs cannot decode file names, because the locale
+ environment is not set up. */
name = make_unibyte_string (SSDATA (name), SBYTES (name));
multibyte = 0;
}
else
- default_directory = string_to_multibyte (default_directory);
+ {
+ /* NAME is non-ASCII and multibyte, and
+ DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a
+ multibyte string out of DEFAULT_DIRECTORY's data. */
+ default_directory =
+ make_multibyte_string (SSDATA (default_directory),
+ SCHARS (default_directory),
+ SCHARS (default_directory));
+ }
}
else
{
- name = string_to_multibyte (name);
- multibyte = 1;
+ unsigned char *p = SDATA (name);
+
+ while (*p && ASCII_CHAR_P (*p))
+ p++;
+ if (*p == '\0')
+ {
+ /* DEFAULT_DIRECTORY is multibyte and NAME is unibyte
+ and pure-ASCII. Make a multibyte string out of
+ NAME's data. */
+ name = make_multibyte_string (SSDATA (name),
+ SCHARS (name), SCHARS (name));
+ multibyte = 1;
+ }
+ else
+ default_directory = make_unibyte_string (SSDATA (default_directory),
+ SBYTES (default_directory));
}
}