From 5878abf87b6b3ead1367cbae5cc6b0743349f611 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Mon, 28 Nov 2016 17:43:25 +0200 Subject: [PATCH] Fix 'expand-file-name' during startup on MS-Windows * src/w32.c (w32_init_file_name_codepage): New function, resets file_name_codepage and w32_ansi_code_page to undo the values recorded during dumping. (codepage_for_filenames): Fix an embarrassing typo. Ignore the cached value of file-name encoding if it is nil, i.e. not initialized yet. Actually cache the last used file-name encoding to avoid calling APIs when not necessary. * src/w32.h (w32_init_file_name_codepage): Add prototype. * src/w32term.c (syms_of_w32term): Set the value of w32_unicode_filenames according to the OS version. This avoids resetting it during startup, which then causes temacs to run with the incorrect value. * src/emacs.c (main): Call w32_init_file_name_codepage early during the startup. * src/fileio.c (Fexpand_file_name) [WINDOWSNT]: Update 'newdir' after converting $HOME to a UTF-8 string, so that 'newdirlim' is consistent with it. (Bug#25038) * lisp/international/mule-cmds.el (set-locale-environment): Set 'default-file-name-coding-system' to the ANSI codepage even in non-interactive sessions. * lisp/files.el (directory-abbrev-alist, abbreviated-home-dir): Doc fix. (abbreviate-file-name): Decode 'abbreviated-home-dir' if it is a unibyte string. * doc/lispref/files.texi (Directory Names): Index 'directory-abbrev-alist'. --- doc/lispref/files.texi | 1 + lisp/files.el | 39 +++++++++++++++++++++++---------- lisp/international/mule-cmds.el | 12 +++++----- src/emacs.c | 3 +++ src/fileio.c | 8 +++---- src/w32.c | 18 +++++++++++++-- src/w32.h | 1 + src/w32term.c | 5 ++++- 8 files changed, 64 insertions(+), 23 deletions(-) diff --git a/doc/lispref/files.texi b/doc/lispref/files.texi index 544992d4ba5..85c80d76f73 100644 --- a/doc/lispref/files.texi +++ b/doc/lispref/files.texi @@ -2151,6 +2151,7 @@ function: @cindex file name abbreviations @cindex abbreviated file names +@vindex directory-abbrev-alist @defun abbreviate-file-name filename @anchor{abbreviate-file-name} This function returns an abbreviated form of @var{filename}. It diff --git a/lisp/files.el b/lisp/files.el index fa630731b8d..45646b186af 100644 --- a/lisp/files.el +++ b/lisp/files.el @@ -51,20 +51,21 @@ when it has unsaved changes." nil "Alist of abbreviations for file directories. A list of elements of the form (FROM . TO), each meaning to replace -FROM with TO when it appears in a directory name. This replacement is -done when setting up the default directory of a newly visited file. +a match for FROM with TO when a directory name matches FROM. This +replacement is done when setting up the default directory of a +newly visited file buffer. -FROM is matched against directory names anchored at the first -character, so it should start with a \"\\\\\\=`\", or, if directory -names cannot have embedded newlines, with a \"^\". +FROM is a regexp that is matched against directory names anchored at +the first character, so it should start with a \"\\\\\\=`\", or, if +directory names cannot have embedded newlines, with a \"^\". FROM and TO should be equivalent names, which refer to the -same directory. Do not use `~' in the TO strings; -they should be ordinary absolute directory names. +same directory. TO should be an absolute directory name. +Do not use `~' in the TO strings. Use this feature when you have directories which you normally refer to via absolute symbolic links. Make TO the name of the link, and FROM -the name it is linked to." +a regexp matching the name it is linked to." :type '(repeat (cons :format "%v" :value ("\\`" . "") (regexp :tag "From") @@ -1736,7 +1737,8 @@ Choose the buffer's name using `generate-new-buffer-name'." (make-obsolete-variable 'automount-dir-prefix 'directory-abbrev-alist "24.3") (defvar abbreviated-home-dir nil - "The user's homedir abbreviated according to `directory-abbrev-alist'.") + "Regexp matching the user's homedir at the beginning of file name. +The value includes abbreviation according to `directory-abbrev-alist'.") (defun abbreviate-file-name (filename) "Return a version of FILENAME shortened using `directory-abbrev-alist'. @@ -1770,8 +1772,23 @@ home directory is a root directory) and removes automounter prefixes (or abbreviated-home-dir (setq abbreviated-home-dir (let ((abbreviated-home-dir "$foo")) - (concat "\\`" (abbreviate-file-name (expand-file-name "~")) - "\\(/\\|\\'\\)")))) + (setq abbreviated-home-dir + (concat "\\`" + (abbreviate-file-name (expand-file-name "~")) + "\\(/\\|\\'\\)")) + ;; Depending on whether default-directory does or + ;; doesn't include non-ASCII characters, the value + ;; of abbreviated-home-dir could be multibyte or + ;; unibyte. In the latter case, we need to decode + ;; it. Note that this function is called for the + ;; first time (from startup.el) when + ;; locale-coding-system is already set up. + (if (multibyte-string-p abbreviated-home-dir) + abbreviated-home-dir + (decode-coding-string abbreviated-home-dir + (if (eq system-type 'windows-nt) + 'utf-8 + locale-coding-system)))))) ;; If FILENAME starts with the abbreviated homedir, ;; make it start with `~' instead. diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index 5008fd5f27b..1ec7456c9e1 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el @@ -2704,10 +2704,12 @@ See also `locale-charset-language-names', `locale-language-names', ;; terminal-coding-system with the ANSI or console codepage. (when (and (eq system-type 'windows-nt) (boundp 'w32-ansi-code-page)) - (let* ((code-page-coding - (intern (format "cp%d" (if noninteractive - (w32-get-console-codepage) - w32-ansi-code-page)))) + (let* ((ansi-code-page-coding + (intern (format "cp%d" w32-ansi-code-page))) + (code-page-coding + (if noninteractive + (intern (format "cp%d" (w32-get-console-codepage))) + ansi-code-page-coding)) (output-coding (if noninteractive (intern (format "cp%d" (w32-get-console-output-codepage))) @@ -2717,7 +2719,7 @@ See also `locale-charset-language-names', `locale-language-names', (unless frame (setq locale-coding-system code-page-coding)) (set-keyboard-coding-system code-page-coding frame) (set-terminal-coding-system output-coding frame) - (setq default-file-name-coding-system code-page-coding)))) + (setq default-file-name-coding-system ansi-code-page-coding)))) (when (eq system-type 'darwin) ;; On Darwin, file names are always encoded in utf-8, no matter diff --git a/src/emacs.c b/src/emacs.c index ce30ae741b2..16cf6cc0e4d 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -716,6 +716,9 @@ main (int argc, char **argv) to have non-stub implementations of APIs we need to convert file names between UTF-8 and the system's ANSI codepage. */ maybe_load_unicows_dll (); + /* Initialize the codepage for file names, needed to decode + non-ASCII file names during startup. */ + w32_init_file_name_codepage (); #endif /* This has to be done before module_init is called below, so that the latter could use the thread ID of the main thread. */ diff --git a/src/fileio.c b/src/fileio.c index c3b2be7c5f7..d94805f316b 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -1063,8 +1063,6 @@ filesystem tree, not (expand-file-name ".." dirname). */) if (!(newdir = egetenv ("HOME"))) newdir = newdirlim = ""; nm++; - /* `egetenv' may return a unibyte string, which will bite us since - we expect the directory to be multibyte. */ #ifdef WINDOWSNT if (newdir[0]) { @@ -1072,11 +1070,14 @@ filesystem tree, not (expand-file-name ".." dirname). */) filename_from_ansi (newdir, newdir_utf8); tem = make_unibyte_string (newdir_utf8, strlen (newdir_utf8)); + newdir = SSDATA (tem); } else #endif tem = build_string (newdir); newdirlim = newdir + SBYTES (tem); + /* `egetenv' may return a unibyte string, which will bite us + if we expect the directory to be multibyte. */ if (multibyte && !STRING_MULTIBYTE (tem)) { hdir = DECODE_FILE (tem); @@ -1105,8 +1106,7 @@ filesystem tree, not (expand-file-name ".." dirname). */) newdir = pw->pw_dir; /* `getpwnam' may return a unibyte string, which will - bite us since we expect the directory to be - multibyte. */ + bite us when we expect the directory to be multibyte. */ tem = make_unibyte_string (newdir, strlen (newdir)); newdirlim = newdir + SBYTES (tem); if (multibyte && !STRING_MULTIBYTE (tem)) diff --git a/src/w32.c b/src/w32.c index 793bc0f28d0..7c57693cf3d 100644 --- a/src/w32.c +++ b/src/w32.c @@ -1493,6 +1493,16 @@ w32_valid_pointer_p (void *p, int size) /* Current codepage for encoding file names. */ static int file_name_codepage; +/* Initialize the codepage used for decoding file names. This is + needed to undo the value recorded during dumping, which might not + be correct when we run the dumped Emacs. */ +void +w32_init_file_name_codepage (void) +{ + file_name_codepage = CP_ACP; + w32_ansi_code_page = CP_ACP; +} + /* Produce a Windows ANSI codepage suitable for encoding file names. Return the information about that codepage in CP_INFO. */ int @@ -1509,12 +1519,13 @@ codepage_for_filenames (CPINFO *cp_info) if (NILP (current_encoding)) current_encoding = Vdefault_file_name_coding_system; - if (!EQ (last_file_name_encoding, current_encoding)) + if (!EQ (last_file_name_encoding, current_encoding) + || NILP (last_file_name_encoding)) { /* Default to the current ANSI codepage. */ file_name_codepage = w32_ansi_code_page; - if (NILP (current_encoding)) + if (!NILP (current_encoding)) { char *cpname = SSDATA (SYMBOL_NAME (current_encoding)); char *cp = NULL, *end; @@ -1543,6 +1554,9 @@ codepage_for_filenames (CPINFO *cp_info) if (!GetCPInfo (file_name_codepage, &cp)) emacs_abort (); } + + /* Cache the new value. */ + last_file_name_encoding = current_encoding; } if (cp_info) *cp_info = cp; diff --git a/src/w32.h b/src/w32.h index 42a1c423ce7..08b88f5f5a0 100644 --- a/src/w32.h +++ b/src/w32.h @@ -195,6 +195,7 @@ extern int filename_from_ansi (const char *, char *); extern int filename_to_ansi (const char *, char *); extern int filename_from_utf16 (const wchar_t *, char *); extern int filename_to_utf16 (const char *, wchar_t *); +extern void w32_init_file_name_codepage (void); extern int codepage_for_filenames (CPINFO *); extern Lisp_Object ansi_encode_filename (Lisp_Object); extern int w32_copy_file (const char *, const char *, int, int, int); diff --git a/src/w32term.c b/src/w32term.c index 51743f8f94d..7b74ae03ad0 100644 --- a/src/w32term.c +++ b/src/w32term.c @@ -7157,7 +7157,10 @@ specified by `file-name-coding-system'. This variable is set to non-nil by default when Emacs runs on Windows systems of the NT family, including W2K, XP, Vista, Windows 7 and Windows 8. It is set to nil on Windows 9X. */); - w32_unicode_filenames = 0; + if (os_subtype == OS_9X) + w32_unicode_filenames = 0; + else + w32_unicode_filenames = 1; /* FIXME: The following variable will be (hopefully) removed -- 2.39.2