From: Eli Zaretskii Date: Sat, 26 Oct 2013 10:37:43 +0000 (+0300) Subject: Finished conversion routines; w32-unicode-filenames exposed to Lisp. X-Git-Tag: emacs-24.3.90~173^2^2~42^2~45^2~387^2~446^2~48 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=c3e9160b8c375760d6bc53602caeed211e91389d;p=emacs.git Finished conversion routines; w32-unicode-filenames exposed to Lisp. --- diff --git a/src/coding.c b/src/coding.c index ac828a48683..69b01553e7f 100644 --- a/src/coding.c +++ b/src/coding.c @@ -9473,6 +9473,49 @@ code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system, return code_convert_string (string, coding_system, Qt, encodep, 0, 1); } +/* Encode or decode a file name, to or from a unibyte string suitable + for passing to C library functions. */ +Lisp_Object +decode_file_name (Lisp_Object fname) +{ +#ifdef WINDOWSNT + /* The w32 build pretends to use UTF-8 for file-name encoding, and + converts the file names either to UTF-16LE or to the system ANSI + codepage internally, depending on the underlying OS; see w32.c. */ + if (! NILP (Fcoding_system_p (Qutf_8))) + return code_convert_string_norecord (fname, Qutf_8, 0); + return fname; +#else /* !WINDOWSNT */ + if (! NILP (Vfile_name_coding_system)) + return code_convert_string_norecord (fname, Vfile_name_coding_system, 0); + else if (! NILP (Vdefault_file_name_coding_system)) + return code_convert_string_norecord (fname, + Vdefault_file_name_coding_system, 0); + else + return fname; +#endif +} + +Lisp_Object +encode_file_name (Lisp_Object fname) +{ +#ifdef WINDOWSNT + /* The w32 build pretends to use UTF-8 for file-name encoding, and + converts the file names either to UTF-16LE or to the system ANSI + codepage internally, depending on the underlying OS; see w32.c. */ + if (! NILP (Fcoding_system_p (Qutf_8))) + return code_convert_string_norecord (fname, Qutf_8, 1); + return fname; +#else /* !WINDOWSNT */ + if (! NILP (Vfile_name_coding_system)) + return code_convert_string_norecord (fname, Vfile_name_coding_system, 1); + else if (! NILP (Vdefault_file_name_coding_system)) + return code_convert_string_norecord (fname, + Vdefault_file_name_coding_system, 1); + else + return fname; +#endif +} DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, 2, 4, 0, diff --git a/src/coding.h b/src/coding.h index 0472bec99de..39f9d62462b 100644 --- a/src/coding.h +++ b/src/coding.h @@ -672,23 +672,11 @@ struct coding_system /* Encode the file name NAME using the specified coding system for file names, if any. */ -#define ENCODE_FILE(name) \ - (! NILP (Vfile_name_coding_system) \ - ? code_convert_string_norecord (name, Vfile_name_coding_system, 1) \ - : (! NILP (Vdefault_file_name_coding_system) \ - ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 1) \ - : name)) - +#define ENCODE_FILE(NAME) encode_file_name (NAME) /* Decode the file name NAME using the specified coding system for file names, if any. */ -#define DECODE_FILE(name) \ - (! NILP (Vfile_name_coding_system) \ - ? code_convert_string_norecord (name, Vfile_name_coding_system, 0) \ - : (! NILP (Vdefault_file_name_coding_system) \ - ? code_convert_string_norecord (name, Vdefault_file_name_coding_system, 0) \ - : name)) - +#define DECODE_FILE(NAME) decode_file_name (NAME) /* Encode the string STR using the specified coding system for system functions, if any. */ @@ -716,6 +704,8 @@ extern Lisp_Object code_convert_string (Lisp_Object, Lisp_Object, Lisp_Object, bool, bool, bool); extern Lisp_Object code_convert_string_norecord (Lisp_Object, Lisp_Object, bool); +extern Lisp_Object encode_file_name (Lisp_Object); +extern Lisp_Object decode_file_name (Lisp_Object); extern Lisp_Object raw_text_coding_system (Lisp_Object); extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object); extern Lisp_Object complement_process_encoding_system (Lisp_Object); diff --git a/src/w32.c b/src/w32.c index 511cec15d5f..993d598ff54 100644 --- a/src/w32.c +++ b/src/w32.c @@ -300,8 +300,6 @@ static BOOL g_b_init_is_valid_security_descriptor; static BOOL g_b_init_set_file_security; static BOOL g_b_init_get_adapters_info; -int w32_unicode_filenames; - /* BEGIN: Wrapper functions around OpenProcessToken and other functions in advapi32.dll that are only @@ -1186,12 +1184,74 @@ w32_valid_pointer_p (void *p, int size) -/* Converting file names from UTF-8 to either UTF-16 or the system - ANSI codepage. */ +/* Converting file names from UTF-8 to either UTF-16 or the ANSI + codepage defined by file-name-coding-system. */ + +/* Current codepage for encoding file names. */ +static int file_name_codepage; + +/* Produce a Windows ANSI codepage suitable for encoding file names. + Return the information about that codepage in CP_INFO. */ +static int +codepage_for_filenames (CPINFO *cp_info) +{ + /* A simple cache to avoid calling GetCPInfo every time we need to + encode/decode a file name. The file-name encoding is not + supposed to be changed too frequently, if ever. */ + static Lisp_Object last_file_name_encoding; + static CPINFO cp; + Lisp_Object current_encoding; + + current_encoding = Vfile_name_coding_system; + if (NILP (current_encoding)) + current_encoding = Vdefault_file_name_coding_system; + + if (!EQ (last_file_name_encoding, current_encoding)) + { + /* Default to the current ANSI codepage. */ + file_name_codepage = w32_ansi_code_page; + + if (NILP (current_encoding)) + { + char *cpname = SDATA (SYMBOL_NAME (current_encoding)); + char *cp = NULL, *end; + int cpnum; + + if (strncmp (cpname, "cp", 2) == 0) + cp = cpname + 2; + else if (strncmp (cpname, "windows-", 8) == 0) + cp = cpname + 8; + + if (cp) + { + end = cp; + cpnum = strtol (cp, &end, 10); + if (cpnum && *end == '\0' && end - cp >= 2) + file_name_codepage = cpnum; + } + } + + if (!file_name_codepage) + file_name_codepage = CP_ACP; /* CP_ACP = 0, but let's not assume that */ + + if (!GetCPInfo (file_name_codepage, &cp)) + { + file_name_codepage = CP_ACP; + if (!GetCPInfo (file_name_codepage, &cp)) + emacs_abort (); + } + } + if (cp_info) + *cp_info = cp; + + return file_name_codepage; +} + static int filename_to_utf16 (const char *fn_in, wchar_t *fn_out) { - int result = MultiByteToWideChar (CP_UTF8, 0, fn_in, -1, fn_out, MAX_PATH); + int result = MultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, fn_in, -1, + fn_out, MAX_PATH); if (!result) { @@ -1217,7 +1277,28 @@ filename_to_utf16 (const char *fn_in, wchar_t *fn_out) static int filename_from_utf16 (const wchar_t *fn_in, char *fn_out) { - return -1; + int result = WideCharToMultiByte (CP_UTF8, 0, fn_in, -1, + fn_out, MAX_UTF8_PATH, NULL, NULL); + + if (!result) + { + DWORD err = GetLastError (); + + switch (err) + { + case ERROR_INVALID_FLAGS: + case ERROR_INVALID_PARAMETER: + errno = EINVAL; + break; + case ERROR_INSUFFICIENT_BUFFER: + case ERROR_NO_UNICODE_TRANSLATION: + default: + errno = ENOENT; + break; + } + return -1; + } + return 0; } static int @@ -1227,9 +1308,11 @@ filename_to_ansi (const char *fn_in, char *fn_out) if (filename_to_utf16 (fn_in, fn_utf16) == 0) { - int result = WideCharToMultiByte (CP_ACP, 0, fn_utf16, -1, - fn_out, MAX_UTF8_PATH, NULL, NULL); + int result; + int codepage = codepage_for_filenames (NULL); + result = WideCharToMultiByte (codepage, 0, fn_utf16, -1, + fn_out, MAX_UTF8_PATH, NULL, NULL); if (!result) { DWORD err = GetLastError (); @@ -1250,12 +1333,36 @@ filename_to_ansi (const char *fn_in, char *fn_out) } return 0; } + return -1; } static int filename_from_ansi (const char *fn_in, char *fn_out) { - return -1; + wchar_t fn_utf16[MAXPATHLEN]; + int codepage = codepage_for_filenames (NULL); + int result = MultiByteToWideChar (codepage, MB_ERR_INVALID_CHARS, fn_in, -1, + fn_utf16, MAX_PATH); + + if (!result) + { + DWORD err = GetLastError (); + + switch (err) + { + case ERROR_INVALID_FLAGS: + case ERROR_INVALID_PARAMETER: + errno = EINVAL; + break; + case ERROR_INSUFFICIENT_BUFFER: + case ERROR_NO_UNICODE_TRANSLATION: + default: + errno = ENOENT; + break; + } + return -1; + } + return filename_from_utf16 (fn_utf16, fn_out); } @@ -1662,66 +1769,16 @@ srandom (int seed) srand (seed); } -/* Current codepage for encoding file names. */ -static int file_name_codepage; - /* Return the maximum length in bytes of a multibyte character sequence encoded in the current ANSI codepage. This is required to correctly walk the encoded file names one character at a time. */ static int max_filename_mbslen (void) { - /* A simple cache to avoid calling GetCPInfo every time we need to - normalize a file name. The file-name encoding is not supposed to - be changed too frequently, if ever. */ - static Lisp_Object last_file_name_encoding; - static int last_max_mbslen; - Lisp_Object current_encoding; - - current_encoding = Vfile_name_coding_system; - if (NILP (current_encoding)) - current_encoding = Vdefault_file_name_coding_system; - - if (!EQ (last_file_name_encoding, current_encoding)) - { - CPINFO cp_info; - - last_file_name_encoding = current_encoding; - /* Default to the current ANSI codepage. */ - file_name_codepage = w32_ansi_code_page; - if (!NILP (current_encoding)) - { - char *cpname = SDATA (SYMBOL_NAME (current_encoding)); - char *cp = NULL, *end; - int cpnum; - - if (strncmp (cpname, "cp", 2) == 0) - cp = cpname + 2; - else if (strncmp (cpname, "windows-", 8) == 0) - cp = cpname + 8; - - if (cp) - { - end = cp; - cpnum = strtol (cp, &end, 10); - if (cpnum && *end == '\0' && end - cp >= 2) - file_name_codepage = cpnum; - } - } - - if (!file_name_codepage) - file_name_codepage = CP_ACP; /* CP_ACP = 0, but let's not assume that */ - - if (!GetCPInfo (file_name_codepage, &cp_info)) - { - file_name_codepage = CP_ACP; - if (!GetCPInfo (file_name_codepage, &cp_info)) - emacs_abort (); - } - last_max_mbslen = cp_info.MaxCharSize; - } + CPINFO cp_info; - return last_max_mbslen; + codepage_for_filenames (&cp_info); + return cp_info.MaxCharSize; } /* Normalize filename by converting all path separators to diff --git a/src/w32.h b/src/w32.h index 32d0fdbe3cf..c836937bd66 100644 --- a/src/w32.h +++ b/src/w32.h @@ -200,8 +200,6 @@ extern void record_pending_deletion (char *); extern void sys_sleep (int); extern int sys_link (const char *, const char *); - - #ifdef HAVE_GNUTLS #include diff --git a/src/w32term.c b/src/w32term.c index 8bc46734b7d..6c94090f3b7 100644 --- a/src/w32term.c +++ b/src/w32term.c @@ -6567,6 +6567,18 @@ X toolkit. Possible values are: gtk, motif, xaw, or xaw3d. With MS Windows or Nextstep, the value is t. */); Vx_toolkit_scroll_bars = Qt; + DEFVAR_BOOL ("w32-unicode-filenames", + w32_unicode_filenames, + doc: /* Non-nil means use Unicode APIs when passing file names to the OS. +A value of nil means file names passed to the OS APIs and returned +from those APIs are encoded/decoded using the ANSI codepage +specified by `file-name-coding-system'. + +This variable is set to non-nil by default when Emacs runs on Windows +systems of the NT family, including W2K, XP, Vista, Windows 7 and +Windows 8. It is set to nil on Windows 9X. */); + w32_unicode_filenames = 0; + /* Tell Emacs about this window system. */ Fprovide (Qw32, Qnil); }