From: Eli Zaretskii Date: Sat, 16 Mar 2019 11:59:03 +0000 (+0200) Subject: Improve locale and language environment setting at startup X-Git-Tag: emacs-27.0.90~3405 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=34dd4e0a83e19882f61c9a2ac99ecc12632d13d4;p=emacs.git Improve locale and language environment setting at startup * lisp/international/mule-cmds.el (locale-language-names): Add more locales and their language environments. (set-locale-environment): Use w32-multibyte-code-page, if non-zero, as locale-coding-system. (Bug#34684) * src/w32fns.c (globals_of_w32fns) : New variable. * etc/NEWS: Mention w32-multibyte-code-page. --- diff --git a/etc/NEWS b/etc/NEWS index 000d211c1ab..f25c3f5dc3d 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1736,6 +1736,14 @@ versions of MS-Windows. Set this variable to 50 if for some reason you need the old behavior (and please report such situations to Emacs developers). +--- +** New variable 'w32-multibyte-code-page'. +This variable holds the value of the multibyte code page used by the +system. It is usually zero, which indicates that 'w32-ansi-code-page' +is being used, except in Far Eastern locales. When this variable is +non-zero, Emacs at startup sets 'locale-coding-system' to the +corresponding encoding, instead of using 'w32-ansi-code-page'. + +++ ** On NS the behaviour of drag and drop can now be modified by use of modifier keys in line with Apples guidelines. This makes the drag and diff --git a/lisp/international/mule-cmds.el b/lisp/international/mule-cmds.el index 3c1769a02fb..5f87d899415 100644 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el @@ -2181,22 +2181,27 @@ See `set-language-info-alist' for use in programs." (defconst locale-language-names (purecopy '( - ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER] - ;; as specified in the Single Unix Spec, Version 2. - ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F) - ;; with additions from ISO 639/RA Newsletter No.1/1989; - ;; see Internet RFC 2165 (1997-06) and - ;; http://www.evertype.com/standards/iso639/iso639-en.html - ;; TERRITORY is a country code taken from ISO 3166 - ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html. - ;; CODESET and MODIFIER are implementation-dependent. + ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER] + ;; as specified in the Single Unix Spec, Version 2. + ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F) + ;; with additions from ISO 639/RA Newsletter No.1/1989; + ;; see Internet RFC 2165 (1997-06) and + ;; http://www.evertype.com/standards/iso639/iso639-en.html + ;; TERRITORY is a country code taken from ISO 3166 + ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html. + ;; CODESET and MODIFIER are implementation-dependent. + + ;; Language names for which there are no locales (yet) are + ;; commented out. ;; jasonr comments: MS Windows uses three letter codes for ;; languages instead of the two letter ISO codes that POSIX - ;; uses. In most cases the first two letters are the same, so - ;; most of the regexps in locale-language-names work. Japanese - ;; and Chinese are exceptions, which are listed in the - ;; non-standard section at the bottom of locale-language-names. + ;; uses. In most cases the first two letters are the same, so + ;; most of the regexps in locale-language-names work. Japanese, + ;; Chinese, and some others are exceptions, which are listed in the + ;; non-standard section at the bottom of locale-language-names, or + ;; in the main section, if otherwise we would pick up the wrong + ;; entry (because the first matching entry is used). ("aa_DJ" . "Latin-1") ; Afar ("aa" . "UTF-8") @@ -2204,11 +2209,12 @@ See `set-language-info-alist' for use in programs." ("af" . "Latin-1") ; Afrikaans ("am" "Ethiopic" utf-8) ; Amharic ("an" . "Latin-9") ; Aragonese + ("arn" . "UTF-8") ; MS-Windows Mapudungun, Mapuche ("ar" . "Arabic") - ; as Assamese + ("as" . "UTF-8") ; Assamese ; ay Aymara ("az" . "UTF-8") ; Azerbaijani - ; ba Bashkir + ("ba" . "UTF-8") ; Bashkir, Cyrillic script ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s] ("bg" "Bulgarian" cp1251) ; Bulgarian ; bh Bihari @@ -2219,12 +2225,12 @@ See `set-language-info-alist' for use in programs." ("bs" . "Latin-2") ; Bosnian ("byn" . "UTF-8") ; Bilin; Blin ("ca" "Catalan" iso-8859-1) ; Catalan - ; co Corsican + ("co" . "UTF-8") ; Corsican ("cs" "Czech" iso-8859-2) ("cy" "Welsh" iso-8859-14) ("da" . "Latin-1") ; Danish ("de" "German" iso-8859-1) - ; dv Divehi + ("dv" . "UTF-8") ; Divehi ; dz Bhutani ("ee" . "Latin-4") ; Ewe ("el" "Greek" iso-8859-7) @@ -2238,6 +2244,8 @@ See `set-language-info-alist' for use in programs." ("et" . "Latin-9") ; Estonian ("eu" . "Latin-1") ; Basque ("fa" "Persian" utf-8) ; Persian + ("fil" . "UTF-8") ; Filipino + ("fpo" . "UTF-8") ; MS-Windows Filipino ("fi" . "Latin-9") ; Finnish ("fj" . "Latin-1") ; Fiji ("fo" . "Latin-1") ; Faroese @@ -2246,6 +2254,7 @@ See `set-language-info-alist' for use in programs." ("ga" . "Latin-1") ; Irish Gaelic (new orthography) ("gd" . "Latin-9") ; Scots Gaelic ("gez" "Ethiopic" utf-8) ; Geez + ("gla" . "Latin-9") ; MS-Windows Scots Gaelic ("gl" . "Latin-1") ; Gallegan; Galician ; gn Guarani ("gu" "Gujarati" utf-8) ; Gujarati @@ -2256,27 +2265,33 @@ See `set-language-info-alist' for use in programs." ("hni_IN" . "UTF-8") ; Chhattisgarhi ("hr" "Croatian" iso-8859-2) ; Croatian ("hu" . "Latin-2") ; Hungarian - ; hy Armenian + ("hy" . "UTF-8") ; Armenian ; ia Interlingua ("id" . "Latin-1") ; Indonesian ; ie Interlingue - ; ik Inupiak + ("ig" . "UTF-8") ; Igbo (Nigeria) + ("ibo" . "UTF-8") ; MS-Windows Igbo + ; ik Inupiak, Inupiaq ("is" . "Latin-1") ; Icelandic ("it" "Italian" iso-8859-1) ; Italian ; iu Inuktitut ("iw" "Hebrew" iso-8859-8) ("ja" "Japanese" euc-jp) ; jw Javanese + ("kal" . "Latin-1") ; MS-Windows Greenlandic ("ka" "Georgian" georgian-ps) ; Georgian - ; kk Kazakh + ("kk" . "UTF-8") ; Kazakh ("kl" . "Latin-1") ; Greenlandic ("km" "Khmer" utf-8) ; Cambodian, Khmer + ("knk" "Devanagari" utf-8) ; MS-Windows Konkani + ("kok" "Devanagari" utf-8) ; Konkani ("kn" "Kannada" utf-8) ("ko" "Korean" euc-kr) ("ks" . "UTF-8") ; Kashmiri ; ku Kurdish ("kw" . "Latin-1") ; Cornish ("ky" . "UTF-8") ; Kirghiz + ("lao" "Lao" utf-8) ; MS-Windows Lao ("la" . "Latin-1") ; Latin ("lb" . "Latin-1") ; Luxemburgish ("lg" . "Latin-6") ; Ganda, a.k.a. Luganda @@ -2287,18 +2302,22 @@ See `set-language-info-alist' for use in programs." ; mg Malagasy ("mi" . "Latin-7") ; Maori ("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian + ("mlt" . "Latin-3") ; MS-Windows Maltese ("ml" "Malayalam" utf-8) ("mn" . "UTF-8") ; Mongolian - ; mo Moldavian + ; mo Moldavian (retired) + ("mri" . "Latin-7") ; MS-Windows Maori ("mr" "Devanagari" utf-8) ; Marathi ("ms" . "Latin-1") ; Malay ("mt" . "Latin-3") ; Maltese + ("mym" "Malayalam" utf-8) ; MS-Windows Malayalam ("my" "Burmese" utf-8) ; Burmese ; na Nauru ("nb" . "Latin-1") ; Norwegian ("ne" "Devanagari" utf-8) ; Nepali ("nl" "Dutch" iso-8859-1) ("nn" . "Latin-1") ; Norwegian Nynorsk + ("non" . "Latin-1") ; MS-Windows Norwegian Nynorsk ("no" . "Latin-1") ; Norwegian ("nr_ZA" . "UTF-8") ; South Ndebele ("nso_ZA" . "UTF-8") ; Pedi @@ -2308,7 +2327,8 @@ See `set-language-info-alist' for use in programs." ("or" "Oriya" utf-8) ("pa" "Punjabi" utf-8) ; Punjabi ("pl" "Polish" iso-8859-2) ; Polish - ; ps Pashto, Pushto + ("ps" . "UTF-8") ; Pashto, Pushto + ("pas" . "UTF-8") ; MS-Windows Pashto ("pt_BR" "Brazilian Portuguese" iso-8859-1) ; Brazilian Portuguese ("pt" . "Latin-1") ; Portuguese ; qu Quechua @@ -2318,7 +2338,7 @@ See `set-language-info-alist' for use in programs." ("ru_RU.koi8r" "Cyrillic-KOI8" koi8-r) ("ru_RU" "Russian" iso-8859-5) ("ru_UA" "Russian" koi8-u) - ; rw Kinyarwanda + ("rw" . "UTF-8") ; Kinyarwanda ("sa" . "Devanagari") ; Sanskrit ; sd Sindhi ("se" . "UTF-8") ; Northern Sami @@ -2339,6 +2359,7 @@ See `set-language-info-alist' for use in programs." ; su Sundanese ("sv" "Swedish" iso-8859-1) ; Swedish ("sw" . "Latin-1") ; Swahili + ("taj" "Tajik" koi8-t) ; MS-Windows Tajik w/Cyrillic script ("ta" "Tamil" utf-8) ("te" "Telugu" utf-8) ; Telugu ("tg" "Tajik" koi8-t) @@ -2348,15 +2369,17 @@ See `set-language-info-alist' for use in programs." ("th" "Thai" iso-8859-11) ("ti" "Ethiopic" utf-8) ; Tigrinya ("tig_ER" . "UTF-8") ; Tigre - ; tk Turkmen + ("tk" . "Latin-5") ; Turkmen + ("tuk" . "Latin-5") ; MS-Windows Turkmen ("tl" . "Latin-1") ; Tagalog ("tn" . "Latin-9") ; Setswana, Tswana ; to Tonga ("tr" "Turkish" iso-8859-9) + ("tsn" . "Latin-9") ; MS-Windows Tswana ("ts" . "Latin-1") ; Tsonga ("tt" . "UTF-8") ; Tatar ; tw Twi - ; ug Uighur + ("ug" . "UTF-8") ; Uighur ("uk" "Ukrainian" koi8-u) ("ur" . "UTF-8") ; Urdu ("uz_UZ@cyrillic" . "UTF-8"); Uzbek @@ -2365,10 +2388,10 @@ See `set-language-info-alist' for use in programs." ("vi" "Vietnamese" utf-8) ; vo Volapuk ("wa" . "Latin-1") ; Walloon - ; wo Wolof + ("wo" . "UTF-8") ; Wolof ("xh" . "Latin-1") ; Xhosa ("yi" . "Windows-1255") ; Yiddish - ; yo Yoruba + ("yo" . "UTF-8") ; Yoruba ; za Zhuang ("zh_HK" . "Chinese-Big5") ; zh_HK/BIG5-HKSCS \ @@ -2378,6 +2401,9 @@ See `set-language-info-alist' for use in programs." ("zh_CN.GB18030" "Chinese-GB18030") ("zh_CN.UTF-8" . "Chinese-GBK") ("zh_CN" . "Chinese-GB") + ("zhh" . "Chinese-Big5") ; MS-Windows Chinese (Hong Kong S.A.R.) + ("zhi" . "Chinese-GBK") ; MS-Windows Chinese (Singapore) + ("zhm" . "Chinese-Big5") ; MS-Windows Chinese (Macao S.A.R.) ("zh" . "Chinese-GB") ("zu" . "Latin-1") ; Zulu @@ -2395,12 +2421,23 @@ See `set-language-info-alist' for use in programs." ("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4 ("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6 ("jp" . "Japanese") ; e.g. MS Windows - ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified - ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional + ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified (PRC) + ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional (Taiwan) ("gbz" . "UTF-8") ; MS Windows Dari Persian ("div" . "UTF-8") ; MS Windows Divehi (Maldives) ("wee" . "Latin-2") ; MS Windows Lower Sorbian ("wen" . "Latin-2") ; MS Windows Upper Sorbian + ("ind" . "Latin-1") ; MS-Windows Indonesian + ("sme" . "UTF-8") ; MS-Windows Northern Sami (Norway) + ("smf" . "UTF-8") ; MS-Windows Northern Sami (Sweden) + ("smg" . "ITF-8") ; MS-Windows Northern Sami (Finland) + ("kdi" "Kannada" utf-8) ; MS-Windows Kannada + ("mar" "Devanagari" utf-8) ; MS-Windows Marathi + ("khm" "Khmer" utf-8) ; MS-Windows Khmer + ("iri" . "Latin-1") ; MS-Windows Irish Gaelic + ; mwk MS-Windows Mohawk (Canada) + ("uig" . "UTF-8") ; MS-Windows Uighur + ("kin" . "UTF-8") ; MS-Windows Kinyarwanda )) "Alist of locale regexps vs the corresponding languages and coding systems. Each element has this form: @@ -2702,10 +2739,20 @@ See also `locale-charset-language-names', `locale-language-names', (output-coding (if noninteractive (intern (format "cp%d" (w32-get-console-output-codepage))) - code-page-coding))) - (when (coding-system-p code-page-coding) + code-page-coding)) + (multibyte-code-page-coding + (or (and (boundp 'w32-multibyte-code-page) + (not (zerop w32-multibyte-code-page)) + (intern (format "cp%d" w32-multibyte-code-page))) + code-page-coding)) + (locale-coding + (if noninteractive + code-page-coding + multibyte-code-page-coding))) + (when (and (coding-system-p code-page-coding) + (coding-system-p locale-coding)) (or output-coding (setq output-coding code-page-coding)) - (unless frame (setq locale-coding-system code-page-coding)) + (unless frame (setq locale-coding-system locale-coding)) (set-keyboard-coding-system code-page-coding frame) (set-terminal-coding-system output-coding frame) (setq default-file-name-coding-system ansi-code-page-coding)))) diff --git a/src/w32fns.c b/src/w32fns.c index 4f53d93d8b4..1fbf32760dd 100644 --- a/src/w32fns.c +++ b/src/w32fns.c @@ -48,6 +48,7 @@ along with GNU Emacs. If not, see . */ #ifdef WINDOWSNT #include +#include /* for _getmbcp */ #endif /* WINDOWSNT */ #if CYGWIN @@ -10908,6 +10909,15 @@ globals_of_w32fns (void) doc: /* The ANSI code page used by the system. */); w32_ansi_code_page = GetACP (); +#ifndef CYGWIN + DEFVAR_INT ("w32-multibyte-code-page", + w32_multibyte_code_page, + doc: /* The current multibyte code page used by the system. +A value of zero indicates that the single-byte code page is in use, +see `w32-ansi-code-page'. */); + w32_multibyte_code_page = _getmbcp (); +#endif + if (os_subtype == OS_NT) w32_unicode_gui = 1; else