(defconst locale-language-names
(purecopy
'(
- ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
- ;; as specified in the Single Unix Spec, Version 2.
- ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
- ;; with additions from ISO 639/RA Newsletter No.1/1989;
- ;; see Internet RFC 2165 (1997-06) and
- ;; http://www.evertype.com/standards/iso639/iso639-en.html
- ;; TERRITORY is a country code taken from ISO 3166
- ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
- ;; CODESET and MODIFIER are implementation-dependent.
+ ;; Locale names of the form LANGUAGE[_TERRITORY][.CODESET][@MODIFIER]
+ ;; as specified in the Single Unix Spec, Version 2.
+ ;; LANGUAGE is a language code taken from ISO 639:1988 (E/F)
+ ;; with additions from ISO 639/RA Newsletter No.1/1989;
+ ;; see Internet RFC 2165 (1997-06) and
+ ;; http://www.evertype.com/standards/iso639/iso639-en.html
+ ;; TERRITORY is a country code taken from ISO 3166
+ ;; http://www.din.de/gremien/nas/nabd/iso3166ma/codlstp1/en_listp1.html.
+ ;; CODESET and MODIFIER are implementation-dependent.
+
+ ;; Language names for which there are no locales (yet) are
+ ;; commented out.
;; jasonr comments: MS Windows uses three letter codes for
;; languages instead of the two letter ISO codes that POSIX
- ;; uses. In most cases the first two letters are the same, so
- ;; most of the regexps in locale-language-names work. Japanese
- ;; and Chinese are exceptions, which are listed in the
- ;; non-standard section at the bottom of locale-language-names.
+ ;; uses. In most cases the first two letters are the same, so
+ ;; most of the regexps in locale-language-names work. Japanese,
+ ;; Chinese, and some others are exceptions, which are listed in the
+ ;; non-standard section at the bottom of locale-language-names, or
+ ;; in the main section, if otherwise we would pick up the wrong
+ ;; entry (because the first matching entry is used).
("aa_DJ" . "Latin-1") ; Afar
("aa" . "UTF-8")
("af" . "Latin-1") ; Afrikaans
("am" "Ethiopic" utf-8) ; Amharic
("an" . "Latin-9") ; Aragonese
+ ("arn" . "UTF-8") ; MS-Windows Mapudungun, Mapuche
("ar" . "Arabic")
- ; as Assamese
+ ("as" . "UTF-8") ; Assamese
; ay Aymara
("az" . "UTF-8") ; Azerbaijani
- ; ba Bashkir
+ ("ba" . "UTF-8") ; Bashkir, Cyrillic script
("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
("bg" "Bulgarian" cp1251) ; Bulgarian
; bh Bihari
("bs" . "Latin-2") ; Bosnian
("byn" . "UTF-8") ; Bilin; Blin
("ca" "Catalan" iso-8859-1) ; Catalan
- ; co Corsican
+ ("co" . "UTF-8") ; Corsican
("cs" "Czech" iso-8859-2)
("cy" "Welsh" iso-8859-14)
("da" . "Latin-1") ; Danish
("de" "German" iso-8859-1)
- ; dv Divehi
+ ("dv" . "UTF-8") ; Divehi
; dz Bhutani
("ee" . "Latin-4") ; Ewe
("el" "Greek" iso-8859-7)
("et" . "Latin-9") ; Estonian
("eu" . "Latin-1") ; Basque
("fa" "Persian" utf-8) ; Persian
+ ("fil" . "UTF-8") ; Filipino
+ ("fpo" . "UTF-8") ; MS-Windows Filipino
("fi" . "Latin-9") ; Finnish
("fj" . "Latin-1") ; Fiji
("fo" . "Latin-1") ; Faroese
("ga" . "Latin-1") ; Irish Gaelic (new orthography)
("gd" . "Latin-9") ; Scots Gaelic
("gez" "Ethiopic" utf-8) ; Geez
+ ("gla" . "Latin-9") ; MS-Windows Scots Gaelic
("gl" . "Latin-1") ; Gallegan; Galician
; gn Guarani
("gu" "Gujarati" utf-8) ; Gujarati
("hni_IN" . "UTF-8") ; Chhattisgarhi
("hr" "Croatian" iso-8859-2) ; Croatian
("hu" . "Latin-2") ; Hungarian
- ; hy Armenian
+ ("hy" . "UTF-8") ; Armenian
; ia Interlingua
("id" . "Latin-1") ; Indonesian
; ie Interlingue
- ; ik Inupiak
+ ("ig" . "UTF-8") ; Igbo (Nigeria)
+ ("ibo" . "UTF-8") ; MS-Windows Igbo
+ ; ik Inupiak, Inupiaq
("is" . "Latin-1") ; Icelandic
("it" "Italian" iso-8859-1) ; Italian
; iu Inuktitut
("iw" "Hebrew" iso-8859-8)
("ja" "Japanese" euc-jp)
; jw Javanese
+ ("kal" . "Latin-1") ; MS-Windows Greenlandic
("ka" "Georgian" georgian-ps) ; Georgian
- ; kk Kazakh
+ ("kk" . "UTF-8") ; Kazakh
("kl" . "Latin-1") ; Greenlandic
("km" "Khmer" utf-8) ; Cambodian, Khmer
+ ("knk" "Devanagari" utf-8) ; MS-Windows Konkani
+ ("kok" "Devanagari" utf-8) ; Konkani
("kn" "Kannada" utf-8)
("ko" "Korean" euc-kr)
("ks" . "UTF-8") ; Kashmiri
; ku Kurdish
("kw" . "Latin-1") ; Cornish
("ky" . "UTF-8") ; Kirghiz
+ ("lao" "Lao" utf-8) ; MS-Windows Lao
("la" . "Latin-1") ; Latin
("lb" . "Latin-1") ; Luxemburgish
("lg" . "Latin-6") ; Ganda, a.k.a. Luganda
; mg Malagasy
("mi" . "Latin-7") ; Maori
("mk" "Cyrillic-ISO" iso-8859-5) ; Macedonian
+ ("mlt" . "Latin-3") ; MS-Windows Maltese
("ml" "Malayalam" utf-8)
("mn" . "UTF-8") ; Mongolian
- ; mo Moldavian
+ ; mo Moldavian (retired)
+ ("mri" . "Latin-7") ; MS-Windows Maori
("mr" "Devanagari" utf-8) ; Marathi
("ms" . "Latin-1") ; Malay
("mt" . "Latin-3") ; Maltese
+ ("mym" "Malayalam" utf-8) ; MS-Windows Malayalam
("my" "Burmese" utf-8) ; Burmese
; na Nauru
("nb" . "Latin-1") ; Norwegian
("ne" "Devanagari" utf-8) ; Nepali
("nl" "Dutch" iso-8859-1)
("nn" . "Latin-1") ; Norwegian Nynorsk
+ ("non" . "Latin-1") ; MS-Windows Norwegian Nynorsk
("no" . "Latin-1") ; Norwegian
("nr_ZA" . "UTF-8") ; South Ndebele
("nso_ZA" . "UTF-8") ; Pedi
("or" "Oriya" utf-8)
("pa" "Punjabi" utf-8) ; Punjabi
("pl" "Polish" iso-8859-2) ; Polish
- ; ps Pashto, Pushto
+ ("ps" . "UTF-8") ; Pashto, Pushto
+ ("pas" . "UTF-8") ; MS-Windows Pashto
("pt_BR" "Brazilian Portuguese" iso-8859-1) ; Brazilian Portuguese
("pt" . "Latin-1") ; Portuguese
; qu Quechua
("ru_RU.koi8r" "Cyrillic-KOI8" koi8-r)
("ru_RU" "Russian" iso-8859-5)
("ru_UA" "Russian" koi8-u)
- ; rw Kinyarwanda
+ ("rw" . "UTF-8") ; Kinyarwanda
("sa" . "Devanagari") ; Sanskrit
; sd Sindhi
("se" . "UTF-8") ; Northern Sami
; su Sundanese
("sv" "Swedish" iso-8859-1) ; Swedish
("sw" . "Latin-1") ; Swahili
+ ("taj" "Tajik" koi8-t) ; MS-Windows Tajik w/Cyrillic script
("ta" "Tamil" utf-8)
("te" "Telugu" utf-8) ; Telugu
("tg" "Tajik" koi8-t)
("th" "Thai" iso-8859-11)
("ti" "Ethiopic" utf-8) ; Tigrinya
("tig_ER" . "UTF-8") ; Tigre
- ; tk Turkmen
+ ("tk" . "Latin-5") ; Turkmen
+ ("tuk" . "Latin-5") ; MS-Windows Turkmen
("tl" . "Latin-1") ; Tagalog
("tn" . "Latin-9") ; Setswana, Tswana
; to Tonga
("tr" "Turkish" iso-8859-9)
+ ("tsn" . "Latin-9") ; MS-Windows Tswana
("ts" . "Latin-1") ; Tsonga
("tt" . "UTF-8") ; Tatar
; tw Twi
- ; ug Uighur
+ ("ug" . "UTF-8") ; Uighur
("uk" "Ukrainian" koi8-u)
("ur" . "UTF-8") ; Urdu
("uz_UZ@cyrillic" . "UTF-8"); Uzbek
("vi" "Vietnamese" utf-8)
; vo Volapuk
("wa" . "Latin-1") ; Walloon
- ; wo Wolof
+ ("wo" . "UTF-8") ; Wolof
("xh" . "Latin-1") ; Xhosa
("yi" . "Windows-1255") ; Yiddish
- ; yo Yoruba
+ ("yo" . "UTF-8") ; Yoruba
; za Zhuang
("zh_HK" . "Chinese-Big5")
; zh_HK/BIG5-HKSCS \
("zh_CN.GB18030" "Chinese-GB18030")
("zh_CN.UTF-8" . "Chinese-GBK")
("zh_CN" . "Chinese-GB")
+ ("zhh" . "Chinese-Big5") ; MS-Windows Chinese (Hong Kong S.A.R.)
+ ("zhi" . "Chinese-GBK") ; MS-Windows Chinese (Singapore)
+ ("zhm" . "Chinese-Big5") ; MS-Windows Chinese (Macao S.A.R.)
("zh" . "Chinese-GB")
("zu" . "Latin-1") ; Zulu
("sp" . "Cyrillic-ISO") ; Serbian (Cyrillic alphabet), e.g. X11R6.4
("su" . "Latin-1") ; Finnish, e.g. Solaris 2.6
("jp" . "Japanese") ; e.g. MS Windows
- ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified
- ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional
+ ("chs" . "Chinese-GBK") ; MS Windows Chinese Simplified (PRC)
+ ("cht" . "Chinese-BIG5") ; MS Windows Chinese Traditional (Taiwan)
("gbz" . "UTF-8") ; MS Windows Dari Persian
("div" . "UTF-8") ; MS Windows Divehi (Maldives)
("wee" . "Latin-2") ; MS Windows Lower Sorbian
("wen" . "Latin-2") ; MS Windows Upper Sorbian
+ ("ind" . "Latin-1") ; MS-Windows Indonesian
+ ("sme" . "UTF-8") ; MS-Windows Northern Sami (Norway)
+ ("smf" . "UTF-8") ; MS-Windows Northern Sami (Sweden)
+ ("smg" . "ITF-8") ; MS-Windows Northern Sami (Finland)
+ ("kdi" "Kannada" utf-8) ; MS-Windows Kannada
+ ("mar" "Devanagari" utf-8) ; MS-Windows Marathi
+ ("khm" "Khmer" utf-8) ; MS-Windows Khmer
+ ("iri" . "Latin-1") ; MS-Windows Irish Gaelic
+ ; mwk MS-Windows Mohawk (Canada)
+ ("uig" . "UTF-8") ; MS-Windows Uighur
+ ("kin" . "UTF-8") ; MS-Windows Kinyarwanda
))
"Alist of locale regexps vs the corresponding languages and coding systems.
Each element has this form:
(output-coding
(if noninteractive
(intern (format "cp%d" (w32-get-console-output-codepage)))
- code-page-coding)))
- (when (coding-system-p code-page-coding)
+ code-page-coding))
+ (multibyte-code-page-coding
+ (or (and (boundp 'w32-multibyte-code-page)
+ (not (zerop w32-multibyte-code-page))
+ (intern (format "cp%d" w32-multibyte-code-page)))
+ code-page-coding))
+ (locale-coding
+ (if noninteractive
+ code-page-coding
+ multibyte-code-page-coding)))
+ (when (and (coding-system-p code-page-coding)
+ (coding-system-p locale-coding))
(or output-coding (setq output-coding code-page-coding))
- (unless frame (setq locale-coding-system code-page-coding))
+ (unless frame (setq locale-coding-system locale-coding))
(set-keyboard-coding-system code-page-coding frame)
(set-terminal-coding-system output-coding frame)
(setq default-file-name-coding-system ansi-code-page-coding))))