;;; Code:
(defconst po-content-type-charset-alist
- '(; Note: Emacs 21 doesn't support all encodings, thus the missing entries.
- ("ASCII" . undecided)
+ '(("ASCII" . undecided)
("ANSI_X3.4-1968" . undecided)
- ("US-ASCII" . undecided)
- ("ISO-8859-1" . iso-8859-1)
- ("ISO_8859-1" . iso-8859-1)
- ("ISO-8859-2" . iso-8859-2)
- ("ISO_8859-2" . iso-8859-2)
- ("ISO-8859-3" . iso-8859-3)
- ("ISO_8859-3" . iso-8859-3)
- ("ISO-8859-4" . iso-8859-4)
- ("ISO_8859-4" . iso-8859-4)
- ("ISO-8859-5" . iso-8859-5)
- ("ISO_8859-5" . iso-8859-5)
- ;("ISO-8859-6" . ??)
- ;("ISO_8859-6" . ??)
- ("ISO-8859-7" . iso-8859-7)
- ("ISO_8859-7" . iso-8859-7)
- ("ISO-8859-8" . iso-8859-8)
- ("ISO_8859-8" . iso-8859-8)
- ("ISO-8859-9" . iso-8859-9)
- ("ISO_8859-9" . iso-8859-9)
- ;("ISO-8859-13" . ??)
- ;("ISO_8859-13" . ??)
- ;("ISO-8859-14" . ??)
- ;("ISO_8859-14" . ??)
- ("ISO-8859-15" . iso-8859-15) ; requires Emacs 21
- ("ISO_8859-15" . iso-8859-15) ; requires Emacs 21
- ("KOI8-R" . koi8-r)
- ;("KOI8-U" . ??)
- ;("KOI8-T" . ??)
- ("CP437" . cp437) ; requires Emacs 20
- ("CP775" . cp775) ; requires Emacs 20
- ("CP850" . cp850) ; requires Emacs 20
- ("CP852" . cp852) ; requires Emacs 20
- ("CP855" . cp855) ; requires Emacs 20
- ;("CP856" . ??)
- ("CP857" . cp857) ; requires Emacs 20
- ("CP861" . cp861) ; requires Emacs 20
- ("CP862" . cp862) ; requires Emacs 20
- ("CP864" . cp864) ; requires Emacs 20
- ("CP865" . cp865) ; requires Emacs 20
- ("CP866" . cp866) ; requires Emacs 21
- ("CP869" . cp869) ; requires Emacs 20
- ;("CP874" . ??)
- ;("CP922" . ??)
- ;("CP932" . ??)
- ;("CP943" . ??)
- ;("CP949" . ??)
- ;("CP950" . ??)
- ;("CP1046" . ??)
- ;("CP1124" . ??)
- ;("CP1129" . ??)
- ("CP1250" . cp1250) ; requires Emacs 20
- ("CP1251" . cp1251) ; requires Emacs 20
- ("CP1252" . iso-8859-1) ; approximation
- ("CP1253" . cp1253) ; requires Emacs 20
- ("CP1254" . iso-8859-9) ; approximation
- ("CP1255" . iso-8859-8) ; approximation
- ;("CP1256" . ??)
- ("CP1257" . cp1257) ; requires Emacs 20
- ("GB2312" . cn-gb-2312) ; also named 'gb2312' in XEmacs 21 or Emacs 21
- ; also named 'euc-cn' in Emacs 20 or Emacs 21
- ("EUC-JP" . euc-jp)
- ("EUC-KR" . euc-kr)
- ;("EUC-TW" . ??)
- ("BIG5" . big5)
- ;("BIG5-HKSCS" . ??)
- ;("GBK" . ??)
- ;("GB18030" . ??)
- ("SHIFT_JIS" . shift_jis)
- ;("JOHAB" . ??)
- ("TIS-620" . tis-620) ; requires Emacs 20 or Emacs 21
- ("VISCII" . viscii) ; requires Emacs 20 or Emacs 21
- ;("GEORGIAN-PS" . ??)
- ("UTF-8" . utf-8) ; requires Mule-UCS in Emacs 20, or Emacs 21
- )
- "How to convert a GNU libc/libiconv canonical charset name as seen in
-Content-Type into a Mule coding system.")
+ ("US-ASCII" . undecided))
+ "Alist of coding system versus GNU libc/libiconv canonical charset name.
+Contains canonical charset names that don't correspond to coding systems.")
(defun po-find-charset (filename)
- "Return PO file charset value."
- (interactive)
+ "Return PO charset value for FILENAME."
(let ((charset-regexp
"^\"Content-Type: text/plain;[ \t]*charset=\\(.*\\)\\\\n\"")
(short-read nil))
(match-string 1))))))
(defun po-find-file-coding-system-guts (operation filename)
- "\
-Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
-Called through file-coding-system-alist, before the file is visited for real."
- (and (eq operation 'insert-file-contents)
- (file-exists-p filename)
- (with-temp-buffer
- (let* ((coding-system-for-read 'no-conversion)
- (charset (or (po-find-charset filename) "ascii"))
- (charset-upper (upcase charset))
- (charset-lower (downcase charset))
- (candidate
- (cdr (assoc charset-upper po-content-type-charset-alist)))
- (try (or candidate (intern-soft charset-lower))))
- (list (cond ((and try (coding-system-p try))
- try)
- ((and try
- (string-match "\\`cp[1-9][0-9][0-9]?\\'"
- (symbol-name try))
- (assoc (substring (symbol-name try) 2)
- (cp-supported-codepages)))
- (codepage-setup (substring (symbol-name try) 2))
- try)
- ((and (string-match "\\`cp[1-9][0-9][0-9]?\\'"
- charset-lower)
- (assoc (substring charset-lower 2)
- (cp-supported-codepages)))
- (codepage-setup (substring charset-lower 2))
- (intern charset-lower))
- (t
- 'no-conversion)))))))
+ "Return a (DECODING . ENCODING) pair for OPERATION on PO file FILENAME.
+Do so according to FILENAME's declared charset."
+ (and
+ (eq operation 'insert-file-contents)
+ (file-exists-p filename)
+ (with-temp-buffer
+ (let* ((coding-system-for-read 'no-conversion)
+ (charset (or (po-find-charset filename) "ascii"))
+ assoc)
+ (list (cond
+ ((setq assoc
+ (assoc-ignore-case charset
+ po-content-type-charset-alist))
+ (cdr assoc))
+ ((or (setq assoc (assoc-ignore-case charset coding-system-alist))
+ (setq assoc
+ (assoc-ignore-case (subst-char-in-string ?_ ?-
+ charset)
+ coding-system-alist)))
+ (intern (car assoc)))
+ ;; In principle we should also check the `mime-charset'
+ ;; property of everything in the base coding system
+ ;; list, but there should always be a coding system
+ ;; corresponding to the MIME name.
+ ((featurep 'code-pages)
+ ;; Give up.
+ 'raw-text)
+ (t
+ ;; Try again with code-pages loaded. Maybe it's best
+ ;; to require it initially?
+ (require 'code-pages nil t)
+ (if (or
+ (setq assoc (assoc-ignore-case charset coding-system-alist))
+ (setq assoc (assoc-ignore-case (subst-char-in-string
+ ?_ ?- charset)
+ coding-system-alist)))
+ (intern (car assoc))
+ 'raw-text))))))))
;;;###autoload
(defun po-find-file-coding-system (arg-list)
- "\
-Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
-Called through file-coding-system-alist, before the file is visited for real."
+ "Return a (DECODING . ENCODING) pair, according to PO file's charset.
+Called through `file-coding-system-alist', before the file is visited for real."
(po-find-file-coding-system-guts (car arg-list) (car (cdr arg-list))))
;; This is for XEmacs.
;(defun po-find-file-coding-system (operation filename)
;Return a Mule (DECODING . ENCODING) pair, according to PO file charset.
;Called through file-coding-system-alist, before the file is visited for real."
; (po-find-file-coding-system-guts operation filename))
+
+(provide 'po)
+
+;;; po.el ends here