From 889f07c352f7e0deccf59353a60a45f2716551d8 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Fri, 15 Dec 2017 11:06:07 +0200 Subject: [PATCH] Better support utf-8-with-signature and utf-8-hfs in XML/HTML * lisp/international/mule.el (sgml-xml-auto-coding-function): Support UTF-8 with BOM and utf-8-hfs as variants of UTF-8, and obey the buffer's encoding if it is one of these variants, instead of re-encoding in UTF-8 proper. (Bug#20623) --- lisp/international/mule.el | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/lisp/international/mule.el b/lisp/international/mule.el index 857fa800eb4..81c04db90e9 100644 --- a/lisp/international/mule.el +++ b/lisp/international/mule.el @@ -2493,7 +2493,17 @@ This function is intended to be added to `auto-coding-functions'." (let* ((match (match-string 1)) (sym (intern (downcase match)))) (if (coding-system-p sym) - sym + ;; If the encoding tag is UTF-8 and the buffer's + ;; encoding is one of the variants of UTF-8, use the + ;; buffer's encoding. This allows, e.g., saving an + ;; XML file as UTF-8 with BOM when the tag says UTF-8. + (let ((sym-type (coding-system-type sym)) + (bfcs-type + (coding-system-type buffer-file-coding-system))) + (if (and (coding-system-equal 'utf-8 sym-type) + (coding-system-equal 'utf-8 bfcs-type)) + buffer-file-coding-system + sym)) (message "Warning: unknown coding system \"%s\"" match) nil)) ;; Files without an encoding tag should be UTF-8. But users @@ -2506,7 +2516,8 @@ This function is intended to be added to `auto-coding-functions'." (coding-system-base (detect-coding-region (point-min) size t))))) ;; Pure ASCII always comes back as undecided. - (if (memq detected '(utf-8 undecided)) + (if (memq detected + '(utf-8 'utf-8-with-signature 'utf-8-hfs undecided)) 'utf-8 (warn "File contents detected as %s. Consider adding an encoding attribute to the xml declaration, -- 2.39.2