]> git.eshelyaron.com Git - emacs.git/commitdiff
Add customization to let EWW guess content-type if needed
authorSebastián Monía <sebastian@sebasmonia.com>
Wed, 9 Oct 2024 03:26:42 +0000 (23:26 -0400)
committerEshel Yaron <me@eshelyaron.com>
Tue, 22 Oct 2024 19:02:46 +0000 (21:02 +0200)
* lisp/net/eww.el (eww-guess-content-type-functions): New user option.
(eww--guess-content-type, eww--html-if-doctype): New functions.
(eww-render): Call 'eww--guess-content-type' (bug#73133).

(cherry picked from commit 9074a9f496b04ab58588b71f51d7830782fc7a29)

lisp/net/eww.el

index 63ad4ae78b7adf80fb5e2fcb120d20905a8058a5..3cdb8a3f42e12a8074214f9e58c65973b63581b4 100644 (file)
@@ -109,6 +109,19 @@ duplicate entries (if any) removed."
              eww-current-url
              eww-bookmark-urls))
 
+(defcustom eww-guess-content-type-functions
+  '(eww--html-if-doctype)
+  "List of functions used to guess a page's content-type.
+These are only used when the page does not have a valid Content-Type
+header.  Functions are called in order, until one of them returns the
+value to be used as Content-Type.  They receive two parameters: an alist
+of headers, and the buffer that holds the complete response.  If the
+list is exhausted, EWW assumes \"application/octet-stream\" per
+RFC-9110."
+  :version "31.1"
+  :group 'eww
+  :type '(repeat function))
+
 (defcustom eww-bookmarks-directory user-emacs-directory
   "Directory where bookmark files will be stored."
   :version "25.1"
@@ -631,6 +644,30 @@ Currently this means either text/html or application/xhtml+xml."
   (member content-type '("text/html"
                         "application/xhtml+xml")))
 
+(defun eww--guess-content-type (headers response-buffer)
+  "Use HEADERS and RESPONSE-BUFFER to guess the Content-Type.
+Will call each function in `eww-guess-content-type-functions', until one
+of them returns a value.  This mechanism is used only if there isn't a
+valid Content-Type header.  If none of the functions can guess, return
+\"application/octet-stream\"."
+  (or (run-hook-with-args-until-success
+       'eww-guess-content-type-functions headers response-buffer)
+      "application/octet-stream"))
+
+(defun eww--html-if-doctype (_headers response-buffer)
+  "Return \"text/html\" if RESPONSE-BUFFER has an HTML doctype declaration.
+HEADERS is unused."
+  ;; https://html.spec.whatwg.org/multipage/syntax.html#the-doctype
+  (let ((case-fold-search t)
+        (target
+         "<!doctype +html *\\(>\\|system +\\(\\\"\\|'\\)+about:legacy-compat\\)"))
+    (with-current-buffer response-buffer
+      (goto-char (point-min))
+      ;; match basic <!doctype html> and also legacy variants as
+      ;; specified in link above
+      (when (re-search-forward target nil t)
+        "text/html"))))
+
 (defun eww--rename-buffer ()
   "Rename the current EWW buffer.
 The renaming scheme is performed in accordance with
@@ -660,7 +697,7 @@ The renaming scheme is performed in accordance with
         (content-type
          (mail-header-parse-content-type
            (if (zerop (length (cdr (assoc "content-type" headers))))
-              "text/plain"
+               (eww--guess-content-type headers (current-buffer))
              (cdr (assoc "content-type" headers)))))
         (charset (intern
                   (downcase