From ea5c79f657a9e2826073896ea00e6000ccc04a8d Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Tue, 24 Sep 2019 17:48:35 +0200 Subject: [PATCH] Allow controlling when to send cookies when retrieving images in shr * lisp/net/shr.el (shr--use-cookies-p): New function. (shr-tag-img): Use it. (shr-cookie-policy): New variable. (shr-save-contents): Use cookies. * doc/misc/eww.texi (Advanced): Document it. --- doc/misc/eww.texi | 19 +++++++++++++--- etc/NEWS | 6 +++++ lisp/net/shr.el | 45 +++++++++++++++++++++++++++++++++----- test/lisp/net/shr-tests.el | 13 +++++++++++ 4 files changed, 75 insertions(+), 8 deletions(-) diff --git a/doc/misc/eww.texi b/doc/misc/eww.texi index 315b4b0194d..b8821cbc299 100644 --- a/doc/misc/eww.texi +++ b/doc/misc/eww.texi @@ -217,9 +217,22 @@ in an external browser by customizing @findex url-cookie-list @kindex C @cindex Cookies - EWW handles cookies through the @ref{Top, url package, ,url}. -You can list existing cookies with @kbd{C} (@code{url-cookie-list}). -For details about the Cookie handling @xref{Cookies,,,url}. + EWW handles cookies through the @ref{Top, url package, ,url} +package. You can list existing cookies with @kbd{C} +(@code{url-cookie-list}). For details about the Cookie handling +@xref{Cookies,,,url}. + +@vindex shr-cookie-policy + Many @acronym{HTML} pages have images embedded in them, and EWW will +download most these by default. When fetching images, cookies can be +sent and received, and these can be used to track users. To control +when to send cookies when retrieving these images, the +@code{shr-cookie-policy} variable can be used. The default value, +@code{same-origin}, means that EWW will only send cookies when +fetching images that originate from the same source as the +@acronym{HTML} page. @code{nil} means ``never send cookies when +retrieving these images'' and @code{t} means ``always send cookies +when retrieving these images''. @vindex eww-header-line-format @cindex Header diff --git a/etc/NEWS b/etc/NEWS index 3f38f9f4a12..50956f4082c 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1117,6 +1117,12 @@ The variable to use instead to alter text to be sent is now ** eww/shr ++++ +*** The new variable 'shr-cookie-policy' can be used to control when +to use cookies when fetching embedded images. The default is to use +them when the images are from the same domain as the main HTML +document. + +++ *** The 'eww' command can now create a new EWW buffer. Invoking the command with a prefix argument will cause it to create a diff --git a/lisp/net/shr.el b/lisp/net/shr.el index 2e4f7fa5c61..63988d01c88 100644 --- a/lisp/net/shr.el +++ b/lisp/net/shr.el @@ -39,6 +39,7 @@ (require 'svg) (require 'image) (require 'puny) +(require 'url-cookie) (require 'text-property-search) (defgroup shr nil @@ -111,6 +112,16 @@ Alternative suggestions are: :version "24.4" :type 'string) +(defcustom shr-cookie-policy 'same-origin + "When to use cookies when fetching dependent data like images. +If t, always use cookies. If nil, never use cookies. If +`same-origin', use cookies if the dependent data comes from the +same domain as the main data." + :type '(choice (const :tag "Always use cookies" t) + (const :tag "Never use cookies" nil) + (const :tag "Use cookies for same domain" same-origin)) + :version "27.1") + (define-obsolete-variable-alias 'shr-external-browser 'browse-url-secondary-browser-function "27.1") @@ -333,7 +344,7 @@ called." ;; Remove common tracking junk from the URL. (funcall cont (replace-regexp-in-string ".utm_.*" "" destination))))) - nil t)) + nil t t)) (defun shr-probe-and-copy-url (url) "Copy the URL under point to the kill ring. @@ -427,7 +438,7 @@ the URL of the image to the kill buffer instead." (message "Inserting %s..." url) (url-retrieve url 'shr-image-fetched (list (current-buffer) (1- (point)) (point-marker)) - t t)))) + t)))) (defun shr-zoom-image () "Toggle the image size. @@ -985,8 +996,7 @@ the mouse click event." (if (not url) (message "No link under point") (url-retrieve (shr-encode-url url) - 'shr-store-contents (list url directory) - nil t)))) + 'shr-store-contents (list url directory))))) (defun shr-store-contents (status url directory) (unless (plist-get status :error) @@ -1658,7 +1668,8 @@ The preference is a float determined from `shr-prefer-media-type'." (shr-encode-url url) 'shr-image-fetched (list (current-buffer) start (set-marker (make-marker) (point)) (list :width width :height height)) - t t))) + t + (not (shr--use-cookies-p url shr-base))))) (when (zerop shr-table-depth) ;; We are not in a table. (put-text-property start (point) 'keymap shr-image-map) (put-text-property start (point) 'shr-alt alt) @@ -1669,6 +1680,30 @@ The preference is a float determined from `shr-prefer-media-type'." (shr-fill-text (or (dom-attr dom 'title) alt)))))))) +(defun shr--use-cookies-p (url base) + "Say whether to use cookies when fetching URL (typically an image). +BASE is the URL of the HTML being rendered." + (cond + ((null base) + ;; Disallow cookies if we don't know what the base is. + nil) + ((eq shr-cookie-policy 'same-origin) + (let ((url-host (url-host (url-generic-parse-url url))) + (base-host (split-string + (url-host (url-generic-parse-url (car base))) + "\\."))) + ;; We allow cookies if it's for any of the sibling domains (that + ;; we're allowed to set cookies for). Determine that by going + ;; "upwards" in the base domain name. + (cl-loop while base-host + when (url-cookie-host-can-set-p + url-host (mapconcat #'identity base-host ".")) + return t + do (pop base-host) + finally (return nil)))) + (t + shr-cookie-policy))) + (defun shr--preferred-image (dom) (let ((srcset (dom-attr dom 'srcset)) (frame-width (frame-pixel-width)) diff --git a/test/lisp/net/shr-tests.el b/test/lisp/net/shr-tests.el index dd820e2d9f4..c3be36439e0 100644 --- a/test/lisp/net/shr-tests.el +++ b/test/lisp/net/shr-tests.el @@ -53,6 +53,19 @@ (unless (equal (car result) (cdr result)) (should (not (list name (car result) (cdr result)))))))) +(ert-deftest use-cookies () + (let ((shr-cookie-policy 'same-origin)) + (should + (shr--use-cookies-p "http://images.fsf.org" '("http://www.fsf.org"))) + (should + (shr--use-cookies-p "http://www.fsf.org" '("https://www.fsf.org"))) + (should + (shr--use-cookies-p "http://www.fsf.org" '("https://www.fsf.org"))) + (should + (shr--use-cookies-p "http://www.fsf.org" '("http://fsf.org"))) + (should-not + (shr--use-cookies-p "http://www.gnu.org" '("http://www.fsf.org"))))) + (require 'shr) ;;; shr-stream-tests.el ends here -- 2.39.2