(unless document
(let ((dom (eww--parse-html-region (point) (point-max) charset)))
(when (eww-default-readable-p url)
- (eww-score-readability dom)
- (setq dom (eww-highest-readability dom))
+ (setq dom (eww-readable-dom dom))
(with-current-buffer buffer
(plist-put eww-data :readable t)))
(setq document (eww-document-base url dom))))
(eww--parse-html-region (point-min) (point-max))))
(base (plist-get eww-data :url)))
(when make-readable
- (eww-score-readability dom)
- (setq dom (eww-highest-readability dom)))
+ (setq dom (eww-readable-dom dom)))
(when eww-readable-adds-to-history
(eww-save-history)
(eww--before-browse)
- (dolist (elem '(:source :url :title :next :previous :up :peer))
+ (dolist (elem '(:source :url :peer))
(plist-put eww-data elem (plist-get old-data elem))))
(eww-display-document (eww-document-base base dom))
(plist-put eww-data :readable make-readable)
(eww--after-page-change)))
-(defun eww-score-readability (node)
- (let ((score -1))
- (cond
- ((memq (dom-tag node) '(script head comment))
- (setq score -2))
- ((eq (dom-tag node) 'meta)
- (setq score -1))
- ((eq (dom-tag node) 'img)
- (setq score 2))
- ((eq (dom-tag node) 'a)
- (setq score (- (length (split-string (dom-text node))))))
- (t
+(defun eww--walk-readability (node callback &optional noscore)
+ "Walk through all children of NODE to score readability.
+After scoring, call CALLBACK with the node and score. If NOSCORE is
+non-nil, don't actually compute a score; just call the callback."
+ (let ((score nil))
+ (unless noscore
+ (cond
+ ((stringp node)
+ (setq score (length (split-string node))
+ noscore t))
+ ((memq (dom-tag node) '(script head comment))
+ (setq score -2
+ noscore t))
+ ((eq (dom-tag node) 'meta)
+ (setq score -1
+ noscore t))
+ ((eq (dom-tag node) 'img)
+ (setq score 2
+ noscore t))
+ ((eq (dom-tag node) 'a)
+ (setq score (- (length (split-string (dom-text node))))
+ noscore t))
+ (t
+ (setq score -1))))
+ (when (consp node)
(dolist (elem (dom-children node))
- (cond
- ((stringp elem)
- (setq score (+ score (length (split-string elem)))))
- ((consp elem)
- (setq score (+ score
- (or (cdr (assoc :eww-readability-score (cdr elem)))
- (eww-score-readability elem)))))))))
- ;; Cache the score of the node to avoid recomputing all the time.
- (dom-set-attribute node :eww-readability-score score)
+ (let ((subscore (eww--walk-readability elem callback noscore)))
+ (when (and (not noscore) subscore)
+ (incf score subscore)))))
+ (funcall callback node score)
score))
+(defun eww-readable-dom (dom)
+ "Return a readable version of DOM."
+ (let ((head-nodes nil)
+ (best-node nil)
+ (best-score most-negative-fixnum))
+ (eww--walk-readability
+ dom
+ (lambda (node score)
+ (when (consp node)
+ (when (and score (> score best-score)
+ ;; We set a lower bound to how long we accept that
+ ;; the readable portion of the page is going to be.
+ (> (length (split-string (dom-texts node))) 100))
+ (setq best-score score
+ best-node node))
+ ;; Keep track of any <title> and <link> tags we find to include
+ ;; in the final document. EWW uses them for various features,
+ ;; like renaming the buffer or navigating to "next" and
+ ;; "previous" pages. NOTE: We could probably filter out
+ ;; stylesheet <link> tags here, though it doesn't really matter
+ ;; since we don't *do* anything with stylesheets...
+ (when (memq (dom-tag node) '(title link))
+ ;; Copy the node, but not any of its (non-text) children.
+ ;; This way, we can ensure that we don't include a node
+ ;; directly in our list in addition to as a child of some
+ ;; other node in the list. This is ok for <title> and <link>
+ ;; tags, but might need changed if supporting other tags.
+ (let* ((inner-text (dom-texts node ""))
+ (new-node `(,(dom-tag node)
+ ,(dom-attributes node)
+ ,@(when (length> inner-text 0)
+ (list inner-text)))))
+ (push new-node head-nodes))))))
+ (if (and best-node (not (eq best-node dom)))
+ `(html nil
+ (head nil ,@head-nodes)
+ (body nil ,best-node))
+ dom)))
+
+(defun eww-score-readability (node)
+ (declare (obsolete 'eww--walk-readability "31.1"))
+ (eww--walk-readability
+ node
+ (lambda (node score)
+ (when (and score (consp node))
+ (dom-set-attribute node :eww-readability-score score)))))
+
(defun eww-highest-readability (node)
+ (declare (obsolete 'eww-readable-dom "31.1"))
(let ((result node)
highest)
(dolist (elem (dom-non-text-children node))
The default just returns an empty list of headers and the URL as the
body.")
+(defvar eww-test--lots-of-words
+ (string-join (make-list 20 "All work and no play makes Jack a dull boy.")
+ " ")
+ "A long enough run of words to satisfy EWW's readable mode cutoff.")
+
+(defvar eww-test--wordy-page
+ (concat "<html>"
+ "<head>"
+ "<title>Welcome to my home page</title>"
+ "<link rel=\"home\" href=\"somewhere.invalid\">"
+ "</head><body>"
+ "<a>This is an uninteresting sentence.</a>"
+ "<div>" eww-test--lots-of-words "</div>"
+ "</body></html>"))
+
(defmacro eww-test--with-mock-retrieve (&rest body)
"Evaluate BODY with a mock implementation of `eww-retrieve'.
This avoids network requests during our tests. Additionally, prepare a
(eww-test--with-mock-retrieve
(let* ((shr-width most-positive-fixnum)
(shr-use-fonts nil)
- (words (string-join
- (make-list
- 20 "All work and no play makes Jack a dull boy.")
- " "))
(eww-test--response-function
(lambda (_url)
(concat "Content-Type: text/html\n\n"
- "<html><body>"
- "<a>This is an uninteresting sentence.</a>"
- "<div>"
- words
- "</div>"
- "</body></html>"))))
+ eww-test--wordy-page))))
(eww "example.invalid")
;; Make sure EWW renders the whole document.
(should-not (plist-get eww-data :readable))
;; Now, EWW should render just the "readable" parts.
(should (plist-get eww-data :readable))
(should (string-match-p
- (concat "\\`" (regexp-quote words) "\n*\\'")
+ (concat "\\`" (regexp-quote eww-test--lots-of-words) "\n*\\'")
(buffer-substring-no-properties (point-min) (point-max))))
(eww-readable 'toggle)
;; Finally, EWW should render the whole document again.
(let* ((eww-test--response-function
(lambda (_url)
(concat "Content-Type: text/html\n\n"
- "<html><body>Hello there</body></html>")))
+ eww-test--wordy-page)))
(eww-readable-urls '("://example\\.invalid/")))
(eww "example.invalid")
;; Make sure EWW uses "readable" mode.
- (should (plist-get eww-data :readable)))))
+ (should (plist-get eww-data :readable))
+ ;; Make sure the page include the <title> and <link> nodes.
+ (should (equal (plist-get eww-data :title) "Welcome to my home page"))
+ (should (equal (plist-get eww-data :home) "somewhere.invalid")))))
(provide 'eww-tests)
;; eww-tests.el ends here