From d8d0ea60fbc241166dbdb19d1ff539fea20cd10a Mon Sep 17 00:00:00 2001 From: Jim Porter Date: Thu, 19 Jun 2025 10:23:39 -0700 Subject: [PATCH] Improve performance when computing the "readable" form of a page in EWW The previous version made a lot of string copies, but only needed the word count of each DOM node. In this version, we just sum up the word counts in all the existing nodes, which results in a significant performance improvement (bug#78902). * lisp/net/eww.el (eww--string-count-words, eww--dom-count-words): New functions... (eww--walk-readability, eww-readable-dom): Use them. (cherry picked from commit c5f649441e2e055701c7bee811eaf266c4f0e7ce) --- lisp/net/eww.el | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/lisp/net/eww.el b/lisp/net/eww.el index 14bd66d5064..582401a9f31 100644 --- a/lisp/net/eww.el +++ b/lisp/net/eww.el @@ -1158,6 +1158,30 @@ adds a new entry to `eww-history'." (plist-put eww-data :readable make-readable) (eww--after-page-change)))) +(defun eww--string-count-words (string) + "Return the number of words in STRING." + (let ((start 0) + (count 0)) + (while (string-match split-string-default-separators string start) + (when (< start (match-beginning 0)) + (incf count)) + (setq start (match-end 0))) + (when (length> string (1+ start)) + (incf count)) + count)) + +(defun eww--dom-count-words (node) + "Return the number of words in all the textual data under NODE." + (cond + ((stringp node) + (eww--string-count-words node)) + ((memq (dom-tag node) '(script comment)) + 0) + (t + (let ((total 0)) + (dolist (elem (dom-children node) total) + (incf total (eww--dom-count-words elem))))))) + (defun eww--walk-readability (node callback &optional noscore) "Walk through all children of NODE to score readability. After scoring, call CALLBACK with the node and score. If NOSCORE is @@ -1166,7 +1190,7 @@ non-nil, don't actually compute a score; just call the callback." (unless noscore (cond ((stringp node) - (setq score (length (split-string node)) + (setq score (eww--string-count-words node) noscore t)) ((memq (dom-tag node) '(head comment script style template)) (setq score -2 @@ -1178,7 +1202,7 @@ non-nil, don't actually compute a score; just call the callback." (setq score 2 noscore t)) ((eq (dom-tag node) 'a) - (setq score (- (length (split-string (dom-inner-text node)))) + (setq score (- (eww--dom-count-words node)) noscore t)) (t (setq score -1)))) @@ -1203,7 +1227,7 @@ If EWW can't create a readable version, return nil instead." (when (and score (> score best-score) ;; We set a lower bound to how long we accept that ;; the readable portion of the page is going to be. - (> (length (split-string (dom-inner-text node))) 100)) + (> (eww--dom-count-words node) 100)) (setq best-score score best-node node)) ;; Keep track of any and <link> tags we find to include -- 2.39.5