From: Jim Porter Date: Thu, 19 Jun 2025 17:23:39 +0000 (-0700) Subject: Improve performance when computing the "readable" form of a page in EWW X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=d8d0ea60fbc241166dbdb19d1ff539fea20cd10a;p=emacs.git Improve performance when computing the "readable" form of a page in EWW The previous version made a lot of string copies, but only needed the word count of each DOM node. In this version, we just sum up the word counts in all the existing nodes, which results in a significant performance improvement (bug#78902). * lisp/net/eww.el (eww--string-count-words, eww--dom-count-words): New functions... (eww--walk-readability, eww-readable-dom): Use them. (cherry picked from commit c5f649441e2e055701c7bee811eaf266c4f0e7ce) --- diff --git a/lisp/net/eww.el b/lisp/net/eww.el index 14bd66d5064..582401a9f31 100644 --- a/lisp/net/eww.el +++ b/lisp/net/eww.el @@ -1158,6 +1158,30 @@ adds a new entry to `eww-history'." (plist-put eww-data :readable make-readable) (eww--after-page-change)))) +(defun eww--string-count-words (string) + "Return the number of words in STRING." + (let ((start 0) + (count 0)) + (while (string-match split-string-default-separators string start) + (when (< start (match-beginning 0)) + (incf count)) + (setq start (match-end 0))) + (when (length> string (1+ start)) + (incf count)) + count)) + +(defun eww--dom-count-words (node) + "Return the number of words in all the textual data under NODE." + (cond + ((stringp node) + (eww--string-count-words node)) + ((memq (dom-tag node) '(script comment)) + 0) + (t + (let ((total 0)) + (dolist (elem (dom-children node) total) + (incf total (eww--dom-count-words elem))))))) + (defun eww--walk-readability (node callback &optional noscore) "Walk through all children of NODE to score readability. After scoring, call CALLBACK with the node and score. If NOSCORE is @@ -1166,7 +1190,7 @@ non-nil, don't actually compute a score; just call the callback." (unless noscore (cond ((stringp node) - (setq score (length (split-string node)) + (setq score (eww--string-count-words node) noscore t)) ((memq (dom-tag node) '(head comment script style template)) (setq score -2 @@ -1178,7 +1202,7 @@ non-nil, don't actually compute a score; just call the callback." (setq score 2 noscore t)) ((eq (dom-tag node) 'a) - (setq score (- (length (split-string (dom-inner-text node)))) + (setq score (- (eww--dom-count-words node)) noscore t)) (t (setq score -1)))) @@ -1203,7 +1227,7 @@ If EWW can't create a readable version, return nil instead." (when (and score (> score best-score) ;; We set a lower bound to how long we accept that ;; the readable portion of the page is going to be. - (> (length (split-string (dom-inner-text node))) 100)) + (> (eww--dom-count-words node) 100)) (setq best-score score best-node node)) ;; Keep track of any and <link> tags we find to include