This replaces 'dom-text' and 'dom-texts', and is both more correct and
more efficient than them.
* lisp/dom.el (dom-text, dom-texts): Make obsolete in favor of...
(dom-inner-text--1, dom-inner-text): ... these new functions. Update
callers.
* doc/lispref/text.texi (Document Object Model): Update documentation to
refer to 'dom-inner-text'.
* etc/NEWS: Announce this change.
(cherry picked from commit
2e53c7d08ba0de2468ec25cf9a9d7079604f9409)
@item dom-attributes @var{node}
Return the key/value pair list of attributes of the node.
-@item dom-text @var{node}
-Return all the textual elements of the node as a concatenated string.
-
-@item dom-texts @var{node}
+@item dom-inner-text @var{node}
Return all the textual elements of the node, as well as the textual
elements of all the children of the node, recursively, as a
-concatenated string. This function also takes an optional separator
-to be inserted between the textual elements.
+concatenated string.
@item dom-parent @var{dom} @var{node}
Return the parent of @var{node} in @var{dom}.
(defun dom-text (node)
"Return all the text bits in the current node concatenated."
+ (declare (obsolete 'dom-inner-text "31.1"))
(mapconcat #'identity (cl-remove-if-not #'stringp (dom-children node)) " "))
(defun dom-texts (node &optional separator)
"Return all textual data under NODE concatenated with SEPARATOR in-between."
+ (declare (obsolete 'dom-inner-text "31.1"))
(if (eq (dom-tag node) 'script)
""
(mapconcat
(dom-children node)
(or separator " "))))
+(defun dom-inner-text--1 (node)
+ (dolist (child (dom-children node))
+ (cond
+ ((stringp child) (insert child))
+ ((memq (dom-tag child) '(script comment)))
+ (t (dom-inner-text--1 child)))))
+
+(defun dom-inner-text (node)
+ "Return all textual data under NODE as a single string."
+ (let ((children (dom-children node)))
+ (if (and (length= children 1)
+ (stringp (car children)))
+ ;; Copy the string content when returning to be consistent with
+ ;; the other branch of this `if' expression.
+ (copy-sequence (car children))
+ (with-work-buffer
+ (dom-inner-text--1 node)
+ (buffer-string)))))
+
(defun dom-child-by-tag (dom tag)
"Return the first child of DOM that is of type TAG."
(assoc tag (dom-children dom)))
(defun nnatom--dom-line (node)
"Return NODE's text as a single, whitespace-trimmed line."
- (string-trim (replace-regexp-in-string "[\r\n]+" " " (dom-text node) t)))
+ (string-trim (replace-regexp-in-string
+ "[\r\n]+" " " (dom-inner-text node) t)))
(defun nnatom--read-title (group)
"Return the title of GROUP, or nil."
(dom-print (dom-child-by-tag part 'div) nil t)
(buffer-substring-no-properties
(point-min) (point-max)))
- (dom-text part)))
+ (dom-inner-text part)))
(type (if (member type atypes) (concat "text/" type) type))
(type (or (cdr (assoc type mtypes)) type)))
(unless (string-blank-p part)
(plist-put eww-data :title
(replace-regexp-in-string
"^ \\| $" ""
- (replace-regexp-in-string "[ \t\r\n]+" " " (dom-text dom))))
+ (replace-regexp-in-string "[ \t\r\n]+" " " (dom-inner-text dom))))
(eww--after-page-change))
(defun eww-display-raw (buffer &optional encode)
(setq score 2
noscore t))
((eq (dom-tag node) 'a)
- (setq score (- (length (split-string (dom-text node))))
+ (setq score (- (length (split-string (dom-inner-text node))))
noscore t))
(t
(setq score -1))))
(when (and score (> score best-score)
;; We set a lower bound to how long we accept that
;; the readable portion of the page is going to be.
- (> (length (split-string (dom-texts node))) 100))
+ (> (length (split-string (dom-inner-text node))) 100))
(setq best-score score
best-node node))
;; Keep track of any <title> and <link> tags we find to include
;; directly in our list in addition to as a child of some
;; other node in the list. This is ok for <title> and <link>
;; tags, but might need changed if supporting other tags.
- (let* ((inner-text (dom-texts node ""))
+ (let* ((inner-text (dom-inner-text node))
(new-node `(,(dom-tag node)
,(dom-attributes node)
,@(when (length> inner-text 0)
most-negative-fixnum))
;; We set a lower bound to how long we accept that the
;; readable portion of the page is going to be.
- (when (> (length (split-string (dom-texts highest))) 100)
+ (when (> (length (split-string (dom-inner-text highest))) 100)
(setq result highest))))
result))
'display (make-string (length value) ?*)))))))))
(defun eww-tag-textarea (dom)
- (let ((value (or (dom-text dom) ""))
+ (let ((value (or (dom-inner-text dom) ""))
(lines (string-to-number (or (dom-attr dom 'rows) "10")))
(width (string-to-number (or (dom-attr dom 'cols) "10")))
start end form)
(dolist (elem (dom-by-tag dom 'option))
(when (dom-attr elem 'selected)
(nconc menu (list :value (dom-attr elem 'value))))
- (let ((display (dom-text elem)))
+ (let ((display (dom-inner-text elem)))
(setq max (max max (length display)))
(push (list 'item
:value (dom-attr elem 'value)