From 06b7e73b3e3189143163f87506e5a262b657d57a Mon Sep 17 00:00:00 2001 From: Katsumi Yamaoka Date: Fri, 11 Nov 2016 08:17:41 +0000 Subject: [PATCH] * lisp/net/shr.el (shr--preferred-image): Add CR to whitespace regexps. (shr-collect-extra-strings-in-table): Render extra tables in an invalid html as well. --- lisp/net/shr.el | 70 +++++++++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/lisp/net/shr.el b/lisp/net/shr.el index ff1fab8cade..afe190803b3 100644 --- a/lisp/net/shr.el +++ b/lisp/net/shr.el @@ -1529,7 +1529,7 @@ The preference is a float determined from `shr-prefer-media-type'." (setq srcset (sort (mapcar (lambda (elem) - (let ((spec (split-string elem "[\t\n ]+"))) + (let ((spec (split-string elem "[\t\n\r ]+"))) (cond ((= (length spec) 1) ;; Make sure it's well formed. @@ -1544,8 +1544,8 @@ The preference is a float determined from `shr-prefer-media-type'." (list (car spec) (string-to-number (cadr spec))))))) (split-string (replace-regexp-in-string - "\\`[\t\n ]+\\|[\t\n ]+\\'" "" srcset) - "[\t\n ]*,[\t\n ]*")) + "\\`[\t\n\r ]+\\|[\t\n\r ]+\\'" "" srcset) + "[\t\n\r ]*,[\t\n\r ]*")) (lambda (e1 e2) (> (cadr e1) (cadr e2))))) ;; Choose the smallest picture that's bigger than the current @@ -1899,7 +1899,7 @@ The preference is a float determined from `shr-prefer-media-type'." (when (zerop shr-table-depth) (save-excursion (shr-expand-alignments start (point))) - ;; Insert also non-td/th strings excluding comments and styles. + ;; Insert also non-td/th objects. (save-restriction (narrow-to-region (point) (point)) (insert (mapconcat #'identity @@ -1913,32 +1913,46 @@ The preference is a float determined from `shr-prefer-media-type'." (defun shr-collect-extra-strings-in-table (dom &optional flags) "Return extra strings in DOM of which the root is a table clause. -FLAGS is a cons of two flags that control whether to collect strings." - ;; If and only if the cdr is not set, the car will be set to t when - ;; a or a clause is found in the children of DOM, and reset - ;; to nil when a clause is found in the children of DOM. - ;; The cdr will be set to t when a
clause is found if the car - ;; is not set then, and will never be reset. - ;; This function collects strings if the car of FLAGS is not set. - (unless flags (setq flags (cons nil nil))) - (cl-loop for child in (dom-children dom) +Render extra child tables of which the parent is not td or th as well. +FLAGS is a cons of two boolean flags that control whether to collect +or render objects." + ;; Currently this function supports extra strings and
s that + ;; are children of
or clauses, not
nor . + ;; It runs recursively and collects strings or renders s if + ;; the cdr of FLAGS is nil. FLAGS becomes (t . nil) if a + ;; clause is found in the children of DOM, and becomes (t . t) if + ;; a
or a clause is found and the car is t then. + ;; When a clause is found, FLAGS becomes nil if the cdr is t + ;; then. But if the cdr is nil then, render the
. + (cl-loop for child in (dom-children dom) with tag with recurse if (stringp child) - when (and (not (car flags)) - (string-match "\\(?:[^\t\n\r ]+[\t\n\r ]+\\)*[^\t\n\r ]+" - child)) - collect (match-string 0 child) - end + unless (cdr flags) + when (string-match "\\(?:[^\t\n\r ]+[\t\n\r ]+\\)*[^\t\n\r ]+" + child) + collect (match-string 0 child) + end end else - unless (let ((tag (dom-tag child))) - (or (memq tag '(comment style)) - (progn - (cond ((memq tag '(td th)) - (unless (cdr flags) (setcar flags t))) - ((eq tag 'table) - (if (car flags) - (unless (cdr flags) (setcar flags nil)) - (setcdr flags t)))) - nil))) + do (setq tag (dom-tag child) + recurse t) + and + if (eq tag 'tr) + do (setq flags '(t . nil)) + else if (memq tag '(td th)) + when (car flags) + do (setq flags '(t . t)) + end + else if (eq tag 'table) + if (cdr flags) + do (setq flags nil) + else + do (setq recurse nil) + (shr-tag-table child) + end + else + when (memq tag '(comment style)) + do (setq recurse nil) + end end end end and + when recurse append (shr-collect-extra-strings-in-table child flags))) (defun shr-insert-table (table widths) -- 2.39.5