From f9f12086fb9ec037b36f74d1f12daa144c775249 Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Thu, 20 Jan 2022 07:57:13 +0100 Subject: [PATCH] Expand textsec-link-suspicious-p checking * lisp/international/textsec.el (textsec-link-suspicious-p): Check the text more thoroughly for link-like things. --- lisp/international/textsec.el | 44 +++++++++++++++--------- test/lisp/international/textsec-tests.el | 8 ++++- 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/lisp/international/textsec.el b/lisp/international/textsec.el index 6b37e925698..017eb5dc9c4 100644 --- a/lisp/international/textsec.el +++ b/lisp/international/textsec.el @@ -389,22 +389,34 @@ This function will return non-nil if it seems like the link text is misleading about where the URL takes you. This is typical when the link text looks like an URL itself, but doesn't lead to the same domain as the URL." - (let ((url (car link)) - (text (string-trim (cdr link)))) - (when (string-match-p "\\`[a-z]+\\.[.a-z]+\\'" text) - (setq text (concat "http://" text))) - (let ((udomain (url-host (url-generic-parse-url url))) - (tdomain (url-host (url-generic-parse-url text)))) - (and udomain - tdomain - (not (equal udomain tdomain)) - ;; One may be a sub-domain of the other, but don't allow too - ;; short domains. - (not (or (and (string-suffix-p udomain tdomain) - (url-domsuf-cookie-allowed-p udomain)) - (and (string-suffix-p tdomain udomain) - (url-domsuf-cookie-allowed-p tdomain)))) - (format "Text `%s' doesn't point to link URL `%s'" text url))))) + (let* ((url (car link)) + (text (string-trim (cdr link))) + (text-bits (seq-filter (lambda (bit) + (string-match-p "\\`[^.]+\\.[^.]+.*\\'" bit)) + (split-string text)))) + (when text-bits + (setq text-bits (seq-map (lambda (string) + (if (not (string-match-p "\\`[^:]+:" string)) + (concat "http://" string) + string)) + text-bits))) + (catch 'found + (dolist (text (or text-bits (list text))) + (let ((udomain (url-host (url-generic-parse-url url))) + (tdomain (url-host (url-generic-parse-url text)))) + (cond + ((and udomain + tdomain + (not (equal udomain tdomain)) + ;; One may be a sub-domain of the other, but don't allow too + ;; short domains. + (not (or (and (string-suffix-p udomain tdomain) + (url-domsuf-cookie-allowed-p udomain)) + (and (string-suffix-p tdomain udomain) + (url-domsuf-cookie-allowed-p tdomain))))) + (throw 'found + (format "Text `%s' doesn't point to link URL `%s'" + text url))))))))) (provide 'textsec) diff --git a/test/lisp/international/textsec-tests.el b/test/lisp/international/textsec-tests.el index f8fc0564807..31e9aefc736 100644 --- a/test/lisp/international/textsec-tests.el +++ b/test/lisp/international/textsec-tests.el @@ -189,6 +189,12 @@ (should (textsec-link-suspicious-p (cons "https://www.gnu.org/" "http://fsf.org/"))) (should (textsec-link-suspicious-p - (cons "https://www.gnu.org/" "fsf.org")))) + (cons "https://www.gnu.org/" "fsf.org"))) + + (should (textsec-link-suspicious-p + (cons "https://www.gnu.org/" + "This is a link that doesn't point to fsf.org"))) + + ) ;;; textsec-tests.el ends here -- 2.39.5