when the link text looks like an URL itself, but doesn't lead to
the same domain as the URL."
(let* ((url (car link))
- (text (string-trim (cdr link)))
- (text-bits
- (seq-filter
- (lambda (bit)
- (and (string-match-p "\\`[^.[:punct:]]+\\.[^.[:punct:]]+\\'" bit)
- ;; All-numerical texts are probably not
- ;; suspicious (but what about IP addresses?).
- (not (string-match-p "\\`[0-9.]+\\'" bit))))
- (split-string text))))
- (when text-bits
- (setq text-bits (seq-map (lambda (string)
- (if (not (string-match-p "\\`[^:]+:" string))
- (concat "http://" string)
- string))
- text-bits)))
+ (text (string-trim (cdr link))))
(catch 'found
- (dolist (text (or text-bits (list text)))
- (let ((udomain (url-host (url-generic-parse-url url)))
- (tdomain (url-host (url-generic-parse-url text))))
- (cond
- ((and udomain
- tdomain
- (not (equal udomain tdomain))
- ;; One may be a sub-domain of the other, but don't allow too
- ;; short domains.
- (not (or (and (string-suffix-p udomain tdomain)
- (url-domsuf-cookie-allowed-p udomain))
- (and (string-suffix-p tdomain udomain)
- (url-domsuf-cookie-allowed-p tdomain)))))
- (throw 'found
- (format "Text `%s' doesn't point to link URL `%s'"
- text url)))
- ((and tdomain
- (textsec-domain-suspicious-p tdomain))
- (throw 'found
- (format "Domain `%s' in the link text is suspicious"
- (bidi-string-strip-control-characters
- tdomain))))))))))
+ (let ((udomain (url-host (url-generic-parse-url url)))
+ (tdomain (url-host (url-generic-parse-url text))))
+ (cond
+ ((and udomain
+ tdomain
+ (not (equal udomain tdomain))
+ ;; One may be a sub-domain of the other, but don't allow too
+ ;; short domains.
+ (not (or (and (string-suffix-p udomain tdomain)
+ (url-domsuf-cookie-allowed-p udomain))
+ (and (string-suffix-p tdomain udomain)
+ (url-domsuf-cookie-allowed-p tdomain)))))
+ (throw 'found
+ (format "Text `%s' doesn't point to link URL `%s'"
+ text url)))
+ ((and tdomain
+ (textsec-domain-suspicious-p tdomain))
+ (throw 'found
+ (format "Domain `%s' in the link text is suspicious"
+ (bidi-string-strip-control-characters
+ tdomain)))))))))
(provide 'textsec)
(cons "https://www.gnu.org/" "https://fsf.org/")))
(should (textsec-link-suspicious-p
(cons "https://www.gnu.org/" "http://fsf.org/")))
- (should (textsec-link-suspicious-p
- (cons "https://www.gnu.org/" "fsf.org")))
-
- (should (textsec-link-suspicious-p
- (cons "https://www.gnu.org/"
- "This is a link that doesn't point to fsf.org")))
(should (textsec-link-suspicious-p
(cons "https://www.gn\N{LEFT-TO-RIGHT ISOLATE}u.org/"
- "gn\N{LEFT-TO-RIGHT ISOLATE}u.org"))))
+ "https://gn\N{LEFT-TO-RIGHT ISOLATE}u.org"))))
;;; textsec-tests.el ends here