;; We can't use the \N{name} things here, because this file is used
;; too early in the build process.
-(defvar glyphless--bidi-control-characters
- '(#x202a ; ?\N{left-to-right embedding}
+(defvar bidi-control-characters
+ '(#x200e ; ?\N{left-to-right mark}
+ #x200f ; ?\N{right-to-left mark}
+ #x061c ; ?\N{arabic letter mark}
+ #x202a ; ?\N{left-to-right embedding}
#x202b ; ?\N{right-to-left embedding}
#x202d ; ?\N{left-to-right override}
#x202e ; ?\N{right-to-left override}
#x2067 ; ?\N{right-to-left isolate}
#x2068 ; ?\N{first strong isolate}
#x202c ; ?\N{pop directional formatting}
- #x2069)) ; ?\N{pop directional isolate})
+ #x2069) ; ?\N{pop directional isolate}
+ "List of bidirectional control characters.")
(defun update-glyphless-char-display (&optional variable value)
"Make the setting of `glyphless-char-display-control' take effect.
(or (aref char-acronym-table from)
"UNK")))
(when (or (eq target 'format-control)
- (memq from
- glyphless--bidi-control-characters))
+ (memq from bidi-control-characters))
(set-char-table-range glyphless-char-display
from this-method)))
(setq from (1+ from))))))
(require 'ucs-normalize)
(require 'idna-mapping)
(require 'puny)
+(require 'mail-parse)
(defvar textsec--char-scripts nil)
(textsec-single-script-p string2)))
(defun textsec-domain-suspicious-p (domain)
+ "Say whether DOMAIN looks suspicious.
+If it isn't, nil is returned. If it is, a string explaining the
+problem is returned."
(catch 'found
(seq-do
(lambda (char)
(throw 'found "%s is not highly restrictive"))
nil))
+(defun textsec-local-address-suspicious-p (local)
+ "Say whether LOCAL looks suspicious.
+LOCAL is the bit before \"@\" in an email address.
+
+If it suspicious, nil is returned. If it is, a string explaining
+the problem is returned."
+ (cond
+ ((not (equal local (ucs-normalize-NFKC-string local)))
+ (format "`%s' is not in normalized format `%s'"
+ local (ucs-normalize-NFKC-string local)))
+ ((textsec-mixed-numbers-p local)
+ (format "`%s' contains numbers from different number systems" local))
+ ((eq (textsec-restriction-level local) 'unrestricted)
+ (format "`%s' isn't restrictive enough" local))
+ ((string-match-p "\\`\\.\\|\\.\\'\\|\\.\\." local)
+ (format "`%s' contains invalid dots" local))))
+
+(defun textsec-name-suspicious-p (name)
+ "Say whether NAME looks suspicious.
+NAME is (for instance) the free-text name from an email address.
+
+If it suspicious, nil is returned. If it is, a string explaining
+the problem is returned."
+ (cond
+ ((not (equal name (ucs-normalize-NFC-string name)))
+ (format "`%s' is not in normalized format `%s'"
+ name (ucs-normalize-NFC-string name)))
+ ((seq-find (lambda (char)
+ (and (member char bidi-control-characters)
+ (not (member char
+ '( ?\N{left-to-right mark}
+ ?\N{right-to-left mark}
+ ?\N{arabic letter mark})))))
+ name)
+ (format "The string contains bidirectional control characters"))
+ ((textsec-suspicious-nonspacing-p name))))
+
+(defun textsec-suspicious-nonspacing-p (string)
+ "Say whether STRING has a suspicious use of nonspacing characters.
+If it suspicious, nil is returned. If it is, a string explaining
+the problem is returned."
+ (let ((prev nil)
+ (nonspace-count 0))
+ (catch 'found
+ (seq-do
+ (lambda (char)
+ (let ((nonspacing
+ (memq (get-char-code-property char 'general-category)
+ '(Cf Cc Mn))))
+ (when (and nonspacing
+ (equal char prev))
+ (throw 'found "Two identical nonspacing characters in a row"))
+ (setq nonspace-count (if nonspacing
+ (1+ nonspace-count)
+ 0))
+ (when (> nonspace-count 4)
+ (throw 'found
+ "Excessive number of nonspacing characters in a row"))
+ (setq prev char)))
+ string)
+ nil)))
+
+(defun textsec-email-suspicious-p (email)
+ "Say whether EMAIL looks suspicious.
+If it isn't, nil is returned. If it is, a string explaining the
+problem is returned."
+ (pcase-let* ((`(,address . ,name) (mail-header-parse-address email t))
+ (`(,local ,domain) (split-string address "@")))
+ (or
+ (textsec-domain-suspicious-p domain)
+ (textsec-local-address-suspicious-p local)
+ (textsec-name-suspicious-p name))))
+
(provide 'textsec)
;;; textsec.el ends here
(should-not (textsec-domain-suspicious-p "foo.org"))
(should (textsec-domain-suspicious-p "f\N{LEFT-TO-RIGHT ISOLATE}oo.org")))
+(ert-deftest test-suspicious-local ()
+ (should-not (textsec-local-address-suspicious-p "larsi"))
+ (should (textsec-local-address-suspicious-p ".larsi"))
+ (should (textsec-local-address-suspicious-p "larsi."))
+ (should-not (textsec-local-address-suspicious-p "la.rsi"))
+ (should (textsec-local-address-suspicious-p "lar..si"))
+
+ (should-not (textsec-local-address-suspicious-p "LÅRSI"))
+ (should (textsec-local-address-suspicious-p "LÅRSI"))
+
+ (should (textsec-local-address-suspicious-p "larsi8৪")))
+
+(ert-deftest test-suspicious-name ()
+ (should-not (textsec-name-suspicious-p "Lars Ingebrigtsen"))
+ (should (textsec-name-suspicious-p "LÅRS INGEBRIGTSEN"))
+ (should-not (textsec-name-suspicious-p "LÅRS INGEBRIGTSEN"))
+
+ (should (textsec-name-suspicious-p
+ "Lars Ingebrigtsen\N{LEFT-TO-RIGHT ISOLATE}"))
+ (should-not (textsec-name-suspicious-p
+ "Lars Ingebrigtsen\N{LEFT-TO-RIGHT MARK}"))
+
+ (should (textsec-name-suspicious-p
+ "\N{LEFT-TO-RIGHT MARK}\N{LEFT-TO-RIGHT MARK}Lars Ingebrigtsen"))
+ (should-not (textsec-name-suspicious-p
+ "\N{LEFT-TO-RIGHT MARK}\N{RIGHT-TO-LEFT MARK}Lars Ingebrigtsen"))
+ (should (textsec-name-suspicious-p
+ "\N{LEFT-TO-RIGHT MARK}\N{RIGHT-TO-LEFT MARK}\N{LEFT-TO-RIGHT MARK}\N{RIGHT-TO-LEFT MARK}\N{LEFT-TO-RIGHT MARK}Lars Ingebrigtsen")))
+
+(ert-deftest test-suspicious-email ()
+ (should-not
+ (textsec-email-suspicious-p "Lars Ingebrigtsen <larsi@gnus.org>"))
+ (should
+ (textsec-email-suspicious-p "LÅrs Ingebrigtsen <larsi@gnus.org>"))
+ (should
+ (textsec-email-suspicious-p "Lars Ingebrigtsen <.larsi@gnus.org>"))
+ (should
+ (textsec-email-suspicious-p "Lars Ingebrigtsen <larsi@gn\N{LEFT-TO-RIGHT ISOLATE}us.org>")))
+
;;; textsec-tests.el ends here