]> git.eshelyaron.com Git - emacs.git/commitdiff
Mark whole-script confusables as suspicious domains
authorLars Ingebrigtsen <larsi@gnus.org>
Wed, 19 Jan 2022 14:52:20 +0000 (15:52 +0100)
committerLars Ingebrigtsen <larsi@gnus.org>
Wed, 19 Jan 2022 14:52:20 +0000 (15:52 +0100)
* lisp/international/textsec.el (textsec-domain-suspicious-p):
Consider domain names that are whole-script confusables with ASCII
to be suspicious.  (I think this is what the Unicode standard is
recommending, but I'm not 100% sure.)

lisp/international/textsec.el
test/lisp/international/textsec-tests.el

index d861685f0b1322eee219e2a8cc30e93410222a96..63860d22508a1486c9a60d9a63dfcc44fb6a3e20 100644 (file)
@@ -245,8 +245,14 @@ or use certain other unusual mixtures of characters."
          (throw 'found (format "Disallowed character: `%s' (#x%x)"
                                (string char) char))))
      domain)
+    ;; Does IDNA allow it?
     (unless (puny-highly-restrictive-domain-p domain)
-      (throw 'found (format "%s is not highly-restrictive" domain)))
+      (throw 'found (format "`%s' is not highly-restrictive" domain)))
+    ;; Check whether any segment of the domain name is confusable with
+    ;; an ASCII-only segment.
+    (dolist (elem (split-string domain "\\."))
+      (when (textsec-ascii-confusable-p elem)
+        (throw 'found (format "`%s' is confusable with ASCII" elem))))
     nil))
 
 (defun textsec-local-address-suspicious-p (local)
index b68bce1dc70f9d7e3e6eb9dcb8f5ca507700e9bb..8385c116f4ffa1fafcb4ca325dea4126492ff92d 100644 (file)
 (ert-deftest test-suspiction-domain ()
   (should (textsec-domain-suspicious-p "foo/bar.org"))
   (should-not (textsec-domain-suspicious-p "foo.org"))
-  (should (textsec-domain-suspicious-p "f\N{LEFT-TO-RIGHT ISOLATE}oo.org")))
+  (should (textsec-domain-suspicious-p "f\N{LEFT-TO-RIGHT ISOLATE}oo.org"))
+
+  (should (textsec-domain-suspicious-p "Сгсе.ru"))
+  (should-not (textsec-domain-suspicious-p "фСгсе.ru")))
 
 (ert-deftest test-suspicious-local ()
   (should-not (textsec-local-address-suspicious-p "larsi"))