From 4eebf528fca6f6f16168c4f76a653353f3598a35 Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Tue, 18 Jan 2022 10:24:32 +0100 Subject: [PATCH] Add textsec predicates for different types of confusability * lisp/international/textsec.el (textsec-resolved-script-set) (textsec-single-script-confusable-p) (textsec-mixed-script-confusable-p) (textsec-whole-script-confusable-p): New functions. --- lisp/international/textsec.el | 30 ++++++++++++++++++++++++ test/lisp/international/textsec-tests.el | 19 ++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/lisp/international/textsec.el b/lisp/international/textsec.el index 304d69cb894..8095733e097 100644 --- a/lisp/international/textsec.el +++ b/lisp/international/textsec.el @@ -192,6 +192,36 @@ This algorithm is described in: (string char))) (ucs-normalize-NFD-string string))))) +(defun textsec-resolved-script-set (string) + "Return the resolved script set for STRING. +This is the minimal covering script set for STRING, but is nil is +STRING isn't a single script string." + (and (textsec-single-script-p string) + (textsec-covering-scripts string))) + +(defun textsec-single-script-confusable-p (string1 string2) + "Say whether STRING1 and STRING2 are single script confusables." + (and (equal (textsec-unconfuse-string string1) + (textsec-unconfuse-string string2)) + ;; And they have to have at least one resolved script in + ;; common. + (seq-intersection (textsec-resolved-script-set string1) + (textsec-resolved-script-set string2)))) + +(defun textsec-mixed-script-confusable-p (string1 string2) + "Say whether STRING1 and STRING2 are mixed script confusables." + (and (equal (textsec-unconfuse-string string1) + (textsec-unconfuse-string string2)) + ;; And they have no resolved scripts in common. + (null (seq-intersection (textsec-resolved-script-set string1) + (textsec-resolved-script-set string2))))) + +(defun textsec-whole-script-confusable-p (string1 string2) + "Say whether STRING1 and STRING2 are whole script confusables." + (and (textsec-mixed-script-confusable-p string1 string2) + (textsec-single-script-p string1) + (textsec-single-script-p string2))) + (provide 'textsec) ;;; textsec.el ends here diff --git a/test/lisp/international/textsec-tests.el b/test/lisp/international/textsec-tests.el index 50106bb955e..15b6b21b348 100644 --- a/test/lisp/international/textsec-tests.el +++ b/test/lisp/international/textsec-tests.el @@ -86,11 +86,28 @@ (should-not (textsec-mixed-numbers-p "8foo8")) (should (textsec-mixed-numbers-p "8foo৪"))) +(ert-deftest test-resolved () + (should (equal (textsec-resolved-script-set "ljeto") + '(latin))) + (should-not (textsec-resolved-script-set "Сirсlе"))) + (ert-deftest test-confusable () (should (equal (textsec-unconfuse-string "ljeto") "ljeto")) (should (textsec-ascii-confusable-p "ljeto")) (should-not (textsec-ascii-confusable-p "ljeto")) (should (equal (textsec-unconfuse-string "~") "〜")) - (should-not (textsec-ascii-confusable-p "~"))) + (should-not (textsec-ascii-confusable-p "~")) + + (should (textsec-single-script-confusable-p "ljeto" "ljeto")) + (should-not (textsec-single-script-confusable-p "paypal" "pаypаl")) + (should-not (textsec-single-script-confusable-p "scope""ѕсоре")) + + (should-not (textsec-mixed-script-confusable-p "ljeto" "ljeto")) + (should (textsec-mixed-script-confusable-p "paypal" "pаypаl")) + (should (textsec-mixed-script-confusable-p "scope""ѕсоре")) + + (should-not (textsec-whole-script-confusable-p "ljeto" "ljeto")) + (should-not (textsec-whole-script-confusable-p "paypal" "pаypаl")) + (should (textsec-whole-script-confusable-p "scope""ѕсоре"))) ;;; textsec-tests.el ends here -- 2.39.2