]> git.eshelyaron.com Git - emacs.git/commitdiff
Add textsec-restriction-level function
authorLars Ingebrigtsen <larsi@gnus.org>
Mon, 17 Jan 2022 15:24:17 +0000 (16:24 +0100)
committerLars Ingebrigtsen <larsi@gnus.org>
Mon, 17 Jan 2022 15:24:17 +0000 (16:24 +0100)
* lisp/international/textsec.el (textsec-restriction-level): New
function.

lisp/international/textsec.el
test/lisp/international/textsec-tests.el

index 884425d49227cea64921710827d12ab4ac317009..fc809d52c1de658d7b203baaf609bf838de578f5 100644 (file)
@@ -97,6 +97,58 @@ Not that a string may have several different minimal cover sets."
       (setq set (seq-union set (seq-difference s set))))
     (sort (delq 'common (delq 'inherited set)) #'string<)))
 
+(defun textsec-restriction-level (string)
+  "Say what restriction level STRING qualifies for.
+Levels are (in order of restrictiveness) `ascii-only',
+`single-script', `highly-restrictive', `moderately-restrictive',
+`minimally-restrictive' and `unrestricted'."
+  (let ((scripts (textsec-covering-scripts string)))
+  (cond
+   ((string-match "\\`[[:ascii:]]+\\'" string)
+    'ascii-only)
+   ((textsec-single-script-p string)
+    'single-script)
+   ((or (null (seq-difference scripts '(latin han hiragana katakana)))
+        (null (seq-difference scripts '(latin han bopomofo)))
+        (null (seq-difference scripts '(latin han hangul))))
+    'highly-restrictive)
+   ((and (= (length scripts) 2)
+         (memq 'latin scripts)
+         (seq-intersection scripts
+                           '(arabic
+                             armenian
+                             bengali
+                             bopomofo
+                             devanagari
+                             ethiopic
+                             georgian
+                             gujarati
+                             gurmukhi
+                             hangul
+                             han
+                             hebrew
+                             hiragana
+                             katakana
+                             kannada
+                             khmer
+                             lao
+                             malayalam
+                             myanmar
+                             oriya
+                             sinhala
+                             tamil
+                             telugu
+                             thaana
+                             thai
+                             tibetan)))
+    ;; The string is covered by Latin and any one other Recommended
+    ;; script, except Cyrillic, Greek.
+    'moderately-retrictive)
+   ;; Fixme `minimally-restrictive' -- needs well-formedness criteria
+   ;; and Identifier Profile.
+   (t
+    'unrestricted))))
+
 (provide 'textsec)
 
 ;;; textsec.el ends here
index c80b2ba0fdf7af9c108bcdff143195c838e2625d..7c56229e983aaa40e5cf253cd345d3005c0e63d6 100644 (file)
   (should (equal (textsec-covering-scripts "〆切")
                  '(han))))
 
+(ert-deftest test-restriction-level ()
+  (should (eq (textsec-restriction-level "foo")
+              'ascii-only))
+  (should (eq (textsec-restriction-level "C𝗂𝗋𝖼𝗅𝖾")
+              'single-script))
+  (should (eq (textsec-restriction-level "切foo")
+              'highly-restrictive))
+  (should (eq (textsec-restriction-level "հfoo")
+              'moderately-retrictive))
+  (should (eq (textsec-restriction-level "Сirсlе")
+              'unrestricted)))
+
 ;;; textsec-tests.el ends here