]> git.eshelyaron.com Git - emacs.git/commitdiff
Speed up Unicode normalisation tests by a factor of 5
authorMattias Engdegård <mattiase@acm.org>
Sat, 3 Dec 2022 18:19:28 +0000 (19:19 +0100)
committerMattias Engdegård <mattiase@acm.org>
Sat, 3 Dec 2022 19:47:41 +0000 (20:47 +0100)
After this change, ucs-normalize-tests are still very slow but
somewhat less disastrously so (from 100 to 20 min on this machine).

* test/lisp/international/ucs-normalize-tests.el
(ucs-normalize-tests--normalization-equal-p)
(ucs-normalize-tests--normalization-chareq-p)
(ucs-normalize-tests--rule1-holds-p)
(ucs-normalize-tests--rule2-holds-p)
(ucs-normalize-tests--part1-rule2):
Run only over the Unicode code space.
Hoist `with-current-buffer` to reduce overhead.

test/lisp/international/ucs-normalize-tests.el

index 9e359d5022f21661ae8a8334d5b31c869411e912..8d7ac5eb8b1d867eeedaa1211ecd11fea0688554 100644 (file)
@@ -59,7 +59,7 @@ And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
                       (NFD . ucs-normalize-NFD-region)
                       (NFKC . ucs-normalize-NFKC-region)
                       (NFKD . ucs-normalize-NFKD-region))))
-    `(with-current-buffer ucs-normalize-tests--norm-buf
+    `(progn
        (erase-buffer)
        (insert ,str)
        (,(cdr (assq norm norm-alist)) (point-min) (point-max))
@@ -74,7 +74,7 @@ And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
                       (NFD . ucs-normalize-NFD-region)
                       (NFKC . ucs-normalize-NFKC-region)
                       (NFKD . ucs-normalize-NFKD-region))))
-    `(with-current-buffer ucs-normalize-tests--norm-buf
+    `(progn
        (erase-buffer)
        (insert ,char)
        (,(cdr (assq norm norm-alist)) (point-min) (point-max))
@@ -90,36 +90,37 @@ The following invariants must be true for all conformant implementations..."
     ;; See `ucs-normalize-tests--rule2-holds-p'.
     (aset ucs-normalize-tests--chars-part1
           (aref source 0) 1))
-  (and
-   ;; c2 ==  toNFC(c1) ==  toNFC(c2) ==  toNFC(c3)
-   (ucs-normalize-tests--normalization-equal-p NFC source nfc)
-   (ucs-normalize-tests--normalization-equal-p NFC nfc nfc)
-   (ucs-normalize-tests--normalization-equal-p NFC nfd nfc)
-   ;; c4 ==  toNFC(c4) ==  toNFC(c5)
-   (ucs-normalize-tests--normalization-equal-p NFC nfkc nfkc)
-   (ucs-normalize-tests--normalization-equal-p NFC nfkd nfkc)
-
-   ;; c3 ==  toNFD(c1) ==  toNFD(c2) ==  toNFD(c3)
-   (ucs-normalize-tests--normalization-equal-p NFD source nfd)
-   (ucs-normalize-tests--normalization-equal-p NFD nfc nfd)
-   (ucs-normalize-tests--normalization-equal-p NFD nfd nfd)
-   ;; c5 ==  toNFD(c4) ==  toNFD(c5)
-   (ucs-normalize-tests--normalization-equal-p NFD nfkc nfkd)
-   (ucs-normalize-tests--normalization-equal-p NFD nfkd nfkd)
-
-   ;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5)
-   (ucs-normalize-tests--normalization-equal-p NFKC source nfkc)
-   (ucs-normalize-tests--normalization-equal-p NFKC nfc nfkc)
-   (ucs-normalize-tests--normalization-equal-p NFKC nfd nfkc)
-   (ucs-normalize-tests--normalization-equal-p NFKC nfkc nfkc)
-   (ucs-normalize-tests--normalization-equal-p NFKC nfkd nfkc)
-
-   ;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) == toNFKD(c5)
-   (ucs-normalize-tests--normalization-equal-p NFKD source nfkd)
-   (ucs-normalize-tests--normalization-equal-p NFKD nfc nfkd)
-   (ucs-normalize-tests--normalization-equal-p NFKD nfd nfkd)
-   (ucs-normalize-tests--normalization-equal-p NFKD nfkc nfkd)
-   (ucs-normalize-tests--normalization-equal-p NFKD nfkd nfkd)))
+  (with-current-buffer ucs-normalize-tests--norm-buf
+    (and
+     ;; c2 ==  toNFC(c1) ==  toNFC(c2) ==  toNFC(c3)
+     (ucs-normalize-tests--normalization-equal-p NFC source nfc)
+     (ucs-normalize-tests--normalization-equal-p NFC nfc nfc)
+     (ucs-normalize-tests--normalization-equal-p NFC nfd nfc)
+     ;; c4 ==  toNFC(c4) ==  toNFC(c5)
+     (ucs-normalize-tests--normalization-equal-p NFC nfkc nfkc)
+     (ucs-normalize-tests--normalization-equal-p NFC nfkd nfkc)
+
+     ;; c3 ==  toNFD(c1) ==  toNFD(c2) ==  toNFD(c3)
+     (ucs-normalize-tests--normalization-equal-p NFD source nfd)
+     (ucs-normalize-tests--normalization-equal-p NFD nfc nfd)
+     (ucs-normalize-tests--normalization-equal-p NFD nfd nfd)
+     ;; c5 ==  toNFD(c4) ==  toNFD(c5)
+     (ucs-normalize-tests--normalization-equal-p NFD nfkc nfkd)
+     (ucs-normalize-tests--normalization-equal-p NFD nfkd nfkd)
+
+     ;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5)
+     (ucs-normalize-tests--normalization-equal-p NFKC source nfkc)
+     (ucs-normalize-tests--normalization-equal-p NFKC nfc nfkc)
+     (ucs-normalize-tests--normalization-equal-p NFKC nfd nfkc)
+     (ucs-normalize-tests--normalization-equal-p NFKC nfkc nfkc)
+     (ucs-normalize-tests--normalization-equal-p NFKC nfkd nfkc)
+
+     ;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) == toNFKD(c5)
+     (ucs-normalize-tests--normalization-equal-p NFKD source nfkd)
+     (ucs-normalize-tests--normalization-equal-p NFKD nfc nfkd)
+     (ucs-normalize-tests--normalization-equal-p NFKD nfd nfkd)
+     (ucs-normalize-tests--normalization-equal-p NFKD nfkc nfkd)
+     (ucs-normalize-tests--normalization-equal-p NFKD nfkd nfkd))))
 
 (defsubst ucs-normalize-tests--rule2-holds-p (X)
  "Check 2nd conformance rule.
@@ -127,7 +128,9 @@ For every code point X assigned in this version of Unicode that
 is not specifically listed in Part 1, the following invariants
 must be true for all conformant implementations:
 
-  X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)"
+  X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)
+
+Must be called with `ucs-normalize-tests--norm-buf' as current buffer."
  (and (ucs-normalize-tests--normalization-chareq-p NFC X X)
       (ucs-normalize-tests--normalization-chareq-p NFD X X)
       (ucs-normalize-tests--normalization-chareq-p NFKC X X)
@@ -230,20 +233,23 @@ must be true for all conformant implementations:
 
 (defun ucs-normalize-tests--part1-rule2 (chars-part1)
   (let ((reporter (make-progress-reporter "UCS Normalize Test Part1, rule 2"
-                                          0 (max-char)))
-        (failed-chars nil))
-    (map-char-table
-     (lambda (char-range listed-in-part)
-       (unless (eq listed-in-part 1)
-         (if (characterp char-range)
-             (progn (unless (ucs-normalize-tests--rule2-holds-p char-range)
-                      (push char-range failed-chars))
-                    (progress-reporter-update reporter char-range))
-           (cl-loop for char from (car char-range) to (cdr char-range)
-                    unless (ucs-normalize-tests--rule2-holds-p char)
-                    do (push char failed-chars)
-                    do (progress-reporter-update reporter char)))))
-     chars-part1)
+                                          0 (max-char t)))
+        (failed-chars nil)
+        (unicode-max (max-char t)))
+    (with-current-buffer ucs-normalize-tests--norm-buf
+      (map-char-table
+       (lambda (char-range listed-in-part)
+         (unless (eq listed-in-part 1)
+           (if (characterp char-range)
+               (progn (unless (ucs-normalize-tests--rule2-holds-p char-range)
+                        (push char-range failed-chars))
+                      (progress-reporter-update reporter char-range))
+             (cl-loop for char from (car char-range) to (min (cdr char-range)
+                                                             unicode-max)
+                      unless (ucs-normalize-tests--rule2-holds-p char)
+                      do (push char failed-chars)
+                      do (progress-reporter-update reporter char)))))
+       chars-part1))
     (progress-reporter-done reporter)
     failed-chars))