]> git.eshelyaron.com Git - emacs.git/commitdiff
Fix idna-mapping-table following Unicode 16 changes
authorRobert Pluim <rpluim@gmail.com>
Tue, 17 Sep 2024 13:19:01 +0000 (15:19 +0200)
committerEshel Yaron <me@eshelyaron.com>
Wed, 18 Sep 2024 10:48:55 +0000 (12:48 +0200)
The latest version of UTS #46 in Unicode 16 has changed the way it
indicates which codepoints are invalid in domain names, causing
'idna-mapping-table' to contain incorrect information, which then breaks
'textsec-domain-suspicious-p' and our test suite.  (Bug#73312)

* admin/unidata/unidata-gen.el (unidata-gen-idna-mapping): Check the
IDNA validity field in "IdnaMappingTable.txt" in addition to checking
the status field, as the latter can now be 'valid' for disallowed
codepoints.

(cherry picked from commit 7d365a2d72d8e656262205827cc5fdf423c3a41f)

admin/unidata/unidata-gen.el

index 7be03fe63afbe01607d13f737b2a2473d9c565ff..71ea7bddb849eaba93ab1f55e6c097c5a8c04b96 100644 (file)
@@ -1598,15 +1598,21 @@ same directory."))
   (let ((map (make-char-table nil)))
     (with-temp-buffer
       (unidata-gen--insert-file "IdnaMappingTable.txt")
-      (while (re-search-forward "^\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^ ]+\\) +\\(?:; +\\([ 0-9A-F]+\\)\\)?"
+      (while (re-search-forward "^\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^ ]+\\) +\\(?:; +\\([ 0-9A-F]+\\)\\)?\\(?:; \\(NV8\\|XV8\\)\\)?"
                                 nil t)
         (let ((start (match-string 1))
               (end (match-string 2))
               (status (match-string 3))
-              (mapped (match-string 4)))
+              (mapped (match-string 4))
+              (idna-status (match-string 5)))
           ;; Make reading the file slightly faster by using `t'
           ;; instead of `disallowed' all over the place.
-          (when (string-match-p "\\`disallowed" status)
+          (when (or (string-match-p "\\`disallowed" status)
+                    ;; UTS #46 messed us about with "status = valid" for
+                    ;; invalid characters, so we need to check for "NV8" or
+                    ;; "XV8".
+                    (string= idna-status "NV8")
+                    (string= idna-status "XV8"))
             (setq status "t"))
           (unless (or (equal status "valid")
                       (equal status "deviation"))