From beebd2a85eeab5b977ca2de8ad32784f9d8bdd51 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Mattias=20Engdeg=C3=A5rd?= Date: Wed, 9 Dec 2020 13:27:16 +0100 Subject: [PATCH] =?utf8?q?Recognise=20=C3=9F=20properly=20as=20a=20lower-c?= =?utf8?q?ase=20letter=20(bug#11309)?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit ß was incorrectly treated as a caseless character and thus not matched by the regexp [[:lower:]] (or, in case-folding mode, [[:upper:]]). The reason is that the upcase table maps it to itself, which can be remedied by mapping it to ẞ (U+7838) instead. Doing so does not affect upcasing since the special-uppercase property maps it to SS. * lisp/international/characters.el (tbl): Map ß to ẞ in the upcase table. * test/src/regex-emacs-tests.el (regexp-eszett): Uncomment previously failing tests. Add checks to make sure that case transformations remain valid. --- lisp/international/characters.el | 9 ++++++++- test/src/regex-emacs-tests.el | 15 +++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/lisp/international/characters.el b/lisp/international/characters.el index 0b6920cf180..5f610ddf670 100644 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@ -759,7 +759,14 @@ with L, LRE, or LRO Unicode bidi character type.") (funcall map-unicode-property 'uppercase (lambda (lc uc) (aset up lc uc) (aset up uc uc))) (funcall map-unicode-property 'lowercase - (lambda (uc lc) (aset down uc lc) (aset down lc lc)))))) + (lambda (uc lc) (aset down uc lc) (aset down lc lc))) + + ;; Override the Unicode uppercase property for ß, since we are + ;; using our case tables for determining the case of a + ;; character (see uppercasep and lowercasep in buffer.h). + ;; The special-uppercase property of ß ensures that it is + ;; still upcased to SS per the usual convention. + (aset up ?ß ?ẞ)))) ;; Clear out the extra slots so that they will be recomputed from the main ;; (downcase) table and upcase table. Since we’re side-stepping the usual diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el index 576630aa5af..34d4067db47 100644 --- a/test/src/regex-emacs-tests.el +++ b/test/src/regex-emacs-tests.el @@ -834,6 +834,13 @@ This evaluates the TESTS test cases from glibc." (ert-deftest regexp-eszett () "Test matching of ß and ẞ." + ;; Sanity checks. + (should (equal (upcase "ß") "SS")) + (should (equal (downcase "ß") "ß")) + (should (equal (capitalize "ß") "Ss")) ; undeutsch... + (should (equal (upcase "ẞ") "ẞ")) + (should (equal (downcase "ẞ") "ß")) + (should (equal (capitalize "ẞ") "ẞ")) ;; ß is a lower-case letter (Ll); ẞ is an upper-case letter (Lu). (let ((case-fold-search nil)) (should (equal (string-match "ß" "ß") 0)) @@ -842,8 +849,8 @@ This evaluates the TESTS test cases from glibc." (should (equal (string-match "ẞ" "ẞ") 0)) (should (equal (string-match "[[:alpha:]]" "ß") 0)) ;; bug#11309 - ;;(should (equal (string-match "[[:lower:]]" "ß") 0)) - ;;(should (equal (string-match "[[:upper:]]" "ß") nil)) + (should (equal (string-match "[[:lower:]]" "ß") 0)) + (should (equal (string-match "[[:upper:]]" "ß") nil)) (should (equal (string-match "[[:alpha:]]" "ẞ") 0)) (should (equal (string-match "[[:lower:]]" "ẞ") nil)) (should (equal (string-match "[[:upper:]]" "ẞ") 0))) @@ -854,8 +861,8 @@ This evaluates the TESTS test cases from glibc." (should (equal (string-match "ẞ" "ẞ") 0)) (should (equal (string-match "[[:alpha:]]" "ß") 0)) ;; bug#11309 - ;;(should (equal (string-match "[[:lower:]]" "ß") 0)) - ;;(should (equal (string-match "[[:upper:]]" "ß") 0)) + (should (equal (string-match "[[:lower:]]" "ß") 0)) + (should (equal (string-match "[[:upper:]]" "ß") 0)) (should (equal (string-match "[[:alpha:]]" "ẞ") 0)) (should (equal (string-match "[[:lower:]]" "ẞ") 0)) (should (equal (string-match "[[:upper:]]" "ẞ") 0)))) -- 2.39.2