From: Kenichi Handa Date: Sat, 12 Dec 2009 02:20:34 +0000 (+0000) Subject: (indian-compose-regexp): New function. X-Git-Tag: emacs-pretest-23.1.91~119 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=3ff3655cac0932fa5932aa2fe35f4a9b37b73a51;p=emacs.git (indian-compose-regexp): New function. (malayalam-composable-pattern): Fix the pattern. (composition-function-table): Set malayalam-composable-pattern for Malayalam characters. --- diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 0afa5ecf453..c01c77b9c92 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,10 @@ +2009-12-12 Kenichi Handa + + * language/indian.el (indian-compose-regexp): New function. + (malayalam-composable-pattern): Fix the pattern. + (composition-function-table): Set malayalam-composable-pattern for + Malayalam characters. + 2009-12-11 Chong Yidong * progmodes/bug-reference.el (bug-reference-map): Bind mouse-2 diff --git a/lisp/language/indian.el b/lisp/language/indian.el index 841ec26eb9c..a4eee0643b8 100644 --- a/lisp/language/indian.el +++ b/lisp/language/indian.el @@ -129,6 +129,15 @@ environment.")) South Indian language Malayalam is supported in this language environment.")) '("Indian")) +;; Replace mnemonic characters in REGEXP according to TABLE. TABLE is +;; an alist of (MNEMONIC-STRING . REPLACEMENT-STRING). + +(defun indian-compose-regexp (regexp table) + (let ((case-fold-search nil)) + (dolist (elt table) + (setq regexp (replace-regexp-in-string (car elt) (cdr elt) regexp t t))) + regexp)) + (defconst devanagari-composable-pattern (concat "\\([अ-औॠॡ][ँं]?\\)\\|[ः।]" @@ -156,12 +165,27 @@ South Indian language Malayalam is supported in this language environment.")) "Regexp matching a composable sequence of Kannada characters.") (defconst malayalam-composable-pattern - (concat - "\\([അ-ഔ][ം]?\\)\\|ഃ" - "\\|\\(" - "\\(?:\\(?:[ക-ഹ]്\\)?\\(?:[ക-ഹ]്\\)?\\(?:[ക-ഹ]്\\)?[ക-ഹ]്\\)?" - "[ക-ഹ]\\(?:്\\|[ാ-ൃെേൈൊൊോൌ]?[ം്]?\\)?" - "\\)") + (let ((table '(("V" . "[\u0D05-\u0D14\u0D60-\u0D61]") ; independent vowel + ("C" . "[\u0D15-\u0D39]") ; consonant + ("m" . "[\u0D46-\u0D48\u0D4A-\u0D4C]") ; prebase matra + ("p" . "[\u0D3E-\u0D44\u0D57]") ; postname matra + ("b" . "[\u0D62-\u0D63]") ; belowbase matra + ("a" . "[\u0D02-\u0D03]") ; abovebase sign + ("H" . "്") ; virama sign + ("N" . "\u200D") ; ZWJ + ("J" . "\u200C") ; ZWNJ + ("X" . "[\u0D00-\u0D7F]")))) ; all coverage + (indian-compose-regexp + (concat + ;; consonant-based syllables + "\\(CJ?HJ?\\)*C\\(H[NJ]?\\|m?b?p?a?\\)\\|" + ;; syllables with an independent vowel + "V\\(J?HC\\)?m?b?p?n?a?\\|" + ;; special consonant form + "JHC\\|" + ;; any other singleton characters + "X") + table)) "Regexp matching a composable sequence of Malayalam characters.") (let ((script-regexp-alist @@ -173,7 +197,7 @@ South Indian language Malayalam is supported in this language environment.")) (tamil . "[\xB80-\xBFF\x200C\x200D]+") (telugu . "[\xC00-\xC7F\x200C\x200D]+") (kannada . "[\xC80-\xCFF\x200C\x200D]+") - (malayalam . "[\xD00-\xD7F\x200C\x200D]+")))) + (malayalam . ,malayalam-composable-pattern)))) (map-char-table #'(lambda (key val) (let ((slot (assq val script-regexp-alist)))