Mostly rewritten.

author Kenichi Handa <handa@m17n.org>

Wed, 15 Dec 1999 00:47:53 +0000 (00:47 +0000)

committer Kenichi Handa <handa@m17n.org>

Wed, 15 Dec 1999 00:47:53 +0000 (00:47 +0000)
author Kenichi Handa <handa@m17n.org>
Wed, 15 Dec 1999 00:47:53 +0000 (00:47 +0000)
committer Kenichi Handa <handa@m17n.org>
Wed, 15 Dec 1999 00:47:53 +0000 (00:47 +0000)
diff --git a/lisp/language/devan-util.el b/lisp/language/devan-util.el

index a9badda617c3feb08db960cf47ce33a4959a7fed..05ba7f948d8f9ad975265e499b96fd27febf89d5 100644 (file)
--- a/lisp/language/devan-util.el
+++ b/lisp/language/devan-util.el
@@ -49,58 +49,64 @@
  ;;; Basic functions.
  
  ;;;###autoload
-(defun indian-to-devanagari (ch)
-  "Convert IS 13194 characters to Devanagari basic characters."
-  (let ((charcodes (split-char ch)))
+(defun indian-to-devanagari (char)
+  "Convert IS 13194 character CHAR to Devanagari basic characters.
+If CHAR is not IS 13194, return CHAR as is."
+  (let ((charcodes (split-char char)))
      (if (eq (car charcodes) 'indian-is13194)
         (make-char 'indian-2-column ?\x21 (nth 1 charcodes))
-      ch)))
+      char)))
  
  ;;;###autoload
-(defun devanagari-to-indian (ch)
-  "Convert Devanagari basic characters to IS 13194 characters."
-  (let* ((charcodes (split-char ch))
-        (charset (car charcodes))
-        (code-h  (car (cdr charcodes))))
+(defun devanagari-to-indian (char)
+  "Convert Devanagari basic character CHAR to IS 13194 characters.
+If CHAR is not Devanagari basic character, return CHAR as is."
+  (let ((charcodes (split-char char)))
      (if (and (eq (car charcodes) 'indian-2-column)
              (= (nth 1 charcodes) ?\x21))
         (make-char 'indian-is13194 (nth 2 charcodes))
-      ch)))
+      char)))
  
  ;;;###autoload
  (defun indian-to-devanagari-region (from to)
-  "Convert IS 13194 characters in region to Devanagari basic characters."
+  "Convert IS 13194 characters in region to Devanagari basic characters.
+When called from a program, expects two arguments,
+positions (integers or markers) specifying the region."
    (interactive "r")
-  (save-restriction 
-    (narrow-to-region from to)
-    (goto-char (point-min))
-;   (while (re-search-forward "\\cd" nil t)
-    (while (re-search-forward "." nil t)
-      (let* ((devanagari-char (indian-to-devanagari (preceding-char))))
-       (delete-char -1)
-       (insert devanagari-char)))))
+  (save-excursion
+    (goto-char from)
+    (while (< (point) to)
+      (let ((char (following-char)))
+       (if (eq (char-charset char) 'indian-is13194)
+           (progn
+             (delete-char 1)
+             (insert (indian-to-devanagari char)))
+         (forward-char 1))))))
  
  ;;;###autoload
  (defun devanagari-to-indian-region (from to)
-  "Convert Devanagari basic characters in region to Indian characters."
+  "Convert Devanagari basic characters in region to Indian characters.
+When called from a program, expects two arguments,
+positions (integers or markers) specifying the region."
    (interactive "r")
-  (save-restriction
-    (narrow-to-region from to)
-    (goto-char (point-min))
-;   (while (re-search-forward "\\cD" nil t) ; Devanagari Character Code.
-    (while (re-search-forward "." nil t) 
-      (let* ((indian-char (devanagari-to-indian (preceding-char))))
-       (delete-char -1)
-       (insert indian-char)))))
+  (save-excursion
+    (goto-char from)
+    (while (< (point) to)
+      (let ((char (following-char)))
+       (if (eq (char-charset char) 'indian-2-column)
+           (progn
+             (delete-char -1)
+             (insert (devanagari-to-indian char)))
+         (forward-char 1))))))
  
  ;;;###autoload
-(defun indian-to-devanagari-string (str)
-  "Convert Indian String to Devanagari Basic Character String."
-  (let* ((len (length str))
+(defun indian-to-devanagari-string (string)
+  "Convert Indian characters in STRING to Devanagari Basic characters."
+  (let* ((len (length string))
          (i 0)
          (vec (make-vector len 0)))
      (while (< i len)
-      (aset vec i (indian-to-devanagari (aref str i)))
+      (aset vec i (indian-to-devanagari (aref string i)))
        (setq i (1+ i)))
      (concat vec)))
  
@@ -256,12 +262,12 @@ of '\e$(5!*!&!'\e(B' and nukta sign.")
  ;; Finally, convert 2-column glyphs to 1-column glyph
  ;; if such a glyph exist.
  ;;
-;; => \e$(6![\e(B (ml.mr) \e$(6!X\e(B / \e$(6!D\e(B (ml.mr) \e$(6"F\e(B (mr ml) \e$(6!\\e(B
+;; => \e$(6!X\e(B (ml.mr) \e$(6![\e(B / \e$(6!D\e(B (ml.mr) \e$(6"F\e(B (mr ml) \e$(6!\\e(B
  ;;
  ;; Compose the glyph.
  ;;
-;; => \e2\e$(6!X@![\e(B\e1/\e2\e$(6!D@"FP!\\e(B\e1
-;; => \e2\e$(6!X@![\e(B\e1\e2\e$(6!D@"FP!\\e(B\e1
+;; => \e4\e$(6!Xt%![\e0!X![\e1\e(B/\e4\e$(6!Dt%"Fv#!\\e0!D"F!\\e1\e(B
+;; => \e4\e$(6!Xt%![\e0!X![\e1\e4!Dt%"Fv#!\\e0!D"F!\\e1\e(B
  ;;
  
  ;;
@@ -269,7 +275,7 @@ of '\e$(5!*!&!'\e(B' and nukta sign.")
  ;; 
  ;;
  ;; IMPORTANT:  
-;;        There may be many rules which you many want to be suppressed.
+;;        There may be many rules that you many want to suppress.
  ;;        In that case, please comment out that rule.
  ;;
  ;;        RULES WILL BE EVALUATED FROM FIRST TO LAST.
@@ -277,7 +283,7 @@ of '\e$(5!*!&!'\e(B' and nukta sign.")
  ;;
  ;; TO DO: 
  ;;        Prepare multiple specific list of rules for each languages
-;;        which adopts Devanagari script.
+;;        that adopt Devanagari script.
  ;;
  
  (defconst devanagari-char-to-glyph-rules
@@ -558,20 +564,18 @@ of '\e$(5!*!&!'\e(B' and nukta sign.")
  ;; glyphs-to-characters conversion.
  ;;
  
-(defun max-match-len (regexp-str)
-  "Return the possible length of matched string of given regexp.
-Only [...] pattern of regexp is recognized.
-The last character of inside of [....] is used for its length."
-  (let ((dest-str regexp-str))
-    (while (string-match "\\[\\([^\]]\\)+\\]" dest-str)
-      (setq dest-str 
-           (concat (substring dest-str 0 (match-beginning 0))
-                   (substring dest-str (match-beginning 1) (match-end 1))
-                   (substring dest-str (match-end 0)))))
-    (length dest-str)))
-
-;; Return t iff LIST1 and LIST2 has a same member.
-(defun rule-intersection (list1 list2)
+(defun max-match-len (regexp)
+  "Return the maximum length of text that can match the pattern REGEXP.
+Only [...] pattern of regexp is recognized."
+  (let ((len 0)
+       (index 0))
+    (while (string-match "\\[\\([^\]]\\)+\\]" regexp index)
+      (setq len (+ len (- (match-beginning 0) index) 1)
+           index (match-end 0)))
+    len))
+
+;; Return t iff at least one member appears in both LIST1 and LIST2.
+(defun intersecting-p (list1 list2)
    (let ((found nil))
      (while (and list1 (not found))
        (if (memq (car list1) list2)
@@ -579,28 +583,32 @@ The last character of inside of [....] is used for its length."
         (setq list1 (cdr list1))))
      found))
  
-(defun string-conversion-by-rule (src-str symbol &rest specs)
-  "Convert string SRC-STR to a new string according to
-the rules described in the each character's SYMBOL property.  The
-rules are described in the forms of '((regexp str <specs>) ...), and
-the character sequence in the string which matches to 'regexp' are
-replaced with str.  If SPECS are not specified, only rules with no
-<specs> would be applied.  If SPECS are specified, then rules with no
-<specs> specified and rules with <spec> matches with SPECS would be
-applied.  Rules are tested in the order of the list, thus more
-specific rules should be placed in front of less important rules.  No
-composite character is supported, thus such must be converted by
-decompose-char before applying to this function.  If rule is given in
-the forms of regexp '...\\(...\\)...', then inside the parenthesis is
-the subject of the match.  Otherwise, the entire expression is the
-subject of the match."
+(defun string-conversion-by-rule (source symbol &rest specs)
+  "Convert string SOURCE by rules stored in SYMBOL property of each character.
+The remaining arguments forms a list SPECS that restricts applicable rules.
+
+The rules has the form ((REGEXP STR RULE-SPEC ...) ...).
+Each character sequence in STRING that matches REGEXP is
+replaced by STR.
+
+If SPECS is nil, only rules with no RULE-SPECs is applied.  Otherwise
+rules with no RULE-SPECS and rules that have at least one member of
+SPECS in RULE-SPECs is applied.
+
+Rules are tested in the order of the list, thus more specific rules
+should be placed in front of less specific rules.
+
+If rule is given in the forms of regexp '...\\(...\\)...', a character
+sequence that matches the pattern inside of the parenthesis is the
+subject of the match.  Otherwise, the entire expression is the subject
+of the match."
    (let ((pos 0) 
         (dst-str ""))
-    (while (< pos (length src-str))
+    (while (< pos (length source))
        (let ((found nil)
             (rules (get-char-code-property 
                     (string-to-char 
-                    (substring src-str pos)) symbol)))
+                    (substring source pos)) symbol)))
         (while rules
           (let* ((rule (car rules))
                  (regexp (car rule))
@@ -608,7 +616,7 @@ subject of the match."
                  (rule-specs (cdr (cdr rule)))
                  search-pos)
             (if (not (or (null rule-specs)
-                        (rule-intersection specs rule-specs)))
+                        (intersecting-p specs rule-specs)))
                 (setq rules (cdr rules))
               (if (null (string-match "\\\\(.+\\\\)" regexp))
                   (progn
@@ -619,7 +627,7 @@ subject of the match."
                                                     (string-match "^[^\\\\]*" regexp)
                                                     (match-end 0))))))
               (if (< search-pos 0) (setq search-pos 0))
-             (if (string-match regexp src-str search-pos)
+             (if (string-match regexp source search-pos)
                   (if (= (match-beginning 1) pos)
                       (progn
                         (setq dst-str (concat dst-str replace-str))
@@ -631,7 +639,7 @@ subject of the match."
                 (setq rules (cdr rules))))))
         ;; proceed to next position
         (if (not found)
-           (setq dst-str (concat dst-str (substring src-str pos (1+ pos)))
+           (setq dst-str (concat dst-str (substring source pos (1+ pos)))
                   pos (1+ pos)))))
      dst-str))
  
@@ -641,12 +649,12 @@ subject of the match."
  ;;
  
  ;;;###autoload
-(defun char-to-glyph-devanagari (src-str &rest langs)
-  "Convert Devanagari characters in the string to Devanagari glyphs.  
+(defun char-to-glyph-devanagari (string &rest langs)
+  "Convert Devanagari characters in STRING to Devanagari glyphs.  
  Ligatures and special rules are processed."
    (apply 
     'string-conversion-by-rule 
-   (append (list src-str 'char-to-glyph) langs)))
+   (append (list string 'char-to-glyph) langs)))
  
  ;; Example:
  ;;(char-to-glyph-devanagari "\e$(5!X![!F!h!D!\\e(B") => "\e$(5!X!["F!D!\\e(B"
@@ -656,7 +664,7 @@ Ligatures and special rules are processed."
  ;; Phase 2: Compose Glyphs to form One Glyph.
  ;;
  
-;; Each list consist of glyph, application-priority and application-direction.
+;; Each list consists of glyph, application-priority and application-direction.
  ;;
  ;; Glyphs will be ordered from low priority number to high priority number.
  ;; If application-priority is omitted, it is assumed to be 0.
@@ -1044,21 +1052,22 @@ Ligatures and special rules are processed."
  ;; Determine composition priority and rule of the array of Glyphs.
  ;; Sort the glyphs with their priority.
  
-(defun devanagari-reorder-glyphs-for-composition (glyph-alist)
-  (let* ((pos 0)
-        (ordered-glyphs '()))
-    (while (< pos (length glyph-alist))
-      (let* ((glyph (aref glyph-alist pos)))
+(defun devanagari-reorder-glyphs-for-composition (string start end)
+  (let ((pos start)
+       (ordered-glyphs nil))
+    (while (< pos end)
+      (let ((glyph (aref string pos)))
         (setq pos (1+ pos))
         (setq ordered-glyphs 
-             (append ordered-glyphs (list (assq glyph devanagari-composition-rules))))))
+             (append ordered-glyphs
+                     (list (assq glyph devanagari-composition-rules))))))
      (sort ordered-glyphs '(lambda (x y) (< (car (cdr x)) (car (cdr y)))))))
  
-;;(devanagari-compose-to-one-glyph "\e$(5"5!X![\e(B") => "\e2\e$(6!XP"5@![\e(B\e1"
+! ;;(devanagari-compose-to-one-glyph "\e$(5"5!X![\e(B") => "\e4\e$(6!Xv#"5t%![\e0!X"5![\e1\e(B"
  
  (defun devanagari-compose-to-one-glyph (devanagari-string)
    (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition
-                       (string-to-vector devanagari-string)))
+                       devanagari-string 0 (length devanagari-string)))
          ;; List of glyphs to be composed.
          (cmp-glyph-list (list (car (car o-glyph-list)))) 
          (o-glyph-list (cdr o-glyph-list)))
@@ -1077,11 +1086,31 @@ Ligatures and special rules are processed."
      (if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list))
        (apply 'compose-chars cmp-glyph-list))))
  
+(defun devanagari-composition-component (string &optional start end)
+  (or start (setq start 0))
+  (or end (setq end (length string)))
+  (let* ((o-glyph-list (devanagari-reorder-glyphs-for-composition
+                       string start end))
+        ;; List of glyphs to be composed.
+        (cmp-glyph-list (list (car (car o-glyph-list)))))
+    (setq o-glyph-list (cdr o-glyph-list))
+    (while o-glyph-list
+      (let* ((o-glyph (car o-glyph-list))
+            (glyph (if (< 2 (length o-glyph))
+                       ;; default composition
+                       (list (car (cdr (cdr o-glyph))) (car o-glyph))
+                     ;; composition with a specified rule
+                     (list '(mr . ml) (car o-glyph)))))
+       (setq o-glyph-list (cdr o-glyph-list))
+       (setq cmp-glyph-list (append cmp-glyph-list glyph))))
+    ;; Convert glyphs to 1-column width if possible.
+    (devanagari-wide-to-narrow cmp-glyph-list)))
+
  ;; Utility function for Phase 2.5
-;; Check whether given glyph is a Devanagari vertical modifier or not.
+
+;; Check whether GLYPH is a Devanagari vertical modifier or not.
  ;; If it is a vertical modifier, whether it should be 1-column shape or not
  ;; depends on previous non-vertical modifier.
-   ; return nil if it is not vertical modifier.
  (defun devanagari-vertical-modifier-p (glyph)
    (string-match (char-to-string glyph)
                 "[\e$(5!"!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M\e(B]"))
@@ -1092,12 +1121,13 @@ Ligatures and special rules are processed."
                 "[\e$(5![\e(B]"))
  
  (defun devanagari-wide-to-narrow-char (char)
-  "Return the corresponding narrow character if it exists."
+  "Convert Devanagari character CHAR to the corresponding narrow character.
+If there's no corresponding narrow character, return CHAR as is."
    (let ((narrow (cdr (assq char devanagari-1-column-char))))
-    (if narrow narrow char)))
+    (or narrow char)))
  
  ;;
-;;    Phase 2.5  Convert Appropriate Character to 1-column shape.
+;;    Phase 2.5  Convert appropriate character to 1-column shape.
  ;;
  ;; This is temporary and should be removed out when Emacs supports 
  ;; variable width characters.
@@ -1121,7 +1151,8 @@ Ligatures and special rules are processed."
      (cond ((null src-list) '())
           ; not glyph code
           ((not (numberp glyph)) 
-          (cons glyph (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))
+          (cons glyph
+                (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph)))
           ; glyphs to be processed regardless of the value of "2-col-glyph"
           ((devanagari-non-vertical-modifier-p glyph)
            (cons (devanagari-wide-to-narrow-char glyph)
@@ -1132,7 +1163,8 @@ Ligatures and special rules are processed."
                (cons glyph
                      (devanagari-wide-to-narrow-iter (cdr src-list) t))
                (cons (devanagari-wide-to-narrow-char glyph)
-                    (devanagari-wide-to-narrow-iter (cdr src-list) 2-col-glyph))))
+                    (devanagari-wide-to-narrow-iter (cdr src-list)
+                                                    2-col-glyph))))
           ; normal glyph
           (t
            (if (cdr (assq glyph devanagari-1-column-char))
@@ -1147,65 +1179,18 @@ Ligatures and special rules are processed."
  ;; 
  
  ;;
-;; Decomposition of composite font.
+;; Decomposition of composite sequence.
  ;;
  
-(defun devanagari-normalize-narrow-glyph (charlist)
-  (let ((wide-char (car (rassoc (car charlist) devanagari-1-column-char))))
-    (if (null charlist) nil
-      (cons (if (null wide-char) (car charlist) wide-char)
-           (devanagari-normalize-narrow-glyph (cdr charlist))))))
-
-(defvar devanagari-decomposition-rules
-  '(
-    (?\e$(5"p\e(B -10)
-    )
-  )
-
-(defun devanagari-reorder-glyphs-for-decomposition (glyphlist)
-  "This function re-orders glyph list for decomposition."
-  (sort glyphlist 
-       '(lambda (x y) 
-          (let ((xx (nth 1 (assoc x devanagari-decomposition-rules)))
-                (yy (nth 1 (assoc y devanagari-decomposition-rules))))
-            (if (null xx) (setq xx 0))
-            (if (null yy) (setq yy 0))
-            (< xx yy)))))
-
-(defun devanagari-decompose-char (glyph)
-  "This function decomposes one Devanagari composite glyph to 
-   basic Devanagari characters as a string."
-  (let ((glyphlist 
-        (if (eq (car (split-char glyph)) 'composition) 
-            (string-to-list (decompose-composite-char glyph))
-          (list glyph))))
-    (setq glyphlist (devanagari-normalize-narrow-glyph glyphlist))
-    (setq glyphlist (devanagari-reorder-glyphs-for-decomposition glyphlist))
-    (string-conversion-by-rule 
-     (mapconcat 'char-to-string glyphlist "") 'glyph-to-char)))
-
  ;;;###autoload
  (defun devanagari-decompose-string (str)
-  "Decompose Devanagari glyph string STR to basic Devanagari character string."
-  (let ((len (length str))
-       (i 0)
-       (dst ""))
-    (while (< i len)
-      (setq dst (concat dst (devanagari-decompose-char (aref str i)))
-           i (1+ i)))
-    dst))
+  "Decompose Devanagari string STR"
+  (decompose-string (copy-sequence str)))
  
  ;;;###autoload
  (defun devanagari-decompose-region (from to)
    (interactive "r")
-  (save-restriction
-    (narrow-to-region from to)
-    (goto-char (point-min))
-    (while (re-search-forward "." nil t)
-      (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
-            (decmps (devanagari-decompose-string (buffer-substring match-b match-e))))
-       (delete-char -1)
-       (insert decmps)))))
+  (decompose-region from to))
  
  ;;;
  ;;; Composition
@@ -1213,37 +1198,34 @@ Ligatures and special rules are processed."
  
  ;;;###autoload
  (defun devanagari-compose-string (str &rest langs)
-  (let ((len (length str))
-       (src (devanagari-decompose-string str)) (dst "") rest match-b match-e)
-    (while (string-match devanagari-composite-glyph-unit src)
-      (setq match-b (match-beginning 0) match-e (match-end 0))
-      (setq dst 
-           (concat dst 
-                   (substring src 0 match-b)
-                   (devanagari-compose-to-one-glyph 
-                    (apply 
-                     'char-to-glyph-devanagari
-                     (cons (substring src match-b match-e)
-                           langs)))))
-      (setq src (substring src match-e)))
-    (setq dst (concat dst src))
-    dst))
+  (setq str (copy-sequence str))
+  (let ((idx 0)
+       rest match-b match-e)
+    (while (string-match devanagari-composite-glyph-unit str idx)
+      (let* ((match-b (match-beginning 0))
+            (match-e (match-end 0))
+            (cmps (devanagari-composition-component
+                   (apply 
+                    'char-to-glyph-devanagari
+                    (cons (substring str match-b match-e) langs)))))
+       (compose-string str match-b match-e cmps)
+       (setq idx match-e))))
+  str)
  
  ;;;###autoload
  (defun devanagari-compose-region (from to &rest langs)
    (interactive "r")
-  (save-restriction
-    (narrow-to-region from to)
-    (goto-char (point-min))
-    (while (re-search-forward devanagari-composite-glyph-unit nil t)
-      (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
-            (cmps (devanagari-compose-to-one-glyph
-                   (apply 
-                    'char-to-glyph-devanagari
-                    (cons (buffer-substring match-b match-e)
-                          langs)))))
-       (delete-region match-b match-e)
-       (insert cmps)))))
+  (save-excursion
+    (save-restriction
+      (narrow-to-region from to)
+      (goto-char (point-min))
+      (while (re-search-forward devanagari-composite-glyph-unit nil t)
+       (let* ((match-b (match-beginning 0)) (match-e (match-end 0))
+              (cmps (devanagari-composition-component
+                     (apply 
+                      'char-to-glyph-devanagari
+                      (cons (buffer-substring match-b match-e) langs)))))
+         (compose-region match-b match-e cmps))))))
  
  ;; For pre-write and post-read conversion
  
@@ -1260,20 +1242,18 @@ Ligatures and special rules are processed."
  
  ;;;###autoload
  (defun in-is13194-devanagari-post-read-conversion (len)
-  (let ((pos (point))
-       (buffer-modified-p (buffer-modified-p)))
-    (prog1
-       (devanagari-compose-from-is13194-region pos (+ pos len))
-      (set-buffer-modified-p buffer-modified-p))))
+  (let ((pos (point)))
+    (devanagari-compose-from-is13194-region pos (+ pos len))))
  
  ;;;###autoload
  (defun devanagari-decompose-to-is13194-region (from to)
    "Decompose Devanagari characters in the region to IS 13194 characters."
    (interactive "r")
-  (save-restriction
-    (narrow-to-region from to)
-    (devanagari-decompose-region (point-min) (point-max))
-    (devanagari-to-indian-region (point-min) (point-max))))
+  (save-excursion
+    (save-restriction
+      (narrow-to-region from to)
+      (devanagari-decompose-region (point-min) (point-max))
+      (devanagari-to-indian-region (point-min) (point-max)))))
  
  ;;;###autoload
  (defun in-is13194-devanagari-pre-write-conversion (from to)
@@ -1304,8 +1284,6 @@ Ligatures and special rules are processed."
      (indian-decode-itrans-region (point-min) (point-max))
      (devanagari-compose-from-is13194-region (point-min) (point-max))))
  
-;; Test comment.
-
  ;;
  (provide 'devan-util)
  
diff --git a/lisp/language/lao-util.el b/lisp/language/lao-util.el

index cb78b7fc06751d1f4deebc49321a28e077d9d1e6..b12e63ae13d01155a4fa226ff88fc2245fdb1bb1 100644 (file)
--- a/lisp/language/lao-util.el
+++ b/lisp/language/lao-util.el
@@ -30,6 +30,14 @@
    (interactive)
    (set-language-environment "Lao"))
  
+;; Setting information of Thai characters.
+
+(defconst lao-category-table (make-category-table))
+(define-category ?c "Lao consonant" lao-category-table)
+(define-category ?s "Lao semi-vowel" lao-category-table)
+(define-category ?v "Lao upper/lower vowel" lao-category-table)
+(define-category ?t "Lao tone" lao-category-table)
+
  (let ((l '((?\e(1!\e(B consonant "LETTER KOR  KAI'" "CHICKEN")
            (?\e(1"\e(B consonant "LETTER KHOR KHAI'" "EGG")
            (?\e(1#\e(B invalid nil)
@@ -127,11 +135,375 @@
            ))
        elm)
    (while l
-    (setq elm (car l))
-    (put-char-code-property (car elm) 'phonetic-type (car (cdr elm)))
-    (put-char-code-property (car elm) 'name (nth 2 elm))
-    (put-char-code-property (car elm) 'meaning (nth 3 elm))
-    (setq l (cdr l))))
+    (setq elm (car l) l (cdr l))
+    (let ((char (car elm))
+         (ptype (nth 1 elm)))
+      (cond ((eq ptype 'consonant)
+            (modify-category-entry char ?c lao-category-table))
+           ((memq ptype '(vowel-upper vowel-lower))
+            (modify-category-entry char ?v lao-category-table))
+           ((eq ptype 'semivowel-lower)
+            (modify-category-entry char ?s lao-category-table))
+           ((eq ptype 'tone)
+            (modify-category-entry char ?t lao-category-table)))
+      (put-char-code-property char 'phonetic-type ptype)
+      (put-char-code-property char 'name (nth 2 elm))
+      (put-char-code-property char 'meaning (nth 3 elm)))))
+
+;; The general composing rules are as follows:
+;;
+;;                          T
+;;       V        T         V                  T
+;; CV -> C, CT -> C, CVT -> C, Cv -> C, CvT -> C
+;;                                   v         v
+;;                             T
+;;        V         T          V                   T
+;; CsV -> C, CsT -> C, CsVT -> C, Csv -> C, CvT -> C
+;;        s         s          s         s         s
+;;                                       v         v
+
+
+;; where C: consonant, V: vowel upper, v: vowel lower,
+;;       T: tone mark, s: semivowel lower
+
+(defvar lao-composition-pattern
+  "\\cc\\(\\ct\\|\\cv\\ct?\\|\\cs\\(\\ct\\|\\cv\\ct?\\)?\\)"
+  "Regular expression matching a Lao composite sequence.")
+
+;;;###autoload
+(defun lao-compose-string (str)
+  (with-category-table lao-category-table
+   (let ((idx 0))
+     (while (setq idx (string-match lao-composition-pattern str idx))
+       (compose-string str idx (match-end 0))
+       (setq idx (match-end 0))))
+   str))
+
+;;; LRT: Lao <-> Roman Transcription
+
+;; Upper vowels and tone-marks are put on the letter.
+;; Semi-vowel-sign-lo and lower vowels are put under the letter.
+
+(defconst lao-transcription-consonant-alist
+  (sort '(;; single consonants
+         ("k" . "\e(1!\e(B")
+         ("kh" . "\e(1"\e(B")
+         ("qh" . "\e(1$\e(B")
+         ("ng" . "\e(1'\e(B")
+         ("j" . "\e(1(\e(B")
+         ("s" . "\e(1J\e(B")
+         ("x" . "\e(1*\e(B")
+         ("y" . "\e(1-\e(B")
+         ("d" . "\e(14\e(B")
+         ("t" . "\e(15\e(B")
+         ("th" . "\e(16\e(B")
+         ("dh" . "\e(17\e(B")
+         ("n" . "\e(19\e(B")
+         ("b" . "\e(1:\e(B")
+         ("p" . "\e(1;\e(B")
+         ("hp" . "\e(1<\e(B")
+         ("fh" . "\e(1=\e(B")
+         ("ph" . "\e(1>\e(B")
+         ("f" . "\e(1?\e(B")
+         ("m" . "\e(1A\e(B")
+         ("gn" . "\e(1B\e(B")
+         ("l" . "\e(1E\e(B")
+         ("r" . "\e(1C\e(B")
+         ("v" . "\e(1G\e(B")
+         ("w" . "\e(1G\e(B")
+         ("hh" . "\e(1K\e(B")
+         ("O" . "\e(1M\e(B")
+         ("h" . "\e(1N\e(B")
+         ("nh" . "\e(1|\e(B")
+         ("mh" . "\e(1}\e(B")
+         ("lh" . ["\e(1K\\e(B"])
+         ;; double consonants
+         ("ngh" . ["\e(1K'\e(B"])
+         ("yh" . ["\e(1K]\e(B"])
+         ("wh" . ["\e(1KG\e(B"])
+         ("hl" . ["\e(1KE\e(B"])
+         ("hy" . ["\e(1K-\e(B"])
+         ("hn" . ["\e(1K9\e(B"])
+         ("hm" . ["\e(1KA\e(B"])
+         )
+       (function (lambda (x y) (> (length (car x)) (length (car y)))))))
+
+(defconst lao-transcription-semi-vowel-alist
+  '(("r" . "\e(1\\e(B")))
+
+(defconst lao-transcription-vowel-alist
+  (sort '(("a" . "\e(1P\e(B")
+         ("ar" . "\e(1R\e(B")
+         ("i" . "\e(1T\e(B")
+         ("ii" . "\e(1U\e(B")
+         ("eu" . "\e(1V\e(B")
+         ("ur" . "\e(1W\e(B")
+         ("u" . "\e(1X\e(B")
+         ("uu" . "\e(1Y\e(B")
+         ("e" . ["\e(1`P\e(B"])
+         ("ee" . "\e(1`\e(B")
+         ("ae" . ["\e(1aP\e(B"])
+         ("aa" . "\e(1a\e(B")
+         ("o" . ["\e(1bP\e(B"])
+         ("oo" . "\e(1b\e(B")
+         ("oe" . ["\e(1`RP\e(B"])
+         ("or" . "\e(1m\e(B")
+         ("er" . ["\e(1`T\e(B"])
+         ("ir" . ["\e(1`U\e(B"])
+         ("ua" . ["\e(1[GP\e(B"])
+         ("uaa" . ["\e(1[G\e(B"])
+         ("ie" . ["\e(1`Q]P\e(B"])
+         ("ia" . ["\e(1`Q]\e(B"])
+         ("ea" . ["\e(1`VM\e(B"])
+         ("eaa" . ["\e(1`WM\e(B"])
+         ("ai" . "\e(1d\e(B")
+         ("ei" . "\e(1c\e(B")
+         ("ao" . ["\e(1`[R\e(B"])
+         ("aM" . "\e(1S\e(B"))
+       (function (lambda (x y) (> (length (car x)) (length (car y)))))))
+
+;; Maa-sakod is put at the tail.
+(defconst lao-transcription-maa-sakod-alist
+  '(("k" . "\e(1!\e(B")
+    ("g" . "\e(1'\e(B")
+    ("y" . "\e(1-\e(B")
+    ("d" . "\e(14\e(B")
+    ("n" . "\e(19\e(B")
+    ("b" . "\e(1:\e(B")
+    ("m" . "\e(1A\e(B")
+    ("v" . "\e(1G\e(B")
+    ("w" . "\e(1G\e(B")
+    ))
+
+(defconst lao-transcription-tone-alist
+  '(("'" . "\e(1h\e(B")
+    ("\"" . "\e(1i\e(B")
+    ("^" . "\e(1j\e(B")
+    ("+" . "\e(1k\e(B")
+    ("~" . "\e(1l\e(B")))
+
+(defconst lao-transcription-punctuation-alist
+  '(("\\0" . "\e(1p\e(B")
+    ("\\1" . "\e(1q\e(B")
+    ("\\2" . "\e(1r\e(B")
+    ("\\3" . "\e(1s\e(B")
+    ("\\4" . "\e(1t\e(B")
+    ("\\5" . "\e(1u\e(B")
+    ("\\6" . "\e(1v\e(B")
+    ("\\7" . "\e(1w\e(B")
+    ("\\8" . "\e(1x\e(B")
+    ("\\9" . "\e(1y\e(B")
+    ("\\\\" . "\e(1f\e(B")
+    ("\\$" . "\e(1O\e(B")))
+
+(defconst lao-transcription-pattern
+  (concat
+   "\\("
+   (mapconcat 'car lao-transcription-consonant-alist "\\|")
+   "\\)\\("
+   (mapconcat 'car lao-transcription-semi-vowel-alist "\\|")
+   "\\)?\\(\\("
+   (mapconcat 'car lao-transcription-vowel-alist "\\|")
+   "\\)\\("
+   (mapconcat 'car lao-transcription-maa-sakod-alist "\\|")
+   "\\)?\\("
+   (mapconcat (lambda (x) (regexp-quote (car x)))
+             lao-transcription-tone-alist "\\|")
+   "\\)?\\)?\\|"
+   (mapconcat (lambda (x) (regexp-quote (car x)))
+             lao-transcription-punctuation-alist "\\|")
+   )
+  "Regexp of Roman transcription pattern for one Lao syllable.")
+
+(defconst lao-transcription-pattern
+  (concat
+   "\\("
+   (regexp-opt (mapcar 'car lao-transcription-consonant-alist))
+   "\\)\\("
+   (regexp-opt (mapcar 'car lao-transcription-semi-vowel-alist))
+   "\\)?\\(\\("
+   (regexp-opt (mapcar 'car lao-transcription-vowel-alist))
+   "\\)\\("
+   (regexp-opt (mapcar 'car lao-transcription-maa-sakod-alist))
+   "\\)?\\("
+   (regexp-opt (mapcar 'car lao-transcription-tone-alist))
+   "\\)?\\)?\\|"
+   (regexp-opt (mapcar 'car lao-transcription-punctuation-alist))
+   )
+  "Regexp of Roman transcription pattern for one Lao syllable.")
+
+(defconst lao-vowel-reordering-rule
+  '(("\e(1P\e(B" (0 ?\e(1P\e(B) (0 ?\e(1Q\e(B))
+    ("\e(1R\e(B" (0 ?\e(1R\e(B))
+    ("\e(1T\e(B" (0 ?\e(1U\e(B))
+    ("\e(1U\e(B" (0 ?\e(1U\e(B))
+    ("\e(1V\e(B" (0 ?\e(1V\e(B))
+    ("\e(1W\e(B" (0 ?\e(1W\e(B))
+    ("\e(1X\e(B" (0 ?\e(1X\e(B))
+    ("\e(1Y\e(B" (0 ?\e(1Y\e(B))
+    ("\e(1`P\e(B" (?\e(1`\e(B 0 ?\e(1P\e(B) (?\e(1`\e(B 0 ?\e(1Q\e(B))
+    ("\e(1`\e(B" (?\e(1`\e(B 0))
+    ("\e(1aP\e(B" (?\e(1a\e(B 0 ?\e(1P\e(B) (?\e(1a\e(B 0 ?\e(1Q\e(B))
+    ("\e(1a\e(B" (?\e(1a\e(B 0))
+    ("\e(1bP\e(B" (?\e(1b\e(B 0 ?\e(1P\e(B) (0 ?\e(1[\e(B) (?\e(1-\e(B ?\e(1b\e(B 0 ?\e(1Q\e(B) (?\e(1G\e(B ?\e(1b\e(B 0 ?\e(1Q\e(B))
+    ("\e(1b\e(B" (?\e(1b\e(B 0))
+    ("\e(1`RP\e(B" (?\e(1`\e(B 0 ?\e(1R\e(B ?\e(1P\e(B) (0 ?\e(1Q\e(B ?\e(1M\e(B))
+    ("\e(1m\e(B" (0 ?\e(1m\e(B) (0 ?\e(1M\e(B))
+    ("\e(1`T\e(B" (?\e(1`\e(B 0 ?\e(1T\e(B))
+    ("\e(1`U\e(B" (?\e(1`\e(B 0 ?\e(1U\e(B))
+    ("\e(1[GP\e(B" (0 ?\e(1[\e(B ?\e(1G\e(B ?\e(1P\e(B) (0 ?\e(1Q\e(B ?\e(1G\e(B))
+    ("\e(1[G\e(B" (0 ?\e(1[\e(B ?\e(1G\e(B) (0 ?\e(1G\e(B))
+    ("\e(1`Q]P\e(B" (?\e(1`\e(B 0 ?\e(1Q\e(B ?\e(1]\e(B ?\e(1P\e(B) (0 ?\e(1Q\e(B ?\e(1]\e(B))
+    ("\e(1`Q]\e(B" (?\e(1`\e(B 0 ?\e(1Q\e(B ?\e(1]\e(B) (0 ?\e(1]\e(B))
+    ("\e(1`VM\e(B" (?\e(1`\e(B 0 ?\e(1V\e(B ?\e(1M\e(B))
+    ("\e(1`WM\e(B" (?\e(1`\e(B 0 ?\e(1W\e(B ?\e(1M\e(B))
+    ("\e(1d\e(B" (?\e(1d\e(B 0))
+    ("\e(1c\e(B" (?\e(1c\e(B 0))
+    ("\e(1`[R\e(B" (?\e(1`\e(B 0 ?\e(1[\e(B ?\e(1R\e(B))
+    ("\e(1S\e(B" (0 ?\e(1S\e(B)))
+  "Alist of Lao vowel string vs the corresponding re-ordering rule.
+Each element has this form:
+       (VOWEL NO-MAA-SAKOD-RULE WITH-MAA-SAKOD-RULE (MAA-SAKOD-0 RULE-0) ...)
+
+VOWEL is a vowel string (e.g. \"\e(1`Q]P\e(B\").
+
+NO-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL following a
+consonant.  It is a list vowel characters or 0.  The element 0
+indicate the place to embed a consonant.
+
+Optional WITH-MAA-SAKOD-RULE is a rule to re-order and modify VOWEL
+follwoing a consonant and preceding a maa-sakod character.  If it is
+nil, NO-MAA-SAKOD-RULE is used.  The maa-sakod character is alwasy
+appended at the tail.
+
+For instance, rule `(\"\e(1`WM\e(B\" (?\e(1`\e(B t ?\e(1W\e(B ?\e(1M\e(B))' tells that this vowel
+string following a consonant `\e(1!\e(B' should be re-ordered as \"\e(1`!WM\e(B\".
+
+Optional (MAA-SAKOD-n RULE-n) are rules specially applied to maa-sakod
+character MAA-SAKOD-n.")
+
+;;;###autoload
+(defun lao-transcribe-single-roman-syllable-to-lao (from to &optional str)
+  "Transcribe a Romanized Lao syllable in the region FROM and TO to Lao string.
+Only the first syllable is transcribed.
+The value has the form: (START END LAO-STRING), where
+START and END are the beggining and end positions of the Roman Lao syllable,
+LAO-STRING is the Lao character transcription of it.
+
+Optional 3rd arg STR, if non-nil, is a string to search for Roman Lao
+syllable.  In that case, FROM and TO are indexes to STR."
+  (if str
+      (if (setq from (string-match lao-transcription-pattern str from))
+         (progn
+           (if (>= from to)
+               (setq from nil)
+             (setq to (match-end 0)))))
+    (save-excursion
+      (goto-char from)
+      (if (setq to (re-search-forward lao-transcription-pattern to t))
+         (setq from (match-beginning 0))
+       (setq from nil))))
+  (if from
+      (let* ((consonant (match-string 1 str))
+            (semivowel (match-string 3 str))
+            (vowel (match-string 5 str))
+            (maa-sakod (match-string 8 str))
+            (tone (match-string 9 str))
+            lao-consonant lao-semivowel lao-vowel lao-maa-sakod lao-tone
+            clen cidx)
+       (setq to (match-end 0))
+       (if (not consonant)
+           (setq str (cdr (assoc (match-string 0 str)
+                                 lao-transcription-punctuation-alist)))
+         (setq lao-consonant
+               (cdr (assoc consonant lao-transcription-consonant-alist)))
+         (if (vectorp lao-consonant)
+             (setq lao-consonant (aref lao-consonant 0)))
+         (setq clen (length lao-consonant))
+         (if semivowel
+             ;; Include semivowel in STR.
+             (setq lao-semivowel
+                   (cdr (assoc semivowel lao-transcription-semi-vowel-alist))
+                   str (if (= clen 1)
+                           (concat lao-consonant lao-semivowel)
+                         (concat (substring lao-consonant 0 1) lao-semivowel
+                                 (substring lao-consonant 1))))
+           (setq str lao-consonant))
+         (if vowel
+             (let (rule)
+               (setq lao-vowel
+                     (cdr (assoc vowel lao-transcription-vowel-alist)))
+               (if (vectorp lao-vowel)
+                   (setq lao-vowel (aref lao-vowel 0)))
+               (setq rule (assoc lao-vowel lao-vowel-reordering-rule))
+               (if (null maa-sakod)
+                   (setq rule (nth 1 rule))
+                 (setq lao-maa-sakod
+                       (cdr (assoc maa-sakod lao-transcription-maa-sakod-alist))
+                       rule
+                       (or (cdr (assq (aref lao-maa-sakod 0) (nthcdr 2 rule)))
+                           (nth 2 rule)
+                           (nth 1 rule))))
+               (or rule
+                   (error "Lao vowel %S has no re-ordering rule" lao-vowel))
+               (setq lao-consonant str str "")
+               (while rule
+                 (if (= (car rule) 0)
+                     (setq str (concat str lao-consonant)
+                           cidx (length str))
+                   (setq str (concat str (list (car rule)))))
+                 (setq rule (cdr rule)))
+               (or cidx
+                   (error "Lao vowel %S has malformed re-ordering rule" vowel))
+               ;; Set CIDX to after upper or lower vowel if any.
+               (let ((len (length str)))
+                 (while (and (< cidx len)
+                             (memq (get-char-code-property (aref str cidx)
+                                                           'phonetic-type)
+                                   '(vowel-lower vowel-upper)))
+                   (setq cidx (1+ cidx))))
+               (if lao-maa-sakod
+                   (setq str (concat str lao-maa-sakod)))
+               (if tone
+                   (setq lao-tone
+                         (cdr (assoc tone lao-transcription-tone-alist))
+                         str (concat (substring str 0 cidx) lao-tone
+                                     (substring str cidx)))))))
+       (list from to (lao-compose-string str)))))
+
+;;;###autoload
+(defun lao-transcribe-roman-to-lao-string (str)
+  "Transcribe Romanized Lao string STR to Lao character string."
+  (let ((from 0)
+       (to (length str))
+       (lao-str "")
+       val)
+    (while (setq val (lao-transcribe-single-roman-syllable-to-lao from to str))
+      (let ((start (car val))
+           (end (nth 1 val))
+           (lao (nth 2 val)))
+       (if (> start from)
+           (setq lao-str (concat lao-str (substring str from start) lao))
+         (setq lao-str (concat lao-str lao)))
+       (setq from end)))
+    (if (< from to)
+       (concat lao-str (substring str from to))
+      lao-str)))
+
+;;;###autoload
+(defun lao-composition-function (from to pattern &optional string)
+  "Compose Lao text in the region FROM and TO.
+The text matches the regular expression PATTERN.
+Optional 4th argument STRING, if non-nil, is a string containing text
+to compose.
+
+The return value is number of composed characters."
+  (if (< (1+ from) to)
+      (prog1 (- to from)
+       (if string
+           (compose-string from to)
+         (compose-region from to))
+       (- to from))))
  
  ;;
  (provide 'lao-util)
author	Kenichi Handa <handa@m17n.org>
	Wed, 15 Dec 1999 00:47:53 +0000 (00:47 +0000)
committer	Kenichi Handa <handa@m17n.org>
	Wed, 15 Dec 1999 00:47:53 +0000 (00:47 +0000)
lisp/language/devan-util.el		patch \| blob \| history
lisp/language/lao-util.el		patch \| blob \| history