;; History:
;; 1996.10.18 written by KAWABATA, Taichi <kawabata@is.s.u-tokyo.ac.jp>
+;; 1997.1.20 fixed some bugs.
;; Devanagari script composition rules and related programs.
;; Syllable ::= Cons-Vowel-Syllable | Vowel-Syllable
;; Vowel-Syllable ::= V[D]
;; Cons-Vowel-Syllable ::= [Cons-Syllable] Full-Cons [M] [D]
-;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] Pure-Cons
+;; Cons-Syllable ::= [Pure-Cons] [Pure-Cons] [Pure-Cons] Pure-Cons
;; Pure-Cons ::= Full-Cons H
;; Full-Cons ::= C [N]
;; C - Consonant (\e$(5!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A!B!C!D!E\e(B
;; \e$(5!F!G!H!I!J!K!L!M!N!O!P!Q!R!S!T!U!V!W!X\e(B)
;; N - Nukta (\e$(5!i\e(B)
-;; H - Halant(\e$(5!h\e(B)
-;; V - Vowel (\e$(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2\e(B)
-;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu, Visarg (\e$(5!!!"\e(B)
-;; M - Matra (\e$(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g\e(B)
+;; H - Halant(\e$(5!h\e(B) or Virama
+;; V - Vowel (\e$(5!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2#&#'#*\e(B)
+;; ("\e$(5#&#'#*\e(B" can be obtained by IS13194 vowels with nukta.)
+;; D - Vowel Modifiers, i.e. Anuswar, Chandrabindu, Visarga (\e$(5!!!"!#\e(B)
+;; M - Matra (\e$(5!Z![!\!]!^!_!`!a!b!c!d!e!f!g#K#L#M\e(B)
+;; ("\e$(5#K#L#M\e(B" can be obtained by IS13194 matras with nukta.)
;; In Emacs, one syllable of Indian language is considered to be one
;; composite glyph. If we expand the above expression, it would be:
-;; [[C [N] H] [C [N] H] C [N] H] C [N] [M] [D] | V [D]
+;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] [D] | V [D]
;; Therefore, in worst case, the consonant syllabe will consist of
;; following characters.
-;; C N H C N H C N H C N M D
+;; C N H C N H C N H C N H C N M D
+;; The example is a sanskrit word "kaurtsnya", where five consecutive
+;; consonant appears.
;; On the other hand, incomplete consonant syllable before inputting
;; base consonant must satisfy the following condition:
-;; [C [N] H] [C [N] H] C [N] H
+;; [C [N] H] [C [N] H] [C [N] H] C [N] H
;; This is acceptable BEFORE proper consonant-syllable is input. The
;; string which doesn't match with the above expression is invalid and
;; Third case can be considered, which is acceptable syllable and can
;; not add any code more.
-;; [[C [N] H] [C [N] H] C [N] H] C [N] [M] D
+;; [[C [N] H] [C [N] H] [C [N] H] C [N] H] C [N] [M] D
;; However, to make editing possible even in this condition, we will
;; not consider about this case.
(defconst devanagari-cons-syllable-examine
- "\\(\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?[\e$(5!Z\e(B-\e$(5!g\e(B]?[\e$(5!!!"\e(B]?"
+ "\\(\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\\([\e$(5!Z\e(B-\e$(5!g#K#L#M\e(B]\\|\\(\e$(5!_!i\e(B\\)\\|\\(\e$(5![!i\e(B\\)\\|\\(\e$(5!\!i\e(B\\)\\)?[\e$(5!!!"!#\e(B]?"
"Regexp matching to one Devanagari consonant syllable.")
(defconst devanagari-cons-syllable-incomplete-examine
- "\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B$"
+ "\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?\\([\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B\\)?[\e$(5!3\e(B-\e$(5!X\e(B]\e$(5!i\e(B?\e$(5!h\e(B$"
"Regexp matching to one Devanagari incomplete consonant syllable.")
(defconst devanagari-vowel-syllable-examine
- "[\e$(5!$\e(B-\e$(5!2\e(B][\e$(5!!!"!#\e(B]?"
+ "\\([\e$(5!$\e(B-\e$(5!2#&#'#*\e(B]\\|\\(\e$(5!*!i\e(B\\)\\|\\(\e$(5!&!i\e(B\\)\\|\\(\e$(5!'!i\e(B\\)\\)[\e$(5!!!"!#\e(B]?"
"Regexp matching to one Devanagari vowel syllable.")
(defconst devanagari-digit-viram-examine
(defconst devanagari-other-sign-examine
- "[\e$(5!!!j\e(B]\e$(5!i\e(B")
+ "\\([\e$(5!!!j\e(B]\e$(5!i\e(B\\)\\|\\([\e$(5#!#J\e(B]\\)")
(defconst devanagari-composite-glyph-unit-examine
(concat "\\(" devanagari-cons-syllable-incomplete-examine
;; ("[^\e$(5!h\e(B]\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"p\e(B")
("^\\(\e$(5!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"p\e(B")
+ ;; Half Form Ligature
+ ;; Here is the half-form ligature which has higher priority than
+ ;; the common ligature rules listed below.
+ ;; special forms.
+ ("\\(\e$(5!3!h!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"l\e(B")
+ ("\\(\e$(5!:!h!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"m\e(B")
+ ;; Ordinary forms.
+ ("\\(\e$(5!B!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"c\e(B")
+ ("\\(\e$(5!F!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"k\e(B")
;; If "r" is preceded by the vowel-suppressed consonant
;; (especially those with vertical line), it will be written as
;; slanted line below the preceding consonant character. Some of
("\\(\e$(5!:!i!h!O\e(B\\)" . "\e$(5"!\e(B")
("\\(\e$(5!I!i!h!O\e(B\\)" . "\e$(5""\e(B")
("\\(\e$(5!3!h!O\e(B\\)" . "\e$(5"#\e(B")
- ("\\(\e$(5!:!h!O\e(B\\)" . "\e$(5"$\e(B")
+ ("\\(\e$(5!5!h!O\e(B\\)" . "\e$(5"$\e(B")
("\\(\e$(5!B!h!O\e(B\\)" . "\e$(5"%\e(B")
("\\(\e$(5!H!h!O\e(B\\)" . "\e$(5"&\e(B")
("\\(\e$(5!I!h!O\e(B\\)" . "\e$(5"'\e(B")
- ("\\(\e$(5!U!h!O\e(B\\)" . "\e$(5"(\e(B")
- ("\\(\e$(5!W!h!O\e(B\\)" . "\e$(5")\e(B")
+ ("\\(\e$(5!U!h!O\e(B\\)" . "\e$(5")\e(B")
+ ;; Special Rules
+ ;; In the following case, "\e$(5!<!h!:\e(B" ligature does not occur.
+ ("\\(\e$(5!<!h\e(B\\)\e$(5!:!h!<!h\e(B" . "\e$(5"<\e(B")
;; Ligature Rules
("\\(\e$(5!3!h!B!h!O!h!M\e(B\\)" . "\e$(5$!\e(B")
("\\(\e$(5!8!h!<\e(B\\)" . "\e$(5$B\e(B")
("\\(\e$(5!9!h!M\e(B\\)" . "\e$(5$C\e(B")
("\\(\e$(5!:!h!O\e(B\\)" . "\e$(5$D\e(B")
- ("\\(\e$(5!:!h!h\e(B\\)" . "\e$(5$E\e(B")
+ ("\\(\e$(5!:!h!<\e(B\\)" . "\e$(5$E\e(B")
("\\(\e$(5!<!h!8\e(B\\)" . "\e$(5$F\e(B")
("\\(\e$(5!<!h!:\e(B\\)" . "\e$(5$G\e(B")
("\\(\e$(5!=!h!3\e(B\\)" . "\e$(5$H\e(B")
;; connection which is not listed here has not been examined yet.
;; I don't know what to do with them.
- ;; special forms
- ("\\(\e$(5!3!h!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"l\e(B")
- ("\\(\e$(5!:!h!<!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"m\e(B")
;; ordinary forms
("\\(\e$(5!5!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"`\e(B")
("\\(\e$(5!6!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"a\e(B")
;; ("\\(\e$(5!<!h!8!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"c\e(B") ; Mistake, must check later.
- ("\\(\e$(5!B!h!B!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"c\e(B")
("\\(\e$(5!B!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"d\e(B")
("\\(\e$(5!E!h!F!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"e\e(B")
("\\(\e$(5!E!h!O!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"f\e(B")
;; have the vertical line (such as "\e$(5!?\e(B"), "\e$(5"r\e(B" is put beneath the
;; consonant.
- ;; ("cons-not-yet-listed-up\\(\e$(5!h!O\e(B\\)" . "\e$(5"q\e(B")
("[\e$(5!7!9!=!>!?!@!D!O!P!R!S!X\e(B]\\(\e$(5!h!O\e(B\\)" . "\e$(5"r\e(B")
+ ("\\(\e$(5!J!h!O\e(B\\)" . "\e$(5!J"r\e(B") ; Protect from Half form conversion.
+ ("\\(\e$(5!E!h!O\e(B\\)" . "\e$(5!E"r\e(B") ; Will be replaced with precomposed font.
+ ("\\(\e$(5!6!h!O\e(B\\)" . "\e$(5!6"r\e(B")
+ ("\\(\e$(5!K!h!O\e(B\\)" . "\e$(5!K"r\e(B")
+ ("\\(\e$(5!T!h!O\e(B\\)" . "\e$(5!T"r\e(B")
+ ("\\(\e$(5!L!h!O\e(B\\)" . "\e$(5!L"r\e(B")
+ ("\\(\e$(5!7!h!5!h!O\e(B\\)" . "\e$(5$;"r\e(B") ; Ggr
+ ("\\(\e$(5!7!h!3!h!O\e(B\\)" . "\e$(5$9"r\e(B") ; Gkr
("\e$(5!?!i\e(B\\(\e$(5!h!O\e(B\\)" . "\e$(5"r\e(B")
("\e$(5!@!i\e(B\\(\e$(5!h!O\e(B\\)" . "\e$(5"r\e(B")
("\\(\e$(5!&!i\e(B\\)" . "\e$(5#&\e(B")
("\\(\e$(5!'!i\e(B\\)" . "\e$(5#'\e(B")
("\\(\e$(5!*!i\e(B\\)" . "\e$(5#*\e(B")
+ ("\\(\e$(5![!i\e(B\\)" . "\e$(5#L\e(B")
+ ("\\(\e$(5!\!i\e(B\\)" . "\e$(5#M\e(B")
+ ("\\(\e$(5!_!i\e(B\\)" . "\e$(5#K\e(B")
("\\(\e$(5!3!i\e(B\\)" . "\e$(5#3\e(B")
("\\(\e$(5!4!i\e(B\\)" . "\e$(5#4\e(B")
("\\(\e$(5!5!i\e(B\\)" . "\e$(5#5\e(B")
("\\(\e$(5!U!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"U\e(B")
("\\(\e$(5!V!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"V\e(B")
("\\(\e$(5!W!h\e(B\\)[\e$(5!3\e(B-\e$(5!X\e(B]" . "\e$(5"W\e(B")
+ ;; Special rule for "rR"
+ ("\\(\e$(5!O!_\e(B\\)" . "\e$(5!*"p\e(B")
+ ;; If everything fails, "y" will connect to the front consonant.
+ ("\\(\e$(5!h!M\e(B\\)" . "\e$(5"]\e(B")
"Alist of regexps of Devanagari character sequences vs composed characters.")
;; Glyphs will be ordered from low priority number to high priority number.
;; If application-priority is omitted, it is assumed to be 0.
;; If application-direction is omitted, it is asumbed to be '(mr . ml).
+;; Priority
+;; Base Glyphs = {\e$(5!h!i\e(B} = Misc >
+;; {\e$(5"p"q"r\e(B} > Matras > {\e$(5!!!"!#\e(B}
+;; Question Halant and '\e$(5"q"r\e(B' priority problem.
(defconst devanagari-composition-rules
- '((?\e$(5!!\e(B 60 (tr . br))
- (?\e$(5!"\e(B 60 (tr . br))
- (?\e$(5!#\e(B 60)
+ '((?\e$(5!!\e(B 70 (tr . br))
+ (?\e$(5!"\e(B 70 (mr . mr))
+ (?\e$(5!#\e(B 70)
(?\e$(5!$\e(B 0)
(?\e$(5!%\e(B 0)
(?\e$(5!&\e(B 0)
(?\e$(5!]\e(B 40 (bc . tc))
(?\e$(5!^\e(B 40 (bc . tc))
(?\e$(5!_\e(B 40 (bc . tc))
- (?\e$(5!`\e(B 40 (tc . bc))
- (?\e$(5!a\e(B 40 (tc . bc))
- (?\e$(5!b\e(B 40 (tc . bc))
- (?\e$(5!c\e(B 40 (tc . bc))
+ (?\e$(5!`\e(B 40 (mr . mr)) ; (tc . bc)
+ (?\e$(5!a\e(B 40 (mr . mr))
+ (?\e$(5!b\e(B 40 (mr . mr))
+ (?\e$(5!c\e(B 40 (mr . mr))
(?\e$(5!d\e(B 40)
(?\e$(5!e\e(B 40)
(?\e$(5!f\e(B 40)
(?\e$(5!g\e(B 40)
- (?\e$(5!h\e(B 0 (br . tr)) ; Halant's special treatment.
- (?\e$(5!i\e(B 0 (br . tr)) ; Nukta's special treatment.
+ (?\e$(5!h\e(B 0 (br . tr))
+ (?\e$(5!i\e(B 0 (br . tr))
(?\e$(5!j\e(B 0)
(nil 0)
(nil 0)
(?\e$(5"m\e(B 0)
(?\e$(5"n\e(B 0)
(?\e$(5"o\e(B 0)
- (?\e$(5"p\e(B 20 (tr . br))
- (?\e$(5"q\e(B 20 (br . tr))
- (?\e$(5"r\e(B 20 (br . tr))
+ (?\e$(5"p\e(B 30 (mr . mr))
+ (?\e$(5"q\e(B 30 (br . tr))
+ (?\e$(5"r\e(B 30 (br . tr))
(?\e$(5"s\e(B 0)
(?\e$(5"t\e(B 0)
(?\e$(5"u\e(B 0)
(?\e$(5#H\e(B 0)
(?\e$(5#I\e(B 0)
(?\e$(5#J\e(B 0)
- (?\e$(5#K\e(B 0)
- (?\e$(5#L\e(B 0)
- (?\e$(5#M\e(B 0)
+ (?\e$(5#K\e(B 40 (bc . tc))
+ (?\e$(5#L\e(B 40 (bc . tc))
+ (?\e$(5#M\e(B 40 (bc . tc))
(?\e$(5#N\e(B 0)
(?\e$(5#O\e(B 0)
(?\e$(5#P\e(B 0)
;; Determine composition priority and rule of the array of Glyphs.
;; Sort the glyphs with their priority.
-;; Example:
-;;(devanagari-reorder-glyph-for-composition '[?\e$(5"5\e(B ?\e$(5!X\e(B ?\e$(5![\e(B])
-;; => ((446680 0) (446773 0) (446683 50 (ml . mr)))
(defun devanagari-reorder-glyph-for-composition (glyph-alist)
(let* ((pos 0)
(ordered-glyphs '()))
(if (= (length cmp-glyph-list) 1) (char-to-string (car cmp-glyph-list))
(apply 'compose-chars cmp-glyph-list))))
+;; Utility function for Phase 2.5
+;; Check whether given glyph is a Devanagari vertical modifier or not.
+;; If it is a vertical modifier, whether it should be 1-column shape or not
+;; depends on previous non-vertical modifier.
+ ; return nil if it is not vertical modifier.
+(defun devanagari-vertical-modifier-p (glyph)
+ (string-match (char-to-string glyph)
+ "[\e$(5!]!^!_!`!a!b!c!h!i"p"q"r#K#L#M\e(B]"))
+(defun devanagari-non-vertical-modifier-p (glyph)
+ (string-match (char-to-string glyph)
+ "[\e$(5!Z![!\!d!e!f!g\e(B]"))
;; Phase 2.5 Convert Appropriate Character to 1-column shape.
;; with 2 column base-glyph.
;; Execution Examples
-;;(devanagari-wide-to-narrow '(446680 446773 (ml . mr) 446683))
-;;(devanagari-wide-to-narrow '(?\e$(5!6\e(B (ml . ml) 446773 (tc . mr) 446683))
+;;(devanagari-wide-to-narrow '(?\e$(5!3\e(B (ml . ml) ?\e$(5!a\e(B))
+;;(devanagari-wide-to-narrow '(?\e$(5!F\e(B (ml . ml) ?\e$(5!a\e(B))
+;(defun devanagari-wide-to-narrow (src-list)
+; (if (null src-list) '()
+; (cons
+; (if (and (numberp (car src-list))
+; (cdr (assq (car src-list) devanagari-1-column-char)))
+; (cdr (assq (car src-list) devanagari-1-column-char))
+; (car src-list))
+; (devanagari-wide-to-narrow (cdr src-list)))))
(defun devanagari-wide-to-narrow (src-list)
- (if (null src-list) '()
- (cons
- (if (and (numberp (car src-list))
- (cdr (assq (car src-list) devanagari-1-column-char)))
- (cdr (assq (car src-list) devanagari-1-column-char))
- (car src-list))
- (devanagari-wide-to-narrow (cdr src-list)))))
-;; Make this function obsolete temporary Because now Emacs supports
-;; attaching 1 column character at the center 2 column char. However,
-;; there are still problems attempting to attach Halant or Nukta sign
-;; at the non-vowel consonant. This problem can not be solved until
-;; Emacs supports attaching the glyph at `temporary-preserved metric'.
-(defun devanagari-wide-to-narrow-old (src-list)
- (if (null src-list) (progn (error "devanagari-wide-to-narrow error") nil)
- (let* ((base-glyph (cdr (assq (car src-list) devanagari-1-column-char)))
- (wide-base-glyph nil)
- (apply-glyph-list (cdr src-list)))
- (if (null base-glyph)
- (progn
- (setq wide-base-glyph t)
- (setq base-glyph (car src-list))))
- (cons base-glyph
- (devanagari-wide-to-narrow-iter apply-glyph-list wide-base-glyph))
- )))
-;; Convert apply-glyph-list from 2-column to 1-column.
-;; wide-base-glyph is t when base-glyph is 2-column.
-;; When apply-glyph is put at the top or bottom of 2-column base-glyph,
-;; they must be 2-column glyph, too. Otherwise, they will be
-;; converted to 1-column glyph if possible.
-(defun devanagari-wide-to-narrow-iter (apply-glyph-list wide-base-glyph)
- (if (< (length apply-glyph-list) 2) '()
- (let* ((apply-dir (car apply-glyph-list))
- (apply-glyph (car (cdr apply-glyph-list)))
- (apply-rest (cdr (cdr apply-glyph-list)))
- (put-t-or-b (member (car apply-dir) '(tl tc tr bl bc br)))
- (narrow-glyph (cdr (assq apply-glyph devanagari-1-column-char))))
- (append
- (list apply-dir
- (if (or (and wide-base-glyph put-t-or-b)
- (null narrow-glyph))
- apply-glyph narrow-glyph))
- (devanagari-wide-to-narrow-iter apply-rest wide-base-glyph)))))
+ (devanagari-wide-to-narrow-iter src-list t))
+(defun devanagari-wide-to-narrow-iter (src-list wide-p)
+ (let ((glyph (car src-list)))
+ (cond ((null src-list) '())
+ ; not glyph code
+ ((not (numberp glyph))
+ (cons glyph (devanagari-wide-to-narrow-iter (cdr src-list) wide-p)))
+ ; vertical modifier glyph
+ ((devanagari-vertical-modifier-p glyph)
+ (if (and (null wide-p)
+ (cdr (assq glyph devanagari-1-column-char)))
+ (cons (cdr (assq glyph devanagari-1-column-char))
+ (devanagari-wide-to-narrow-iter (cdr src-list) nil))
+ (cons glyph
+ (devanagari-wide-to-narrow-iter (cdr src-list) t))))
+ ; nonvertical modifier glyph
+ ((devanagari-non-vertical-modifier-p glyph)
+ (if (cdr (assq glyph devanagari-1-column-char))
+ (cons (cdr (assq glyph devanagari-1-column-char))
+ (devanagari-wide-to-narrow-iter (cdr src-list) wide-p))
+ (cons glyph
+ (devanagari-wide-to-narrow-iter (cdr src-list) wide-p))))
+ ; normal glyph
+ (t
+ (if (cdr (assq glyph devanagari-1-column-char))
+ (cons (cdr (assq glyph devanagari-1-column-char))
+ (devanagari-wide-to-narrow-iter (cdr src-list) nil))
+ (cons glyph
+ (devanagari-wide-to-narrow-iter (cdr src-list) t)))))))
;; Summary