(features tibet-util)
(documentation . t)
(sample-text .
-"Tibetan (\e2\e$(7"70"]\e(B\e1\e$(7"2\e$(8!;\e(B\e2\e$(7%P`"Q\e(B\e1\e$(7"2\e$(8!;\e(B) \e2\e$(7#RP#SP#S\e(B\e1\e$(7!>"7\e(B\e2\e$(7$P`"Q\e(B\e1\e$(8!;\e(B\e2\e$(7"E0"S\e(B\e1\e$(7"G\e$(8!;\e$(7"7\e(B\e2\e$(7"20"[\e(B\e1\e$(8!;\e(B\e2\e$(7"D0"[\e(B\e1\e$(7"#"G!>\e(B\e2\e$(7"I0"]0"_\e(B\e1\e$(8!;\e(B\e2\e$(7"9`"Q\e(B\e1\e$(8!;\e(B\e2\e$(7"/0"S\e(B\e1\e$(8!;\e(B\e2\e$(7"5`"Q\e(B\e1\e2\e$(7#2`#90"[\e(B\e1\e$(8!;\e(B\e2\e$(7"H`#A`"U0"c\e(B\e1\e$(7!>\e(B")))
+"Tibetan (\e4\e$(7"7r'"]\e0"7"]\e1"2\e$(8!;\e4\e$(7%Px!"Q\e0%P"Q\e1"2\e$(8!;\e(B) \e4\e$(7#Rv##Sv##S\e0#R#S#S\e1!>"7\e4$Px!"Q\e0$P"Q\e1\e$(8!;\e4\e$(7"Er'"S\e0"E"S\e1"G\e$(8!;\e$(7"7\e4"2r'"[\e0"2"[\e1\e$(8!;\e4\e$(7"Dr'"[\e0"D"[\e1"#"G!>\e4"Ir'"]r'"_\e0"I"]"_\e1\e$(8!;\e4\e$(7"9x!"Q\e0"9"Q\e1\e$(8!;\e4\e$(7"/r'"S\e0"/"S\e1\e$(8!;\e4\e$(7"5x!"Q\e0"5"Q\e1\e4#2x!#9r'"[\e0#2#9"[\e1\e$(8!;\e4\e$(7"Hx!#Ax!"Ur'"c\e0"H#A"U"c\e1!>\e(B")))
+;; `\e$(7"A\e(B' is included in the pattern for subjoined consonants because we
+;; treat it specially in tibetan-add-components.
+(defconst tibetan-composable-pattern
+ "[\e$(7"!\e(B-\e$(7"J\e(B][\e$(7"A#!\e(B-\e$(7#J\e(B]*[\e$(7"Q\e(B-\e$(7"^"a"e\e(B]?[\e$(7"_"c"d"g\e(B-\e$(7"l!I!e!g\e(B]?"
+ "Regexp matching a composable sequence of Tibetan characters.")
+
+;; Register a function to compose Tibetan characters.
+(aset composition-function-table (make-char 'tibetan)
+ (list (cons tibetan-composable-pattern 'tibetan-composition-function)))
+
;;;
;;; Definitions of conversion data.
;;;
("E" . "\e$(7"\\e(B")
("O" . "\e$(7"^\e(B")
("I" . "\e$(7"a\e(B")
- ("M" . "\e$(7"_\e(B")
- ("~" . "\e$(7"c\e(B") ; not specified in Ext.wylie
- ("`" . "\e$(7"d\e(B") ; idem.
("," . "\e$(7"e\e(B") ; idem.
- ("v" . "\e$(7"g\e(B") ; idem.
- ("V" . "\e$(7"h\e(B") ; idem.
- ("x" . "\e$(7"i\e(B") ; idem.
- ("X" . "\e$(7"j\e(B") ; idem.
- ("q" . "\e$(7"k\e(B") ; idem.
- ("Q" . "\e$(7"l\e(B") ; idem.
- ("_o" . "\e$(7!g\e(B") ; idem.
- ("_O" . "\e$(7!e\e(B") ; idem.
- ("_/" . "\e$(7!I\e(B") ; idem.
))
+(defconst tibetan-modifier-transcription-alist
+ '(("M" . "\e$(7"_\e(B")
+ ("~" . "\e$(7"c\e(B")
+ ("`" . "\e$(7"d\e(B")
+ ("x" . "\e$(7"i\e(B")
+ ("X" . "\e$(7"j\e(B")
+ ("v" . "\e$(7"g\e(B")
+ ("V" . "\e$(7"h\e(B")
+ ("q" . "\e$(7"k\e(B")
+ ("Q" . "\e$(7"l\e(B")
+ ("_/" . "\e$(7!I\e(B")
+ ("_o" . "\e$(7!g\e(B")
+ ("_O" . "\e$(7!e\e(B")))
+
(defconst tibetan-precomposed-transcription-alist
'(("phyw" . "\e$(7$G\e(B")
("tshw" . "\e$(7$)\e(B")
("sm" . "\e$(7%Y\e(B")))
(defconst tibetan-subjoined-transcription-alist
- '(("+k" . "\e$(7#!\e(B")
- ("+kh" . "\e$(7#"\e(B")
- ("+g" . "\e$(7##\e(B")
- ("+gh" . "\e$(7#$\e(B")
- ("+ng" . "\e$(7#%\e(B")
- ("+c" . "\e$(7#&\e(B")
- ("+ch" . "\e$(7#'\e(B")
- ("+j" . "\e$(7#(\e(B")
- ("+ny" . "\e$(7#*\e(B")
- ("+T" . "\e$(7#+\e(B")
- ("+TH" . "\e$(7#,\e(B")
- ("+D" . "\e$(7#-\e(B")
- ("+DH" . "\e$(7#.\e(B")
- ("+N" . "\e$(7#/\e(B")
- ("+t" . "\e$(7#0\e(B")
- ("+th" . "\e$(7#1\e(B")
- ("+d" . "\e$(7#2\e(B")
- ("+dh" . "\e$(7#3\e(B")
- ("+n" . "\e$(7#4\e(B")
- ("+p" . "\e$(7#5\e(B")
- ("+ph" . "\e$(7#6\e(B")
- ("+b" . "\e$(7#7\e(B")
- ("+bh" . "\e$(7#8\e(B")
- ("+m" . "\e$(7#9\e(B")
- ("+ts" . "\e$(7#:\e(B")
- ("+tsh" . "\e$(7#;\e(B")
- ("+dz" . "\e$(7#<\e(B")
- ("+dzh" . "\e$(7#=\e(B")
- ("+w" . "\e$(7#>\e(B")
- ("+zh" . "\e$(7#?\e(B")
- ("+z" . "\e$(7#@\e(B")
- ("+'" . "\e$(7#A\e(B")
- ("+y" . "\e$(7#B\e(B")
- ("+r" . "\e$(7#C\e(B")
- ("+l" . "\e$(7#D\e(B")
- ("+sh" . "\e$(7#E\e(B")
- ("+SH" . "\e$(7#F\e(B")
- ("+s" . "\e$(7#G\e(B")
- ("+h" . "\e$(7#H\e(B")
- ("+A" . "\e$(7#I\e(B")
- ("+kSH" . "\e$(7#J\e(B")
- ("R" . "\e$(7#P\e(B")))
+ (sort '(("+k" . "\e$(7#!\e(B")
+ ("+kh" . "\e$(7#"\e(B")
+ ("+g" . "\e$(7##\e(B")
+ ("+gh" . "\e$(7#$\e(B")
+ ("+ng" . "\e$(7#%\e(B")
+ ("+c" . "\e$(7#&\e(B")
+ ("+ch" . "\e$(7#'\e(B")
+ ("+j" . "\e$(7#(\e(B")
+ ("+ny" . "\e$(7#*\e(B")
+ ("+T" . "\e$(7#+\e(B")
+ ("+TH" . "\e$(7#,\e(B")
+ ("+D" . "\e$(7#-\e(B")
+ ("+DH" . "\e$(7#.\e(B")
+ ("+N" . "\e$(7#/\e(B")
+ ("+t" . "\e$(7#0\e(B")
+ ("+th" . "\e$(7#1\e(B")
+ ("+d" . "\e$(7#2\e(B")
+ ("+dh" . "\e$(7#3\e(B")
+ ("+n" . "\e$(7#4\e(B")
+ ("+p" . "\e$(7#5\e(B")
+ ("+ph" . "\e$(7#6\e(B")
+ ("+b" . "\e$(7#7\e(B")
+ ("+bh" . "\e$(7#8\e(B")
+ ("+m" . "\e$(7#9\e(B")
+ ("+ts" . "\e$(7#:\e(B")
+ ("+tsh" . "\e$(7#;\e(B")
+ ("+dz" . "\e$(7#<\e(B")
+ ("+dzh" . "\e$(7#=\e(B")
+ ("+w" . "\e$(7#>\e(B")
+ ("+zh" . "\e$(7#?\e(B")
+ ("+z" . "\e$(7#@\e(B")
+ ("+'" . "\e$(7#A\e(B")
+ ("+y" . "\e$(7#B\e(B")
+ ("+r" . "\e$(7#C\e(B")
+ ("+l" . "\e$(7#D\e(B")
+ ("+sh" . "\e$(7#E\e(B")
+ ("+SH" . "\e$(7#F\e(B")
+ ("+s" . "\e$(7#G\e(B")
+ ("+h" . "\e$(7#H\e(B")
+ ("+A" . "\e$(7#I\e(B")
+ ("+kSH" . "\e$(7#J\e(B")
+ ("+R" . "\e$(7#P\e(B"))
+ (lambda (x y) (> (length (car x)) (length (car y))))))
;;;
;;; alist for Tibetan base consonant <-> subjoined consonant conversion.
;;; (includes some punctuation conversion rules)
;;;
(defconst tibetan-precomposition-rule-alist
- '(("\e$(7"6#B#>\e(B" . "\e$(7$G\e(B")
+ `(("\e$(7"6#B#>\e(B" . "\e$(7$G\e(B")
("\e$(7"##C#>\e(B" . "\e$(7$_\e(B")
("\e$(7";#>\e(B" . "\e$(7$)\e(B")
("\e$(7"C#:#>\e(B" . "\e$(7%.\e(B")
("\e$(7"G#4\e(B" . "\e$(7%V\e(B")
("\e$(7"G#5\e(B" . "\e$(7%W\e(B")
("\e$(7"G#7\e(B" . "\e$(7%X\e(B")
- ("\e$(7"G#9\e(B" . "\e$(7%Y\e(B")
- ("\e$(7!=\e(B" . "\e$(8!=\e(B") ; 2 col <-> 1 col
+ ("\e$(7"G#9\e(B" . "\e$(7%Y\e(B")))
+
+(defconst tibetan-obsolete-glyphs
+ `(("\e$(7!=\e(B" . "\e$(8!=\e(B") ; 2 col <-> 1 col
("\e$(7!?\e(B" . "\e$(8!?\e(B")
("\e$(7!@\e(B" . "\e$(8!@\e(B")
("\e$(7!A\e(B" . "\e$(8!A\e(B")
("\e$(7"`\e(B" . "\e$(8"`\e(B")
("\e$(7!;\e(B" . "\e$(8!;\e(B")
("\e$(7!D\e(B" . "\e$(8!D\e(B")
- ("\e$(7!>\e(B \e$(7!>\e(B" . "\e2\e$(7!>P\e(B P\e$(7!>\e(B\e1") ; Yes this is dirty. But ...
- ("\e$(7!4!5!5\e(B" . "\e2\e$(7#RP#SP#SP#S\e(B\e1")
- ("\e$(7!4!5\e(B" . "\e2\e$(7#RP#SP#S\e(B\e1")
- ("\e$(7!6\e(B" . "\e2\e$(7#RP#S_!I\e(B\e1")
- ("\e$(7!4\e(B" . "\e2\e$(7#RP#S\e(B\e1")))
-
-(defvar tibetan-regexp
- (let ((l (append tibetan-consonant-transcription-alist
- tibetan-vowel-transcription-alist
- tibetan-subjoined-transcription-alist))
- temp)
- (setq temp "\\(")
- (setq temp (concat temp (car (car l))))
- (setq l (cdr l))
+ ;; Yes these are dirty. But ...
+ ("\e$(7!>\e(B \e$(7!>\e(B" . ,(compose-string "\e$(7!>\e(B \e$(7!>\e(B" 0 3 [?\e$(7!>\e(B (Br . Bl) ? (Br . Bl) ?\e$(7!>\e(B]))
+ ("\e$(7!4!5!5\e(B" . ,(compose-string
+ "\e$(7#R#S#S#S\e(B" 0 4
+ [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B (Br . Bl) ?\e$(7#S\e(B (Br . Bl) ?\e$(7#S\e(B]))
+ ("\e$(7!4!5\e(B" . ,(compose-string "\e$(7#R#S#S\e(B" 0 3 [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B (Br . Bl) ?\e$(7#S\e(B]))
+ ("\e$(7!6\e(B" . ,(compose-string "\e$(7#R#S!I\e(B" 0 3 [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B (br . tr) ?\e$(7!I\e(B]))
+ ("\e$(7!4\e(B" . ,(compose-string "\e$(7#R#S\e(B" 0 2 [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B]))))
+
+(defconst tibetan-regexp
+ (let ((l (list tibetan-precomposed-transcription-alist
+ tibetan-consonant-transcription-alist
+ tibetan-vowel-transcription-alist
+ tibetan-modifier-transcription-alist
+ tibetan-subjoined-transcription-alist))
+ (separator "\\|")
+ tail pattern)
(while l
- (setq temp (concat temp "\\|" (car (car l))))
- (setq l (cdr l)))
- (concat temp "\\)$"))
- "Regexp string to match a romanized Tibetan character component, i.e.,
-base and subjoined consonant, vowel and vowel modifier. The result of matching
-is to be used for indexing alists at conversion from a roman transcription to
-the corresponding Tibetan character.")
+ (setq tail (car l) l (cdr l))
+ (while tail
+ (setq pattern (cons separator (cons (car (car tail)) pattern))
+ tail (cdr tail))))
+ (apply 'concat (nreverse (cdr pattern))))
+ "Regexp matching a Tibetan transcription of a composable Tibetan sequence.
+The result of matching is to be used for indexing alists at conversion
+from a roman transcription to the corresponding Tibetan character.")
(defvar tibetan-precomposed-regexp
(let ((l tibetan-precomposed-transcription-alist)