From 6f2cdcd11202a6976a399bed4c071b9ac9ce254f Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Mon, 12 Jul 2010 14:25:46 +0900 Subject: [PATCH] Improve Hebrew rendering. --- lisp/ChangeLog | 8 ++ lisp/language/hebrew.el | 178 +++++++++++++++++++++++++++++++++++----- src/ChangeLog | 3 + src/Makefile.in | 4 +- 4 files changed, 169 insertions(+), 24 deletions(-) diff --git a/lisp/ChangeLog b/lisp/ChangeLog index e988b6bdef4..3d8648c86b6 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,11 @@ +2010-07-12 Kenichi Handa + + * language/hebrew.el: Remove no-byte-compile declaration. Change + coding: tag to utf-8. Register hebrew-shape-gstring in + composition-function-table for 3-character looking back. + (hebrew-font-get-precomposed): New function. + (hebrew-shape-gstring): Utilize precomposed glyphs if available. + 2010-07-11 Chong Yidong * mouse.el (mouse-drag-track): Handle select-active-regions diff --git a/lisp/language/hebrew.el b/lisp/language/hebrew.el index f024251c60b..89a22d2d238 100644 --- a/lisp/language/hebrew.el +++ b/lisp/language/hebrew.el @@ -1,4 +1,4 @@ -;;; hebrew.el --- support for Hebrew -*- coding: iso-2022-7bit; no-byte-compile: t -*- +;;; hebrew.el --- support for Hebrew -*- coding: utf-8 -*- ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 ;; Free Software Foundation, Inc. @@ -59,7 +59,7 @@ (nonascii-translation . iso-8859-8) (input-method . "hebrew") (unibyte-display . hebrew-iso-8bit) - (sample-text . "Hebrew ,Hylem(B") + (sample-text . "Hebrew שלום") (documentation . "Bidirectional editing is supported."))) (set-language-info-alist @@ -85,33 +85,167 @@ Bidirectional editing is supported."))) :mime-charset 'cp862) (define-coding-system-alias 'ibm862 'cp862) -;; Composition function for hebrew. +;; Return a nested alist of Hebrew character sequences vs the +;; corresponding glyph of FONT-OBJECT. +(defun hebrew-font-get-precomposed (font-object) + (let ((precomposed (font-get font-object 'hebrew-precomposed)) + ;; Vector of Hebrew precomposed charaters. + (chars [#xFB2A #xFB2B #xFB2C #xFB2D #xFB2E #xFB2F #xFB30 #xFB31 + #xFB32 #xFB33 #xFB34 #xFB35 #xFB36 #xFB38 #xFB39 #xFB3A + #xFB3B #xFB3C #xFB3E #xFB40 #xFB41 #xFB43 #xFB44 #xFB46 + #xFB47 #xFB48 #xFB49 #xFB4A #xFB4B #xFB4C #xFB4D #xFB4E]) + ;; Vector of decomposition character sequences corresponding + ;; to the above vector. + (decomposed + [[#x05E9 #x05C1] + [#x05E9 #x05C2] + [#x05E9 #x05BC #x05C1] + [#x05E9 #x05BC #x05C2] + [#x05D0 #x05B7] + [#x05D0 #x05B8] + [#x05D0 #x05BC] + [#x05D1 #x05BC] + [#x05D2 #x05BC] + [#x05D3 #x05BC] + [#x05D4 #x05BC] + [#x05D5 #x05BC] + [#x05D6 #x05BC] + [#x05D8 #x05BC] + [#x05D9 #x05BC] + [#x05DA #x05BC] + [#x05DB #x05BC] + [#x05DC #x05BC] + [#x05DE #x05BC] + [#x05E0 #x05BC] + [#x05E1 #x05BC] + [#x05E3 #x05BC] + [#x05E4 #x05BC] + [#x05E6 #x05BC] + [#x05E7 #x05BC] + [#x05E8 #x05BC] + [#x05E9 #x05BC] + [#x05EA #x05BC] + [#x05D5 #x05B9] + [#x05D1 #x05BF] + [#x05DB #x05BF] + [#x05E4 #x05BF]])) + (unless precomposed + (setq precomposed (list t)) + (let ((gvec (font-get-glyphs font-object 0 (length chars) chars))) + (dotimes (i (length chars)) + (if (aref gvec i) + (set-nested-alist (aref decomposed i) (aref gvec i) + precomposed)))) + ;; Cache the result in FONT-OBJECT's property. + (font-put font-object 'hebrew-precomposed precomposed)) + precomposed)) + +;; Composition function for hebrew. GSTRING is made of a Hebrew base +;; character followed by Hebrew diacritical marks, or is made of +;; single Hebrew diacritical mark. Adjust GSTRING to display that +;; sequence properly. The basic strategy is: +;; +;; (1) If there's single diacritical, add padding space to the left +;; and right of the glyph. +;; +;; (2) If the font has OpenType features for Hebrew, ask the OTF +;; driver the whole work. +;; +;; (3) If the font has precomposed glyphs, use them as far as +;; possible. Adjust the remaining glyphs artificially. + (defun hebrew-shape-gstring (gstring) - (setq gstring (font-shape-gstring gstring)) - (let ((header (lgstring-header gstring)) - (nchars (lgstring-char-len gstring)) - (nglyphs (lgstring-glyph-len gstring)) - (base-width (lglyph-width (lgstring-glyph gstring 0)))) - (while (and (> nglyphs 1) - (not (lgstring-glyph gstring (1- nglyphs)))) - (setq nglyphs (1- nglyphs))) - (while (> nglyphs 1) - (setq nglyphs (1- nglyphs)) - (let* ((glyph (lgstring-glyph gstring nglyphs)) - (adjust (and glyph (lglyph-adjustment glyph)))) - (if adjust - (setq nglyphs 0) - (if (>= (lglyph-lbearing glyph) 0) - (lglyph-set-adjustment glyph (- base-width) 0 0)))))) - gstring) + (let* ((font (lgstring-font gstring)) + (otf (font-get font :otf)) + (nchars (lgstring-char-len gstring)) + header nglyphs base-width glyph precomposed val idx) + (cond + ((= nchars 1) + ;; Independent diacritical mark. Add padding space to left or + ;; right so that the glyph doesn't overlap with the surrounding + ;; chars. + (setq glyph (lgstring-glyph gstring 0)) + (let ((width (lglyph-width glyph)) + bearing) + (if (< (setq bearing (lglyph-lbearing glyph)) 0) + (lglyph-set-adjustment glyph bearing 0 (- width bearing))) + (if (> (setq bearing (lglyph-rbearing glyph)) width) + (lglyph-set-adjustment glyph 0 0 bearing)))) + + ((or (assq 'hebr (car otf)) (assq 'hebr (cdr otf))) + ;; FONT has OpenType features for Hebrew. + (font-shape-gstring gstring)) + + (t + ;; FONT doesn't have OpenType features for Hebrew. + ;; Try a precomposed glyph. + ;; Now GSTRING is in this form: + ;; [[FONT CHAR1 CHAR2 ... CHARn] nil GLYPH1 GLYPH2 ... GLYPHn nil ...] + (setq precomposed (hebrew-font-get-precomposed font) + header (lgstring-header gstring) + val (lookup-nested-alist header precomposed nil 1)) + (if (and (consp val) (vectorp (car val))) + ;; All characters can be displayed by a single precomposed glyph. + ;; Reform GSTRING to [HEADER nil PRECOMPOSED-GLYPH nil ...] + (let ((glyph (copy-sequence (car val)))) + (lglyph-set-from-to glyph 0 (1- nchars)) + (lgstring-set-glyph gstring 0 glyph) + (lgstring-set-glyph gstring 1 nil)) + (if (and (integerp val) (> val 2) + (setq glyph (lookup-nested-alist header precomposed val 1)) + (consp glyph) (vectorp (car glyph))) + ;; The first (1- VAL) characters can be displayed by a + ;; precomposed glyph. Provided that VAL is 3, the first + ;; two glyphs should be replaced by the precomposed glyph. + ;; In that case, reform GSTRING to: + ;; [HEADER nil PRECOMPOSED-GLYPH GLYPH3 ... GLYPHn nil ...] + (let* ((ncmp (1- val)) ; number of composed glyphs + (diff (1- ncmp))) ; number of reduced glyphs + (setq glyph (copy-sequence (car glyph))) + (lglyph-set-from-to glyph 0 (1- nchars)) + (lgstring-set-glyph gstring 0 glyph) + (setq idx ncmp) + (while (< idx nchars) + (setq glyph (lgstring-glyph gstring idx)) + (lglyph-set-from-to glyph 0 (1- nchars)) + (lgstring-set-glyph gstring (- idx diff) glyph) + (setq idx (1+ idx))) + (lgstring-set-glyph gstring (- idx diff) nil) + (setq idx (- ncmp diff) + nglyphs (- nchars diff))) + (setq glyph (lgstring-glyph gstring 0)) + (lglyph-set-from-to glyph 0 (1- nchars)) + (setq idx 1 nglyphs nchars)) + ;; Now IDX is an index to the first non-precomposed glyph. + ;; Adjust positions of the remaining glyphs artificially. + (setq base-width (lglyph-width (lgstring-glyph gstring 0))) + (while (< idx nglyphs) + (setq glyph (lgstring-glyph gstring idx)) + (lglyph-set-from-to glyph 0 (1- nchars)) + (if (>= (lglyph-lbearing glyph) (lglyph-width glyph)) + ;; It seems that this glyph is designed to be rendered + ;; before the base glyph. + (lglyph-set-adjustment glyph (- base-width) 0 0) + (if (>= (lglyph-lbearing glyph) 0) + ;; Align the horizontal center of this glyph to the + ;; horizontal center of the base glyph. + (let ((width (- (lglyph-rbearing glyph) + (lglyph-lbearing glyph)))) + (lglyph-set-adjustment glyph + (- (/ (- base-width width) 2) + (lglyph-lbearing glyph) + base-width) 0 0)))) + (setq idx (1+ idx)))))) + gstring)) (let ((pattern1 "[\u05D0-\u05F2][\u0591-\u05BF\u05C1-\u05C5\u05C7]+") (pattern2 "[\u05D0-\u05F2]\u200D[\u0591-\u05BF\u05C1-\u05C5\u05C7]+")) (set-char-table-range composition-function-table '(#x591 . #x5C7) - (list (vector pattern2 2 'hebrew-shape-gstring) + (list (vector pattern2 3 'hebrew-shape-gstring) + (vector pattern2 2 'hebrew-shape-gstring) (vector pattern1 1 'hebrew-shape-gstring) - ["[\u0591-\u05C7]" 0 font-shape-gstring])) + [nil 0 hebrew-shape-gstring])) (set-char-table-range composition-function-table #x5C0 nil) (set-char-table-range diff --git a/src/ChangeLog b/src/ChangeLog index 5acf42608e4..c7e5c5c3ef2 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,5 +1,8 @@ 2010-07-12 Kenichi Handa + * Makefile.in (lisp): Change hebrew.el to hebrew.elc. + (shortlisp): Likewise. + * font.h (enum font_property_index): New member FONT_ENTITY_INDEX. * font.c (font_open_entity): Record ENTITY in FONT_OBJECT's slot diff --git a/src/Makefile.in b/src/Makefile.in index 933ec98a8d5..6a60b1d1ffe 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -426,7 +426,7 @@ lisp= \ ${lispsource}language/slovak.el \ ${lispsource}language/romanian.el \ ${lispsource}language/greek.el \ - ${lispsource}language/hebrew.el \ + ${lispsource}language/hebrew.elc \ ${lispsource}language/japanese.el \ ${lispsource}language/korean.el \ ${lispsource}language/lao.el \ @@ -517,7 +517,7 @@ shortlisp= \ ../lisp/language/slovak.el \ ../lisp/language/romanian.el \ ../lisp/language/greek.el \ - ../lisp/language/hebrew.el \ + ../lisp/language/hebrew.elc \ ../lisp/language/japanese.el \ ../lisp/language/korean.el \ ../lisp/language/lao.el \ -- 2.39.2