From: Miles Bader Date: Tue, 20 Jun 2006 07:35:06 +0000 (+0000) Subject: Merge from emacs--devo--0 X-Git-Tag: emacs-pretest-23.0.90~8295^2~862 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=7ffefb0856637762050ca248013d2b2f1cf7554e;p=emacs.git Merge from emacs--devo--0 Patches applied: * emacs--devo--0 (patch 314-319) - Update from CVS - Merge from gnus--rel--5.10 * gnus--rel--5.10 (patch 107) - Update from CVS Revision: emacs@sv.gnu.org/emacs--unicode--0--patch-78 --- 7ffefb0856637762050ca248013d2b2f1cf7554e diff --cc lisp/international/characters.el index 9b2b244d6ea,0b7c223c258..26d6cd93439 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@@ -890,210 -1339,11 +890,213 @@@ (setq l (cdr l)))) -(setq utf-translate-cjk-mode saved-utf-translate-cjk-mode) -(makunbound 'saved-utf-translate-cjk-mode) +;; CJK double width characters. +(let ((l '((#x1100 . #x11FF) + (#x2E80 . #x9FAF) + (#xAC00 . #xD7AF) + (#xF900 . #xFAFF) + (#xFE30 . #xFE4F) + (#xFF00 . #xFF5F) + (#xFFE0 . #xFFEF) + (#x20000 . #x2AFFF) + (#x2F800 . #x2FFFF)))) + (dolist (elt l) + (set-char-table-range char-width-table + (cons (car elt) (cdr elt)) + 2))) +;; Fixme: Doing this affects non-CJK characters through unification, +;; but presumably CJK users expect those characters to be +;; double-width when using these charsets. +;; (map-charset-chars +;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) +;; 'japanese-jisx0208) +;; (map-charset-chars +;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) +;; 'japanese-jisx0212) +;; (map-charset-chars +;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) +;; 'japanese-jisx0213-1) +;; (map-charset-chars +;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) +;; 'japanese-jisx0213-2) +;; (map-charset-chars +;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) +;; 'korean-ksc5601) + +;; Other double width +(map-charset-chars + (lambda (range ignore) (set-char-table-range char-width-table range 2)) + 'ethiopic) +(map-charset-chars + (lambda (range ignore) (set-char-table-range char-width-table range 2)) + 'tibetan) +(map-charset-chars + (lambda (range ignore) (set-char-table-range char-width-table range 2)) + 'indian-2-column) +(map-charset-chars + (lambda (range ignore) (set-char-table-range char-width-table range 2)) + 'arabic-2-column) + +(optimize-char-table (standard-case-table)) +(optimize-char-table char-width-table) +(optimize-char-table (standard-category-table)) +(optimize-char-table (standard-syntax-table)) + +;; The Unicode blocks actually extend past some of these ranges with +;; undefined codepoints. +(let ((script-list nil)) + (dolist + (elt + '((#x0000 #x007F latin) + (#x00A0 #x036F latin) + (#x0370 #x03E1 greek) + (#x03E2 #x03EF coptic) + (#x03F0 #x03F3 greek) + (#x0400 #x04FF cyrillic) + (#x0530 #x058F armenian) + (#x0590 #x05FF hebrew) + (#x0600 #x06FF arabic) + (#x0700 #x074F syriac) + (#x0780 #x07BF thaana) + (#x0900 #x097F devanagari) + (#x0980 #x09FF bengali) + (#x0A00 #x0A7F gurmukhi) + (#x0A80 #x0AFF gujarati) + (#x0B00 #x0B7F oriya) + (#x0B80 #x0BFF tamil) + (#x0C00 #x0C7F telugu) + (#x0C80 #x0CFF kannada) + (#x0D00 #x0D7F malayalam) + (#x0D80 #x0DFF sinhala) + (#x0E00 #x0E5F thai) + (#x0E80 #x0EDF lao) + (#x0F00 #x0FFF tibetan) + (#x1000 #x105F myanmar) + (#x10A0 #x10FF georgian) + (#x1100 #x11FF hangul) + (#x1200 #x137F ethiopic) + (#x13A0 #x13FF cherokee) + (#x1400 #x167F canadian-aboriginal) + (#x1680 #x169F ogham) + (#x16A0 #x16FF runic) + (#x1780 #x17FF khmer) + (#x1800 #x18AF mongolian) + (#x1E00 #x1EFF latin) + (#x1F00 #x1FFF greek) + (#x2000 #x27FF symbol) + (#x2800 #x28FF braille) + (#x2E80 #x2FDF han) + (#x2FF0 #x2FFF ideographic-description) + (#x3000 #x303F cjk-misc) + (#x3040 #x30FF kana) + (#x3100 #x312F bopomofo) + (#x3130 #x318F hangul) + (#x3190 #x319F kanbun) + (#x31A0 #x31BF bopomofo) + (#x3400 #x9FAF han) + (#xA000 #xA4CF yi) + (#xAC00 #xD7AF hangul) + (#xF900 #xFAFF han) + (#xFB1D #xFB4F hebrew) + (#xFB50 #xFDFF arabic) + (#xFE70 #xFEFC arabic) + (#xFF00 #xFF5F cjk-misc) + (#xFF61 #xFF9F kana) + (#xFFE0 #xFFE6 cjk-misc) + (#x20000 #x2AFFF han) + (#x2F800 #x2FFFF han))) + (set-char-table-range char-script-table + (cons (car elt) (nth 1 elt)) (nth 2 elt)) + (or (memq (nth 2 elt) script-list) + (setq script-list (cons (nth 2 elt) script-list)))) + (set-char-table-extra-slot char-script-table 0 (nreverse script-list))) + +(map-charset-chars + #'(lambda (range ignore) + (set-char-table-range char-script-table range 'tibetan)) + 'tibetan) + + +;;; Setting word boundary. + +(defun next-word-boundary-han (pos limit) + (if (<= pos limit) + (save-excursion + (goto-char pos) + (looking-at "\\cC+") + (goto-char (match-end 0)) + (if (looking-at "\\cH+") + (goto-char (match-end 0))) + (point)) + (while (and (> pos limit) + (eq (aref char-script-table (char-after (1- pos))) 'han)) + (setq pos (1- pos))) + pos)) + +(defun next-word-boundary-kana (pos limit) + (if (<= pos limit) + (save-excursion + (goto-char pos) + (if (looking-at "\\cK+") + (goto-char (match-end 0))) + (if (looking-at "\\cH+") + (goto-char (match-end 0))) + (if (looking-at "\\ck+") + (goto-char (match-end 0))) + (point)) + (let ((category-set (char-category-set (char-after pos))) + category) + (if (or (aref category-set ?K) (aref category-set ?k)) + (while (and (> pos limit) + (setq category-set + (char-category-set (char-after (1- pos)))) + (or (aref category-set ?K) (aref category-set ?k))) + (setq pos (1- pos))) + (while (and (> pos limit) + (aref (setq category-set + (char-category-set (char-after (1- pos)))) ?H)) + (setq pos (1- pos))) + (setq category (cond ((aref category-set ?C) ?C) + ((aref category-set ?K) ?K) + ((aref category-set ?A) ?A))) + (when category + (setq pos (1- pos)) + (while (and (> pos limit) + (aref (char-category-set (char-after (1- pos))) + category)) + (setq pos (1- pos))))) + pos))) + +(map-char-table + #'(lambda (char script) + (cond ((eq script 'han) + (set-char-table-range find-word-boundary-function-table + char #'next-word-boundary-han)) + ((eq script 'kana) + (set-char-table-range find-word-boundary-function-table + char #'next-word-boundary-kana)))) + char-script-table) + +(setq word-combining-categories - '((?l . ?l))) ++ '((?l . ?l) ++ (?C . ?C) ++ (?C . ?H) ++ (?C . ?K))) + +(setq word-separating-categories ; (2-byte character sets) + '((?A . ?K) ; Alpha numeric - Katakana + (?A . ?C) ; Alpha numeric - Chinese + (?H . ?A) ; Hiragana - Alpha numeric + (?H . ?K) ; Hiragana - Katakana + (?H . ?C) ; Hiragana - Chinese + (?K . ?A) ; Katakana - Alpha numeric + (?K . ?C) ; Katakana - Chinese + (?C . ?A) ; Chinese - Alpha numeric + (?C . ?K) ; Chinese - Katakana + )) ;;; Local Variables: -;;; coding: iso-2022-7bit +;;; coding: utf-8-emacs ;;; End: ;;; arch-tag: 85889c35-9f4d-4912-9bf5-82de31b0d42d