From: Kenichi Handa Date: Mon, 8 Sep 2003 12:53:41 +0000 (+0000) Subject: *** empty log message *** X-Git-Tag: emacs-pretest-23.0.90~8295^2~1864 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=8f924df7df019cce90537647de2627581043b5c4;p=emacs.git *** empty log message *** --- 8f924df7df019cce90537647de2627581043b5c4 diff --cc admin/ChangeLog.22 index 00000000000,00000000000..ee129d9c119 new file mode 100644 --- /dev/null +++ b/admin/ChangeLog.22 @@@ -1,0 -1,0 +1,16 @@@ ++2003-08-16 Kenichi Handa ++ ++ * charsets: New directory for scripts to generate charset map ++ files in ../etc/charsets/. ++ ++ * charsets/Makefile, charsets/mapconv, charsets/compact.awk, ++ charsets/big5.awk charsets/gb18030-2.awk, charsets/gb18030-4.awk, ++ cahrsets/kuten.awk: New files. ++ ++;; Local Variables: ++;; coding: iso-2022-7bit-unix ++;; End: ++ ++ Copyright (C) 2003 Free Software Foundation, Inc. ++ Copying and distribution of this file, with or without modification, ++ are permitted provided the copyright notice and this notice are preserved. diff --cc admin/README index 33d79de3f0a,bf3663aab38..439d7874a5b --- a/admin/README +++ b/admin/README @@@ -11,9 -11,7 +11,14 @@@ alloc-color.c a utility program that a dense colormaps (PseudoColor). build-configs build Emacs in various configurations check-doc-strings check doc strings against documentation +cus-test.el tests for custom types and load problems diff-tar-files show files added/removed between two tar files make-emacs build Emacs in various ways +make-tarball.txt instructions to create pretest or release tarballs quick-install-emacs install emacs quickly (`incrementally') revdiff get CVS diffs of files ++ ++Brief description of sub-directories: ++ ++charsets scripts for generating charset map files ++ in ../etc/charsets diff --cc etc/ChangeLog.22 index 00000000000,00000000000..065c356c201 new file mode 100644 --- /dev/null +++ b/etc/ChangeLog.22 @@@ -1,0 -1,0 +1,40 @@@ ++2003-09-08 Kenichi Handa ++ ++ * charsets: New directory for charset mapping tables. ++ ++ * charsets/README: New file. ++ ++ * charsets/*.map: New files. ++ ++2003-04-12 Kenichi Handa ++ ++ * HELLO: Change "Hindi" to more common characters. ++ ++2003-01-10 Kenichi Handa ++ ++ * HELLO: Fix upcase and downcase for several languages. Change ++ the two German lines into one. Change "Nederlangs" to ++ "Dutch (Nederlands)". Add original language names to several ++ entries. ++ ++2003-01-06 Kenichi Handa ++ ++ * TUTORIAL.es: Add local variable coding: latin-1. ++ ++2002-10-30 Kenichi Handa ++ ++ * HELLO: Change indian-2-column chars of Hindi line to Unicode ++ chars. ++ ++2002-05-27 Dave Love ++ ++ * HELLO: Add pseudo-maths example. ++ ++;; Local Variables: ++;; coding: iso-2022-7bit ++;; End: ++ ++ Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2001, 2002 ++ Free Software Foundation, Inc. ++ Copying and distribution of this file, with or without modification, ++ are permitted provided the copyright notice and this notice are preserved. diff --cc etc/HELLO index 21b4a3cacf8,f05107390d1..7b8f13fcd42 --- a/etc/HELLO +++ b/etc/HELLO @@@ -2,39 -2,37 +2,39 @@@ This is a list of ways to say hello in Its purpose is to illustrate a number of scripts. --------------------------------------------------------- -Amharic ($(3"c!(B +Amharic ($(3"c!(B - Arabic (38R(47d(3T!JSa(4W(3W(B + Arabic (,GIqjHQYdG(B) ,GecjdY(B ,GeGdqSdG(B Czech (,Bh(Besky) Dobr,B}(B den - Danish (Dansk) Hej, Goddag + Danish (dansk) Hej, Goddag + Dutch (Nederlands) Hallo, Dag English Hello Esperanto Saluton (E,C6(Bo,C~(Ban,Cx(Bo ,Cf(Biu,C<(Ba,C}(Bde) - Estonian Tere, Tervist + Estonian (eesti keel) Tere, Tervist FORTRAN PROGRAM -Finnish (suomi) Hei +Finnish (Suomi) Hei - French (Fran,Ag(Bais) Bonjour, Salut - German (Deutsch Nord) Guten Tag - German (Deutsch S,A|(Bd) Gr,A|_(B Gott - Greek (,FEkkgmij\(B) ,FCei\(B ,Fsar(B - Hebrew ,Hylem(B - Hindi (4$,4!}t%"+0$,15y55B14$,4!.v#"Yv#"20$,15f6 1(B) 4$,4!8v#")0$,15h14$,4!hv#")0$,15n14$,4!zv#!)v#")v#"D0$,15x6-5d6'1(B, 4$,4!8v#")0$,15h14$,4!hv#")0$,15n14$,4!zv# ev#"Rv#")0$,15x6-5U5~14$,4!nv#"W0$,15p1(B 4$,4 J0$,16D1(B - Italian (Italiano) Ciao, Buon giorno - Lao ((1>RJRERG(B) (1JP:R-04U1(B, 0(1"m1c0Ki1b*!04U1(B - Malayalam (4$,46A0$,1@N14$,46E0$,1@R14$,46Bv#6M0$,1@O@^14$,46Fv#6W0$,1@S@"1(B) 4$,46<0$,1@H14$,46A0$,1@N14$,46Kv#6Vv#6)v#6M0$,1@X@m@5@^14$,46Cv#6W0$,1@P@"1(B - Maltese (Malti) Bon,Cu(Bu, Sa,C11(Ba - Nederlands, Vlaams Hallo, Dag - Norwegian (Norsk) Hei, God dag - Polish Dzie,Bq(B dobry! Cze,B6f(B! - Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B! + French (fran,Ag(Bais) Bonjour, Salut + German (Deutsch) Guten Tag, Gr,A|_(B Gott + Greek (,Fekkgmij\(B) ,FCei\(B ,Fsar(B + Hebrew (,Hzixar(B) ,Hylem(B + Hindi ($,15y55B5f6 (B) $,15h5n5x6-5d6'(B, $,15h5n5x6-5U5~5p(B $,16D(B + Italian (italiano) Ciao, Buon giorno -Lao((1>RJRERG(B) (1JP:R-4U(B, (1"mcKib*!4U(B ++Lao ((1>RJRERG(B) (1JP:R-4U(B, (1"mcKib*!4U(B ++Malayalam ($,1@N@R@O@^@S@"(B) $,1@H@N@X@m@5@^@P@"(B + Maltese (il-Malti) Bon,Cu(Bu, Sa,C11(Ba + Mathematics $,1x (B p $,1x((B world $,1s"(B hello p $,2!a(B + Norwegian (norsk) Hei, God dag -Polish (j,Bj(Bzyk polski) Dzie,Bq(B dobry! Cze,B6f(B! ++Polish (j,Bj(Bzyk polski) Dzie,Bq(B dobry! Cze,B6f(B! + Russian (,L`caaZXY(B) ,L7T`PRabRcYbU(B! Slovak (slovensky) Dobr,B}(B de,Br(B Slovenian (sloven,B9h(Bina) Pozdravljeni! - Spanish (Espa,Aq(Bol) ,A!(BHola! - Swedish (Svenska) Hej, Goddag - Tamil (4$,4*N0$,1(B + Spanish (espa,Aq(Bol) ,A!(BHola! + Swedish (svenska) Hej, Goddag ++Tamil ($,1(B Tigrigna ($(3"8#r!N"^(B) $(3!Q!,!<"8(B Turkish (T,M|(Brk,Mg(Be) Merhaba - Vietnamese (Ti,1*(Bng Vi,1.(Bt) Ch,1`(Bo b,1U(Bn + Vietnamese (ti,1*(Bng Vi,1.(Bt) Ch,1`(Bo b,1U(Bn Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B, (I:]FAJ(B Chinese ($AVPND(B,$AFUM(;0(B,$A::So(B) $ADc:C(B diff --cc leim/ChangeLog.22 index 00000000000,00000000000..2fcd45430df new file mode 100644 --- /dev/null +++ b/leim/ChangeLog.22 @@@ -1,0 -1,0 +1,98 @@@ ++2003-02-13 Dave Love ++ ++ * quail/latin-post.el ("turkish-latin-3-postfix"): Make it ++ just an alias for turkish-postfix. ++ ++ * quail/latin-alt.el ("turkish-latin-3-alt-postfix"): Make it ++ just an alias for turkish-alt-postfix. ++ ++ * quail/cyrillic.el (ukrainian-computer): Fix duplicate `\'. ++ ++2002-11-07 Kenichi Handa ++ ++ * quail/thai.el: Don't require thai-util. ++ (quail-thai-update-translation): Function deleted. ++ (thai-generate-quail-map): Changed to a macro that directly calls ++ quail-define-rules. ++ ("thai-kesmanee", "thai-pattachote"): Don't use ++ UPDATE-TRANSLATION-FUNCTION. ++ ++ * quail/indian.el (quail-indian-preceding-char-position): Function ++ deleted. ++ (quail-indian-update-preceding-char): Variable deleted. ++ (quail-indian-update-translation): Function deleted. ++ (quail-define-indian-trans-package): Don't call ++ quail-define-package with quail-indian-update-translation. ++ (quail-define-inscript-package): Likewise. ++ ++2002-10-06 Dave Love ++ ++ * quail/indian.el (quail-indian-preceding-char-position) ++ (quail-indian-update-translation, quail-define-inscript-package): ++ Use characterp, not char-valid-p. ++ ++2002-07-30 Dave Love ++ ++ * quail/welsh.el ("welsh"): Doc fix. ++ ++ * quail/cyrillic.el: Reinstate some commented-out redundancies. ++ ("russian-typewriter"): Renamed from cyrillic-typewriter. Make ++ cyrillic-jcuken effectively an alias for it. ++ ("russian-computer"): New. ++ ("bulgarian-phonetic"): Renamed from bulgarian-pho. ++ ("bulgarian-bds"): Renamed from bulgarian-standard. ++ ++2002-07-01 Dave Love ++ ++ * quail/indian.el: Update from head. ++ ++2002-06-27 Dave Love ++ ++ * ja-dic/ja-dic.el: Add coding tag. ++ ++2002-06-24 Dave Love ++ ++ * latin-post.el: Recoded to utf-8. ++ ("latin-postfix"): New method. ++ ++ * latin-alt.el: Recoded to utf-8. ++ ("latin-alt-postfix"): New method. ++ ++ * quail/latin-pre.el: Recoded to utf-8. ++ ("latin-1-prefix", "latin-8-prefix", "latin-9-prefix"): Add nbsp. ++ ("latin-3-prefix"): Remove bogus Latin-3 characters and ~o -> ,Cu(B, ++ ~O -> ,CU(B. ++ ("latin-prefix"): New method. ++ ++ * quail/uni-input.el (utf-8-ccl-encode): Deleted. ++ (ucs-input-method): Modified. ++ ++2002-06-10 Dave Love ++ ++ * quail/hanja3.el, quail/hanja.el, makefile.nt, Makefile.in: ++ * quail/vntelex.el: Update from trunk. ++ ++ * quail/cyrillic.el: Doc fixes. ++ ("cyrillic-beylorussian"): Commented-out. ++ ("cyrillic-translit-bulgarian"): Deleted. ++ ("cyrillic-ukrainian"): Fix `q', `Q', `W', `w' bindings. ++ ("ukrainian-computer", "belarusian", "bulgarian-standard"): New. ++ ("bulgarian-pho"): Add ,A'(B, ,Lp(B, ,LN(B. ++ ++2002-05-31 Kenichi Handa ++ ++ * quail/indian.el: Replace commented-out lines with a condition ++ that is always false. The same change by Eli on 2002-04-19 in ++ the HEAD trunk. ++ ++2002-05-22 Kenichi Handa ++ ++ * Makefile.in (RUN-EMACS): Add LC_ALL=C. ++ ++;; Local Variables: ++;; coding: iso-2022-7bit-unix ++;; End: ++ ++ Copyright (C) 2002 Free Software Foundation, Inc. ++ Copying and distribution of this file, with or without modification, ++ are permitted provided the copyright notice and this notice are preserved. diff --cc leim/Makefile.in index 57dc5a0741e,515805e3f63..80c78bf52ef --- a/leim/Makefile.in +++ b/leim/Makefile.in @@@ -1,6 -1,6 +1,9 @@@ # Makefile for leim subdirectory in GNU Emacs. # Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. --# Licensed to the Free Software Foundation. ++# Licensed to the Free Software Foundation. ++# Copyright (C) 2003 ++# National Institute of Advanced Industrial Science and Technology (AIST) ++# Registration Number H13PRO009 # This file is part of GNU Emacs. @@@ -42,7 -42,7 +45,7 @@@ BUILT-EMACS = ${dot}${dot}/src/emac buildlisppath=${srcdir}/${dot}${dot}/lisp # How to run Emacs. - RUN-EMACS = EMACSLOADPATH=$(buildlisppath) \ -RUN-EMACS = EMACSLOADPATH=$(buildlisppath) LC_ALL=C\ ++RUN-EMACS = EMACSLOADPATH=$(buildlisppath) LC_ALL=C \ ${BUILT-EMACS} -batch --no-init-file --no-site-file --multibyte # Subdirectories to be made if ${srcdir} is different from the current diff --cc leim/quail/cyrillic.el index e6412020236,388dac0bb91..47a6352bcc7 --- a/leim/quail/cyrillic.el +++ b/leim/quail/cyrillic.el @@@ -613,11 -599,9 +613,11 @@@ ;; happily with this little change. [See "ukrainian-computer" below.] ;; Fixme: add GHE_WITH_UPTURN. -(quail-define-package +(quail-define-package "cyrillic-ukrainian" "Ukrainian" ",L6(BU" nil - ",L$'5@B7(B-,L&.(B UKRAINIAN (ISO 8859-5 encoding) - ",L$'5@B7(B-,L&.(B UKRAINIAN" ++ ",L$'5@B7(B-,L&.(B UKRAINIAN + +Sorry, but 'ghe with upturn' is not included in ISO 8859-5." nil t t t t nil nil nil nil nil t) ;; 1! 2" 3# 4$ 5% 6& 7' 8( 9) 0= /? +* <> @@@ -833,9 -817,9 +833,9 @@@ ;; Alexander Mikhailian says this is of limited use. It has been ;; popular among emigrants or foreigners who have to type in Cyrillic ;; (mostly Russian) from time to time. -(quail-define-package +(quail-define-package "cyrillic-yawerty" "Cyrillic" ",L6O(B" nil - ",LO25@BK(B Roman transcription (ISO 8859-5 encoding) - ",LO25@BK(B Roman transcription. ++ ",LO25@BK(B Roman transcription This layout is based on Roman transcription by phonemic resemblance. When preceded by a '/', the second and the third rows (number key row) change diff --cc leim/quail/indian.el index 2d2e284844c,3e0eabf36c5..de164d33dc4 --- a/leim/quail/indian.el +++ b/leim/quail/indian.el @@@ -33,90 -33,24 +33,9 @@@ ;;; Code: (require 'quail) - (require 'devan-util) (require 'ind-util) - - (defun quail-indian-preceding-char-position (position) - "Return the position of preceding composite character." - (let (prec-composed) - (if (char-valid-p (char-before position)) ;; range o.k. - (if (setq prec-composed (find-composition (1- position))) - (car prec-composed) - (1- position)) - nil))) - - (defvar quail-indian-update-preceding-char nil) - (make-variable-frame-local 'quail-indian-update-preceding-char) - - ;; Input value :: - ;; CONTROL-FLAG is integer `n' - ;; quail-current-key :: keyboard input. - ;; Only first `n' can be translated. - ;; quail-current-str :: corresonding string. - ;; jobs :: (1) put last (len-n) char to unrread-command-event. - ;; (2) put translated string to quail-current-str. - ;; - ;; CONTROL-FLAG is t (terminate) or nil (proceed the translation) - ;; quail-current-key :: keyboard input. - ;; quail-current-str :: corresponding string. - ;; jobs :: (1) put modified translated string to quail-current-str. - ;; - ;; When non-nil value is returned from quail-translation-update-function, - ;; the quail-current-str is split to characters and put into event queue, - ;; with `compose-last-char' event with composition info at the end. - - (defun quail-indian-update-translation (control-flag) - ;; make quail-current-str string when possible. - (if (char-valid-p quail-current-str) - (setq quail-current-str (char-to-string quail-current-str))) - ;(message "\n input control-flag=%s, str=%s, key=%s q-ind-upd-prec-char=%s" - ; control-flag quail-current-str quail-current-key - ; quail-indian-update-preceding-char) - ;; reset quail-indian-update-preceding-char if it's initial. - (if (= (overlay-start quail-overlay) (overlay-end quail-overlay)) - (setq quail-indian-update-preceding-char nil)) - ;; Check the preceding character of the quail region. If the - ;; preceding character can be composed with quail-current-str, then - ;; grab that preceding character into the quail-current-str and - ;; remove that char from the region. - (let* (prec-char-position composition-regexp - prec-char-str candidate-str match-pos match-end) - (when (and quail-current-str - (null quail-indian-update-preceding-char) - (null input-method-use-echo-area) - (null input-method-exit-on-first-char) - (setq prec-char-position - (quail-indian-preceding-char-position - (overlay-start quail-overlay))) - (setq composition-regexp - (if prec-char-position - (caar (elt composition-function-table - (char-after prec-char-position))))) - (setq prec-char-str - (buffer-substring prec-char-position - (overlay-start quail-overlay)) - candidate-str (concat prec-char-str quail-current-str) - match-pos (string-match composition-regexp candidate-str) - match-end (match-end 0)) - (> match-end (length prec-char-str))) - (setq quail-indian-update-preceding-char prec-char-str) - (delete-region prec-char-position - (overlay-start quail-overlay)))) - (setq quail-current-str - (indian-compose-string - (concat quail-indian-update-preceding-char - quail-current-str))) - (if (numberp control-flag) - (setq unread-command-events - (string-to-list - (substring quail-current-key control-flag)))) - (when control-flag - (setq quail-indian-update-preceding-char nil)) - ;(message "output control-flag=%s, str=%s, key=%s q-ind-upd-prec-char=%s" - ; control-flag quail-current-str quail-current-key - ; quail-indian-update-preceding-char) - control-flag) + (require 'devan-util) -;;; update function - -;; CONTROL-FLAG is integer (n) -;; quail-current-key :: keyboard input. -;; Only first n can be translated. -;; quail-current-string :: corresonding string. Translated when last -;; time CONTROL-FLAG is nil. -;; todo :: (1) put last (len-n) char to unrread-command-event. -;; (2) put translated string to quail-current-string. -;; -;; CONTROL-FLAG is t (terminate) or nil (proceed the translation) -;; quail-current-key :: keyboard input. -;; quail-current-string :: corresponding string. Created by database. -;; todo :: (1) put modified translated string to quail-current-string. - ;;; ;;; Input by transliteration ;;; diff --cc leim/quail/latin-alt.el index ea0e1ba4448,28bc71dbd94..87c961e03c9 --- a/leim/quail/latin-alt.el +++ b/leim/quail/latin-alt.el @@@ -1406,25 -1406,24 +1406,24 @@@ Doubling the postfix separates the lett ) (quail-define-package - "turkish-latin-3-alt-postfix" "Turkish" "TR3<<" t - "Turkish (T,A|(Brk,Ag(Be) input method with postfix modifiers. + "turkish-alt-postfix" "Turkish" "TR«" t + "Turkish (Türkçe) input method with postfix modifiers. - This is for those who use Latin-3 (ISO-8859-3) for Turkish. If you - use Latin-5 (ISO-8859-9), you should use \"turkish-alt-postfix\" instead. -turkish-latin-3-alt-postfix is an obsolete alisa for turkish-alt-postfix. ++turkish-latin-3-alt-postfix is an obsolete alias for turkish-alt-postfix. - Note for I, ,C9(B, ,C)(B, i. + Note for I, ı, Ä°, i. - A^ -> ,CB(B - C` -> ,CG(B - G^ -> ,C+(B + A^ ->  + C` -> Ç + G^ -> Ğ I -> I - i -> ,C9(B - I/ -> ,C)(B + i -> ı + I/ -> Ä° i/ -> i - O\" -> ,CV(B - S` -> ,C*(B - U\" -> ,C\(B - U^ -> ,C[(B + O\" -> Ö + S` -> Ş + U\" -> Ü + U^ -> Û Doubling the postfix separates the letter and postfix: e.g. a^^ -> a^ " nil t nil nil nil nil nil nil nil nil t) @@@ -1467,69 -1466,12 +1466,11 @@@ ("u^^" ["u^"]) ) - (quail-define-package - "turkish-alt-postfix" "Turkish" "TR,A+(B" t - "Turkish (T,A|(Brk,Ag(Be) input method with postfix modifiers. - - This is for those who use Latin-5 (ISO-8859-9) for Turkish. If you - use Latin-3 (ISO-8859-3), you should use - \"turkish-latin-3-alt-postfix\" instead. - - Note for I, ,M}(B, ,M](B, i. - - A^ -> ,MB(B - C` -> ,MG(B - G^ -> ,MP(B - I -> I - i -> ,M}(B - I/ -> ,M](B - i/ -> i - O\" -> ,MV(B - S` -> ,M^(B - U\" -> ,M\(B - U^ -> ,M[(B - - Doubling the postfix separates the letter and postfix: e.g. a^^ -> a^ - " nil t nil nil nil nil nil nil nil nil t) - - (quail-define-rules - ("A^" ?,MB(B) - ("a^" ?,Mb(B) - ("C`" ?,MG(B) - ("c`" ?,Mg(B) - ("G^" ?,MP(B) - ("g^" ?,Mp(B) - ("I/" ?,M](B) - ("i" ?,M}(B) - ("i/" ?i) - ("O\"" ?,MV(B) - ("o\"" ?,Cv(B) - ("S`" ?,M^(B) - ("s`" ?,M~(B) - ("U\"" ?,M\(B) - ("u\"" ?,M|(B) - ("U^" ?,M[(B) - ("u^" ?,M{(B) - - ("A^^" ["A^"]) - ("a^^" ["a^"]) - ("C``" ["C`"]) - ("c``" ["c`"]) - ("G^^" ["G^"]) - ("g^^" ["g^"]) - ("I//" ["I/"]) - ("i" ["i"]) - ("i//" ["i/"]) - ("O\"\"" ["O\""]) - ("o\"\"" ["o\""]) - ("S``" ["S`"]) - ("s``" ["s`"]) - ("U\"\"" ["U\""]) - ("u\"\"" ["u\""]) - ("U^^" ["U^"]) - ("u^^" ["u^"]) - ) + ;; Backwards compatibility. + (push (cons "turkish-latin-3-alt-postfix" + (cdr (assoc "turkish-alt-postfix" quail-package-alist))) + quail-package-alist) - ;; Dutch Quail input method derived from the one in Yudit by Roman ;; Czyborra. (quail-define-package @@@ -1551,73 -1493,73 +1492,73 @@@ Caters for French and Turkish as well a ------------+---------+---------- | prefix | ------------+---------+---------- - diaeresis | \" | \"a -> ,Ad(B + diaeresis | \" | \"a -> ä - + Doubling the postfix separates the letter and postfix: e.g. a'' -> a' " nil t nil nil nil nil nil nil nil nil t) (quail-define-rules - ("fl." ?$,1!R(B) ;; LATIN SMALL LETTER F WITH HOOK (florin currency symbol) - ("eur." ?$,1tL(B) ;; EURO SIGN - ;; $,1r|(BThe 25th letter of the Dutch alphabet.$,1r}(B - ("ij" ?$,1 S(B) ;; LATIN SMALL LIGATURE IJ - ("IJ" ?$,1 R(B) ;; LATIN CAPITAL LIGATURE IJ - ;; $,1r|(BTrema on the second letter of vowel pair.$,1r}(B Yudit uses `:', not `"'. - ("\"a" ?,Ad(B) ;; LATIN SMALL LETTER A WITH DIAERESIS - ("\"e" ?,Ak(B) ;; LATIN SMALL LETTER E WITH DIAERESIS - ("\"i" ?,Ao(B) ;; LATIN SMALL LETTER I WITH DIAERESIS - ("\"o" ?,Av(B) ;; LATIN SMALL LETTER O WITH DIAERESIS - ("\"u" ?,A|(B) ;; LATIN SMALL LETTER U WITH DIAERESIS - ("\"A" ?,AD(B) ;; LATIN CAPITAL LETTER A WITH DIAERESIS - ("\"E" ?,AK(B) ;; LATIN CAPITAL LETTER E WITH DIAERESIS - ("\"I" ?,AO(B) ;; LATIN CAPITAL LETTER I WITH DIAERESIS - ("\"O" ?,AV(B) ;; LATIN CAPITAL LETTER O WITH DIAERESIS - ("\"U" ?,A\(B) ;; LATIN CAPITAL LETTER U WITH DIAERESIS - ;; $,1r|(BAcute, marking emphasis on long vowels$,1r}(B: - ("a'" ?,Aa(B) ;; LATIN SMALL LETTER A WITH ACUTE - ("e'" ?,Ai(B) ;; LATIN SMALL LETTER E WITH ACUTE - ("i'" ?,Am(B) ;; LATIN SMALL LETTER I WITH ACUTE - ("o'" ?,As(B) ;; LATIN SMALL LETTER O WITH ACUTE - ("u'" ?,Az(B) ;; LATIN SMALL LETTER U WITH ACUTE - ("A'" ?,AA(B) ;; LATIN CAPITAL LETTER A WITH ACUTE - ("E'" ?,AI(B) ;; LATIN CAPITAL LETTER E WITH ACUTE - ("I'" ?,AM(B) ;; LATIN CAPITAL LETTER I WITH ACUTE - ("O'" ?,AS(B) ;; LATIN CAPITAL LETTER O WITH ACUTE - ("U'" ?,AZ(B) ;; LATIN CAPITAL LETTER U WITH ACUTE - ;; $,1r|(BGrave, marking emphasis on short vowels$,1r}(B: - ("a`" ?,A`(B) ;; LATIN SMALL LETTER A WITH GRAVE - ("e`" ?,Ah(B) ;; LATIN SMALL LETTER E WITH GRAVE - ("i`" ?,Al(B) ;; LATIN SMALL LETTER I WITH GRAVE - ("o`" ?,Ar(B) ;; LATIN SMALL LETTER O WITH GRAVE - ("u`" ?,Ay(B) ;; LATIN SMALL LETTER U WITH GRAVE - ("A`" ?,A@(B) ;; LATIN CAPITAL LETTER A WITH GRAVE - ("E`" ?,AH(B) ;; LATIN CAPITAL LETTER E WITH GRAVE - ("I`" ?,AL(B) ;; LATIN CAPITAL LETTER I WITH GRAVE - ("O`" ?,AR(B) ;; LATIN CAPITAL LETTER O WITH GRAVE - ("U`" ?,AY(B) ;; LATIN CAPITAL LETTER U WITH GRAVE - ;; $,1r|(BCater for the use of many French words and use of the circumflex - ;; in Frisian.$,1r}(B Yudit used `;' for cedilla. - ("c," ?,Ag(B) ;; LATIN SMALL LETTER C WITH CEDILLA - ("C," ?,AG(B) ;; LATIN CAPITAL LETTER C WITH CEDILLA - ("a^" ?,Ab(B) ;; LATIN SMALL LETTER A WITH CIRCUMFLEX - ("e^" ?,Aj(B) ;; LATIN SMALL LETTER E WITH CIRCUMFLEX - ("i^" ?,An(B) ;; LATIN SMALL LETTER I WITH CIRCUMFLEX - ("o^" ?,At(B) ;; LATIN SMALL LETTER O WITH CIRCUMFLEX - ("u^" ?,A{(B) ;; LATIN SMALL LETTER U WITH CIRCUMFLEX - ("A^" ?,AB(B) ;; LATIN CAPITAL LETTER A WITH CIRCUMFLEX - ("E^" ?,AJ(B) ;; LATIN CAPITAL LETTER E WITH CIRCUMFLEX - ("I^" ?,AN(B) ;; LATIN CAPITAL LETTER I WITH CIRCUMFLEX - ("O^" ?,AT(B) ;; LATIN CAPITAL LETTER O WITH CIRCUMFLEX - ("U^" ?,A[(B) ;; LATIN CAPITAL LETTER U WITH CIRCUMFLEX - ;; $,1r|(BFollow the example of the Dutch POSIX locale, using ISO-8859-9 to - ;; cater to the many Turks in Dutch society.$,1r}(B Perhaps German methods + ("fl." ?ƒ) ;; LATIN SMALL LETTER F WITH HOOK (florin currency symbol) + ("eur." ?€) ;; EURO SIGN + ;; “The 25th letter of the Dutch alphabet.” - ("ij" ?ij) ;; LATIN SMALL LIGATURE IJ - ("IJ" ?IJ) ;; LATIN CAPITAL LIGATURE IJ ++ ("ij" ?ij) ;; LATIN SMALL LIGATURE IJ ++ ("IJ" ?IJ) ;; LATIN CAPITAL LIGATURE IJ + ;; “Trema on the second letter of vowel pair.” Yudit uses `:', not `"'. + ("\"a" ?ä) ;; LATIN SMALL LETTER A WITH DIAERESIS + ("\"e" ?ë) ;; LATIN SMALL LETTER E WITH DIAERESIS + ("\"i" ?ï) ;; LATIN SMALL LETTER I WITH DIAERESIS + ("\"o" ?ö) ;; LATIN SMALL LETTER O WITH DIAERESIS + ("\"u" ?ü) ;; LATIN SMALL LETTER U WITH DIAERESIS + ("\"A" ?Ä) ;; LATIN CAPITAL LETTER A WITH DIAERESIS + ("\"E" ?Ë) ;; LATIN CAPITAL LETTER E WITH DIAERESIS + ("\"I" ?Ï) ;; LATIN CAPITAL LETTER I WITH DIAERESIS + ("\"O" ?Ö) ;; LATIN CAPITAL LETTER O WITH DIAERESIS + ("\"U" ?Ü) ;; LATIN CAPITAL LETTER U WITH DIAERESIS + ;; “Acute, marking emphasis on long vowels”: + ("a'" ?á) ;; LATIN SMALL LETTER A WITH ACUTE + ("e'" ?é) ;; LATIN SMALL LETTER E WITH ACUTE + ("i'" ?í) ;; LATIN SMALL LETTER I WITH ACUTE + ("o'" ?ó) ;; LATIN SMALL LETTER O WITH ACUTE + ("u'" ?ú) ;; LATIN SMALL LETTER U WITH ACUTE + ("A'" ?Á) ;; LATIN CAPITAL LETTER A WITH ACUTE + ("E'" ?É) ;; LATIN CAPITAL LETTER E WITH ACUTE + ("I'" ?Í) ;; LATIN CAPITAL LETTER I WITH ACUTE + ("O'" ?Ó) ;; LATIN CAPITAL LETTER O WITH ACUTE + ("U'" ?Ú) ;; LATIN CAPITAL LETTER U WITH ACUTE + ;; “Grave, marking emphasis on short vowels”: + ("a`" ?à) ;; LATIN SMALL LETTER A WITH GRAVE + ("e`" ?è) ;; LATIN SMALL LETTER E WITH GRAVE + ("i`" ?ì) ;; LATIN SMALL LETTER I WITH GRAVE + ("o`" ?ò) ;; LATIN SMALL LETTER O WITH GRAVE + ("u`" ?ù) ;; LATIN SMALL LETTER U WITH GRAVE + ("A`" ?À) ;; LATIN CAPITAL LETTER A WITH GRAVE + ("E`" ?È) ;; LATIN CAPITAL LETTER E WITH GRAVE + ("I`" ?Ì) ;; LATIN CAPITAL LETTER I WITH GRAVE + ("O`" ?Ò) ;; LATIN CAPITAL LETTER O WITH GRAVE + ("U`" ?Ù) ;; LATIN CAPITAL LETTER U WITH GRAVE + ;; “Cater for the use of many French words and use of the circumflex + ;; in Frisian.” Yudit used `;' for cedilla. + ("c," ?ç) ;; LATIN SMALL LETTER C WITH CEDILLA + ("C," ?Ç) ;; LATIN CAPITAL LETTER C WITH CEDILLA + ("a^" ?â) ;; LATIN SMALL LETTER A WITH CIRCUMFLEX + ("e^" ?ê) ;; LATIN SMALL LETTER E WITH CIRCUMFLEX + ("i^" ?î) ;; LATIN SMALL LETTER I WITH CIRCUMFLEX + ("o^" ?ô) ;; LATIN SMALL LETTER O WITH CIRCUMFLEX + ("u^" ?û) ;; LATIN SMALL LETTER U WITH CIRCUMFLEX + ("A^" ?Â) ;; LATIN CAPITAL LETTER A WITH CIRCUMFLEX + ("E^" ?Ê) ;; LATIN CAPITAL LETTER E WITH CIRCUMFLEX + ("I^" ?Î) ;; LATIN CAPITAL LETTER I WITH CIRCUMFLEX + ("O^" ?Ô) ;; LATIN CAPITAL LETTER O WITH CIRCUMFLEX + ("U^" ?Û) ;; LATIN CAPITAL LETTER U WITH CIRCUMFLEX + ;; “Follow the example of the Dutch POSIX locale, using ISO-8859-9 to + ;; cater to the many Turks in Dutch society.” Perhaps German methods ;; should do so too. Follow turkish-alt-postfix here. - ("i/" ?$,1 Q(B) ;; LATIN SMALL LETTER I WITH NO DOT - ("s," ?$,1 (B) ;; LATIN SMALL LETTER S WITH CEDILLA - ("g^" ?$,1 ?(B) ;; LATIN SMALL LETTER G WITH BREVE - ("I/" ?$,1 P(B) ;; LATIN CAPITAL LETTER I WITH DOT ABOVE - ("S," ?$,1 ~(B) ;; LATIN CAPITAL LETTER S WITH CEDILLA - ("G^" ?$,1 >(B) ;; LATIN CAPITAL LETTER G WITH BREVE + ("i/" ?ı) ;; LATIN SMALL LETTER I WITH NO DOT + ("s," ?ş) ;; LATIN SMALL LETTER S WITH CEDILLA + ("g^" ?ğ) ;; LATIN SMALL LETTER G WITH BREVE + ("I/" ?Ä°) ;; LATIN CAPITAL LETTER I WITH DOT ABOVE + ("S," ?Ş) ;; LATIN CAPITAL LETTER S WITH CEDILLA + ("G^" ?Ğ) ;; LATIN CAPITAL LETTER G WITH BREVE ) ;; Originally from Yudit, discussed with Albertas Agejevas diff --cc leim/quail/latin-post.el index b99f4062c34,6cfd0203ff4..7859d11f28d --- a/leim/quail/latin-post.el +++ b/leim/quail/latin-post.el @@@ -1,10 -1,10 +1,13 @@@ - ;;; latin-post.el --- Quail packages for inputting various European characters -*-coding: iso-2022-7bit;-*- + ;;; latin-post.el --- Quail packages for inputting various European characters -*-coding: utf-8;-*- ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. -;; Copyright (C) 2002 Free Software Foundation, Inc. ++;; Licensed to the Free Software Foundation. +;; Copyright (C) 2001, 2002 Free Software Foundation. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 - ;; Keywords: multilingual, input method, latin + ;; Keywords: multilingual, input method, latin, i18n ;; This file is part of GNU Emacs. @@@ -2520,23 -2459,4 +2463,23 @@@ Doubling the postfix separates the lett ("z~~" ["z~"]) ) +;; Derived from Slovenian.kmap from Yudit +;; attributed as: 2001-11-11 Roman Maurer +(quail-define-package + "slovenian" "Slovenian" "Sl" t + "Slovenian postfix input." + nil t t t nil nil nil nil nil nil t) + +(quail-define-rules - ("C<" ?,BH(B) - ("C'" ?,BF(B) - ("D;" ?,BP(B) - ("S<" ?,B)(B) - ("Z<" ?,B.(B) - ("c<" ?,Bh(B) - ("c'" ?,Bf(B) - ("d;" ?,Bp(B) - ("s<" ?,B9(B) - ("z<" ?,B>(B)) ++ ("C<" ?Č) ++ ("C'" ?Ć) ++ ("D;" ?Đ) ++ ("S<" ?Å ) ++ ("Z<" ?Ž) ++ ("c<" ?č) ++ ("c'" ?ć) ++ ("d;" ?đ) ++ ("s<" ?Å¡) ++ ("z<" ?ž)) + ;;; latin-post.el ends here diff --cc leim/quail/latin-pre.el index 0b47f8fd50c,d86b50d76ab..023dbd44461 --- a/leim/quail/latin-pre.el +++ b/leim/quail/latin-pre.el @@@ -69,111 -63,111 +69,111 @@@ " nil t nil nil nil nil nil nil nil nil t) (quail-define-rules - ("'A" ?,AA(B) - ("'E" ?,AI(B) - ("'I" ?,AM(B) - ("'O" ?,AS(B) - ("'U" ?,AZ(B) - ("'Y" ?,A](B) - ("'a" ?,Aa(B) - ("'e" ?,Ai(B) - ("'i" ?,Am(B) - ("'o" ?,As(B) - ("'u" ?,Az(B) - ("'y" ?,A}(B) - ("''" ?,A4(B) + ("'A" ?Á) + ("'E" ?É) + ("'I" ?Í) + ("'O" ?Ó) + ("'U" ?Ú) + ("'Y" ?Ý) + ("'a" ?á) + ("'e" ?é) + ("'i" ?í) + ("'o" ?ó) + ("'u" ?ú) + ("'y" ?ý) + ("''" ?´) ("' " ?') - ("`A" ?,A@(B) - ("`E" ?,AH(B) - ("`I" ?,AL(B) - ("`O" ?,AR(B) - ("`U" ?,AY(B) - ("`a" ?,A`(B) - ("`e" ?,Ah(B) - ("`i" ?,Al(B) - ("`o" ?,Ar(B) - ("`u" ?,Ay(B) + ("`A" ?À) + ("`E" ?È) + ("`I" ?Ì) + ("`O" ?Ò) + ("`U" ?Ù) + ("`a" ?à) + ("`e" ?è) + ("`i" ?ì) + ("`o" ?ò) + ("`u" ?ù) ("``" ?`) ("` " ?`) - ("^A" ?,AB(B) - ("^E" ?,AJ(B) - ("^I" ?,AN(B) - ("^O" ?,AT(B) - ("^U" ?,A[(B) - ("^a" ?,Ab(B) - ("^e" ?,Aj(B) - ("^i" ?,An(B) - ("^o" ?,At(B) - ("^u" ?,A{(B) + ("^A" ?Â) + ("^E" ?Ê) + ("^I" ?Î) + ("^O" ?Ô) + ("^U" ?Û) + ("^a" ?â) + ("^e" ?ê) + ("^i" ?î) + ("^o" ?ô) + ("^u" ?û) ("^^" ?^) ("^ " ?^) - ("\"A" ?,AD(B) - ("\"E" ?,AK(B) - ("\"I" ?,AO(B) - ("\"O" ?,AV(B) - ("\"U" ?,A\(B) - ("\"a" ?,Ad(B) - ("\"e" ?,Ak(B) - ("\"i" ?,Ao(B) - ("\"o" ?,Av(B) - ("\"s" ?,A_(B) - ("\"u" ?,A|(B) - ("\"y" ?,A(B) - ("\"\"" ?,A((B) + ("\"A" ?Ä) + ("\"E" ?Ë) + ("\"I" ?Ï) + ("\"O" ?Ö) + ("\"U" ?Ü) + ("\"a" ?ä) + ("\"e" ?ë) + ("\"i" ?ï) + ("\"o" ?ö) + ("\"s" ?ß) + ("\"u" ?ü) + ("\"y" ?ÿ) + ("\"\"" ?¨) ("\" " ?\") - ("~A" ?,AC(B) - ("~C" ?,AG(B) - ("~D" ?,AP(B) - ("~N" ?,AQ(B) - ("~O" ?,AU(B) - ("~T" ?,A^(B) - ("~a" ?,Ac(B) - ("~c" ?,Ag(B) - ("~d" ?,Ap(B) - ("~n" ?,Aq(B) - ("~o" ?,Au(B) - ("~t" ?,A~(B) - ("~>" ?\,A;(B) - ("~<" ?\,A+(B) - ("~!" ?,A!(B) - ("~?" ?,A?(B) - ("~~" ?,A8(B) + ("~A" ?Ã) + ("~C" ?Ç) + ("~D" ?Ð) + ("~N" ?Ñ) + ("~O" ?Õ) + ("~T" ?Þ) + ("~a" ?ã) + ("~c" ?ç) + ("~d" ?ð) + ("~n" ?ñ) + ("~o" ?õ) + ("~t" ?þ) + ("~>" ?\») + ("~<" ?\«) + ("~!" ?¡) + ("~?" ?¿) + ("~~" ?¸) ("~ " ?~) - ("/A" ?,AE(B) - ("/E" ?,AF(B) - ("/O" ?,AX(B) - ("/a" ?,Ae(B) - ("/e" ?,Af(B) - ("/o" ?,Ax(B) - ("//" ?,A0(B) + ("/A" ?Å) + ("/E" ?Æ) + ("/O" ?Ø) + ("/a" ?Ã¥) + ("/e" ?æ) + ("/o" ?ø) + ("//" ?°) ("/ " ?/) - ("_o" ?,A:(B) - ("_a" ?,A*(B) - ("_ " ?,A (B) + ("_o" ?º) + ("_a" ?ª) ++ ("_ " ? ) ;; Symbols added by Roland Smith - ("_+" ?,A1(B) - ("_y" ?,A%(B) - ("_:" ?,Aw(B) - ("/c" ?,A"(B) - ("/\\" ?,AW(B) - ("/2" ?,A=(B) - ("/4" ?,A<(B) - ("/3" ?,A>(B) - ("~s" ?,A'(B) - ("~p" ?,A6(B) - ("~x" ?,A$(B) - ("~." ?,A7(B) - ("~$" ?,A#(B) - ("~u" ?,A5(B) - ("^r" ?,A.(B) - ("^c" ?,A)(B) - ("^1" ?,A9(B) - ("^2" ?,A2(B) - ("^3" ?,A3(B) - ("~-" ?,A-(B) - ("~|" ?,A&(B) - ("/=" ?,A,(B) - ("~=" ?,A/(B) + ("_+" ?±) + ("_y" ?Â¥) + ("_:" ?÷) + ("/c" ?¢) + ("/\\" ?×) + ("/2" ?½) + ("/4" ?¼) + ("/3" ?¾) + ("~s" ?§) + ("~p" ?¶) + ("~x" ?¤) + ("~." ?·) + ("~$" ?£) + ("~u" ?µ) + ("^r" ?®) + ("^c" ?©) + ("^1" ?¹) + ("^2" ?²) + ("^3" ?³) + ("~-" ?­) + ("~|" ?¦) + ("/=" ?¬) + ("~=" ?¯) - ("_ " ? ) ; nbsp ) (quail-define-package @@@ -590,111 -584,113 +590,111 @@@ Key translation rules are effect | prefix | examples ------------+--------+---------- - acute | ' | 'a -> ,Ca(B '' -> ?,C4(B - grave | ` | `a -> ,C`(B - circumflex | ^ | ^a -> ,Cb(B - diaeresis | \" | \"a -> ,Cd(B \"\" -> ,C((B - cedilla | ~ | ~c -> ,Cg(B ~s -> ,C:(B ~~ -> ,C8(B - dot above | / . | /g -> ,Cu(B .o -> ,Cu(B - misc | \" ~ / | \"s -> ,C_(B ~g -> ,C;(B ~u -> ,C}(B /h -> ,C1(B /i -> ,C9(B - symbol | ~ | ~` -> ,C"(B /# -> ,C#(B /$ -> ,C$(B // -> ,C0(B + acute | ' | 'a -> á '' -> ?´ + grave | ` | `a -> à + circumflex | ^ | ^a -> â + diaeresis | \" | \"a -> ä \"\" -> ¨ + cedilla | ~ | ~c -> ç ~s -> ş ~~ -> ¸ - dot above | ~ / . | ~o -> Ä¡ /o -> Ä¡ .o -> Ä¡ ++ dot above | / . | /g -> Ä¡ .o -> Ä¡ + misc | \" ~ / | \"s -> ß ~g -> ğ ~u -> Å­ /h -> ħ /i -> ı + symbol | ~ | ~` -> ˘ /# -> £ /$ -> ¤ // -> ° " nil t nil nil nil nil nil nil nil nil t) (quail-define-rules - ("'A" ?,CA(B) - ("'E" ?,CI(B) - ("'I" ?,CM(B) - ("'O" ?,CS(B) - ("'U" ?,CZ(B) - ("'a" ?,Ca(B) - ("'e" ?,Ci(B) - ("'i" ?,Cm(B) - ("'o" ?,Cs(B) - ("'u" ?,Cz(B) - ("''" ?,C4(B) + ("'A" ?Á) + ("'E" ?É) + ("'I" ?Í) + ("'O" ?Ó) + ("'U" ?Ú) + ("'a" ?á) + ("'e" ?é) + ("'i" ?í) + ("'o" ?ó) + ("'u" ?ú) + ("''" ?´) ("' " ?') - ("`A" ?,C@(B) - ("`E" ?,CH(B) - ("`I" ?,CL(B) - ("`O" ?,CR(B) - ("`U" ?,CY(B) - ("`a" ?,C`(B) - ("`e" ?,Ch(B) - ("`i" ?,Cl(B) - ("`o" ?,Cr(B) - ("`u" ?,Cy(B) + ("`A" ?À) + ("`E" ?È) + ("`I" ?Ì) + ("`O" ?Ò) + ("`U" ?Ù) + ("`a" ?à) + ("`e" ?è) + ("`i" ?ì) + ("`o" ?ò) + ("`u" ?ù) ("``" ?`) ("` " ?`) - ("^A" ?,CB(B) - ("^C" ?,CF(B) - ("^E" ?,CJ(B) - ("^G" ?,CX(B) - ("^H" ?,C&(B) - ("^I" ?,CN(B) - ("^J" ?,C,(B) - ("^O" ?,CT(B) - ("^S" ?,C^(B) - ("^U" ?,C[(B) - ("^a" ?,Cb(B) - ("^c" ?,Cf(B) - ("^e" ?,Cj(B) - ("^g" ?,Cx(B) - ("^h" ?,C6(B) - ("^i" ?,Cn(B) - ("^j" ?,C<(B) - ("^o" ?,Ct(B) - ("^s" ?,C~(B) - ("^u" ?,C{(B) + ("^A" ?Â) + ("^C" ?Ĉ) + ("^E" ?Ê) + ("^G" ?Ĝ) + ("^H" ?Ĥ) + ("^I" ?Î) + ("^J" ?Ä´) + ("^O" ?Ô) + ("^S" ?Ŝ) + ("^U" ?Û) + ("^a" ?â) + ("^c" ?ĉ) + ("^e" ?ê) + ("^g" ?ĝ) + ("^h" ?Ä¥) + ("^i" ?î) + ("^j" ?ĵ) + ("^o" ?ô) + ("^s" ?ŝ) + ("^u" ?û) ("^^" ?^) ("^ " ?^) - ("\"A" ?,CD(B) - ("\"E" ?,CK(B) - ("\"I" ?,CO(B) - ("\"O" ?,CV(B) - ("\"U" ?,C\(B) - ("\"a" ?,Cd(B) - ("\"e" ?,Ck(B) - ("\"i" ?,Co(B) - ("\"o" ?,Cv(B) - ("\"u" ?,C|(B) - ("\"s" ?,C_(B) - ("\"\"" ?,C((B) + ("\"A" ?Ä) + ("\"E" ?Ë) + ("\"I" ?Ï) + ("\"O" ?Ö) + ("\"U" ?Ü) + ("\"a" ?ä) + ("\"e" ?ë) + ("\"i" ?ï) + ("\"o" ?ö) + ("\"u" ?ü) + ("\"s" ?ß) + ("\"\"" ?¨) ("\" " ?\") - ("~C" ?,CG(B) - ("~N" ?,CQ(B) - ("~c" ?,Cg(B) - ("~n" ?,Cq(B) - ("~S" ?,C*(B) - ("~s" ?,C:(B) - ("~G" ?,C+(B) - ("~g" ?,C;(B) - ("~U" ?,C](B) - ("~u" ?,C}(B) - ("~`" ?,C"(B) - ("~~" ?,C8(B) + ("~C" ?Ç) + ("~N" ?Ñ) - ("~O" ?Õ) + ("~c" ?ç) + ("~n" ?ñ) - ("~o" ?õ) + ("~S" ?Ş) + ("~s" ?ş) + ("~G" ?Ğ) + ("~g" ?ğ) + ("~U" ?Ŭ) + ("~u" ?Å­) + ("~`" ?˘) + ("~~" ?¸) ("~ " ?~) - ("/C" ?,CE(B) - ("/G" ?,CU(B) - ("/H" ?,C!(B) - ("/I" ?,C)(B) - ("/Z" ?,C/(B) - ("/c" ?,Ce(B) - ("/g" ?,Cu(B) - ("/h" ?,C1(B) - ("/i" ?,C9(B) - ("/z" ?,C?(B) - ("/." ?,C(B) - ("/#" ?,C#(B) - ("/$" ?,C$(B) - ("//" ?,C0(B) + ("/C" ?Ċ) + ("/G" ?Ä ) + ("/H" ?Ħ) + ("/I" ?Ä°) + ("/Z" ?Å») + ("/c" ?ċ) + ("/g" ?Ä¡) + ("/h" ?ħ) + ("/i" ?ı) + ("/z" ?ż) + ("/." ?˙) + ("/#" ?£) + ("/$" ?¤) + ("//" ?°) ("/ " ?/) - (".C" ?,CE(B) - (".G" ?,CU(B) - (".I" ?,C)(B) - (".Z" ?,C/(B) - (".c" ?,Ce(B) - (".g" ?,Cu(B) - (".z" ?,C?(B) + (".C" ?Ċ) + (".G" ?Ä ) + (".I" ?Ä°) + (".Z" ?Å») + (".c" ?ċ) + (".g" ?Ä¡) + (".z" ?ż) ) @@@ -705,25 -701,24 +705,25 @@@ For example, the character named `aogon nil t t t nil nil nil nil nil nil t) (quail-define-rules + ("//" ?/) - ("/a" ?,B1(B) - ("/c" ?,Bf(B) - ("/e" ?,Bj(B) - ("/l" ?,B3(B) - ("/n" ?,Bq(B) - ("/o" ?,Bs(B) - ("/s" ?,B6(B) - ("/x" ?,B<(B) - ("/z" ?,B?(B) - ("/A" ?,B!(B) - ("/C" ?,BF(B) - ("/E" ?,BJ(B) - ("/L" ?,B#(B) - ("/N" ?,BQ(B) - ("/O" ?,BS(B) - ("/S" ?,B&(B) - ("/X" ?,B,(B) - ("/Z" ?,B/(B)) + ("/a" ?ą) + ("/c" ?ć) + ("/e" ?ę) + ("/l" ?ł) + ("/n" ?ń) - ("/o" ?ó) ++ ("/o" ?ó) + ("/s" ?ś) + ("/x" ?ź) + ("/z" ?ż) + ("/A" ?Ą) + ("/C" ?Ć) + ("/E" ?Ę) + ("/L" ?Ł) + ("/N" ?Ń) - ("/O" ?Ó) ++ ("/O" ?Ó) + ("/S" ?Ś) + ("/X" ?Ź) + ("/Z" ?Å»)) (quail-define-package "latin-9-prefix" "Latin-9" "0>" t @@@ -750,110 -745,111 +750,110 @@@ " nil t nil nil nil nil nil nil nil nil t) (quail-define-rules - ("'A" ?,bA(B) - ("'E" ?,bI(B) - ("'I" ?,bM(B) - ("'O" ?,bS(B) - ("'U" ?,bZ(B) - ("'Y" ?,b](B) - ("'a" ?,ba(B) - ("'e" ?,bi(B) - ("'i" ?,bm(B) - ("'o" ?,bs(B) - ("'u" ?,bz(B) - ("'y" ?,b}(B) + ("'A" ?Á) + ("'E" ?É) + ("'I" ?Í) + ("'O" ?Ó) + ("'U" ?Ú) + ("'Y" ?Ý) + ("'a" ?á) + ("'e" ?é) + ("'i" ?í) + ("'o" ?ó) + ("'u" ?ú) + ("'y" ?ý) ("' " ?') - ("`A" ?,b@(B) - ("`E" ?,bH(B) - ("`I" ?,bL(B) - ("`O" ?,bR(B) - ("`U" ?,bY(B) - ("`a" ?,b`(B) - ("`e" ?,bh(B) - ("`i" ?,bl(B) - ("`o" ?,br(B) - ("`u" ?,by(B) + ("`A" ?À) + ("`E" ?È) + ("`I" ?Ì) + ("`O" ?Ò) + ("`U" ?Ù) + ("`a" ?à) + ("`e" ?è) + ("`i" ?ì) + ("`o" ?ò) + ("`u" ?ù) ("``" ?`) ("` " ?`) - ("^A" ?,bB(B) - ("^E" ?,bJ(B) - ("^I" ?,bN(B) - ("^O" ?,bT(B) - ("^U" ?,b[(B) - ("^a" ?,bb(B) - ("^e" ?,bj(B) - ("^i" ?,bn(B) - ("^o" ?,bt(B) - ("^u" ?,b{(B) + ("^A" ?Â) + ("^E" ?Ê) + ("^I" ?Î) + ("^O" ?Ô) + ("^U" ?Û) + ("^a" ?â) + ("^e" ?ê) + ("^i" ?î) + ("^o" ?ô) + ("^u" ?û) ("^^" ?^) ("^ " ?^) - ("\"A" ?,bD(B) - ("\"E" ?,bK(B) - ("\"I" ?,bO(B) - ("\"O" ?,bV(B) - ("\"U" ?,b\(B) - ("\"a" ?,bd(B) - ("\"e" ?,bk(B) - ("\"i" ?,bo(B) - ("\"o" ?,bv(B) - ("\"s" ?,b_(B) - ("\"u" ?,b|(B) - ("\"y" ?,b(B) + ("\"A" ?Ä) + ("\"E" ?Ë) + ("\"I" ?Ï) + ("\"O" ?Ö) + ("\"U" ?Ü) + ("\"a" ?ä) + ("\"e" ?ë) + ("\"i" ?ï) + ("\"o" ?ö) + ("\"s" ?ß) + ("\"u" ?ü) + ("\"y" ?ÿ) ("\" " ?\") - ("~A" ?,bC(B) - ("~C" ?,bG(B) - ("~D" ?,bP(B) - ("~N" ?,bQ(B) - ("~O" ?,bU(B) - ("~S" ?,b&(B) - ("~T" ?,b^(B) - ("~Z" ?,b4(B) - ("~a" ?,bc(B) - ("~c" ?,bg(B) - ("~d" ?,bp(B) - ("~n" ?,bq(B) - ("~o" ?,bu(B) - ("~s" ?,b((B) - ("~t" ?,b~(B) - ("~z" ?,b8(B) - ("~>" ?\,b;(B) - ("~<" ?\,b+(B) - ("~!" ?,b!(B) - ("~?" ?,b?(B) + ("~A" ?Ã) + ("~C" ?Ç) + ("~D" ?Ð) + ("~N" ?Ñ) + ("~O" ?Õ) + ("~S" ?Å ) + ("~T" ?Þ) + ("~Z" ?Ž) + ("~a" ?ã) + ("~c" ?ç) + ("~d" ?ð) + ("~n" ?ñ) + ("~o" ?õ) + ("~s" ?Å¡) + ("~t" ?þ) + ("~z" ?ž) + ("~>" ?\») + ("~<" ?\«) + ("~!" ?¡) + ("~?" ?¿) ("~ " ?~) - ("/A" ?,bE(B) - ("/E" ?,bF(B) - ("/O" ?,bX(B) - ("/a" ?,be(B) - ("/e" ?,bf(B) - ("/o" ?,bx(B) - ("//" ?,b0(B) - ("~_" ? ) + ("/A" ?Å) + ("/E" ?Æ) + ("/O" ?Ø) + ("/a" ?Ã¥) + ("/e" ?æ) + ("/o" ?ø) + ("//" ?°) ("/ " ?/) - ("_o" ?,b:(B) - ("_a" ?,b*(B) - ("_+" ?,b1(B) - ("_y" ?,b%(B) - ("_:" ?,bw(B) - ("/c" ?,b"(B) - ("/\\" ?,bW(B) - ("/o" ?,b=(B) ; clash with ,bx(B, but ,bf(B uses / - ("/O" ?,b<(B) - ("\"Y" ?,b>(B) - ("~s" ?,b'(B) - ("~p" ?,b6(B) + ("_o" ?º) + ("_a" ?ª) + ("_+" ?±) + ("_y" ?Â¥) + ("_:" ?÷) + ("/c" ?¢) + ("/\\" ?×) + ("/o" ?œ) ; clash with ø, but æ uses / + ("/O" ?Œ) + ("\"Y" ?Ÿ) + ("~s" ?§) + ("~p" ?¶) ;; Is this the best option for Euro entry? - ("~e" ?,b$(B) - ("~." ?,b7(B) - ("~$" ?,b#(B) - ("~u" ?,b5(B) - ("^r" ?,b.(B) - ("^c" ?,b)(B) - ("^1" ?,b9(B) - ("^2" ?,b2(B) - ("^3" ?,b3(B) - ("~-" ?,b-(B) - ("~=" ?,b/(B) - ("/=" ?,b,(B)) + ("~e" ?€) + ("~." ?·) + ("~$" ?£) + ("~u" ?µ) + ("^r" ?®) + ("^c" ?©) + ("^1" ?¹) + ("^2" ?²) + ("^3" ?³) + ("~-" ?­) + ("~=" ?¯) + ("/=" ?¬)) ;; Latin-8 was done by an Englishman -- Johnny Celt should take a ;; squint at it. @@@ -864,123 -860,125 +864,123 @@@ effect | prefix | examples ------------+--------+---------- - acute | ' | 'a -> ,_a(B - grave | ` | `a -> ,_`(B - circumflex | ^ | ^w -> ,_p(B - diaeresis | \" | \"a -> ,_d(B - dot above | . | .b -> ,_"(B - tilde | ~ | ~a -> ,_c(B - cedilla | ~ | ~c -> ,_g(B - misc | \" ~ / | \"s -> ,__(B /a -> ,_e(B /e -> ,_f(B /o -> ,_x(B - | ~ | ~s -> ,_'(B ~$ -> ,_#(B ~p -> ,_6(B - symbol | ^ | ^r -> ,_.(B ^c -> ,_)(B + acute | ' | 'a -> á + grave | ` | `a -> à + circumflex | ^ | ^w -> ŵ + diaeresis | \" | \"a -> ä + dot above | . | .b -> ḃ + tilde | ~ | ~a -> ã + cedilla | ~ | ~c -> ç + misc | \" ~ / | \"s -> ß /a -> Ã¥ /e -> æ /o -> ø + | ~ | ~s -> § ~$ -> £ ~p -> ¶ + symbol | ^ | ^r -> ® ^c -> © " nil t nil nil nil nil nil nil nil nil t) -;; Basically following Latin-1 plus dottiness from Latin-3. +;; Basically following Latin-1, plus dottiness from Latin-3. (quail-define-rules - (".B" ?,_!(B) - (".b" ?,_"(B) - (".c" ?,_%(B) - (".C" ?,_$(B) - (".D" ?,_&(B) - (".d" ?,_+(B) - (".f" ?,_1(B) - (".F" ?,_0(B) - (".g" ?,_3(B) - (".G" ?,_2(B) - (".m" ?,_5(B) - (".M" ?,_4(B) - (".p" ?,_9(B) - (".P" ?,_7(B) - (".s" ?,_?(B) - (".S" ?,_;(B) - (".t" ?,_w(B) - (".T" ?,_W(B) - ("'A" ?,_A(B) - ("'E" ?,_I(B) - ("'I" ?,_M(B) - ("'O" ?,_S(B) - ("'U" ?,_Z(B) - ("'Y" ?,_](B) - ("'W" ?,_*(B) - ("'a" ?,_a(B) - ("'e" ?,_i(B) - ("'i" ?,_m(B) - ("'o" ?,_s(B) - ("'u" ?,_z(B) - ("'w" ?,_:(B) - ("'y" ?,_}(B) + (".B" ?Ḃ) + (".b" ?ḃ) + (".c" ?ċ) + (".C" ?Ċ) + (".D" ?Ḋ) + (".d" ?ḋ) + (".f" ?ḟ) + (".F" ?Ḟ) + (".g" ?Ä¡) + (".G" ?Ä ) + (".m" ?ṁ) + (".M" ?Ṁ) + (".p" ?ṗ) + (".P" ?Ṗ) + (".s" ?ṡ) + (".S" ?á¹ ) + (".t" ?ṫ) + (".T" ?Ṫ) + ("'A" ?Á) + ("'E" ?É) + ("'I" ?Í) + ("'O" ?Ó) + ("'U" ?Ú) + ("'Y" ?Ý) + ("'W" ?Ẃ) + ("'a" ?á) + ("'e" ?é) + ("'i" ?í) + ("'o" ?ó) + ("'u" ?ú) + ("'w" ?ẃ) + ("'y" ?ý) ("' " ?') - ("`A" ?,_@(B) - ("`E" ?,_H(B) - ("`I" ?,_L(B) - ("`O" ?,_R(B) - ("`U" ?,_Y(B) - ("`W" ?,_((B) - ("`Y" ?,_,(B) - ("`a" ?,_`(B) - ("`e" ?,_h(B) - ("`i" ?,_l(B) - ("`o" ?,_r(B) - ("`u" ?,_y(B) - ("`w" ?,_8(B) - ("`y" ?,_<(B) + ("`A" ?À) + ("`E" ?È) + ("`I" ?Ì) + ("`O" ?Ò) + ("`U" ?Ù) + ("`W" ?Ẁ) + ("`Y" ?Ỳ) + ("`a" ?à) + ("`e" ?è) + ("`i" ?ì) + ("`o" ?ò) + ("`u" ?ù) + ("`w" ?ẁ) + ("`y" ?ỳ) ("``" ?`) ("` " ?`) - ("^A" ?,_B(B) - ("^E" ?,_J(B) - ("^I" ?,_N(B) - ("^O" ?,_T(B) - ("^U" ?,_[(B) - ("^a" ?,_b(B) - ("^e" ?,_j(B) - ("^i" ?,_n(B) - ("^o" ?,_t(B) - ("^u" ?,_{(B) - ("^w" ?,_p(B) - ("^W" ?,_P(B) - ("^y" ?,_~(B) - ("^Y" ?,_^(B) + ("^A" ?Â) + ("^E" ?Ê) + ("^I" ?Î) + ("^O" ?Ô) + ("^U" ?Û) + ("^a" ?â) + ("^e" ?ê) + ("^i" ?î) + ("^o" ?ô) + ("^u" ?û) + ("^w" ?ŵ) + ("^W" ?Å´) + ("^y" ?Å·) + ("^Y" ?Ŷ) ("^^" ?^) ("^ " ?^) - ("\"A" ?,_D(B) - ("\"E" ?,_K(B) - ("\"I" ?,_O(B) - ("\"O" ?,_V(B) - ("\"U" ?,_\(B) - ("\"a" ?,_d(B) - ("\"e" ?,_k(B) - ("\"i" ?,_o(B) - ("\"o" ?,_v(B) - ("\"s" ?,__(B) - ("\"u" ?,_|(B) - ("\"w" ?,_>(B) - ("\"W" ?,_=(B) - ("\"y" ?,_(B) - ("\"Y" ?,_/(B) + ("\"A" ?Ä) + ("\"E" ?Ë) + ("\"I" ?Ï) + ("\"O" ?Ö) + ("\"U" ?Ü) + ("\"a" ?ä) + ("\"e" ?ë) + ("\"i" ?ï) + ("\"o" ?ö) + ("\"s" ?ß) + ("\"u" ?ü) + ("\"w" ?ẅ) + ("\"W" ?Ẅ) + ("\"y" ?ÿ) + ("\"Y" ?Ÿ) ("\" " ?\") - ("~A" ?,_C(B) - ("~C" ?,_G(B) - ("~N" ?,_Q(B) - ("~O" ?,_U(B) - ("~a" ?,_c(B) - ("~c" ?,_g(B) - ("~n" ?,_q(B) - ("~o" ?,_u(B) + ("~A" ?Ã) + ("~C" ?Ç) + ("~N" ?Ñ) + ("~O" ?Õ) + ("~a" ?ã) + ("~c" ?ç) + ("~n" ?ñ) + ("~o" ?õ) ("~ " ?~) - ("/A" ?,_E(B) - ("/E" ?,_F(B) - ("/O" ?,_X(B) - ("/a" ?,_e(B) - ("/e" ?,_f(B) - ("/o" ?,_x(B) - ("~_" ? ) + ("/A" ?Å) + ("/E" ?Æ) + ("/O" ?Ø) + ("/a" ?Ã¥) + ("/e" ?æ) + ("/o" ?ø) ("/ " ?/) - ("~p" ?,_6(B) - ("~s" ?,_'(B) - ("~$" ?,_#(B) - ("^r" ?,_.(B) - ("^c" ?,_)(B)) + ("~p" ?¶) + ("~s" ?§) + ("~$" ?£) + ("^r" ?®) + ("^c" ?©)) - (quail-define-package "latin-prefix" "Latin" "L>" t "Latin characters input method with prefix modifiers. @@@ -1006,181 -1004,182 +1006,181 @@@ of characters from a single Latin-N cha (quail-define-rules ("' " ?') - ("''" ?,A4(B) - ("'A" ?,AA(B) - ("'E" ?,AI(B) - ("'I" ?,AM(B) - ("'O" ?,AS(B) - ("'U" ?,AZ(B) - ("'W" ?$,1nb(B) - ("'Y" ?,A](B) - ("'a" ?,Aa(B) - ("'e" ?,Ai(B) - ("'i" ?,Am(B) - ("'o" ?,As(B) - ("'u" ?,Az(B) - ("'w" ?$,1nc(B) - ("'y" ?,A}(B) - (".B" ?$,1mB(B) - (".C" ?$,1 *(B) - (".D" ?$,1mJ(B) - (".F" ?$,1m^(B) - (".G" ?$,1 @(B) - (".I" ?$,1 P(B) - (".M" ?$,1n (B) - (".P" ?$,1n6(B) - (".S" ?$,1n@(B) - (".T" ?$,1nJ(B) - (".Z" ?$,1!;(B) - (".b" ?$,1mC(B) - (".c" ?$,1 +(B) - (".d" ?$,1mK(B) - (".f" ?$,1m_(B) - (".g" ?$,1 A(B) - (".m" ?$,1n!(B) - (".p" ?$,1n7(B) - (".s" ?$,1nA(B) - (".t" ?$,1nK(B) - (".z" ?$,1!<(B) + ("''" ?´) + ("'A" ?Á) + ("'E" ?É) + ("'I" ?Í) + ("'O" ?Ó) + ("'U" ?Ú) + ("'W" ?Ẃ) + ("'Y" ?Ý) + ("'a" ?á) + ("'e" ?é) + ("'i" ?í) + ("'o" ?ó) + ("'u" ?ú) + ("'w" ?ẃ) + ("'y" ?ý) + (".B" ?Ḃ) + (".C" ?Ċ) + (".D" ?Ḋ) + (".F" ?Ḟ) + (".G" ?Ä ) + (".I" ?Ä°) + (".M" ?Ṁ) + (".P" ?Ṗ) + (".S" ?á¹ ) + (".T" ?Ṫ) + (".Z" ?Å») + (".b" ?ḃ) + (".c" ?ċ) + (".d" ?ḋ) + (".f" ?ḟ) + (".g" ?Ä¡) + (".m" ?ṁ) + (".p" ?ṗ) + (".s" ?ṡ) + (".t" ?ṫ) + (".z" ?ż) ("/ " ?/) - ("/#" ?,A#(B) - ("/$" ?,A$(B) - ("/." ?$,1$y(B) - ("//" ?,A0(B) - ("/2" ?,A=(B) - ("/3" ?,A>(B) - ("/4" ?,A<(B) - ("/=" ?,A,(B) - ("/A" ?,AE(B) - ("/C" ?$,1 *(B) - ("/E" ?,AF(B) - ("/G" ?$,1 @(B) - ("/H" ?$,1 F(B) - ("/I" ?$,1 P(B) - ("/O" ?,AX(B) - ("/O" ?$,1 r(B) - ("/Z" ?$,1!;(B) - ("/\\" ?,AW(B) - ("/a" ?,Ae(B) - ("/c" ?,A"(B) - ("/c" ?$,1 +(B) - ("/e" ?,Af(B) - ("/g" ?$,1 A(B) - ("/h" ?$,1 G(B) - ("/i" ?$,1 Q(B) - ("/o" ?,Ax(B) - ("/o" ?$,1 s(B) - ("/z" ?$,1!<(B) + ("/#" ?£) + ("/$" ?¤) + ("/." ?˙) + ("//" ?°) + ("/2" ?½) + ("/3" ?¾) + ("/4" ?¼) + ("/=" ?¬) + ("/A" ?Å) + ("/C" ?Ċ) + ("/E" ?Æ) + ("/G" ?Ä ) + ("/H" ?Ħ) + ("/I" ?Ä°) + ("/O" ?Ø) + ("/O" ?Œ) + ("/Z" ?Å») + ("/\\" ?×) + ("/a" ?Ã¥) + ("/c" ?¢) + ("/c" ?ċ) + ("/e" ?æ) + ("/g" ?Ä¡) + ("/h" ?ħ) + ("/i" ?ı) + ("/o" ?ø) + ("/o" ?œ) + ("/z" ?ż) ("\" " ?\") - ("\"A" ?,AD(B) - ("\"E" ?,AK(B) - ("\"I" ?,AO(B) - ("\"O" ?,AV(B) - ("\"U" ?,A\(B) - ("\"W" ?$,1nd(B) - ("\"Y" ?$,1!8(B) - ("\"\"" ?,A((B) - ("\"a" ?,Ad(B) - ("\"e" ?,Ak(B) - ("\"i" ?,Ao(B) - ("\"o" ?,Av(B) - ("\"s" ?,A_(B) - ("\"u" ?,A|(B) - ("\"w" ?$,1ne(B) - ("\"y" ?,A(B) + ("\"A" ?Ä) + ("\"E" ?Ë) + ("\"I" ?Ï) + ("\"O" ?Ö) + ("\"U" ?Ü) + ("\"W" ?Ẅ) + ("\"Y" ?Ÿ) + ("\"\"" ?¨) + ("\"a" ?ä) + ("\"e" ?ë) + ("\"i" ?ï) + ("\"o" ?ö) + ("\"s" ?ß) + ("\"u" ?ü) + ("\"w" ?ẅ) + ("\"y" ?ÿ) ("^ " ?^) - ("^1" ?,A9(B) - ("^2" ?,A2(B) - ("^3" ?,A3(B) - ("^A" ?,AB(B) - ("^C" ?$,1 ((B) - ("^E" ?,AJ(B) - ("^G" ?$,1 <(B) - ("^H" ?$,1 D(B) - ("^I" ?,AN(B) - ("^J" ?$,1 T(B) - ("^O" ?,AT(B) - ("^S" ?$,1 |(B) - ("^U" ?,A[(B) - ("^W" ?$,1!4(B) - ("^Y" ?$,1!6(B) + ("^1" ?¹) + ("^2" ?²) + ("^3" ?³) + ("^A" ?Â) + ("^C" ?Ĉ) + ("^E" ?Ê) + ("^G" ?Ĝ) + ("^H" ?Ĥ) + ("^I" ?Î) + ("^J" ?Ä´) + ("^O" ?Ô) + ("^S" ?Ŝ) + ("^U" ?Û) + ("^W" ?Å´) + ("^Y" ?Ŷ) ("^^" ?^) - ("^a" ?,Ab(B) - ("^c" ?,A)(B) - ("^c" ?$,1 )(B) - ("^e" ?,Aj(B) - ("^g" ?$,1 =(B) - ("^h" ?$,1 E(B) - ("^i" ?,An(B) - ("^j" ?$,1 U(B) - ("^o" ?,At(B) - ("^r" ?,A.(B) - ("^s" ?$,1 }(B) - ("^u" ?,A{(B) - ("^w" ?$,1!5(B) - ("^y" ?$,1!7(B) - ("_+" ?,A1(B) - ("_:" ?,Aw(B) - ("_a" ?,A*(B) - ("_o" ?,A:(B) - ("_y" ?,A%(B) - ("_ " ?,A (B) + ("^a" ?â) + ("^c" ?©) + ("^c" ?ĉ) + ("^e" ?ê) + ("^g" ?ĝ) + ("^h" ?Ä¥) + ("^i" ?î) + ("^j" ?ĵ) + ("^o" ?ô) + ("^r" ?®) + ("^s" ?ŝ) + ("^u" ?û) + ("^w" ?ŵ) + ("^y" ?Å·) + ("_+" ?±) + ("_:" ?÷) + ("_a" ?ª) + ("_o" ?º) + ("_y" ?Â¥) + ("_ " ? ) ("` " ?`) - ("`A" ?,A@(B) - ("`E" ?,AH(B) - ("`I" ?,AL(B) - ("`O" ?,AR(B) - ("`U" ?,AY(B) - ("`W" ?$,1n`(B) - ("`Y" ?$,1or(B) + ("`A" ?À) + ("`E" ?È) + ("`I" ?Ì) + ("`O" ?Ò) + ("`U" ?Ù) + ("`W" ?Ẁ) + ("`Y" ?Ỳ) ("``" ?`) - ("`a" ?,A`(B) - ("`e" ?,Ah(B) - ("`i" ?,Al(B) - ("`o" ?,Ar(B) - ("`u" ?,Ay(B) - ("`w" ?$,1na(B) - ("`y" ?$,1os(B) + ("`a" ?à) + ("`e" ?è) + ("`i" ?ì) + ("`o" ?ò) + ("`u" ?ù) + ("`w" ?ẁ) + ("`y" ?ỳ) ("~ " ?~) - ("~!" ?,A!(B) - ("~$" ?,A#(B) - ("~-" ?,A-(B) - ("~." ?,A7(B) - ("~<" ?\,A+(B) - ("~=" ?,A/(B) - ("~>" ?\,A;(B) - ("~?" ?,A?(B) - ("~A" ?,AC(B) - ("~C" ?,AG(B) - ("~D" ?,AP(B) - ("~G" ?$,1 >(B) - ("~N" ?,AQ(B) - ("~O" ?,AU(B) - ("~O" ?$,1 @(B) - ("~S" ?$,1 ~(B) - ("~S" ?$,1! (B) - ("~T" ?,A^(B) - ("~U" ?$,1!,(B) - ("~Z" ?$,1!=(B) - ("~`" ?$,1$x(B) - ("~a" ?,Ac(B) - ("~c" ?,Ag(B) - ("~d" ?,Ap(B) - ("~e" ?$,1tL(B) - ("~g" ?$,1 ?(B) - ("~n" ?,Aq(B) - ("~o" ?,Au(B) - ("~o" ?$,1 A(B) - ("~p" ?,A6(B) - ("~s" ?,A'(B) - ("~s" ?$,1 (B) - ("~s" ?$,1!!(B) - ("~t" ?,A~(B) - ("~u" ?,A5(B) - ("~u" ?$,1!-(B) - ("~x" ?,A$(B) - ("~z" ?$,1!>(B) - ("~|" ?,A&(B) - ("~~" ?,A8(B) + ("~!" ?¡) + ("~$" ?£) + ("~-" ?­) + ("~." ?·) + ("~<" ?\«) + ("~=" ?¯) + ("~>" ?\») + ("~?" ?¿) + ("~A" ?Ã) + ("~C" ?Ç) + ("~D" ?Ð) + ("~G" ?Ğ) + ("~N" ?Ñ) + ("~O" ?Õ) + ("~O" ?Ä ) + ("~S" ?Ş) + ("~S" ?Å ) + ("~T" ?Þ) + ("~U" ?Ŭ) + ("~Z" ?Ž) + ("~`" ?˘) + ("~a" ?ã) + ("~c" ?ç) + ("~d" ?ð) + ("~e" ?€) + ("~g" ?ğ) + ("~n" ?ñ) + ("~o" ?õ) + ("~o" ?Ä¡) + ("~p" ?¶) + ("~s" ?§) + ("~s" ?ş) + ("~s" ?Å¡) + ("~t" ?þ) + ("~u" ?µ) + ("~u" ?Å­) + ("~x" ?¤) + ("~z" ?ž) + ("~|" ?¦) + ("~~" ?¸) ) - ;;; latin-pre.el ends here diff --cc leim/quail/thai.el index a21d3c5fa14,c86d80ba688..a3fec13d1c6 --- a/leim/quail/thai.el +++ b/leim/quail/thai.el @@@ -27,48 -27,18 +27,17 @@@ ;;; Code: (require 'quail) - (require 'thai-util) - - (defun quail-thai-update-translation (control-flag) - (if (integerp control-flag) - ;; Non-composable character typed. - (setq quail-current-str - (buffer-substring (overlay-start quail-overlay) - (overlay-end quail-overlay)) - unread-command-events - (string-to-list - (substring quail-current-key control-flag))) - (setq quail-current-str - (compose-string (quail-lookup-map-and-concat quail-current-key)))) - control-flag) - - (defun thai-generate-quail-map (translation-table) - (let ((i 0) - consonant vowel tone voweltone others) - ;; Categorize Thai characters into one of above. - (while (< i 128) - (let ((trans (aref translation-table i)) - ptype) - (if (eq trans 0) - nil - (if (> (length trans) 1) - (setq ptype 'voweltone - trans (vector (compose-string trans))) - (setq ptype (get-char-code-property (aref trans 0) 'phonetic-type)) - (cond ((memq ptype '(vowel-upper vowel-lower)) - (setq ptype 'vowel)) - ((not (memq ptype '(consonant tone))) - (setq ptype 'others)))) - (set ptype (cons (cons (char-to-string i) trans) - (symbol-value ptype))))) - (setq i (1+ i))) - - (quail-map-from-table - '((base-state (consonant . vt-state) - vowel tone voweltone others) - (vt-state (vowel . t-state) - voweltone tone) - (t-state tone))))) + - + (defmacro thai-generate-quail-map (translation-table) + (let (map) + (dotimes (i (length translation-table)) + (let ((trans (aref translation-table i))) + (when (not (eq trans 0)) + (if (> (length trans) 1) + (setq trans (vector trans)) + (setq trans (aref trans 0))) + (setq map (cons (list (char-to-string i) trans) map))))) + `(quail-define-rules ,@map))) ;; Thai Kesmanee keyboard support. @@@ -80,52 -50,51 +49,50 @@@ The difference from the ordinal Thai ke ',T_(B' and ',To(B' are assigned to '\\' and '|' respectively, ',T#(B' and ',T%(B' are assigned to '`' and '~' respectively, Don't know where to assign characters ',Tz(B' and ',T{(B'." - nil t t t t nil nil nil 'quail-thai-update-translation nil t) - - (quail-install-map - (thai-generate-quail-map - [ - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ; control codes - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ; control codes - 0 "#" "." ",Tr(B" ",Ts(B" ",Tt(B" ",TQi(B" ",T'(B" ; SPC .. ' - ",Tv(B" ",Tw(B" ",Tu(B" ",Ty(B" ",TA(B" ",T"(B" ",Tc(B" ",T=(B" ; ( .. / - ",T((B" ",TE(B" "/" "_" ",T@(B" ",T6(B" ",TX(B" ",TV(B" ; 0 .. 7 - ",T$(B" ",T5(B" ",T+(B" ",TG(B" ",T2(B" ",T*(B" ",TL(B" ",TF(B" ; 8 .. ? - ",Tq(B" ",TD(B" ",TZ(B" ",T)(B" ",T/(B" ",T.(B" ",Tb(B" ",T,(B" ; @ .. G - ",Tg(B" ",T3(B" ",Tk(B" ",TI(B" ",TH(B" ",Tn(B" ",Tl(B" ",TO(B" ; H .. O - ",T-(B" ",Tp(B" ",T1(B" ",T&(B" ",T8(B" ",Tj(B" ",TN(B" "\"" ; P .. W - ")" ",Tm(B" "(" ",T:(B" ",T_(B" ",TE(B" ",TY(B" ",Tx(B" ; X .. _ - ",T#(B" ",T?(B" ",TT(B" ",Ta(B" ",T!(B" ",TS(B" ",T4(B" ",T`(B" ; ` .. g - ",Ti(B" ",TC(B" ",Th(B" ",TR(B" ",TJ(B" ",T7(B" ",TW(B" ",T9(B" ; h .. o - ",TB(B" ",Tf(B" ",T>(B" ",TK(B" ",TP(B" ",TU(B" ",TM(B" ",Td(B" ; p .. w - ",T;(B" ",TQ(B" ",T<(B" ",T0(B" ",To(B" "," ",T%(B" 0 ; x .. DEL - ])) - + nil t t t t nil nil nil nil nil t) + + (thai-generate-quail-map + [ + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ; control codes + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ; control codes - 0 "#" ",TF(B" ",Tr(B" ",Ts(B" ",Tt(B" ",TQi(B" ",T'(B" ; SPC .. ' ++ 0 "#" "." ",Tr(B" ",Ts(B" ",Tt(B" ",TQi(B" ",T'(B" ; SPC .. ' + ",Tv(B" ",Tw(B" ",Tu(B" ",Ty(B" ",TA(B" ",T"(B" ",Tc(B" ",T=(B" ; ( .. / + ",T((B" ",TE(B" "/" "_" ",T@(B" ",T6(B" ",TX(B" ",TV(B" ; 0 .. 7 + ",T$(B" ",T5(B" ",T+(B" ",TG(B" ",T2(B" ",T*(B" ",TL(B" ",TF(B" ; 8 .. ? + ",Tq(B" ",TD(B" ",TZ(B" ",T)(B" ",T/(B" ",T.(B" ",Tb(B" ",T,(B" ; @ .. G + ",Tg(B" ",T3(B" ",Tk(B" ",TI(B" ",TH(B" ",Tn(B" ",Tl(B" ",TO(B" ; H .. O + ",T-(B" ",Tp(B" ",T1(B" ",T&(B" ",T8(B" ",Tj(B" ",TN(B" "\"" ; P .. W + ")" ",Tm(B" "(" ",T:(B" ",T_(B" ",TE(B" ",TY(B" ",Tx(B" ; X .. _ + ",T#(B" ",T?(B" ",TT(B" ",Ta(B" ",T!(B" ",TS(B" ",T4(B" ",T`(B" ; ` .. g + ",Ti(B" ",TC(B" ",Th(B" ",TR(B" ",TJ(B" ",T7(B" ",TW(B" ",T9(B" ; h .. o + ",TB(B" ",Tf(B" ",T>(B" ",TK(B" ",TP(B" ",TU(B" ",TM(B" ",Td(B" ; p .. w - ",T;(B" ",TQ(B" ",T<(B" ",T0(B" ",To(B" "." ",T%(B" 0 ; x .. DEL ++ ",T;(B" ",TQ(B" ",T<(B" ",T0(B" ",To(B" "," ",T%(B" 0 ; x .. DEL + ]) - ;; Thai Pattachote keyboard support. (quail-define-package "thai-pattachote" "Thai" ",T!;(B>" t "Thai Pattachote input method with TIS620 keyboard layout" - nil t t t t nil nil nil 'quail-thai-update-translation nil t) - - (quail-install-map - (thai-generate-quail-map - [ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ; control codes - 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ; control codes - 0 "+" ",T1(B" "/" "," "?" "_" ",T"(B" ; SPC .. ' - "(" ")" "." "%" ",TP(B" ",Tq(B" ",T((B" ",T>(B" ; ( .. / - ",Tp(B" "=" ",Tr(B" ",Ts(B" ",Tt(B" ",Tu(B" ",TY(B" ",Tw(B" ; 0 .. 7 - ",Tx(B" ",Ty(B" ",T&(B" ",Td(B" ",T?(B" ",Tv(B" ",T2(B" ",TL(B" ; 8 .. ? - "\"" ",Tk(B" ",TQ(B" ",T0(B" ",TS(B" ",Tf(B" ",T3(B" ",Tl(B" ; @ .. G - ",TW(B" ",T+(B" ",T<(B" ",T*(B" ",Tb(B" ",TN(B" ",TH(B" ",T6(B" ; H .. O - ",T2(B" ",Tj(B" ",T-(B" ",T8(B" ",TI(B" ",T=(B" ",T@(B" ",TD(B" ; P .. W - ",T.(B" ",TV(B" ",T.(B" ",Tc(B" ",TZ(B" ",T2(B" ",TX(B" "-" ; X .. _ - ",T#(B" ",Ti(B" ",TT(B" ",TE(B" ",T'(B" ",TB(B" ",T!(B" ",TQ(B" ; ` .. g - ",TU(B" ",TA(B" ",TR(B" ",T9(B" ",T`(B" ",TJ(B" ",T$(B" ",TG(B" ; h .. o - ",Ta(B" ",Tg(B" ",TM(B" ",T7(B" ",TC(B" ",T4(B" ",TK(B" ",T5(B" ; p .. w - ",T;(B" ",Th(B" ",T:(B" ",TO(B" ",Tm(B" ",TF(B" ",T%(B" 0 ; x .. DEL - ])) + nil t t t t nil nil nil nil nil t) + + (thai-generate-quail-map + [ + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ; control codes + 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ; control codes - 0 "+" ",T1(B" "/" "," "?" "_" ",T"(B" ; SPC .. ' ++ 0 "+" ",T1(B" "/" "," "?" "_" ",T"(B" ; SPC .. ' + "(" ")" "." "%" ",TP(B" ",Tq(B" ",T((B" ",T>(B" ; ( .. / + ",Tp(B" "=" ",Tr(B" ",Ts(B" ",Tt(B" ",Tu(B" ",TY(B" ",Tw(B" ; 0 .. 7 + ",Tx(B" ",Ty(B" ",T&(B" ",Td(B" ",T?(B" ",Tv(B" ",T2(B" ",TL(B" ; 8 .. ? + "\"" ",Tk(B" ",TQ(B" ",T0(B" ",TS(B" ",Tf(B" ",T3(B" ",Tl(B" ; @ .. G + ",TW(B" ",T+(B" ",T<(B" ",T*(B" ",Tb(B" ",TN(B" ",TH(B" ",T6(B" ; H .. O + ",T2(B" ",Tj(B" ",T-(B" ",T8(B" ",TI(B" ",T=(B" ",T@(B" ",TD(B" ; P .. W + ",T.(B" ",TV(B" ",T.(B" ",Tc(B" ",TZ(B" ",T2(B" ",TX(B" "-" ; X .. _ + ",T#(B" ",Ti(B" ",TT(B" ",TE(B" ",T'(B" ",TB(B" ",T!(B" ",TQ(B" ; ` .. g + ",TU(B" ",TA(B" ",TR(B" ",T9(B" ",T`(B" ",TJ(B" ",T$(B" ",TG(B" ; h .. o + ",Ta(B" ",Tg(B" ",TM(B" ",T7(B" ",TC(B" ",T4(B" ",TK(B" ",T5(B" ; p .. w + ",T;(B" ",Th(B" ",T:(B" ",TO(B" ",Tm(B" ",TF(B" ",T%(B" 0 ; x .. DEL + ]) ;;; thai.el ends here diff --cc leim/quail/welsh.el index 41bb0905330,bbd81843b34..d7807dc1628 --- a/leim/quail/welsh.el +++ b/leim/quail/welsh.el @@@ -34,7 -32,7 +32,7 @@@ (quail-define-package "welsh" "Welsh" "$,1!4(B" t - "Welsh postfix input method, using Unicode" - "Welsh postfix input method." ++ "Welsh postfix input method" nil t nil nil nil nil nil nil nil nil t) (quail-define-rules diff --cc lisp/ChangeLog.22 index 00000000000,00000000000..c5a9b1b6e87 new file mode 100644 --- /dev/null +++ b/lisp/ChangeLog.22 @@@ -1,0 -1,0 +1,1504 @@@ ++2003-06-23 Kenichi Handa ++ ++ * international/mule-conf.el (utf-16-be): Make it an alias of ++ utf-16be-with-signature. ++ (utf-16-le): Make it an alias of utf-16le-with-signature. ++ (utf-16-be-with-signature, utf-16-le-with-signature): Remove these ++ coding system aliases. ++ ++ * international/utf-7.el (utf-7-decode, utf-7-encode): Use ++ utf-16be, not utf-16-be. ++ ++2003-06-06 Kenichi Handa ++ ++ * international/mule.el (coding-system-category): New function. ++ (set-coding-priority): Re-written. ++ (make-translation-table): Re-written. ++ ++ * international/mule-cmds.el (universal-coding-system-argument): ++ Check the coding system type `undecided', not `t'. ++ (sort-coding-systems): Fix for iso-2022 coding systems. ++ (find-multibyte-characters): Fix for eight-bit chars. ++ (set-language-environment): Set charset priorities according to ++ the charsets supported by the coding systems of higher priorities. ++ ++2003-06-05 Kenichi Handa ++ ++ * font-lock.el ++ (font-lock-match-c-style-declaration-item-and-skip-to-next): ++ Check (match-end 2) before goto there. ++ ++2003-05-30 Kenichi Handa ++ ++ * international/mule.el (ctext-non-standard-designations-alist): ++ Exclude entries for iso8859-1[03456]. Fix docstring. ++ (ctext-pre-write-conversion): Make it work for the case that FROM ++ is a string. Don't do unnecessary save-restriction and ++ narrow-to-region. ++ ++2003-05-28 Kenichi Handa ++ ++ * international/mule.el (ctext-non-standard-encodings-alist): ++ Renamed from non-standard-icccm-encodings-alist. ++ (ctext-non-standard-encodings-regexp): New variable ++ (ctext-post-read-conversion): Full rewrite. ++ (ctext-non-standard-designations-alist): Renamed from ++ non-standard-designations-alist. ++ (ctext-pre-write-conversion): Full rewrite. ++ (define-coding-system): Doc fix (escape '"' by '\'). ++ ++2003-05-24 Dave Love ++ ++ * international/mule-conf.el (utf-16le, utf-16be-16be) ++ (utf-16le-with-signature, utf-16be-with-signature, utf-16): ++ Define :mime-text-unsuitable. `utf-16{b,l}e...' renamed from ++ `-16-{b,l}e-' and aliases defined for the old names. ++ ++ * international/mule.el (define-coding-system): Doc fix. ++ ++2003-05-08 Dave Love ++ ++ * international/utf-7.el: New file. ++ ++ * international/mule-conf.el (utf-7): New. ++ ++2003-05-06 Kenichi Handa ++ ++ * international/mule-conf.el (utf-16-be) ++ (utf-16-be-with-signature): Cancel the previous change. ++ (utf-16): Add :endinan 'big. ++ ++ * international/mule.el (define-coding-system): Fix docstring ++ about :bom and :endian. ++ ++2003-04-30 Dave Love ++ ++ * mule-conf.el (utf-16-be) ++ (utf-16-be-with-signature): Comment-out :endian. ++ ++ * mule-diag.el (describe-character-set): Fix ++ printing dimensions. Use `,AW(B', not `x'. ++ ++2003-04-12 Kenichi Handa ++ ++ * international/fontset.el: Register CDAC fonts in ++ face-font-rescale-alist instead of face-resizing-fonts. ++ ++2003-03-28 Kenichi Handa ++ ++ * international/mule-conf.el (utf-16-le): Renamed from ++ utf-16-le-nosig. ++ (utf-16-be): Renamed from utf-16-be-nosig. ++ (utf-16-le-with-signature): Renamed from utf-16-le. ++ (utf-16-be-with-signature): Renamed from utf-16-be. ++ (utf-16): Set :bom to (utf-16-le-with-signature . ++ utf-16-be-with-signature). ++ ++2003-02-12 Dave Love ++ ++ * language/georgian.el (georgian-ps, georgian-academy): New coding ++ system. ++ ++ * language/chinese.el (chinese-big5-hkscs): New coding system. ++ (big5-hkscs, cn-big5-hkscs): New aliases. ++ ++ * international/mule-conf.el (big5-hkscs, georgian-academy): New ++ charset. ++ (ibm1047): Doc fix. ++ (pt154): Add mime-charset. ++ (ptcp154, cp154): New aliases. ++ ++2003-02-04 Dave Love ++ ++ * international/mule.el (charset-iso-final-char) ++ (define-coding-system): Doc fix. ++ ++2003-01-30 Kenichi Handa ++ ++ * international/ccl.el (define-ccl-program): Fset charset-id ++ to charset-id-internal temporarily. ++ ++2003-01-21 Kenichi Handa ++ ++ * international/mule-diag.el (list-block-of-chars): Use ++ decode-char instead of make-char. ++ ++2003-01-10 Kenichi Handa ++ ++ * international/fontset.el: Enable the default fontset to use ++ unicode fonts for ASCII characters. ++ (x-decompose-font-name): Don't try to resolve PATTERN by ++ x-resolve-font-name. ++ (x-complement-fontset-spec): Never prepend an ASCII font. ++ (create-fontset-from-fontset-spec): If a fontset of the same name ++ already exists, override it instead of signalling an error. Don't ++ turn `ascii' into `latin'. Don't update fontset-alias-alist here. ++ ++ * international/mule-conf.el (unicode-bmp): Delete duplicated ++ definition. Give it :code-offset 0. ++ ++ * international/mule-diag.el (print-fontset-element): New ++ function. ++ (print-fontset): Use print-fontset-element to print the elements ++ of a fontset. Use it also to print fonts fallen back to the ++ default fontsets. ++ ++ * term/x-win.el: Delete the code to create a fontset from the X ++ resource "Font". ++ ++2003-01-09 Kenichi Handa ++ ++ * international/mule-diag.el (print-fontset): Insert proper ++ newline and indentation spaces. ++ ++2003-01-07 Dave Love ++ ++ * emacs-lisp/byte-opt.el (side-effect-free-fns): Add langinfo, ++ multibyte-char-to-unibyte. ++ ++ * emacs-lisp/cl-macs.el (cl-make-type-test): Use characterp. ++ ++ * international/mule-cmds.el (find-coding-systems-for-charsets): ++ Use coding-system-type, coding-system-charset-list. ++ (standard-keyboard-coding-systems): Deleted. ++ (set-locale-environment): Maybe set keyboard coding system. Avoid ++ typical warning about codeset in C locale. ++ ++2003-01-06 Kenichi Handa ++ ++ * international/fontset.el (x-complement-fontset-spec): If a ++ fontname doesn't conform to XLFD format, try to get XLFD name by ++ x-list-fonts. ++ ++2002-12-08 Dave Love ++ ++ * international/mule-conf.el (mik, pt154): New charsets. ++ ++ * language/cyrillic.el (mik, pt154): New coding systems. ++ ++2002-12-05 Dave Love ++ ++ * international/characters.el: Move parts around so that syntax ++ definitions aren't overwritten wrongly. ++ ++ * international/mule-diag.el (unicode-data): Fix treatment of ++ numeric-value fields. ++ ++2002-12-01 Dave Love ++ ++ * progmodes/cap-words.el: New file. ++ ++2002-11-07 Kenichi Handa ++ ++ The following changes are to make character composition happen ++ automatically on displaying. ++ ++ * composite.el: Remove all autoload cookies. ++ (composition-function-table): Variable declaration moved from ++ composite.c. Format changed. ++ (save-buffer-state): Copied from font-lock.el. ++ (auto-composition-chunk-size): New variable. ++ (auto-compose-chars): New function. Set ++ auto-composition-function to it. ++ (toggle-auto-composition): New function. ++ ++ * international/characters.el: Make all chararacters in the ++ charset tibetan to tibetan script. ++ ++ * international/mule-conf.el (tibetan): Fix :code-space property. ++ (tibetan-1-column): Delete :superset property, add :code-offset ++ property. ++ ++ * international/quail.el (quail-input-string-to-events): Don't ++ generate compose-chars-after events. ++ ++ * language/devanagari.el: Register devanagari-composable-function ++ in composition-function-table for Devanagari characters. ++ ++ * language/devan-util.el: Register devanagari-composable-pattern ++ in indian-composable-pattern. ++ (devanagari-composition-function): Add autoload cookie. Change ++ arguments to conform to composition-function-table. ++ ++ * language/european.el: Register combining characters in ++ composition-function-table. ++ (diacritic-composition-function): Change arguments to conform to ++ composition-function-table. ++ ++ * language/indian.el (indian-composable-pattern): New variable. ++ ++ * language/ind-util.el (indian-compose-region): Don't call ++ compose-chars-after, instead call a function registered in ++ composition-function-table. ++ ++ * language/lao.el: Register lao-composition-function in ++ composition-function-table for Lao vowels and tones. ++ (lao): Don't use lao-post-read-conversion. ++ ++ * language/lao-util.el (lao-post-read-conversion): Delete this ++ function. ++ (lao-composition-function): Change arguments to conform to ++ composition-function-table. ++ ++ * language/thai.el: Register thai-composition-function in ++ composition-function-table for Thai vowels and tones. ++ (thai-tis620): Don't use thai-post-read-conversion. ++ ++ * language/thai-util.el (thai-post-read-conversion): Delete this ++ function. ++ (thai-composition-function): Change arguments to conform to ++ composition-function-table. ++ ++ * language/tibetan.el: Register tibetan-composition-function in ++ composition-function-table for Tibetan characters. ++ ++ * language/tibet-util.el (tibetan-composition-function): Change ++ arguments to conform to composition-function-table. ++ ++ * loadup.el: Load composite. ++ ++2002-11-01 Dave Love ++ ++ * international/mule-diag.el (unicode-data): Find unicodedata-file ++ literally. ++ ++2002-10-31 Dave Love ++ ++ * international/fontset.el: Doc fixes. ++ ("fontset-default"): Add iso8859-15. ++ ++2002-10-30 Kenichi Handa ++ ++ * international/fontset.el ("fontset-default"): Add an entry for ++ `indian-is13194' charset. ++ ++ * international/mule-conf.el (indian-is13194): Delete :unify-map ++ property. Don't unify this charset. ++ (indian-2-column): Delete :superset property, add :code-offset ++ property. ++ ++ * language/devan-util.el: Delete the superfluous autoload cookie ++ near the head of the file. ++ (devanagari-compose-region): Add autoload cookie. ++ ++ * language/ind-util.el (indian-2-column-to-ucs-region): Add ++ autoload cookie. ++ ++2002-10-29 Kenichi Handa ++ ++ * international/fontset.el ("fontset-default"): Add an entry for ++ `devanagari' script and `malayalam-glyph' charset. ++ (face-resizing-fonts): Register CDAC fonts. ++ ++ * international/mule-conf.el (malayalam-glyph): New charset. ++ ++2002-10-28 Dave Love ++ ++ * term/x-win.el: Fix typo in loop setting x-keysym-table. ++ ++2002-10-27 Dave Love ++ ++ * international/mule-conf.el (utf-8-emacs): Delete :mime-charset. ++ ++ * term/x-win.el: Remove some ancient compatibility code. Populate ++ x-keysym-table. ++ ++ * international/mule-cmds.el (wid-edit): Require when compiling. ++ (set-locale-environment): Check locale against coding system ++ aliases too. ++ ++2002-10-25 Kenichi Handa ++ ++ * language/indian.el (in-is13194-devanagari): Fix ++ :post-read-conversion and :pre-write-conversion. ++ ++2002-10-16 Dave Love ++ ++ * international/mule-cmds.el (charset): Supply doc, :tag. ++ ++2002-10-16 Kenichi Handa ++ ++ * emacs-lisp/bytecomp.el (byte-compile-lapcode): Be sure to ++ return a unibyte string. ++ ++2002-10-15 Kenichi Handa ++ ++ * international/mule-cmds.el (set-locale-environment): Delete ++ superfluous ')'s at the tail. ++ ++2002-10-14 Kenichi Handa ++ ++ * international/mule.el (define-coding-system): Fix typo; ++ "docode" -> "decode". ++ ++2002-10-14 Dave Love ++ ++ * emacs-lisp/byte-opt.el (side-effect-free-fns): Add ++ string-make-unibyte string-make-multibyte string-to-multibyte ++ string-as-multibyte string-as-unibyte. ++ ++2002-10-11 Dave Love ++ ++ * language/english.el (ibm1047, cp1047): New. ++ ++ * international/mule-conf.el (alternativnyj): Use new table. ++ (cp866): Define standalone, not as alias. ++ (ibm866): Change alias. ++ (ibm1047): New. ++ ++ * language/cyrillic.el (cyrillic-alternativnyj): Remove ++ mime-charset. Use alternativnj charset. ++ (cp866): Remove alias. Define standalone. ++ ++2002-10-09 Dave Love ++ ++ * language/european.el ("Latin-2", "Latin-5"): Doc fix. ++ ++ * language/cyrillic.el ("Bulgarian"): Fix input method and ++ charset. ++ ("Tajik", "Belarusian"): Fix charset. ++ ++ * international/mule-cmds.el (describe-language-environment): Be ++ case-insensitive when looking for input methods. ++ (locale-name-match): Doc fix. ++ (locale-charset-match-p): New. ++ (set-locale-environment): Warn if coding system doesn't agree ++ with system locale. ++ ++2002-10-09 Kenichi Handa ++ ++ * international/mule-cmds.el (select-safe-coding-system): If ++ :mime-charset property of a coding system is not equal to that ++ coding system, don't show that :mime-charset name. ++ ++2002-10-06 Dave Love ++ ++ * international/quail.el (quail-help): Fix underlining. ++ ++ * language/chinese.el (chinese-gb18030, gb18030) ++ ("Chinese-GB18030"): New. ++ ++ * international/mule-cmds.el (locale-language-names): Add GB18030. ++ ++ * international/swedish.el: Don't require latin-1. ++ ++2002-10-05 Dave Love ++ ++ * language/czech.el ("Czech"): ++ * language/slovak.el ("Slovak"): ++ * language/romanian.el ("Romanian"): Remove unibyte-syntax property. ++ ++ * language/greek.el ("Greek"): Remove unibyte-syntax property. ++ (cp737): New coding system. ++ ++ * language/european.el: Remove unibyte-syntax properties. ++ Augment some coding-system properties. ++ ++ * international/mule-cmds.el (set-language-environment): Check ++ for charset property of environment. ++ (language-info-alist): Doc fix. ++ ++ * Makefile.in (DONTCOMPILE): Remove latin-N.el. ++ ++ * international/characters.el: Add Latin-1. ++ ++ * international/latin-1, international/latin-2, international/latin-3: ++ * international/latin-4, international/latin-5, international/latin-8: ++ * international/latin-9: Deleted. ++ ++ * loadup.el: Remove international/latin-N.el. ++ ++ * case-table.el (set-case-syntax-charset, set-case-syntax-1) ++ (set-case-syntax-delims, set-case-syntax-pair, set-case-syntax): ++ Undo last changes. ++ ++ * international/mule-diag.el (unicode-data): Check that ++ `unicodedata-file' exists. ++ ++2002-10-05 Kenichi Handa ++ ++ * international/fontset.el (charset-script-alist): Fix script ++ names. ++ ++2002-10-01 Kenichi Handa ++ ++ * international/latin-1.el: Bind set-case-syntax-charset to ++ iso-8859-1. Never provide latin-1. ++ ++ * international/latin-2.el: Bind set-case-syntax-charset to ++ iso-8859-2. Never provide latin-2. ++ ++ * international/latin-3.el: Bind set-case-syntax-charset to ++ iso-8859-3. Never provide latin-3. ++ ++ * international/latin-4.el: Bind set-case-syntax-charset to ++ iso-8859-4. Never provide latin-4. ++ ++ * international/latin-5.el: Bind set-case-syntax-charset to ++ iso-8859-9. Never provide latin-5. ++ ++ * international/latin-8.el: Bind set-case-syntax-charset to ++ iso-8859-14. Never provide latin-8. ++ ++ * international/latin-9.el: Bind set-case-syntax-charset to ++ iso-8859-15. Never provide latin-9. ++ ++ * case-table.el (set-case-syntax-set-multibyte): This variable ++ deleted. ++ (set-case-syntax-charset): New variable. ++ (set-case-syntax-1): New function. ++ (set-case-syntax-delims, set-case-syntax-pair, set-case-syntax): ++ Call set-case-syntax-1 on arguments. ++ ++ * international/mule-cmds.el (set-language-environment): Delete ++ unnecessary setup of syntax/case for unibyte case. ++ ++2002-09-27 Kenichi Handa ++ ++ * international/fontset.el (generate-fontset-menu): Exclude the ++ default fontset. ++ ++ * faces.el (describe-face): Describe :fontset property too. ++ ++2002-09-25 Dave Love ++ ++ * international/characters.el: Fix last change. ++ ++2002-09-23 Dave Love ++ ++ * international/characters.el: Don't set char width of CJK ++ charsets generally. Add some non-word syntax cases. ++ (korean-ksc5601): Fix ranges with symbol syntax. ++ ++2002-09-14 Dave Love ++ ++ * Makefile.in (DONTCOMPILE): Add indian.el. ++ ++ * language/ind-util.el: ++ (ucs-bengali-to-is13194-alist, ucs-assamese-to-is13194-alist) ++ (ucs-gurmukhi-to-is13194-alist, ucs-gujarati-to-is13194-alist) ++ (ucs-oriya-to-is13194-alist, ucs-tamil-to-is13194-alist) ++ (ucs-telugu-to-is13194-alist, ucs-malayalam-to-is13194-alist))): ++ Remove declarations and let-bind them in re-written top-level loop ++ over scripts, including ucs-devanagari-to-is13194-alist. ++ ++2002-09-14 Dave Love ++ ++ * international/mule-cmds.el (charset): Move. ++ (language-info-custom-alist): Move. Alter :set. ++ (find-coding-systems-for-charsets): Check charset type before ++ getting :charset-list. ++ ++ * language/ind-util.el (iscii-to-ucs-region): Fix typo. ++ ++2002-09-12 Kenichi Handa ++ ++ * international/mule-conf.el (emacs-mule, iso-2022-7bit) ++ (iso-2022-7bit-ss2, iso-2022-7bit-lock, iso-2022-8bit-ss2) ++ (compound-text, ctext-no-compositions): Cancel provious change. ++ ++2002-09-09 Dave Love ++ ++ * wid-edit.el (widget-string-complete): New. ++ (widget-coding-system-prompt-value-history): Deleted. ++ (coding-system): Use coding-system-value-history. ++ ++ * international/mule-cmds.el (charset): New widget. ++ (language-info-custom-alist): Use it. ++ (default-input-method): Modify :type. ++ ++2002-09-08 Dave Love ++ ++ * language/ind-util.el (ucs-devanagari-to-is13194-alist) ++ (indian-glyph-char, indian-char-glyph): Deleted. ++ (is13194-default-repertory): Renamed from ++ is13194-default-repartory, ++ (iscii-to-ucs-region): Hoist evals from loop. ++ ++ * language/devan-util.el (dev-charseq): Avoid indian-glyph-char. ++ ++ * language/indian.el (indian-script-table) ++ (ccl-encode-indian-glyph-font): Deleted. ++ ++ * international/mule-conf.el (emacs-mule, iso-2022-7bit) ++ (iso-2022-7bit-ss2, iso-2022-7bit-lock, iso-2022-8bit-ss2) ++ (compound-text, ctext-no-compositions): Remove :charset-list. ++ ++ * international/mule-cmds.el (language-info-custom-alist): New. ++ (input-method-activate-hook, input-method-inactivate-hook) ++ (input-method-after-insert-chunk-hook) ++ (input-method-use-echo-area, set-language-environment-hook) ++ (exit-language-environment-hook): Customize. ++ (find-coding-systems-for-charsets): Rewritten. ++ (default-input-method): Add :link. ++ ++2002-09-08 Dave Love ++ ++ * international/mule-conf.el (eight-bit): Add :docstring, ++ :short-name properties. ++ (cp851): Doc fix. ++ (unicode-bmp): New. ++ ++ * case-table.el (set-case-syntax-pair): Remove check on byte ++ lengths. ++ ++ * language/european.el (cp858): New. ++ ("Turkish"): Add special case rules. ++ ++2002-09-05 Dave Love ++ ++ * international/characters.el: Make $(D*s(B and $(D+s(B a case pair. ++ ++2002-09-03 Kenichi Handa ++ ++ * international/mule-conf.el: Don't define the charset iso-8859-1 ++ here, just setup its properties. ++ ++2002-08-21 Kenichi Handa ++ ++ * international/mule-conf.el (utf-8): Give :mime-charset property. ++ ++2002-08-20 Kenichi Handa ++ ++ * international/characters.el: Remove duplicated case setting for ++ Cyrillic and Greek letters. Don't setup printable-chars here. ++ ++ * case-table.el (describe-buffer-case-table): Handle the case ++ that KEY is a cons within map-char-table. ++ ++2002-08-19 Kenichi Handa ++ ++ * international/characters.el: Fix categories ?A and ?C. Treat ++ ASCII characters as `latin' script. ++ ++2002-08-18 Kenichi Handa ++ ++ * international/fontset.el (fontset-plain-name): If the fontset ++ name doesn't ends with "-fontset-*", use family name as the first ++ part of the plain name. ++ (create-fontset-from-ascii-font): If "fontset-startup" is not yet ++ created, use that name for the fontset. Fix arguments to ++ subst-char-in-string. ++ ++2002-08-18 Dave Love ++ ++ * term.el (term-char-mode): Remove generic-character-list code. ++ ++ * ruler-mode.el (ruler-mode-character-validate): Use characterp. ++ ++ * wid-edit.el (character): Use characterp. ++ ++ * international/mule-diag.el (describe-coding-system): Add utf-16 ++ case. ++ ++ * language/viet-util.el (viet-encode-viscii-char): Use encode-char. ++ ++ * language/cyrillic.el ("Tajik", "Bulgarian", "Belarusian"): Add ++ charset. ++ ("Ukrainian"): New. ++ ++ * language/georgian.el (georgian-ps): New coding system. ++ ++2002-08-15 Kenichi Handa ++ ++ * international/mule-cmds.el (reset-language-environment): Don't ++ set nonascii-translation-table and nonascii-insert-offset. Call ++ set-unibyte-charset, not set-primary-charset. ++ (nonascii-translation-table, nonascii-insert-offset): Declare ++ these variable as obsolete ones. ++ (set-language-environment): Call set-unibyte-charset, not ++ set-primary-charset. Call set-charset-priority with `charset' ++ info of the language environment. ++ ++2002-08-08 Kenichi Handa ++ ++ * tar-mode.el (tar-extract): Fix previous change. ++ ++2002-08-02 Kenichi Handa ++ ++ * international/characters.el (next-word-boundary-han): Don't ++ treat katakana following han characters as a part of a word. ++ ++2002-08-01 Kenichi Handa ++ ++ * international/characters.el: Call map-charset-chars on big5 ++ (not chinese-big5-1/2) to set categories `c', `C', and `|'. ++ (next-word-boundary-han): New function. Register it in ++ next-word-boundary-function-table. ++ (next-word-boundary-kana): Likewise. ++ ++2002-07-31 Kenichi Handa ++ ++ * arc-mode.el (archive-file-name-coding-system): New variable. ++ Make it permanent-local. ++ (byte-after, bref, insert-unibyte): New function. Change most of ++ char-after, aref, insert to them respectively. ++ (archive-mode): Set archive-file-name-coding-system. ++ (archive-summarize): Don't change the buffer's multibyteness. ++ (archive-extract): Inherit archive-file-name-coding-system from ++ archive-superior-buffer. Bind coding-system-for-write to ++ archive-file-name-coding-system. ++ (archive-*-write-file-member): Encode ENAME by ++ archive-file-name-coding-system. Bind coding-system-for-write to ++ no-conversion. ++ (archive-rename-entry): Encode the filename by ++ archive-file-name-coding-system. ++ (archive-mode-revert): Don't change the buffer's multibyteness. ++ (archive-arc-summarize, archive-lzh-summarize, ++ archive-zoo-summarize): Don't change the buffer's multibyteness. ++ Decode filenames by archive-file-name-coding-system. ++ (archive-arc-rename-entry, archive-zip-chmod-entry): Don't change ++ the buffer's multibyteness. ++ ++ * tar-mode.el (tar-file-name-coding-system): New variable. Make ++ it permanent-local.p ++ (tar-header-block-tokenize): Decode filename and linkname by ++ tar-file-name-coding-system. ++ (tar-header-block-checksum): Call multibyte-char-to-unibyte to get ++ the byte value of eight-bit chars. ++ (tar-summarize-buffer): Call set-buffer-multibyte with METHOD ++ `to'. Delete unnecessary call of position-bytes. ++ (tar-mode): Set tar-file-name-coding-system. Delete unnecessary ++ call of position-bytes. ++ (tar-extract): Simplified by calling decode-coding-region with ++ DESTINATION argument. Don't toggle multibyteness of tar buffer. ++ (tar-copy): Don't toggle multibyteness of tar buffer. ++ (tar-expunge): Likewise. ++ (tar-clear-modification-flags): Delete unnecessary call of ++ position-bytes. ++ (tar-rename-entry): Call tar-alter-one-field with encoded new ++ name. ++ (tar-alter-one-field): Don't toggle multibyteness of tar buffer. ++ Convert new-data-string by string-to-multibyte before inserting ++ it. ++ (tar-subfile-save-buffer): Don't toggle multibyteness of tar ++ buffer. Simplified by calling encoding-coding-region with ++ DESTINATION argument. ++ (tar-mode-write-file): Delete unnecessary call of ++ byte-to-position. ++ ++2002-07-30 Dave Love ++ ++ * international/titdic-cnv.el (quail-cxterm-package-ext-info): Doc ++ fix. ++ ++ * emacs-lisp/copyright.el (copyright-regexp): Remove redundancy. ++ ++2002-07-26 Kenichi Handa ++ ++ * international/characters.el: Setup char-script-table. ++ ++ * international/fontset.el: Setup the default fontset by the new ++ script based way. ++ (x-complement-fontset-spec): Change the format of arg FONTLIST to ++ an alist of charsets vs font name lists. ++ (charset-script-alist): New variable. ++ (create-fontset-from-fontset-spec): Allow script name in ++ FONTSET-SPEC. If charset is specified in FONTSET-SPEC, change it ++ to the corresponding script name. ++ (create-fontset-from-ascii-font): Slightly tuned. ++ ++ * international/mule-conf.el (devanagari-glyph): New charset. ++ Unify these charsets: korean-ksc5601, ipa, tibetan, ethiopic, ++ japanese-jisx0208, japanese-jisx0212, japanese-jisx0213-1, ++ japanese-jisx0213-2. ++ ++ * international/mule-diag.el (print-fontset): Use describe-vector ++ to handle a char table returned by fontset-info. ++ ++ * language/indian.el: Don't register ccl-encode-indian-glyph-font ++ and ccl-encode-unicode-font in font-ccl-encoder-alist. ++ ++2002-07-18 Dave Love ++ ++ * startup.el (fancy-splash-head): Warn about status. ++ (fancy-splash-tail): Copyright up-date. ++ ++2002-07-17 Kenichi Handa ++ ++ * international/mule-util.el (detect-coding-with-priority): Fix ++ the place of using `,' marker in backguote form. ++ ++2002-07-12 Dave Love ++ ++ * international/mule-conf.el (mule-unicode-e000-ffff): Set ++ :max-code. ++ ++ * international/mule-util.el (with-coding-priority): Fix. ++ ++ * international/mule.el (set-coding-priority): Fix obsolete spec. ++ ++2002-07-05 Dave Love ++ ++ * international/mule-cmds.el (leim-list-header): Add coding tag. ++ ++2002-07-05 Dave Love ++ ++ * international/mule-cmds.el (find-coding-systems-region) ++ (language-info-alist, reset-language-environment, princ-list): Doc ++ fix. ++ (input-method-verbose-flag): Fix :type. ++ (set-locale-environment): Revert last change. ++ ++2002-07-03 Dave Love ++ ++ * international/mule.el (set-char-table-default): Make obsolete. ++ ++2002-07-01 Dave Love ++ ++ * language/devanagari.el ("Devanagari"): Fix coding-system, ++ coding-priority. ++ ++ * international/mule-diag.el (describe-char-after): Modify display ++ list processing. ++ (unicodedata-file, unicodedata-find): New. ++ ++2002-06-29 Dave Love ++ ++ * emacs-lisp/bytecomp.el (batch-byte-compile-if-not-done): Add ++ autoload cookie. ++ ++ * international/ja-dic-cnv.el (skkdic-convert): Add coding tag. ++ ++2002-06-28 Dave Love ++ ++ * international/mule-util.el (detect-coding-with-priority): ++ Rewritten. ++ ++2002-06-27 Dave Love ++ ++ * composite.el (compose-string): Doc fix. ++ ++ * language/georgian.el ("Georgian"): Delete code-pages feature, ++ add nonascii-translation. ++ ++ * language/european.el ("German", "French", "Spanish"): Add ++ latin-9 as alternative coding system. ++ ("Slovenian", "Polish"): Add windows-1250 as alternative coding ++ system. ++ ("Dutch"): Add latin-9 as alternative coding system. Add input ++ method. ++ ("Turkish"): Add windows-1254 and latin-3 as alternative coding ++ systems. ++ ("Lithuanian", "Latvian"): Add windows-1257 as alternative coding ++ system. ++ ++ * emacs-lisp/byte-opt.el (side-effect-free-fns): Add decode-char, ++ encode-coding-char. ++ (side-effect-and-error-free-fns): Add charsetp, max-char, ++ primary-charset. ++ ++ * simple.el: Don't require cl when compiling. ++ ++ * Makefile.in (DONTCOMPILE): Fix duplicates. ++ ++ * emacs-lisp/bytecomp.el (byte-compile-insert-header): Fix typo. ++ ++ * international/mule.el: Doc fixes. ++ (load-with-code-conversion): Fix coding-system-type test. ++ ++ * international/mule-cmds.el (sort-coding-systems): Comment out ++ iso-2022 case. ++ (encoded-string-description): Fix coding-system-type test. ++ ++2002-06-24 Dave Love ++ ++ * international/characters.el: Make korean-ksc5601 double-width. ++ ++ * international/mule.el (auto-coding-regexp-alist): Recognize ++ Emacs 20/1 byte-compiled files. ++ ++ * international/mule-conf.el (file-coding-system-alist): Change ++ .elc to utf-8-emacs. ++ ++ * emacs-lisp/bytecomp.el (byte-recompile-directory) ++ (batch-byte-recompile-directory): Update from trunk. ++ (byte-compile-fix-header): Adjust for use of utf-8-emacs. ++ (byte-compile-insert-header): Update magic number. Add ballast. ++ (map-char-table): Use byte-compile-funarg-2. ++ ++2002-06-16 Dave Love ++ ++ * international/mule-cmds.el (set-locale-environment): Use ++ locale-codeset. ++ ++2002-06-15 Dave Love ++ ++ * language/cyrillic.el ("Belarusian"): Doc fix. ++ (cp1125, koi8-t): Doc fix. ++ ++ * international/mule-cmds.el (find-multibyte-characters): Doc fix. ++ (find-multibyte-characters): Don't test for charset `unknown'. ++ (locale-language-names): Change or add: be, bs, cy, mk, ru.koi8, ++ ru, sr_YU, tg, wa, zh.gbk. ++ (locale-language-names): Change sp to Cyrillic. ++ (locale-charset-language-names): Match @euro after utf-8. ++ ++2002-06-13 Dave Love ++ ++ * language/chinese.el (chinese-gbk, gbk, cp936, windows-936): New ++ coding systems. ++ ("Chinese-GBK"): New environment. ++ ++2002-06-12 Dave Love ++ ++ * language/chinese.el (chinese-gbk, gbk, cp936, windows-936): New ++ coding systems. ++ ("Chinese-GBK"): New environment. ++ ++ * language/thai-util.el: Remove redundant unicodes in ++ category-setting. ++ ++ * language/lao-util.el: Remove redundant unicodes in ++ category-setting. ++ ++ * international/mule-conf.el (japanese-jisx0213-1) ++ (japanese-jisx0213-2): Add unify-map. ++ ++2002-06-12 Kenichi Handa ++ ++ * international/quail.el (quail-update-leim-list-file): Force ++ writing the file by iso-2022-7bit. ++ ++ * international/titdic-cnv.el (miscdic-convert): Force writing the ++ file by iso-2022-7bit. ++ ++ * international/mule.el (define-charset): Change the attribute ++ :parents to :subset or :superset. ++ ++ * international/mule-conf.el: Adjusted for the change of ++ define-charset (:parent -> :subset or :superset). ++ ++ * international/characters.el: Fix the last change. ++ ++2002-06-11 Dave Love ++ ++ * language/cyril-util.el (cyrillic-encode-koi8-r-char) ++ (cyrillic-encode-alternativnyj-char): Fixed. ++ (standard-display-cyrillic-translit): Remove redundant unicodes. ++ ++2002-06-10 Dave Love ++ ++ * language/cyrillic.el ("Tajik", "Bulgarian", "Belarusian"): New ++ environments. ++ ++ * international/mule-conf.el (control-1): New charset. ++ (eight-bit-control, eight-bit-graphic): Redefined to use raw-bytes ++ space. ++ (korean-ksc5601): Redefine to use code-offset and unify-map. ++ ++2002-06-07 Dave Love ++ ++ * international/mule-conf.el (tcvn-5712): New charset. ++ ++ * language/vietnamese.el (vietnamese-tcvn, tcvn): New coding ++ system. ++ ("Vietnamese"): Doc fix. ++ ++2002-06-06 Dave Love ++ ++ * international/mule-conf.el (windows-936): New alias. ++ ++ * cus-start.el: Add scalable-fonts-allowed. ++ ++ * international/characters.el: Reinstate various CJK syntax and ++ category setup. Remove obsolete syntax setting in Greek section. ++ Optimize the char tables. ++ ++ * language/thai.el ("Thai"): Add cp874, iso-8859-11 coding ++ systems. ++ ++ * language/vietnamese.el ("Vietnamese"): Add windows-1258 coding ++ system. ++ ++ * language/greek.el ("Greek"): Add windows-1253, cp851, cp869 ++ coding systems. ++ ++ * language/romanian.el ("Romanian"): Add iso-latin-10 coding ++ system. ++ ++2002-06-03 Kenichi Handa ++ ++ * international/characters.el (printable-chars): Setup correctly. ++ ++2002-06-02 Dave Love ++ ++ * cus-start.el: Add selection-coding-system. ++ ++ * language/hebrew.el ("Hebrew"): Add windows-1255, cp862 coding ++ systems. ++ ++ * language/china-util.el (post-read-decode-hz) ++ (pre-write-encode-hz): Moved from chinese.el. ++ (big5-to-flat-code, flat-code-to-big5, euc-to-flat-code) ++ (flat-code-to-euc, expand-euc-big5-alist, big5-to-cns): Deleted. ++ ++ * language/chinese.el (post-read-decode-hz) ++ (pre-write-encode-hz): Moved to china-util.el. ++ ++ * case-table.el (set-case-syntax-pair): Test again for equal byte ++ lengths of the pair. ++ ++ * international/characters.el: Add Vietnamese category to ++ equivalent unicodes. ++ ++2002-05-31 Dave Love ++ ++ * international/mule-conf.el (chinese-sisheng, ipa) ++ (indian-is13194, tibetan, ethiopic): Add :unify-map. ++ ++ * international/mule-diag.el (describe-character-set): Account for ++ more than two dimensions of possibly different size. ++ (describe-current-coding-system): Add selection-coding-system. ++ ++ * international/mule-cmds.el (unify-8859-on-encoding-mode) ++ (unify-8859-on-decoding-mode): Moved from mule.el. ++ ++ * international/mule.el (unify-8859-on-encoding-mode) ++ (unify-8859-on-decoding-mode): Remove :init-value. ++ (charset-chars): Add optional dimension arg. ++ (unify-8859-on-encoding-mode, unify-8859-on-decoding-mode): Moved ++ to mule-cmds.el. ++ ++2002-05-31 Kenichi Handa ++ ++ * international/fontset.el: Change registry for japanese-jisx0208 ++ to "JISX0208*" in the default fontset. ++ (font-encoding-alist): Add an entry for "JISX0208.1983" and ++ "ISO10646.indian-1". ++ ++2002-05-30 Kenichi Handa ++ ++ * language/japanese.el (japanese-iso-7bit-1978-irv): Add ++ `designation' to :flags, `ascii' to :charset-list. ++ ++ * international/mule.el (define-charset): New args :min-code and ++ :max-code. ++ (coding-system-iso-2022-flags): Add use-roman, use-oldjis. ++ ++ * international/mule-conf.el (chinese-gbk): Change :code-offset to ++ #x160000. ++ (gb18030-2-byte, gb18030-4-byte-bmp, gb18030-4-byte-smp, ++ gb18030-4-byte-ext-1, gb18030-4-byte-ext-2, gb18030): New ++ charsets. ++ ++2002-05-29 Dave Love ++ ++ * language/thai.el (iso-8859-11): New coding system. ++ ++ * international/mule-conf.el: Doc fixes. ++ (iso-8859-11): New. ++ ++2002-05-28 Dave Love ++ ++ * cus-start.el (scalable-fonts-allowed): Add. ++ ++2002-05-27 Dave Love ++ ++ * international/mule.el (unify-8859-on-encoding-mode) ++ (unify-8859-on-decoding-mode): Dummy versions. ++ ++ * international/ucs-tables.el: Removed. ++ ++ * Makefile.in (DONTCOMPILE): Add language/chinese.el, ++ language/japanese.el. ++ ++ * international/mule-conf.el: Doc fixes. ++ (cp936): New alias. ++ (cp720, cp858): New charsets. ++ ++ * mail/sendmail.el (mail-recover-1, mail-recover): Use utf-8-emacs ++ coding system, not emacs-mule. ++ ++ * files.el (revert-buffer, recover-file): Likewise. ++ ++ * desktop.el (desktop-save): Likewise. ++ ++2002-05-26 Dave Love ++ ++ * international/mule.el (with-category-table): Use make-symbol. ++ (coding-system-list): Use coding-system-aliases. ++ (make-translation-table): Don't deal with generic characters. ++ ++ * international/mule-util.el: (coding-system-post-read-conversion) ++ (coding-system-pre-write-conversion) ++ (coding-system-translation-table-for-decode) ++ (coding-system-translation-table-for-encode): Get the right ++ properties. ++ (with-coding-priority): New macro. ++ (detect-coding-with-language-environment): Use it. ++ (coding-system-equal): Use coding-system-plist. ++ ++ * encoded-kb.el: (encoded-kbd-setup-keymap) : ++ Use :valid property. ++ ++ * mule-cmds.el (encode-coding-char): Use ++ find-coding-systems-string. Don't use make-char. ++ (describe-language-environment): Use coding-system-aliases. ++ (prefer-coding-system): Doc fix. ++ ++ * mule-diag.el (describe-current-coding-system): Fix aliases ++ listing. ++ (print-iso-2022-flags): Deleted. ++ (print-designation): Partly re-written. ++ (describe-coding-system): Deal with iso-2022 designations, flags. ++ Fix shift_jis case. ++ (describe-char-after): Use characterp. Print explicit unicode. ++ Remove some obsolete code. ++ (print-coding-system-briefly): Fix printing aliases. ++ (print-coding-system): Use coding-system-aliases. ++ (mule-diag): Don't list coding categories. ++ ++ * mule-conf.el: Doc fixes. Remove redundant ++ :long-name properties. Re-order charset priorities. ++ (binary): New alias. ++ (iso-8859-16): Fix nickname. ++ (define-iso-single-byte-charset): Un-define after use. ++ ++2002-05-26 Dave Love ++ ++ * international/mule.el (coding-system-iso-2022-flags): Revert ++ last change. ++ ++2002-05-25 Dave Love ++ ++ * international/characters.el: Additional double width ++ specifications. ++ ++ * international/mule-diag.el (print-coding-system): Incomplete ++ updates. ++ (describe-character-set): List more properties. ++ (print-fontset): Fix case of vector font-spec. ++ (describe-current-coding-system): Fix iso-7, iso-7-else. ++ ++ * international/mule-conf.el (ibm866): Fix alias. ++ (iso-8859-16): Fix nickname. ++ ++ * international/mule.el (coding-system-iso-2022-flags): Add ++ use-roman, use-oldjis. ++ ++2002-05-23 Dave Love ++ ++ * language/ind-util.el: Avoid decode-char. ++ (indian--puthash-char, indian--puthash-c, indian--puthash-cv): ++ Use characterp, not char-valid-p. ++ ++ * language/devan-util.el: Add coding tag, avoid decode-char. ++ ++ * international/titdic-cnv.el: Add coding tag. ++ (tit-process-header): Add coding tag to output. ++ ++ * language/thai.el (cp874, ibm874): New coding systems. ++ ++ * emacs-lisp/byte-opt.el : Add ++ character-p. ++ ++ * language/european.el (cp852, ibm852, cp857, ibm857, cp860) ++ (ibm860, cp861, ibm861, cp863, cp865, ibm865, cp437, ibm437): New ++ coding systems. ++ : Avoid decode-char. ++ ++ * language/hebrew.el (cp862, ibm862): New coding systems. ++ ++ * language/greek.el (cp851, ibm851, cp869, ibm869): New coding ++ systems. ++ ++ * language/cyrillic.el (cp855, ibm855): New coding systems. ++ ++ * international/mule-conf.el: Avoid decode-char in top-level code ++ for self-inserting multibyte chars. ++ (cp437, cp737, cp775, cp851, cp852, cp855, cp857, cp855, cp857) ++ (cp860, cp861, cp862, cp863, cp864, cp865, cp869, cp874): New ++ charsets. ++ ++ * international/characters.el: Various simplifications and ++ additions. ++ ++2002-05-22 Dave Love ++ ++ * international/mule-conf.el (code-pages): Provide, for ++ compatibility. ++ ++ * international/code-pages.el: Removed. ++ ++2002-05-22 Kenichi Handa ++ ++ * Makefile.in: Cancel the 2002-05-17 change of mine. ++ ++ * international/fontset.el: Add setting for unicode font at the ++ end of the default fontset. ++ ++2002-05-21 Dave Love ++ ++ * international/mule-conf.el (adobe-standard-encoding, symbol): ++ Adjust :code-space. ++ (ibm850): Add :ascii-compatible-p. ++ ++2002-05-21 Kenichi Handa ++ ++ * international/mule-conf.el (vietnamese-viscii-lower) ++ (vietnamese-viscii-upper): Supply them :code-offset, then unify by ++ mapping table. ++ ++2002-05-20 Dave Love ++ ++ * cus-start.el: Add charset-map-directory. ++ ++ * international/fontset.el (font-encoding-alist): Add ++ adobe-symbol. ++ ++ * format.el (format-alist): Remove ibm, mac, hp entries (available ++ natively). ++ ++ * language/romanian.el (iso-latin-10): Add :mime-charset. ++ ("Romanian"): Add doc. ++ ++ * international/mule-conf.el (iso-8859-10, symbol) ++ (adobe-standard-encoding, ibm850): New charsets. ++ (iso-8859-13): Fix IR number, final char. ++ (file-coding-system-alist): Add .xml. ++ ++ * language/european.el (iso-latin-6, iso-8859-10, latin-6) ++ (iso-latin-7, iso-8859-13, latin-7, hp-roman8, roman8) ++ (adobe-standard-encoding, cp850, ibm850): New coding systems. ++ ("Latin-6", "Latin-7"): New language environments. ++ ++2002-05-19 Dave Love ++ ++ * Makefile.in (DONTCOMPILE): Remove ucs-tables, utf-8, ++ utf-8-subst; add vietnamese, cyrillic, czech. ++ ++ * language/romanian.el (iso-latin-10): New coding system. ++ ++ * international/mule-conf.el (emacs-mule): Set :charset-list, ++ consistent with coding-system-charset-list doc. ++ (iso-8859-16): New charset. ++ ++ * international/mule-diag.el (describe-coding-system): Avoid error ++ for iso-2022, emacs-mule. ++ ++2002-05-17 Dave Love ++ ++ * international/codepage.el: Most of code removed or commented ++ out. ++ (codepage-setup): Re-written as trivial, obsolete function. ++ ++ * language/utf-8-lang.el ("UTF-8"): Use utf-8, not mule-utf-8. ++ Remove setup function. ++ ++ * language/english.el (ebcdic-us, ebcdic-uk): New coding systems. ++ ++ * Makefile.in (DONTCOMPILE): Add language/vietnamese.el, ++ language/cyrillic.el, language/czech.el. ++ ++ * language/vietnamese.el (windows-1258, cp1258): New coding ++ systems. ++ (font-ccl-encoder-alist): Remove viscii, vscii. ++ ++ * language/hebrew.el (windows-1255, cp1255): New coding systems. ++ ++ * language/european.el (windows-1254, cp1254, windows-1257) ++ (cp1257, next, iso-latin-7, iso-8859-13, latin-7): New coding ++ systems. ++ ("Latin-7", "Lithuanian", "Latvian"): Don't require code-pages. ++ ++ * language/greek.el (windows-1253, cp1253): New coding systems. ++ ++ * international/mule-conf.el (ebcdic-us, ebcdic-uk): Change map ++ file name. ++ (windows-1253, windows-1254, windows-1255, windows-1256) ++ (windows-1257, windows-1258, next): New charsets. ++ ++ * international/utf-8.el, international/utf-8-subst.el: Removed. ++ ++ * international/mule.el: Doc fixes. ++ (charset-list, generic-char-p, set-coding-priority): Make ++ obsolete. ++ (coding-system-get): Try to convert old-style symbol to keyword. ++ (define-charset): Purecopy strings in property list. ++ (define-coding-system): Purecopy docstring. ++ ++ * international/mule-diag.el (list-character-sets-2): Avoid ++ charset-bytes. ++ (list-iso-charset-chars, list-non-iso-charset-chars): Deleted. ++ (list-block-of-chars): Re-written. ++ (describe-character-set): Show more properties. ++ (describe-char-after): Correct codepoint display. ++ (print-coding-system): Use symbolic types. ++ ++2002-05-17 Kenichi Handa ++ ++ * Makefile.in: Be sure also to run emacs with LANG=C. ++ ++2002-05-16 Dave Love ++ ++ * international/mule-diag.el: Doc fixes. ++ (sort-charset-list, charset-multibyte-form-string): Removed. ++ (list-character-sets, list-character-sets-1) ++ (list-character-sets-2): Re-written. ++ (non-iso-charset-alist): Set to nil and made obsolete. ++ (decode-codepage-char): Re-written and made obsolete. ++ (read-charset): Don't use non-iso-charset-alist. ++ (describe-coding-system): Use keyword properties. ++ (describe-character-set): Re-written. ++ ++ * international/mule-conf.el (koi8-u, koi8-t, georgian-ps) ++ (windows-1250, windows-1251, windows-1252, cp1125, ebcdic-us) ++ (ebcdic-uk): New charsets. ++ ++ * language/cyrillic.el (koi8-u, koi8-t, windows-1251, cp1125): New ++ coding systems. ++ ++ * language/european.el (windows-1252): New coding system. ++ ++2002-05-16 Kenichi Handa ++ ++ * Makefile.in: By sure to run emacs with LC_ALL=C. ++ ++ * international/encoded-kb.el (encoded-kbd-handle-8bit): Call ++ encoded-kbd-self-insert-iso2022-8bit with argument 1. ++ (encoded-kbd-self-insert-charset): New function. ++ (encoded-kbd-setup-keymap): Handle a coding-system of type ++ charset. ++ (encoded-kbd-mode): Likewise. ++ ++2002-05-15 Dave Love ++ ++ * international/mule-cmds.el (set-locale-environment): Comment out ++ set-keyboard-coding-system stuff. ++ ++2002-05-14 Kenichi Handa ++ ++ * international/mule.el (define-coding-system): Docstring ++ fixed (sjis->shift-jis). ++ ++ * international/mule-conf.el: Use decode-char instead of ++ make-char. ++ ++ * international/encoded-kb.el ++ (encoded-kbd-self-insert-iso2022-8bit): New arg ARG. Directly ++ call self-insert-command. This is a temporary workaround to ++ make it work with latin-1. ++ (encoded-kbd-mode): Change `sjis' to `shift-jis'. ++ ++ * international/characters.el: Setup char-width-table for CJK ++ characters. ++ ++2002-05-15 Kenichi Handa ++ ++ * international/ja-dic-utl.el (skkdic-jisx0208-hiragana-block): ++ Fix the initial value to specify character range of ++ japanese-jisx0208, not unicode. ++ ++ * international/characters.el: Set syntax/category for ++ japanese-jisx0208. ++ ++2002-05-14 Dave Love ++ ++ * mail/sendmail.el (sendmail-send-it): Use :mime-charset, not ++ mime-charset. ++ ++ * language/vietnamese.el, language/korean.el, language/japanese.el: ++ * language/hebrew.el, language/greek.el, language/chinese.el: ++ Fix :mime-charset properties. ++ ++ * language/cyrillic.el (cyrillic-iso-8bit, cyrillic-koi8) ++ (cyrillic-alternativnyj): Fix :mime-charset. ++ (cp878, cp866): New alias. ++ ++ * language/european.el: Fix mime-charset properties. ++ ++ * international/mule-cmds.el (sort-coding-systems) ++ (select-safe-coding-system, select-message-coding-system): Use ++ :mime-charset, not 'mime-charset. ++ ++ * international/mule-conf.el: Add various :mime-charset ++ properties. ++ ++2002-05-13 Dave Love ++ ++ * international/encoded-kb.el (encoded-kbd-self-insert-ccl): Fix ++ getting decoder. ++ (encoded-kbd-mode): Fix code for coding system type and ++ designations. ++ ++ * international/mule-conf.el: Speling fixes. ++ ++2002-05-13 Kenichi Handa ++ ++ * international/fontset.el (fontset-plain-name): Handle the case ++ that size, weight, slant are not specified in the fontset name. ++ ++2002-05-10 Yong Lu ++ ++ * language/greek.el (greek-iso-8bit): Fix typo. ++ ++2002-05-10 Kenichi Handa ++ ++ * language/chinese.el (chinese-big5): Change :coding-type to ++ `charset'. ++ ++2002-05-09 Kenichi Handa ++ ++ * international/characters.el: Change encoding to utf-8-emacs. ++ Remove apparent duplicate codes. ++ ++2002-05-07 Kenichi Handa ++ ++ * international/fontset.el: Change the registry for ++ chinese-gb2312 and add the registry for chinese-gbk. ++ ++ * international/mule.el (charset-chars): Fix typo. ++ ++ * international/mule-conf.el (chinese-gbk): New charset. ++ ++ * international/titdic-cnv.el (titdic-convert): Read into a ++ unibyte buffer then make the buffer multibyte. ++ ++2002-03-07 Kenichi Handa ++ ++ * international/mule-conf.el (utf-16-le): Use :bom attribute ++ instead of :signature. ++ (utf-16-be): Likewise. ++ ++2002-03-05 Kenichi Handa ++ ++ * language/czech.el ("Czech"): Set `iso-8859-2' for ++ `nonascii-translation'. ++ ++ * language/romanian.el ("Romanian"): Likewise. ++ ++ * language/slovak.el ("Slovak"): Likewise. ++ ++ * international/characters.el: Optimize calls of ++ modify-category-entry. ++ ++2002-03-01 Kenichi Handa ++ ++ * bindings.el: Don't bind multibyte characters to ++ self-insert-command here. It's done in mule-conf.el ++ ++ * case-table.el (set-case-syntax-offset): Variable deleted. ++ (set-case-syntax-1): Function deleted. Callers changed. ++ (set-case-syntax-delims): Don't check byte length of characters. ++ ++ * isearch.el (isearch-mode-map): Call set-char-table-range to bind ++ characters to isearch-printing-char. ++ ++ * loadup.el: Don't load "international/utf-8". Don't call ++ update-coding-systems-internal. Bind coding-system-for-write to ++ `utf-8' while writing fns-XXX.el. Call clear-charset-maps before ++ dumping. ++ ++ * version.el (emacs-version): Set to "22.0.0". ++ ++ * emacs-lisp/regexp-opt.el (regexp-opt-charset): Adjusted for the ++ change of map-char-table. ++ ++ * eshell/esh-mode.el: Adjusted for the change of map-char-table. ++ ++ * international/characters.el: Adjusted for the deletion of ++ generic characters. ++ ++ * international/code-pages.el (cp-make-translation-table, ++ cp-valid-codes, cp-fix-safe-chars): Deleted. Caller changed. ++ (cp-make-coding-system): Call define-coding-system. ++ ++ * international/fontset.el: Mostly re-written. ++ ++ * international/ja-dic-cnv.el (skkdic-get-kana-compact-codes): ++ Call encode-char instead of split-char. ++ ++ * international/ja-dic-utl.el (skkdic-jisx0208-hiragana-block): ++ Value changed. ++ (skkdic-lookup-key): Call encode-char instead of split-char. ++ ++ * international/titdic-cnv.el (titdic-convert): Bind ++ coding-system-for-write to 'iso-2022-7bit. Don't work on unibyte ++ buffer. ++ ++ * international/latin-1.el: Don't bind set-case-syntax-offset. ++ ++ * international/latin-2.el: Don't bind set-case-syntax-offset. ++ ++ * international/latin-3.el: Don't bind set-case-syntax-offset. ++ ++ * international/latin-4.el: Don't bind set-case-syntax-offset. ++ ++ * international/latin-5.el: Don't bind set-case-syntax-offset. ++ ++ * international/latin-8.el: Don't bind set-case-syntax-offset. ++ ++ * international/latin-9.el: Don't bind set-case-syntax-offset. ++ ++ * international/mule-cmds.el: Don't use coding category. Call ++ set-coding-system-priority instead of set-coding-priority. ++ (sort-coding-systems): Call coding-system-priority-list to get the ++ most preferred one. ++ (select-safe-coding-system): Likewise. ++ (reset-language-environment): Order of coding system priority ++ changed. Set primary charset to iso-8859-1. ++ (set-language-environment-coding-systems): Call ++ set-coding-system-priority instead of set-coding-priority. ++ (get-charset-property, put-charset-property): Moved to mule.el. ++ ++ * international/mule-conf.el: Fully Re-written. ++ ++ * international/mule-diag.el (print-designation): Arguments ++ changed. ++ (print-iso-2022-flags): New function. ++ (describe-coding-system): Adjusted for the new structure of coding ++ system. ++ (describe-current-coding-system): Likewise. ++ ++ * international/mule.el (char-valid-p): Make it an alias of ++ characterp. ++ (define-charset): Fully re-designed. ++ (charset-quoted-standard-p): Deleted. ++ (charsetp): Moved to charset.c. ++ (charset-info, charset-id, charset-bytes, charset-width, ++ charset-directioin, charset-iso-graphic-plane, ++ charset-reverse-charset): Deleted. ++ (charset-dimension, charset-chars, charset-iso-final-char, ++ charset-description, charset-short-name, charset-long-name): Call ++ charset-plist instead of charset-info. ++ (charset-plist, set-charset-plist): Moved to charset.c. ++ (get-charset-property, put-charset-property): Moved from ++ mule-cmds.el. Call charset-plist and set-charset-plist. ++ (make-char): Deleted. ++ (generic-char-p): Make it always return nil. ++ (decode-char, encode-char): Moved to charset.c. ++ (coding-spec-XXX-idx): Variables deleted. ++ (coding-system-iso-2022-flags): New variable. ++ (define-coding-system): New function. ++ (transform-make-coding-system-args, make-coding-system): Deleted. ++ (set-coding-priority): Make it obsolete. ++ (after-insert-file-set-buffer-file-coding-system): Adjusted for ++ the new coding system structure. ++ (find-new-buffer-file-coding-system): Likewise. ++ ++ * language/chinese.el, language/cyrillic.el, language/european.el, ++ language/greek.el, language/hebrew.el, language/indian.el, ++ language/japanese.el, language/korean.el, language/lao.el, ++ language/thai.el, language/tibetan.el, language/vietnamese.el: ++ Call define-coding-system instead of make-coding-system. All CCL ++ program deleted. ++ ++ * textmodes/sgml-mode.el (sgml-mode-map): Use encode-char instead ++ of make-char. ++ (sgml-char-names-table): Iteration limit fixed. ++ ++ * term/mac-win.el: Deleted unnecessary calls of set-fontset-font. ++ (ccl-encode-mac-roman-font): Deleted. ++ ++ * Makefile.in (DONTCOMPILE): Add latin1-disp.el, ucs-tables.el, ++ utf-8.el, and utf-8-subst.el as they can't be bytecompiled ++ currently. This is just a temporary workaround. ++ ++;; Local Variables: ++;; coding: iso-2022-7bit ++;; End: ++ ++ Copyright (C) 2002 Free Software Foundation, Inc. ++ Copying and distribution of this file, with or without modification, ++ are permitted provided the copyright notice and this notice are preserved. diff --cc lisp/Makefile.in index c09597b6d03,345cd7f8641..d1f5b43b762 --- a/lisp/Makefile.in +++ b/lisp/Makefile.in @@@ -143,19 -136,21 +141,19 @@@ setwins_almost=subdirs=`find $$wd -typ doit: -cus-load.el: +$(lisp)/cus-load.el: touch $@ -custom-deps: cus-load.el doit - wd=$(lisp); $(setwins); \ +custom-deps: $(lisp)/cus-load.el doit + wd=$(lisp); $(setwins_almost); \ echo Directories: $$wins; \ - $(EMACS) $(EMACSOPT) -l cus-dep --eval '(setq generated-custom-dependencies-file "$(lisp)/cus-load.el")' -f custom-make-dependencies $$wins - LC_ALL=C $(EMACS) $(EMACSOPT) -l cus-dep -f custom-make-dependencies $$wins ++ LC_ALL=C $(EMACS) $(EMACSOPT) -l cus-dep --eval '(setq generated-custom-dependencies-file "$(lisp)/cus-load.el")' -f custom-make-dependencies $$wins -finder-inf.el: - echo "(provide 'finder-inf)" >> $@ -finder-data: finder-inf.el doit - wd=$(lisp); $(finder_setwins); \ +finder-data: doit + wd=$(lisp); $(setwins_almost); \ echo Directories: $$wins; \ - $(EMACS) $(EMACSOPT) -l finder --eval '(setq generated-finder-keywords-file "$(lisp)/finder-inf.el")' -f finder-compile-keywords-make-dist $$wins - LC_ALL=C $(EMACS) $(EMACSOPT) -l finder -f finder-compile-keywords-make-dist $$wins ++ LC_ALL=C $(EMACS) $(EMACSOPT) -l finder --eval '(setq generated-finder-keywords-file "$(lisp)/finder-inf.el")' -f finder-compile-keywords-make-dist $$wins -loaddefs.el: +$(lisp)/loaddefs.el: echo ";;; loaddefs.el --- automatically extracted autoloads" >> $@ echo ";;" >> $@; echo ";;; Code:" >> $@ echo " " >> $@ @@@ -165,12 -160,12 +163,12 @@@ echo ";;; no-update-autoloads: t" >> $@ echo ";;; End:" >> $@ echo ";;; loaddefs.el ends here" >> $@ -autoloads: loaddefs.el doit +autoloads: $(lisp)/loaddefs.el doit wd=$(lisp); $(setwins); \ echo Directories: $$wins; \ - $(EMACS) $(EMACSOPT) -l autoload --eval '(setq generated-autoload-file "$(lisp)/loaddefs.el")' -f batch-update-autoloads $$wins + LC_ALL=C $(EMACS) $(EMACSOPT) -l autoload --eval '(setq generated-autoload-file "$(lisp)/loaddefs.el")' -f batch-update-autoloads $$wins -subdirs.el: +$(lisp)/subdirs.el: $(MAKE) $(MFLAGS) update-subdirs update-subdirs: doit wd=$(lisp); $(setwins); \ diff --cc lisp/arc-mode.el index 563b71f940e,69b00ec575b..9730a72ff72 --- a/lisp/arc-mode.el +++ b/lisp/arc-mode.el @@@ -364,6 -350,21 +368,18 @@@ Each descriptor is a vector of the for ;; ------------------------------------------------------------------------- ;; Section: Support functions. + (eval-when-compile + (defsubst byte-after (pos) + "Like char-after but an eight-bit char is converted to unibyte." + (multibyte-char-to-unibyte (char-after pos))) - (defsubst bref (string idx) - "Like aref but an eight-bit char is converted to unibyte." - (multibyte-char-to-unibyte (aref string idx))) + (defsubst insert-unibyte (&rest args) + "Like insert but don't make unibyte string and eight-bit char multibyte." + (dolist (elt args) + (if (integerp elt) + (insert (if (< elt 128) elt (decode-char 'eight-bit elt))) + (insert (string-to-multibyte elt))))) + ) + (defsubst archive-name (suffix) (intern (concat "archive-" (symbol-name archive-subtype) "-" suffix))) @@@ -374,6 -375,6 +390,7 @@@ in which case a second argument, length (if (stringp str) (setq len (length str)) (setq str (buffer-substring str (+ str len)))) ++ (setq str (string-as-unibyte str)) (let ((result 0) (i 0)) (while (< i len) @@@ -578,6 -580,12 +595,12 @@@ archive (make-local-variable 'archive-file-list-start) (make-local-variable 'archive-file-list-end) (make-local-variable 'archive-file-name-indent) + (setq archive-file-name-coding-system + (or file-name-coding-system + default-file-name-coding-system + locale-coding-system)) + (if default-enable-multibyte-characters - (set-buffer-multibyte t 'to)) ++ (set-buffer-multibyte 'to)) (archive-summarize nil) (setq buffer-read-only t)))) @@@ -1413,90 -1422,53 +1436,89 @@@ This doesn't recover lost files, it jus (maxlen 8) files visual) - (while (progn (goto-char p) + (while (progn (goto-char p) ;beginning of a base header. (looking-at "\\(.\\|\n\\)\\(.\\|\n\\)-l[hz][0-9ds]-")) - (let* ((hsize (char-after p)) ;size of the base header (level 0 and 1) - (let* ((hsize (byte-after p)) - (csize (archive-l-e (+ p 7) 4)) - (ucsize (archive-l-e (+ p 11) 4)) - (modtime (archive-l-e (+ p 15) 2)) - (moddate (archive-l-e (+ p 17) 2)) - (hdrlvl (byte-after (+ p 20))) - (fnlen (byte-after (+ p 21))) - (efnname (let ((str (buffer-substring (+ p 22) (+ p 22 fnlen)))) ++ (let* ((hsize (byte-after p)) ;size of the base header (level 0 and 1) + (csize (archive-l-e (+ p 7) 4)) ;size of a compressed file to follow (level 0 and 2), + ;size of extended headers + the compressed file to follow (level 1). + (ucsize (archive-l-e (+ p 11) 4)) ;size of an uncompressed file. + (time1 (archive-l-e (+ p 15) 2)) ;date/time (MSDOS format in level 0, 1 headers + (time2 (archive-l-e (+ p 17) 2)) ;and UNIX format in level 2 header.) - (hdrlvl (char-after (+ p 20))) ;header level ++ (hdrlvl (byte-after (+ p 20))) ;header level + thsize ;total header size (base + extensions) + fnlen efnname fiddle ifnname width p2 creator + neh ;beginning of next extension header (level 1 and 2) + mode modestr uid gid text dir prname + gname uname modtime moddate) + (if (= hdrlvl 3) (error "can't handle lzh level 3 header type")) + (when (or (= hdrlvl 0) (= hdrlvl 1)) - (setq fnlen (char-after (+ p 21))) ;filename length ++ (setq fnlen (byte-after (+ p 21))) ;filename length + (setq efnname (let ((str (buffer-substring (+ p 22) (+ p 22 fnlen)))) ;filename from offset 22 - (if file-name-coding-system - (decode-coding-string str file-name-coding-system) - (string-as-multibyte str)))) + (decode-coding-string + str archive-file-name-coding-system))) - (fiddle (string= efnname (upcase efnname))) - (ifnname (if fiddle (downcase efnname) efnname)) - (width (string-width ifnname)) - (p2 (+ p 22 fnlen)) - (creator (if (>= (- hsize fnlen) 24) (byte-after (+ p2 2)) 0)) - mode modestr uid gid text path prname - ) - (if (= hdrlvl 0) - (setq mode (if (= creator ?U) (archive-l-e (+ p2 8) 2) ?\666) - uid (if (= creator ?U) (archive-l-e (+ p2 10) 2)) - gid (if (= creator ?U) (archive-l-e (+ p2 12) 2))) - (if (= creator ?U) - (let* ((p3 (+ p2 3)) - (hsize (archive-l-e p3 2)) - (etype (byte-after (+ p3 2)))) - (while (not (= hsize 0)) + (setq p2 (+ p 22 fnlen))) ; + (if (= hdrlvl 1) + (progn ;specific to level 1 header - (setq creator (if (>= (- hsize fnlen) 24) (char-after (+ p2 2)) 0)) ++ (setq creator (if (>= (- hsize fnlen) 24) (byte-after (+ p2 2)) 0)) + (setq neh (+ p2 3))) + (if (= hdrlvl 2) + (progn ;specific to level 2 header - (setq creator (char-after (+ p 23)) ) ++ (setq creator (byte-after (+ p 23)) ) + (setq neh (+ p 24))))) + (if neh ;if level 1 or 2 we expect extension headers to follow + (let* ((ehsize (archive-l-e neh 2)) ;size of the extension header - (etype (char-after (+ neh 2)))) ;extension type ++ (etype (byte-after (+ neh 2)))) ;extension type + (while (not (= ehsize 0)) (cond - ((= etype 2) (let ((i (+ p3 3))) - (while (< i (+ p3 hsize)) - (setq path (concat path + ((= etype 1) ;file name + (let ((i (+ neh 3))) + (while (< i (+ neh ehsize)) - (setq efnname (concat efnname (char-to-string (char-after i)))) ++ (setq efnname (concat efnname (char-to-string (byte-after i)))) + (setq i (1+ i))))) + ((= etype 2) ;directory name + (let ((i (+ neh 3))) + (while (< i (+ neh ehsize)) + (setq dir (concat dir - (if (= (char-after i) + (if (= (byte-after i) 255) "/" (char-to-string - (byte-after i))))) + (char-after i))))) (setq i (1+ i))))) - ((= etype 80) (setq mode (archive-l-e (+ p3 3) 2))) - ((= etype 81) (progn (setq uid (archive-l-e (+ p3 3) 2)) - (setq gid (archive-l-e (+ p3 5) 2)))) + ((= etype 80) ;Unix file permission + (setq mode (archive-l-e (+ neh 3) 2))) + ((= etype 81) ;UNIX file group/user ID + (progn (setq uid (archive-l-e (+ neh 3) 2)) + (setq gid (archive-l-e (+ neh 5) 2)))) + ((= etype 82) ;UNIX file group name + (let ((i (+ neh 3))) + (while (< i (+ neh ehsize)) + (setq gname (concat gname (char-to-string (char-after i)))) + (setq i (1+ i))))) + ((= etype 83) ;UNIX file user name + (let ((i (+ neh 3))) + (while (< i (+ neh ehsize)) + (setq uname (concat uname (char-to-string (char-after i)))) + (setq i (1+ i))))) ) - (setq p3 (+ p3 hsize)) - (setq hsize (archive-l-e p3 2)) - (setq etype (byte-after (+ p3 2))))))) - (setq prname (if path (concat path ifnname) ifnname)) + (setq neh (+ neh ehsize)) + (setq ehsize (archive-l-e neh 2)) - (setq etype (char-after (+ neh 2)))) ++ (setq etype (byte-after (+ neh 2)))) + ;;get total header size for level 1 and 2 headers + (setq thsize (- neh p)))) + (if (= hdrlvl 0) ;total header size + (setq thsize hsize)) + (setq fiddle (if efnname (string= efnname (upcase efnname)))) + (setq ifnname (if fiddle (downcase efnname) efnname)) + (setq prname (if dir (concat dir ifnname) ifnname)) + (setq width (if prname (string-width prname) 0)) (setq modestr (if mode (archive-int-to-mode mode) "??????????")) + (setq moddate (if (= hdrlvl 2) + (archive-unixdate time1 time2) ;level 2 header in UNIX format + (archive-dosdate time2))) ;level 0 and 1 header in DOS format + (setq modtime (if (= hdrlvl 2) + (archive-unixtime time1 time2) + (archive-dostime time1))) (setq text (if archive-alternate-display (format " %8d %5S %5S %s" ucsize @@@ -1516,14 -1488,9 +1538,13 @@@ (length text)) visual) files (cons (vector prname ifnname fiddle mode (1- p)) - files) - p (+ p hsize 2 csize)))) + files)) + (cond ((= hdrlvl 1) + (setq p (+ p hsize 2 csize))) + ((or (= hdrlvl 2) (= hdrlvl 0)) + (setq p (+ p thsize 2 csize)))) + )) (goto-char (point-min)) - (set-buffer-multibyte default-enable-multibyte-characters) (let ((dash (concat (if archive-alternate-display "- -------- ----- ----- " "- ---------- -------- ----------- -------- ") diff --cc lisp/case-table.el index f83123bf19f,1897ef0aec1..f54d0b75c55 --- a/lisp/case-table.el +++ b/lisp/case-table.el @@@ -25,19 -25,8 +25,15 @@@ ;;; Commentary: +;; Written by: +;; TN/ETX/TX/UMG Howard Gayle UUCP : seismo!enea!erix!howard +;; Telefonaktiebolaget L M Ericsson Phone: +46 8 719 55 65 +;; Ericsson Telecom Telex: 14910 ERIC S +;; S-126 25 Stockholm FAX : +46 8 719 64 82 +;; Sweden + ;;; Code: - (defvar set-case-syntax-offset 0) - - (defvar set-case-syntax-set-multibyte nil) - (defun describe-buffer-case-table () "Describe the case table of the current buffer." (interactive) diff --cc lisp/composite.el index 1f279cd6a17,18c1cc02eee..6df3b690071 --- a/lisp/composite.el +++ b/lisp/composite.el @@@ -364,20 -351,124 +355,123 @@@ after a sequence character events. (compose-region (- (point) chars) (point) (nth 2 args)) (compose-chars-after (- (point) chars) (point)))))) - ;;;###autoload(global-set-key [compose-last-chars] 'compose-last-chars) + (global-set-key [compose-last-chars] 'compose-last-chars) + + ;;; Automatic character composition. + + (defvar composition-function-table + (make-char-table nil) + "Char table of functions for automatic character composition. + For each character that has to be composed automatically with + preceding and/or following characters, this char table contains + a function to call to compose that character. + + Each function is called with two arguments, POS and STRING. + + If STRING is nil, POS is a position in the current buffer, and the + function has to compose a character at POS with surrounding characters + in the current buffer. + + Otherwise, STRING is a string, and POS is an index to the string. In + this case, the function has to compose a character at POS with + surrounding characters in the string. + + See also the command `toggle-auto-composition'.") + + ;; Copied from font-lock.el. + (eval-when-compile + ;; + ;; We don't do this at the top-level as we only use non-autoloaded macros. + (require 'cl) + ;; + ;; Borrowed from lazy-lock.el. + ;; We use this to preserve or protect things when modifying text properties. + (defmacro save-buffer-state (varlist &rest body) + "Bind variables according to VARLIST and eval BODY restoring buffer state." + `(let* ,(append varlist + '((modified (buffer-modified-p)) (buffer-undo-list t) + (inhibit-read-only t) (inhibit-point-motion-hooks t) + (inhibit-modification-hooks t) + deactivate-mark buffer-file-name buffer-file-truename)) + ,@body + (unless modified + (restore-buffer-modified-p nil)))) + (put 'save-buffer-state 'lisp-indent-function 1) + ;; Fixme: This makes bootstrapping fails by this error. + ;; Symbol's function definition is void: eval-defun + ;;(def-edebug-spec save-buffer-state let) + ) + + (defvar auto-composition-chunk-size 500 + "*Automatic composition chunks of this many characters, or smaller.") + + (defun auto-compose-chars (pos string) + "Compose characters after the buffer position POS. + If STRING is non-nil, it is a string, and POS is an index to the string. + In that case, compose characters in the string. + + This function is the default value of `auto-composition-function' (which see)." + (save-buffer-state nil + (save-excursion + (save-restriction + (save-match-data + (let* ((start pos) + (end (if string (length string) (point-max))) + (limit (next-single-property-change pos 'auto-composed string + end)) + (lines 0) + ch func newpos) + (if (> (- limit start) auto-composition-chunk-size) + (setq limit (+ start auto-composition-chunk-size))) + (while (and (< pos end) + (setq ch (if string (aref string pos) + (char-after pos))) + (or (< pos limit) + (/= ch ?\n))) + (setq func (aref composition-function-table ch)) + (if (fboundp func) + (setq newpos (funcall func pos string) + pos (if (and (integerp newpos) (> newpos pos)) + newpos + (1+ pos))) + (setq pos (1+ pos)))) + (if (< pos limit) + (setq pos (1+ pos))) + (put-text-property start pos 'auto-composed t string))))))) + + (setq auto-composition-function 'auto-compose-chars) + + (defun toggle-auto-composition (&optional arg) + "Change whether automatic character composition is enabled in this buffer. + With arg, enable it iff arg is positive." + (interactive "P") + (let ((enable (if (null arg) (not auto-composition-function) + (> (prefix-numeric-value arg) 0)))) + (if enable + (kill-local-variable 'auto-composition-function) + (make-local-variable 'auto-composition-function) + (setq auto-composition-function nil) + (save-buffer-state nil + (save-restriction + (widen) + (decompose-region (point-min) (point-max))))) + + (save-buffer-state nil + (save-restriction + (widen) + (put-text-property (point-min) (point-max) 'auto-composed nil))))) - ;;; The following codes are only for backward compatibility with Emacs -;;; 20.4 and the earlier. +;;; 20.4 and earlier. - ;;;###autoload (defun decompose-composite-char (char &optional type with-composition-rule) "Convert CHAR to string. -This is only for backward compatibility with Emacs 20.4 and the earlier. If optional 2nd arg TYPE is non-nil, it is `string', `list', or -`vector'. In this case, CHAR is converted string, list of CHAR, or -vector of CHAR respectively." +`vector'. In this case, CHAR is converted to string, list of CHAR, or +vector of CHAR respectively. +Optional 3rd arg WITH-COMPOSITION-RULE is ignored." (cond ((or (null type) (eq type 'string)) (char-to-string char)) ((eq type 'list) (list char)) (t (vector char)))) diff --cc lisp/descr-text.el index 229e1b57e3f,00000000000..c65043f86cf mode 100644,000000..100644 --- a/lisp/descr-text.el +++ b/lisp/descr-text.el @@@ -1,586 -1,0 +1,576 @@@ +;;; descr-text.el --- describe text mode + +;; Copyright (c) 1994, 1995, 1996, 2001, 02, 03 Free Software Foundation, Inc. + +;; Author: Boris Goldowsky +;; Keywords: faces + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;;; Describe-Text Mode. + +;;; Code: + +(eval-when-compile (require 'button)) + +(defun describe-text-done () + "Delete the current window or bury the current buffer." + (interactive) + (if (> (count-windows) 1) + (delete-window) + (bury-buffer))) + +(defvar describe-text-mode-map + (let ((map (make-sparse-keymap))) + (set-keymap-parent map widget-keymap) + map) + "Keymap for `describe-text-mode'.") + +(defcustom describe-text-mode-hook nil + "List of hook functions ran by `describe-text-mode'." + :type 'hook + :group 'facemenu) + +(defun describe-text-mode () + "Major mode for buffers created by `describe-char'. + +\\{describe-text-mode-map} +Entry to this mode calls the value of `describe-text-mode-hook' +if that value is non-nil." + (kill-all-local-variables) + (setq major-mode 'describe-text-mode + mode-name "Describe-Text") + (use-local-map describe-text-mode-map) + (widget-setup) + (add-hook 'change-major-mode-hook 'font-lock-defontify nil t) + (run-hooks 'describe-text-mode-hook)) + +;;; Describe-Text Utilities. + +(defun describe-text-widget (widget) + "Insert text to describe WIDGET in the current buffer." + (widget-create 'link + :notify `(lambda (&rest ignore) + (widget-browse ',widget)) + (format "%S" (if (symbolp widget) + widget + (car widget)))) + (widget-insert " ") + (widget-create 'info-link :tag "widget" "(widget)Top")) + +(defun describe-text-sexp (sexp) + "Insert a short description of SEXP in the current buffer." + (let ((pp (condition-case signal + (pp-to-string sexp) + (error (prin1-to-string signal))))) + (when (string-match "\n\\'" pp) + (setq pp (substring pp 0 (1- (length pp))))) + (if (cond ((string-match "\n" pp) + nil) + ((> (length pp) (- (window-width) (current-column))) + nil) + (t t)) + (widget-insert pp) + (widget-create 'push-button + :tag "show" + :action (lambda (widget &optional event) + (with-output-to-temp-buffer + "*Pp Eval Output*" + (princ (widget-get widget :value)))) + pp)))) + +(defun describe-property-list (properties) + "Insert a description of PROPERTIES in the current buffer. +PROPERTIES should be a list of overlay or text properties. +The `category' property is made into a widget button that call +`describe-text-category' when pushed." + ;; Sort the properties by the size of their value. + (dolist (elt (sort (let ((ret nil) + (key nil) + (val nil) + (len nil)) + (while properties + (setq key (pop properties) + val (pop properties) + len 0) + (unless (or (eq key 'category) + (widgetp val)) + (setq val (pp-to-string val) + len (length val))) + (push (list key val len) ret)) + ret) + (lambda (a b) + (< (nth 2 a) + (nth 2 b))))) + (let ((key (nth 0 elt)) + (value (nth 1 elt))) + (widget-insert (propertize (format " %-20s " key) + 'font-lock-face 'italic)) + (cond ((eq key 'category) + (widget-create 'link + :notify `(lambda (&rest ignore) + (describe-text-category ',value)) + (format "%S" value))) + ((widgetp value) + (describe-text-widget value)) + (t + (widget-insert value)))) + (widget-insert "\n"))) + +;;; Describe-Text Commands. + +(defun describe-text-category (category) + "Describe a text property category." + (interactive "S") + (save-excursion + (with-output-to-temp-buffer "*Help*" + (set-buffer standard-output) + (widget-insert "Category " (format "%S" category) ":\n\n") + (describe-property-list (symbol-plist category)) + (describe-text-mode) + (goto-char (point-min))))) + +;;;###autoload +(defun describe-text-properties (pos &optional output-buffer) + "Describe widgets, buttons, overlays and text properties at POS. +Interactively, describe them for the character after point. +If optional second argument OUTPUT-BUFFER is non-nil, +insert the output into that buffer, and don't initialize or clear it +otherwise." + (interactive "d") + (if (>= pos (point-max)) + (error "No character follows specified position")) + (if output-buffer + (describe-text-properties-1 pos output-buffer) + (if (not (or (text-properties-at pos) (overlays-at pos))) + (message "This is plain text.") + (let ((buffer (current-buffer))) + (when (eq buffer (get-buffer "*Help*")) + (error "Can't do self inspection")) + (save-excursion + (with-output-to-temp-buffer "*Help*" + (set-buffer standard-output) + (setq output-buffer (current-buffer)) + (widget-insert "Text content at position " (format "%d" pos) ":\n\n") + (with-current-buffer buffer + (describe-text-properties-1 pos output-buffer)) + (describe-text-mode) + (goto-char (point-min)))))))) + +(defun describe-text-properties-1 (pos output-buffer) + (let* ((properties (text-properties-at pos)) + (overlays (overlays-at pos)) + overlay + (wid-field (get-char-property pos 'field)) + (wid-button (get-char-property pos 'button)) + (wid-doc (get-char-property pos 'widget-doc)) + ;; If button.el is not loaded, we have no buttons in the text. + (button (and (fboundp 'button-at) (button-at pos))) + (button-type (and button (button-type button))) + (button-label (and button (button-label button))) + (widget (or wid-field wid-button wid-doc))) + (with-current-buffer output-buffer + ;; Widgets + (when (widgetp widget) + (newline) + (widget-insert (cond (wid-field "This is an editable text area") + (wid-button "This is an active area") + (wid-doc "This is documentation text"))) + (widget-insert " of a ") + (describe-text-widget widget) + (widget-insert ".\n\n")) + ;; Buttons + (when (and button (not (widgetp wid-button))) + (newline) + (widget-insert "Here is a " (format "%S" button-type) + " button labeled `" button-label "'.\n\n")) + ;; Overlays + (when overlays + (newline) + (if (eq (length overlays) 1) + (widget-insert "There is an overlay here:\n") + (widget-insert "There are " (format "%d" (length overlays)) + " overlays here:\n")) + (dolist (overlay overlays) + (widget-insert " From " (format "%d" (overlay-start overlay)) + " to " (format "%d" (overlay-end overlay)) "\n") + (describe-property-list (overlay-properties overlay))) + (widget-insert "\n")) + ;; Text properties + (when properties + (newline) + (widget-insert "There are text properties here:\n") + (describe-property-list properties))))) + +;;; We cannot use the UnicodeData.txt file as such; it is not free. +;;; We can turn that info a different format and release the result +;;; as free data. When that is done, we could reinstate the code below. +;;; For the mean time, here is a dummy placeholder. +;;; -- rms +(defun describe-char-unicode-data (char) nil) + +;;; (defcustom describe-char-unicodedata-file nil +;;; "Location of Unicode data file. +;;; This is the UnicodeData.txt file from the Unicode consortium, used for +;;; diagnostics. If it is non-nil `describe-char-after' will print data +;;; looked up from it. This facility is mostly of use to people doing +;;; multilingual development. + +;;; This is a fairly large file, not typically present on GNU systems. At +;;; the time of writing it is at +;;; ." +;;; :group 'mule +;;; :version "21.5" +;;; :type '(choice (const :tag "None" nil) +;;; file)) + +;;; ;; We could convert the unidata file into a Lispy form once-for-all +;;; ;; and distribute it for loading on demand. It might be made more +;;; ;; space-efficient by splitting strings word-wise and replacing them +;;; ;; with lists of symbols interned in a private obarray, e.g. +;;; ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). + +;;; ;; Fixme: Check whether this needs updating for Unicode 4. +;;; (defun describe-char-unicode-data (char) +;;; "Return a list of Unicode data for unicode CHAR. +;;; Each element is a list of a property description and the property value. +;;; The list is null if CHAR isn't found in `describe-char-unicodedata-file'." +;;; (when describe-char-unicodedata-file +;;; (unless (file-exists-p describe-char-unicodedata-file) +;;; (error "`unicodedata-file' %s not found" describe-char-unicodedata-file)) +;;; (save-excursion +;;; ;; Find file in fundamental mode to avoid, e.g. flyspell turned +;;; ;; on for .txt. Don't use RAWFILE arg in case of DOS line endings. +;;; (set-buffer (let ((auto-mode-alist)) +;;; (find-file-noselect describe-char-unicodedata-file))) +;;; (goto-char (point-min)) +;;; (let ((hex (format "%04X" char)) +;;; found first last) +;;; (if (re-search-forward (concat "^" hex) nil t) +;;; (setq found t) +;;; ;; It's not listed explicitly. Look for ranges, e.g. CJK +;;; ;; ideographs, and check whether it's in one of them. +;;; (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) +;;; (>= char (setq first +;;; (string-to-number (match-string 1) 16))) +;;; (progn +;;; (forward-line 1) +;;; (looking-at "^\\([^;]+\\);[^;]+Last>;") +;;; (> char +;;; (setq last +;;; (string-to-number (match-string 1) 16)))))) +;;; (if (and (>= char first) +;;; (<= char last)) +;;; (setq found t))) +;;; (if found +;;; (let ((fields (mapcar (lambda (elt) +;;; (if (> (length elt) 0) +;;; elt)) +;;; (cdr (split-string +;;; (buffer-substring +;;; (line-beginning-position) +;;; (line-end-position)) +;;; ";"))))) +;;; ;; The length depends on whether the last field was empty. +;;; (unless (or (= 13 (length fields)) +;;; (= 14 (length fields))) +;;; (error "Invalid contents in %s" describe-char-unicodedata-file)) +;;; ;; The field names and values lists are slightly +;;; ;; modified from Mule-UCS unidata.el. +;;; (list +;;; (list "Name" (let ((name (nth 0 fields))) +;;; ;; Check for <..., First>, <..., Last> +;;; (if (string-match "\\`\\(<[^,]+\\)," name) +;;; (concat (match-string 1 name) ">") +;;; name))) +;;; (list "Category" +;;; (cdr (assoc +;;; (nth 1 fields) +;;; '(("Lu" . "uppercase letter") +;;; ("Ll" . "lowercase letter") +;;; ("Lt" . "titlecase letter") +;;; ("Mn" . "non-spacing mark") +;;; ("Mc" . "spacing-combining mark") +;;; ("Me" . "enclosing mark") +;;; ("Nd" . "decimal digit") +;;; ("Nl" . "letter number") +;;; ("No" . "other number") +;;; ("Zs" . "space separator") +;;; ("Zl" . "line separator") +;;; ("Zp" . "paragraph separator") +;;; ("Cc" . "other control") +;;; ("Cf" . "other format") +;;; ("Cs" . "surrogate") +;;; ("Co" . "private use") +;;; ("Cn" . "not assigned") +;;; ("Lm" . "modifier letter") +;;; ("Lo" . "other letter") +;;; ("Pc" . "connector punctuation") +;;; ("Pd" . "dash punctuation") +;;; ("Ps" . "open punctuation") +;;; ("Pe" . "close punctuation") +;;; ("Pi" . "initial-quotation punctuation") +;;; ("Pf" . "final-quotation punctuation") +;;; ("Po" . "other punctuation") +;;; ("Sm" . "math symbol") +;;; ("Sc" . "currency symbol") +;;; ("Sk" . "modifier symbol") +;;; ("So" . "other symbol"))))) +;;; (list "Combining class" +;;; (cdr (assoc +;;; (string-to-number (nth 2 fields)) +;;; '((0 . "Spacing") +;;; (1 . "Overlays and interior") +;;; (7 . "Nuktas") +;;; (8 . "Hiragana/Katakana voicing marks") +;;; (9 . "Viramas") +;;; (10 . "Start of fixed position classes") +;;; (199 . "End of fixed position classes") +;;; (200 . "Below left attached") +;;; (202 . "Below attached") +;;; (204 . "Below right attached") +;;; (208 . "Left attached (reordrant around \ +;;; single base character)") +;;; (210 . "Right attached") +;;; (212 . "Above left attached") +;;; (214 . "Above attached") +;;; (216 . "Above right attached") +;;; (218 . "Below left") +;;; (220 . "Below") +;;; (222 . "Below right") +;;; (224 . "Left (reordrant around single base \ +;;; character)") +;;; (226 . "Right") +;;; (228 . "Above left") +;;; (230 . "Above") +;;; (232 . "Above right") +;;; (233 . "Double below") +;;; (234 . "Double above") +;;; (240 . "Below (iota subscript)"))))) +;;; (list "Bidi category" +;;; (cdr (assoc +;;; (nth 3 fields) +;;; '(("L" . "Left-to-Right") +;;; ("LRE" . "Left-to-Right Embedding") +;;; ("LRO" . "Left-to-Right Override") +;;; ("R" . "Right-to-Left") +;;; ("AL" . "Right-to-Left Arabic") +;;; ("RLE" . "Right-to-Left Embedding") +;;; ("RLO" . "Right-to-Left Override") +;;; ("PDF" . "Pop Directional Format") +;;; ("EN" . "European Number") +;;; ("ES" . "European Number Separator") +;;; ("ET" . "European Number Terminator") +;;; ("AN" . "Arabic Number") +;;; ("CS" . "Common Number Separator") +;;; ("NSM" . "Non-Spacing Mark") +;;; ("BN" . "Boundary Neutral") +;;; ("B" . "Paragraph Separator") +;;; ("S" . "Segment Separator") +;;; ("WS" . "Whitespace") +;;; ("ON" . "Other Neutrals"))))) +;;; (list +;;; "Decomposition" +;;; (if (nth 4 fields) +;;; (let* ((parts (split-string (nth 4 fields))) +;;; (info (car parts))) +;;; (if (string-match "\\`<\\(.+\\)>\\'" info) +;;; (setq info (match-string 1 info)) +;;; (setq info nil)) +;;; (if info (setq parts (cdr parts))) +;;; ;; Maybe printing ? for unrepresentable unicodes +;;; ;; here and below should be changed? +;;; (setq parts (mapconcat +;;; (lambda (arg) +;;; (string (or (decode-char +;;; 'ucs +;;; (string-to-number arg 16)) +;;; ??))) +;;; parts " ")) +;;; (concat info parts)))) +;;; (list "Decimal digit value" +;;; (nth 5 fields)) +;;; (list "Digit value" +;;; (nth 6 fields)) +;;; (list "Numeric value" +;;; (nth 7 fields)) +;;; (list "Mirrored" +;;; (if (equal "Y" (nth 8 fields)) +;;; "yes")) +;;; (list "Old name" (nth 9 fields)) +;;; (list "ISO 10646 comment" (nth 10 fields)) +;;; (list "Uppercase" (and (nth 11 fields) +;;; (string (or (decode-char +;;; 'ucs +;;; (string-to-number +;;; (nth 11 fields) 16)) +;;; ??)))) +;;; (list "Lowercase" (and (nth 12 fields) +;;; (string (or (decode-char +;;; 'ucs +;;; (string-to-number +;;; (nth 12 fields) 16)) +;;; ??)))) +;;; (list "Titlecase" (and (nth 13 fields) +;;; (string (or (decode-char +;;; 'ucs +;;; (string-to-number +;;; (nth 13 fields) 16)) +;;; ??))))))))))) + +;;;###autoload +(defun describe-char (pos) + "Describe the character after POS (interactively, the character after point). +The information includes character code, charset and code points in it, +syntax, category, how the character is encoded in a file, +character composition information (if relevant), +as well as widgets, buttons, overlays, and text properties." + (interactive "d") + (if (>= pos (point-max)) + (error "No character follows specified position")) + (let* ((char (char-after pos)) + (charset (char-charset char)) + (buffer (current-buffer)) + (composition (find-composition pos nil nil t)) + (composed (if composition (buffer-substring (car composition) + (nth 1 composition)))) + (multibyte-p enable-multibyte-characters) - item-list max-width unicode) - (if (eq charset 'unknown) ++ item-list max-width) ++ (if (eq charset 'eight-bit) + (setq item-list + `(("character" - ,(format "%s (0%o, %d, 0x%x) -- invalid character code" - (if (< char 256) - (single-key-description char) - (char-to-string char)) - char char char)))) ++ ,(format "%s (0%o, %d, 0x%x) -- raw byte 0x%x" ++ (char-to-string char) char char char ++ (multibyte-char-to-unibyte char))))) + - (if (or (< char 256) - (memq 'mule-utf-8 (find-coding-systems-region pos (1+ pos))) - (get-char-property pos 'untranslated-utf-8)) - (setq unicode (or (get-char-property pos 'untranslated-utf-8) - (encode-char char 'ucs)))) + (setq item-list + `(("character" - ,(format "%s (0%o, %d, 0x%x%s)" (if (< char 256) ++ ,(format "%s (0%o, %d, 0x%x)" (if (< char 256) + (single-key-description char) + (char-to-string char)) - char char char - (if unicode - (format ", U+%04X" unicode) - ""))) - ("charset" ++ char char char)) ++ ("preferred charset" + ,(symbol-name charset) + ,(format "(%s)" (charset-description charset))) + ("code point" - ,(let ((split (split-char char))) - (if (= (charset-dimension charset) 1) - (format "%d" (nth 1 split)) - (format "%d %d" (nth 1 split) (nth 2 split))))) ++ ,(let ((split (split-char char))) ++ (mapconcat #'number-to-string (cdr split) " "))) + ("syntax" + ,(let ((syntax (syntax-after pos))) + (with-temp-buffer + (internal-describe-syntax-value syntax) + (buffer-string)))) + ("category" + ,@(let ((category-set (char-category-set char))) + (if (not category-set) + '("-- none --") + (mapcar #'(lambda (x) (format "%c:%s " + x (category-docstring x))) + (category-set-mnemonics category-set))))) + ,@(let ((props (aref char-code-property-table char)) + ps) + (when props + (while props + (push (format "%s:" (pop props)) ps) + (push (format "%s;" (pop props)) ps)) + (list (cons "Properties" (nreverse ps))))) + ("buffer code" + ,(encoded-string-description + (string-as-unibyte (char-to-string char)) nil)) + ("file code" + ,@(let* ((coding buffer-file-coding-system) + (encoded (encode-coding-char char coding))) + (if encoded + (list (encoded-string-description encoded coding) + (format "(encoded by coding system %S)" coding)) + (list "not encodable by coding system" + (symbol-name coding))))) + ,(if (display-graphic-p (selected-frame)) + (list "font" (or (internal-char-font pos) + "-- none --")) + (list "terminal code" + (let* ((coding (terminal-coding-system)) + (encoded (encode-coding-char char coding))) + (if encoded + (encoded-string-description encoded coding) + "not encodable")))) - ,@(let ((unicodedata (and unicode - (describe-char-unicode-data unicode)))) ++ ,@(let ((unicodedata (unicode-data char))) + (if unicodedata + (cons (list "Unicode data" " ") unicodedata)))))) - (setq max-width (apply #'max (mapcar #'(lambda (x) (length (car x))) ++ (setq max-width (apply #'max (mapcar #'(lambda (x) ++ (if (cadr x) ++ (length (car x)) ++ 0)) + item-list))) + (when (eq (current-buffer) (get-buffer "*Help*")) + (error "Can't describe char in Help buffer")) + (with-output-to-temp-buffer "*Help*" + (with-current-buffer standard-output + (set-buffer-multibyte multibyte-p) + (let ((formatter (format "%%%ds:" max-width))) + (dolist (elt item-list) + (when (cadr elt) + (insert (format formatter (car elt))) + (dolist (clm (cdr elt)) + (when (>= (+ (current-column) + (or (string-match "\n" clm) + (string-width clm)) 1) + (frame-width)) + (insert "\n") + (indent-to (1+ max-width))) + (insert " " clm)) + (insert "\n")))) + (when composition + (insert "\nComposed with the " + (cond + ((eq pos (car composition)) "following ") + ((eq (1+ pos) (cadr composition)) "preceding ") + (t "")) + "character(s) `" + (cond + ((eq pos (car composition)) (substring composed 1)) + ((eq (1+ pos) (cadr composition)) (substring composed 0 -1)) + (t (concat (substring composed 0 (- pos (car composition))) + "' and `" + (substring composed (- (1+ pos) (car composition)))))) + + "' to form `" composed "'") + (if (nth 3 composition) + (insert ".\n") + (insert "\nby the rule (" + (mapconcat (lambda (x) + (format (if (consp x) "%S" "?%c") x)) + (nth 2 composition) + " ") + ").\n" + "See the variable `reference-point-alist' for " + "the meaning of the rule.\n"))) + + (let ((output (current-buffer))) + (with-current-buffer buffer + (describe-text-properties pos output)) + (describe-text-mode)))))) + +(defalias 'describe-char-after 'describe-char) +(make-obsolete 'describe-char-after 'describe-char "21.5") + +(provide 'descr-text) + +;;; descr-text.el ends here diff --cc lisp/desktop.el index a5a898e2cd5,28f6498dc9f..41884f787d5 --- a/lisp/desktop.el +++ b/lisp/desktop.el @@@ -629,40 -521,36 +629,40 @@@ DIRNAME must be the directory in which (set-buffer buf) (erase-buffer) - (insert ";; -*- coding: utf-8-emacs; -*-\n" - desktop-header - ";; Created " (current-time-string) "\n" - ";; Emacs version " emacs-version "\n\n" - ";; Global section:\n") + (insert - ";; -*- coding: emacs-mule; -*-\n" ++ ";; -*- coding: utf-8-emacs; -*-\n" + desktop-header + ";; Created " (current-time-string) "\n" + ";; Desktop file format version " desktop-file-version "\n" + ";; Emacs version " emacs-version "\n\n" + ";; Global section:\n") (mapcar (function desktop-outvar) desktop-globals-to-save) (if (memq 'kill-ring desktop-globals-to-save) - (insert "(setq kill-ring-yank-pointer (nthcdr " - (int-to-string - (- (length kill-ring) (length kill-ring-yank-pointer))) - " kill-ring))\n")) + (insert + "(setq kill-ring-yank-pointer (nthcdr " + (int-to-string (- (length kill-ring) (length kill-ring-yank-pointer))) + " kill-ring))\n")) - (insert "\n;; Buffer section:\n") + (insert "\n;; Buffer section -- buffers listed in same order as in buffer list:\n") (mapcar - (function (lambda (l) - (if (apply 'desktop-save-buffer-p l) - (progn - (insert desktop-create-buffer-form) - (mapcar - (function (lambda (e) - (insert "\n " - (desktop-value-to-string e)))) - l) - (insert ")\n\n"))))) - info) + (function + (lambda (l) + (if (apply 'desktop-save-buffer-p l) + (progn + (insert "(desktop-create-buffer " desktop-file-version) + (mapcar + (function + (lambda (e) + (insert "\n " (desktop-value-to-string e)))) + l) + (insert ")\n\n"))))) + info) (setq default-directory dirname) - (if (file-exists-p filename) (delete-file filename)) + (when (file-exists-p filename) (delete-file filename)) - (let ((coding-system-for-write 'emacs-mule)) + (let ((coding-system-for-write 'utf-8-emacs)) - (write-region (point-min) (point-max) filename nil 'nomessage)))) + (write-region (point-min) (point-max) filename nil 'nomessage)))) (setq desktop-dirname dirname)) + ;; ---------------------------------------------------------------------------- (defun desktop-remove () "Delete the Desktop file and inactivate the desktop system." diff --cc lisp/emacs-lisp/byte-opt.el index c02e8b02dea,dc95be958bf..47cbec1fbc9 --- a/lisp/emacs-lisp/byte-opt.el +++ b/lisp/emacs-lisp/byte-opt.el @@@ -1207,51 -1148,41 +1207,54 @@@ '(% * + - / /= 1+ 1- < <= = > >= abs acos append aref ash asin atan assoc assq boundp buffer-file-name buffer-local-variables buffer-modified-p - buffer-substring + buffer-substring byte-code-function-p capitalize car-less-than-car car cdr ceiling char-after char-before - concat coordinates-in-window-p - char-width copy-marker cos count-lines - decode-char default-boundp default-value documentation downcase - elt encode-char exp expt fboundp featurep + char-equal char-to-string char-width + compare-strings concat coordinates-in-window-p + copy-alist copy-sequence copy-marker cos count-lines ++ decdoe-char + decode-time default-boundp default-value documentation downcase - elt exp expt encode-time error-message-string ++ elt encode-char exp expt encode-time error-message-string + fboundp fceiling featurep ffloor file-directory-p file-exists-p file-locked-p file-name-absolute-p file-newer-than-file-p file-readable-p file-symlink-p file-writable-p - float floor format frame-visible-p + float float-time floor format format-time-string frame-visible-p + fround ftruncate get gethash get-buffer get-buffer-window getenv get-file-buffer hash-table-count - int-to-string + int-to-string intern-soft keymap-parent length local-variable-if-set-p local-variable-p log log10 logand - logb logior lognot logxor lsh + logb logior lognot logxor lsh langinfo + make-list make-string make-symbol marker-buffer max member memq min mod multibyte-char-to-unibyte next-window nth nthcdr number-to-string - parse-colon-path prefix-numeric-value previous-window propertize - radians-to-degrees rassq regexp-quote reverse round + parse-colon-path plist-get plist-member + prefix-numeric-value previous-window prin1-to-string propertize + radians-to-degrees rassq rassoc read-from-string regexp-quote + region-beginning region-end reverse round sin sqrt string string< string= string-equal string-lessp string-to-char - string-to-int string-to-number substring symbol-function symbol-plist - symbol-value string-make-unibyte string-make-multibyte - string-to-multibyte string-as-multibyte string-as-unibyte - tan unibyte-char-to-multibyte upcase user-variable-p vconcat + string-to-int string-to-number substring sxhash symbol-function + symbol-name symbol-plist symbol-value string-make-unibyte + string-make-multibyte string-as-multibyte string-as-unibyte ++ string-to-multibyte + tan truncate + unibyte-char-to-multibyte upcase user-full-name + user-login-name user-original-login-name user-variable-p + vconcat window-buffer window-dedicated-p window-edges window-height window-hscroll window-minibuffer-p window-width zerop)) (side-effect-and-error-free-fns '(arrayp atom - bobp bolp buffer-end buffer-list buffer-size buffer-string bufferp + bobp bolp bool-vector-p + buffer-end buffer-list buffer-size buffer-string bufferp - car-safe case-table-p cdr-safe char-or-string-p commandp cons consp + car-safe case-table-p cdr-safe char-or-string-p characterp + charsetp commandp cons consp current-buffer current-global-map current-indentation - current-local-map current-minor-mode-maps - dot dot-marker eobp eolp eq equal eventp + current-local-map current-minor-mode-maps current-time + current-time-string current-time-zone + eobp eolp eq equal eventp floatp following-char framep get-largest-window get-lru-window hash-table-p @@@ -1259,20 -1190,21 +1262,22 @@@ invocation-directory invocation-name keymapp line-beginning-position line-end-position list listp - make-marker mark mark-marker markerp memory-limit minibuffer-window - make-marker mark mark-marker markerp max-char memory-limit minibuffer-window ++ make-marker mark mark-marker markerp max-char ++ memory-limit minibuffer-window mouse-movement-p natnump nlistp not null number-or-marker-p numberp one-window-p overlayp - point point-marker point-min point-max preceding-char processp + point point-marker point-min point-max preceding-char primary-charset + processp recent-keys recursion-depth - selected-frame selected-window sequencep stringp subrp symbolp - standard-case-table standard-syntax-table syntax-table-p + safe-length selected-frame selected-window sequencep + standard-case-table standard-syntax-table stringp subrp symbolp + syntax-table syntax-table-p this-command-keys this-command-keys-vector this-single-command-keys this-single-command-raw-keys - user-full-name user-login-name user-original-login-name user-real-login-name user-real-uid user-uid vector vectorp visible-frame-list - window-configuration-p window-live-p windowp))) + wholenump window-configuration-p window-live-p windowp))) (while side-effect-free-fns (put (car side-effect-free-fns) 'side-effect-free t) (setq side-effect-free-fns (cdr side-effect-free-fns))) diff --cc lisp/emacs-lisp/bytecomp.el index 43ce86921e8,fdb955b490a..b9c864a6792 --- a/lisp/emacs-lisp/bytecomp.el +++ b/lisp/emacs-lisp/bytecomp.el @@@ -1797,7 -1603,7 +1797,7 @@@ With argument, insert value in current (set-buffer outbuffer) (goto-char 1) ;; The magic number of .elc files is ";ELC", or 0x3B454C43. After - ;; that is the file-format version number (18, 19 or 20) as a - ;; that is the file-format version number (18, 19, 20 or 22) as a ++ ;; that is the file-format version number (18, 19, 20, or 22) as a ;; byte, followed by some nulls. The primary motivation for doing ;; this is to get some binary characters up in the first line of ;; the file so that `diff' will simply say "Binary files differ" @@@ -3235,9 -2978,9 +3235,11 @@@ If FORM is a lambda or a macro, byte-co (byte-defop-compiler-1 mapatoms byte-compile-funarg) (byte-defop-compiler-1 mapconcat byte-compile-funarg) (byte-defop-compiler-1 mapc byte-compile-funarg) -(byte-defop-compiler-1 sort byte-compile-funarg-2) +(byte-defop-compiler-1 maphash byte-compile-funarg) +(byte-defop-compiler-1 map-char-table byte-compile-funarg) + (byte-defop-compiler-1 map-char-table byte-compile-funarg-2) + ;; map-charset-chars should be funarg but has optional third arg +(byte-defop-compiler-1 sort byte-compile-funarg-2) (byte-defop-compiler-1 let) (byte-defop-compiler-1 let*) diff --cc lisp/emacs-lisp/copyright.el index c2ad007e3a8,9411c8be8ec..43b32e42d2e --- a/lisp/emacs-lisp/copyright.el +++ b/lisp/emacs-lisp/copyright.el @@@ -42,14 -40,12 +42,9 @@@ A value of nil means to search whole bu :type '(choice (integer :tag "Limit") (const :tag "No limit"))) --;; Would it be cleaner to specify Latin-1 coding for this file, --;; and not use both unibyte and multibyte copyright symbol characters? -- - ;; The character classes include the unibyte (C) sign, - ;; the Latin-1 version, and the Latin-9 version. (defcustom copyright-regexp - "\\([©Ž©]\\|@copyright{}\\|[Cc]opyright\\s *:?\\s *\\(?:(C)\\)?\ - \\|[Cc]opyright\\s *:?\\s *[©Ž©]\\)\ - "\\(©\\|@copyright{}\\|[Cc]opyright\\s *:?\\s *\\(?:(C)\\)?\ ++ "\\(©\|@copyright{}\\|[Cc]opyright\\s *:?\\s *\\(?:(C)\\)?\ + \\|[Cc]opyright\\s *:?\\s *©\\)\ \\s *\\([1-9]\\([-0-9, ';\n\t]\\|\\s<\\|\\s>\\)*[0-9]+\\)" "*What your copyright notice looks like. The second \\( \\) construct must match the years." diff --cc lisp/eshell/esh-mode.el index d4df95ea0f7,97219d85532..9a7a52ee72d --- a/lisp/eshell/esh-mode.el +++ b/lisp/eshell/esh-mode.el @@@ -280,8 -280,11 +280,11 @@@ This is used by `eshell-watch-for-passw (map-char-table (function (lambda (key val) - (and (>= key 256) - (/= (char-syntax key) ?w) + (and (if (consp key) - (and (>= (car key) 128) - (/= (char-syntax (car key)) ?w)) ++ (and (>= (car key) 128) ++ (/= (char-syntax (car key)) ?w)) + (and (>= key 256) + (/= (char-syntax key) ?w))) (modify-syntax-entry key "_ " eshell-mode-syntax-table)))) (standard-syntax-table))))) diff --cc lisp/files.el index 5ec82a39206,d1dedf0e3a4..f4e62a71d38 --- a/lisp/files.el +++ b/lisp/files.el @@@ -3533,21 -3071,11 +3533,21 @@@ non-nil, it is called instead of reread (let ((coding-system-for-read ;; Auto-saved file shoule be read without ;; any code conversion. - (if auto-save-p 'emacs-mule-unix - (if auto-save-p 'utf-8-emacs-unix - coding-system-for-read))) ++ (if auto-save-p 'utf-8-emacs + (or coding-system-for-read + buffer-file-coding-system)))) + ;; This force after-insert-file-set-coding + ;; (called from insert-file-contents) to set + ;; buffer-file-coding-system to a proper value. + (kill-local-variable 'buffer-file-coding-system) + ;; Note that this preserves point in an intelligent way. - (insert-file-contents file-name (not auto-save-p) - nil nil t)))) + (if preserve-modes + (let ((buffer-file-format buffer-file-format)) + (insert-file-contents file-name (not auto-save-p) + nil nil t)) + (insert-file-contents file-name (not auto-save-p) + nil nil t))))) ;; Recompute the truename in case changes in symlinks ;; have changed the truename. (setq buffer-file-truename diff --cc lisp/gnus/ChangeLog.22 index 00000000000,00000000000..78f35a703c1 new file mode 100644 --- /dev/null +++ b/lisp/gnus/ChangeLog.22 @@@ -1,0 -1,0 +1,108 @@@ ++2002-10-16 Dave Love ++ ++ * mm-bodies.el (mm-encode-body): Doc fix. ++ ++ * qp.el (quoted-printable-encode-region): Fix non-Emacs 22 case. ++ ++2002-10-09 Dave Love ++ ++ * qp.el (quoted-printable-encode-region): Fix non-multibyte search ++ for Emacs 22. ++ ++2002-10-07 Dave Love ++ ++ * qp.el (quoted-printable-encode-region): Go to start of range ++ before searching. ++ (quoted-printable-encode-region): Use multibyte-char-to-unibyte. ++ ++2002-09-05 Dave Love ++ ++ * qp.el (quoted-printable-decode-region): Use mm-insert-byte. ++ ++ * mm-util.el (mm-hack-charsets, mm-iso-8859-15-compatible) ++ (mm-iso-8859-x-to-15-table, mm-iso-8859-x-to-15-region): Deleted. ++ (mm-find-mime-charset-region): Remove hack-charsets stuff. ++ (mm-insert-byte): New. ++ ++ * rfc2047.el (message-posting-charset): defvar when compiling. ++ (rfc2047-header-encoding-alist): Add `address-mime' part. ++ (rfc2047-charset-encoding-alist): Use B for iso-8859-7. Doc fix. ++ (rfc2047-q-encoding-alist): Augment header list. ++ (rfc2047-encodable-p): Use mm-find-mime-charset-region. ++ (rfc2047-special-chars, rfc2047-non-special-chars): New. ++ (rfc2047-dissect-region, rfc2047-encode-region, rfc2047-encode): ++ Rewritten to avoid charset stuff and to take account of rfc2822 ++ tokens. ++ (rfc2047-encode-message-header): Don't include header name field ++ in encoding. Add `address-mime' case and bind ++ rfc2047-special-chars for `mime' case. ++ ++2002-08-21 Kenichi Handa ++ ++ * qp.el (quoted-printable-decode-region): Insert bytes by ++ `insert-byte'. ++ ++2002-08-18 Dave Love ++ ++ * rfc2047.el (rfc2047-encode): Fix last change. ++ ++2002-07-30 Dave Love ++ ++ * rfc2047.el (rfc2047-charset-encoding-alist): Use B for Hebrew. ++ Doc fix. ++ ++ * gnus-start.el (gnus-read-newsrc-el-file): Don't bind ++ coding-system-for-read. ++ (gnus-gnus-to-quick-newsrc-format): Insert coding cookie. ++ ++2002-07-09 Dave Love ++ ++ * mm-util.el (mm-find-mime-charset-region): Fix :mime-charset ++ part. ++ ++2002-06-26 Dave Love ++ ++ * rfc2047.el (rfc2047-encodable-p): Avoid mm-find-charset-region. ++ (rfc2047-dissect-region): Don't record charsets. ++ (rfc2047-encode): Remove arg CHARSET. ++ (rfc2047-encode-region): Change rfc2047-encode call. ++ ++2002-06-24 Dave Love ++ ++ * mm-util.el (mm-mule4-p, mm-enable-multibyte-mule4) ++ (mm-disable-multibyte-mule4) ++ (mm-with-unibyte-current-buffer-mule4): Deleted. ++ ++ * gnus-sum.el (gnus-summary-mode, gnus-summary-display-article) ++ (gnus-summary-select-article, gnus-summary-edit-article): Use ++ mm-{en,dis}able-multibyte, not mm-{en,dis}able-multibyte-mule4. ++ ++ * message.el (message-forward-make-body): Use ++ mm-{en,dis}able-multibyte, not mm-{en,dis}able-multibyte-mule4. ++ ++ * qp.el (quoted-printable-encode-region): Avoid ++ find-charset-region. ++ ++ * mm-bodies.el (mm-body-7-or-8): Don't special-case Mule. ++ (mm-encode-body): Just call mm-encode-coding-region in encoding ++ case. ++ ++2002-05-27 Dave Love ++ ++ * mm-util.el (mm-auto-save-coding-system): Prefer utf-8-emacs ++ coding system to emacs-mule. ++ ++2002-05-14 Dave Love ++ ++ * mm-util.el (mm-mime-mule-charset-alist) ++ (mm-mule-charset-to-mime-charset, mm-charset-to-coding-system) ++ (mm-mime-charset, mm-find-mime-charset-region): Look for ++ `:mime-charset' property of coding systems before `mime-charset'. ++ ++;; Local Variables: ++;; coding: iso-2022-7bit ++;; End: ++ ++ Copyright (C) 2002 Free Software Foundation, Inc. ++ Copying and distribution of this file, with or without modification, ++ are permitted provided the copyright notice and this notice are preserved. diff --cc lisp/gnus/gnus-sum.el index 024230c5074,07b1d33414f..e1c9ddd23aa --- a/lisp/gnus/gnus-sum.el +++ b/lisp/gnus/gnus-sum.el @@@ -1,5 -1,5 +1,5 @@@ ;;; gnus-sum.el --- summary mode commands for Gnus - ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 -;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002 ++;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003 ;; Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen diff --cc lisp/gnus/mm-util.el index f80560e63c1,9c0e34f2d86..4ad91852432 --- a/lisp/gnus/mm-util.el +++ b/lisp/gnus/mm-util.el @@@ -521,9 -462,11 +471,11 @@@ charset, and a longer list means no app (setq systems nil charsets (list cs)))))) charsets)) + ;; Fixme: won't work for unibyte Emacs 22: + ;; Otherwise we're not multibyte, XEmacs or a single coding ;; system won't cover it. - (setq charsets + (setq charsets (mm-delete-duplicates (mapcar 'mm-mime-charset (delq 'ascii diff --cc lisp/international/characters.el index 68a1fa9ead1,4fe00017b4e..bd353c53b01 --- a/lisp/international/characters.el +++ b/lisp/international/characters.el @@@ -1,8 -1,11 +1,11 @@@ ;;; characters.el --- set syntax and category for multibyte characters ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. -;; Copyright (C) 2001, 2002 ++;; Copyright (C) 2003 + ;; National Institute of Advanced Industrial Science and Technology (AIST) + ;; Registration Number H13PRO009 ;; Keywords: multibyte character, character set, syntax, category @@@ -25,12 -28,12 +28,6 @@@ ;;; Commentary: --;; This file contains multibyte characters. Save this file always in --;; the coding system `iso-2022-7bit'. -- --;; This file does not define the syntax for Latin-N character sets; --;; those are defined by the files latin-N.el. -- ;;; Code: ;;; Predefined categories. @@@ -230,399 -146,59 +140,62 @@@ ;; Chinese character set (CNS11643) - (let ((cns-list '(chinese-cns11643-1 - chinese-cns11643-2 - chinese-cns11643-3 - chinese-cns11643-4 - chinese-cns11643-5 - chinese-cns11643-6 - chinese-cns11643-7)) - generic-char) - (while cns-list - (setq generic-char (make-char (car cns-list))) - ;; (modify-syntax-entry generic-char "w") - (modify-category-entry generic-char ?c) - (modify-category-entry generic-char ?C) - (modify-category-entry generic-char ?|) - (setq cns-list (cdr cns-list)))) - - ;; Cyrillic character set (ISO-8859-5) - - (modify-category-entry (make-char 'cyrillic-iso8859-5) ?y) - - (modify-syntax-entry (make-char 'cyrillic-iso8859-5 160) " ") - (modify-syntax-entry ?,L-(B ".") - (modify-syntax-entry ?,Lp(B ".") - (modify-syntax-entry ?,L}(B ".") - (let ((tbl (standard-case-table))) - (set-case-syntax-pair ?,L!(B ?,Lq(B tbl) - (set-case-syntax-pair ?,L"(B ?,Lr(B tbl) - (set-case-syntax-pair ?,L#(B ?,Ls(B tbl) - (set-case-syntax-pair ?,L$(B ?,Lt(B tbl) - (set-case-syntax-pair ?,L%(B ?,Lu(B tbl) - (set-case-syntax-pair ?,L&(B ?,Lv(B tbl) - (set-case-syntax-pair ?,L'(B ?,Lw(B tbl) - (set-case-syntax-pair ?,L((B ?,Lx(B tbl) - (set-case-syntax-pair ?,L)(B ?,Ly(B tbl) - (set-case-syntax-pair ?,L*(B ?,Lz(B tbl) - (set-case-syntax-pair ?,L+(B ?,L{(B tbl) - (set-case-syntax-pair ?,L,(B ?,L|(B tbl) - (set-case-syntax-pair ?,L.(B ?,L~(B tbl) - (set-case-syntax-pair ?,L/(B ?,L(B tbl) - (set-case-syntax-pair ?,L0(B ?,LP(B tbl) - (set-case-syntax-pair ?,L1(B ?,LQ(B tbl) - (set-case-syntax-pair ?,L2(B ?,LR(B tbl) - (set-case-syntax-pair ?,L3(B ?,LS(B tbl) - (set-case-syntax-pair ?,L4(B ?,LT(B tbl) - (set-case-syntax-pair ?,L5(B ?,LU(B tbl) - (set-case-syntax-pair ?,L6(B ?,LV(B tbl) - (set-case-syntax-pair ?,L7(B ?,LW(B tbl) - (set-case-syntax-pair ?,L8(B ?,LX(B tbl) - (set-case-syntax-pair ?,L9(B ?,LY(B tbl) - (set-case-syntax-pair ?,L:(B ?,LZ(B tbl) - (set-case-syntax-pair ?,L;(B ?,L[(B tbl) - (set-case-syntax-pair ?,L<(B ?,L\(B tbl) - (set-case-syntax-pair ?,L=(B ?,L](B tbl) - (set-case-syntax-pair ?,L>(B ?,L^(B tbl) - (set-case-syntax-pair ?,L?(B ?,L_(B tbl) - (set-case-syntax-pair ?,L@(B ?,L`(B tbl) - (set-case-syntax-pair ?,LA(B ?,La(B tbl) - (set-case-syntax-pair ?,LB(B ?,Lb(B tbl) - (set-case-syntax-pair ?,LC(B ?,Lc(B tbl) - (set-case-syntax-pair ?,LD(B ?,Ld(B tbl) - (set-case-syntax-pair ?,LE(B ?,Le(B tbl) - (set-case-syntax-pair ?,LF(B ?,Lf(B tbl) - (set-case-syntax-pair ?,LG(B ?,Lg(B tbl) - (set-case-syntax-pair ?,LH(B ?,Lh(B tbl) - (set-case-syntax-pair ?,LI(B ?,Li(B tbl) - (set-case-syntax-pair ?,LJ(B ?,Lj(B tbl) - (set-case-syntax-pair ?,LK(B ?,Lk(B tbl) - (set-case-syntax-pair ?,LL(B ?,Ll(B tbl) - (set-case-syntax-pair ?,LM(B ?,Lm(B tbl) - (set-case-syntax-pair ?,LN(B ?,Ln(B tbl) - (set-case-syntax-pair ?,LO(B ?,Lo(B tbl) - (set-case-syntax-pair ?$,1(!(B ?$,1(q(B tbl) - (set-case-syntax-pair ?$,1("(B ?$,1(r(B tbl) - (set-case-syntax-pair ?$,1(#(B ?$,1(s(B tbl) - (set-case-syntax-pair ?$,1($(B ?$,1(t(B tbl) - (set-case-syntax-pair ?$,1(%(B ?$,1(u(B tbl) - (set-case-syntax-pair ?$,1(&(B ?$,1(v(B tbl) - (set-case-syntax-pair ?$,1('(B ?$,1(w(B tbl) - (set-case-syntax-pair ?$,1(((B ?$,1(x(B tbl) - (set-case-syntax-pair ?$,1()(B ?$,1(y(B tbl) - (set-case-syntax-pair ?$,1(*(B ?$,1(z(B tbl) - (set-case-syntax-pair ?$,1(+(B ?$,1({(B tbl) - (set-case-syntax-pair ?$,1(,(B ?$,1(|(B tbl) - (set-case-syntax-pair ?$,1(.(B ?$,1(~(B tbl) - (set-case-syntax-pair ?$,1(/(B ?$,1((B tbl) - (set-case-syntax-pair ?$,1(0(B ?$,1(P(B tbl) - (set-case-syntax-pair ?$,1(1(B ?$,1(Q(B tbl) - (set-case-syntax-pair ?$,1(2(B ?$,1(R(B tbl) - (set-case-syntax-pair ?$,1(3(B ?$,1(S(B tbl) - (set-case-syntax-pair ?$,1(4(B ?$,1(T(B tbl) - (set-case-syntax-pair ?$,1(5(B ?$,1(U(B tbl) - (set-case-syntax-pair ?$,1(6(B ?$,1(V(B tbl) - (set-case-syntax-pair ?$,1(7(B ?$,1(W(B tbl) - (set-case-syntax-pair ?$,1(8(B ?$,1(X(B tbl) - (set-case-syntax-pair ?$,1(9(B ?$,1(Y(B tbl) - (set-case-syntax-pair ?$,1(:(B ?$,1(Z(B tbl) - (set-case-syntax-pair ?$,1(;(B ?$,1([(B tbl) - (set-case-syntax-pair ?$,1(<(B ?$,1(\(B tbl) - (set-case-syntax-pair ?$,1(=(B ?$,1(](B tbl) - (set-case-syntax-pair ?$,1(>(B ?$,1(^(B tbl) - (set-case-syntax-pair ?$,1(?(B ?$,1(_(B tbl) - (set-case-syntax-pair ?$,1(@(B ?$,1(`(B tbl) - (set-case-syntax-pair ?$,1(A(B ?$,1(a(B tbl) - (set-case-syntax-pair ?$,1(B(B ?$,1(b(B tbl) - (set-case-syntax-pair ?$,1(C(B ?$,1(c(B tbl) - (set-case-syntax-pair ?$,1(D(B ?$,1(d(B tbl) - (set-case-syntax-pair ?$,1(E(B ?$,1(e(B tbl) - (set-case-syntax-pair ?$,1(F(B ?$,1(f(B tbl) - (set-case-syntax-pair ?$,1(G(B ?$,1(g(B tbl) - (set-case-syntax-pair ?$,1(H(B ?$,1(h(B tbl) - (set-case-syntax-pair ?$,1(I(B ?$,1(i(B tbl) - (set-case-syntax-pair ?$,1(J(B ?$,1(j(B tbl) - (set-case-syntax-pair ?$,1(K(B ?$,1(k(B tbl) - (set-case-syntax-pair ?$,1(L(B ?$,1(l(B tbl) - (set-case-syntax-pair ?$,1(M(B ?$,1(m(B tbl) - (set-case-syntax-pair ?$,1(N(B ?$,1(n(B tbl) - (set-case-syntax-pair ?$,1(O(B ?$,1(o(B tbl)) - - ;; Devanagari character set - - ;;; Commented out since the categories appear not to be used anywhere - ;;; and word syntax is the default. - ;; (let ((deflist '(;; chars syntax category - ;; ("$(5!!!"!#(B" "w" ?7) ; vowel-modifying diacritical mark - ;; ; chandrabindu, anuswar, visarga - ;; ("$(5!$(B-$(5!2(B" "w" ?1) ; independent vowel - ;; ("$(5!3(B-$(5!X(B" "w" ?0) ; consonant - ;; ("$(5!Z(B-$(5!g(B" "w" ?8) ; matra - ;; ("$(5!q(B-$(5!z(B" "w" ?6) ; digit - ;; ;; Unicode equivalents - ;; ("$,15A5B5C(B" "w" ?7) ; vowel-modifying diacritical mark - ;; ; chandrabindu, anuswar, visarga - ;; ("$,15E(B-$,15M(B" "w" ?1) ; independent vowel - ;; ("$,15U(B-$,15y(B" "w" ?0) ; consonant - ;; ("$,15~(B-$,16)(B" "w" ?8) ; matra - ;; ("$,16F(B-$,16O(B" "w" ?6) ; digit - ;; )) - ;; elm chars len syntax category to ch i) - ;; (while deflist - ;; (setq elm (car deflist)) - ;; (setq chars (car elm) - ;; len (length chars) - ;; syntax (nth 1 elm) - ;; category (nth 2 elm) - ;; i 0) - ;; (while (< i len) - ;; (if (= (aref chars i) ?-) - ;; (setq i (1+ i) - ;; to (aref chars i)) - ;; (setq ch (aref chars i) - ;; to ch)) - ;; (while (<= ch to) - ;; (modify-syntax-entry ch syntax) - ;; (modify-category-entry ch category) - ;; (setq ch (1+ ch))) - ;; (setq i (1+ i))) - ;; (setq deflist (cdr deflist)))) - - ;; Ethiopic character set - - (modify-category-entry (make-char 'ethiopic) ?e) - ;; (modify-syntax-entry (make-char 'ethiopic) "w") - (dotimes (i (1+ (- #x137c #x1200))) - (modify-category-entry (decode-char 'ucs (+ #x1200 i)) ?e)) - (let ((chars '(?$(3$h(B ?$(3$i(B ?$(3$j(B ?$(3$k(B ?$(3$l(B ?$(3$m(B ?$(3$n(B ?$(3$o(B ?$(3%i(B ?$(3%t(B ?$(3%u(B ?$(3%v(B ?$(3%w(B ?$(3%x(B - ;; Unicode equivalents of the above: - ?$,1Q!(B ?$,1Q"(B ?$,1Q#(B ?$,1Q$(B ?$,1Q%(B ?$,1Q&(B ?$,1Q'(B ?$,1Q((B ?$,3op(B ?$,3o{(B ?$,3o|(B ?$,3o}(B ?$,3o~(B ?$,3o(B))) - (while chars - (modify-syntax-entry (car chars) ".") - (setq chars (cdr chars)))) - - ;; Greek character set (ISO-8859-7) - - (modify-category-entry (make-char 'greek-iso8859-7) ?g) - (let ((c #x370)) - (while (<= c #x3ff) - (modify-category-entry (decode-char 'ucs c) ?g) - (setq c (1+ c)))) - - ;; (let ((c 182)) - ;; (while (< c 255) - ;; (modify-syntax-entry (make-char 'greek-iso8859-7 c) "w") - ;; (setq c (1+ c)))) - ;; (modify-syntax-entry (make-char 'greek-iso8859-7 160) "w") ; NBSP - (modify-syntax-entry ?,F7(B ".") - (modify-syntax-entry ?,F;(B ".") - (modify-syntax-entry ?,F=(B ".") - (let ((tbl (standard-case-table))) - ;; Fixme: non-letter syntax copied from latin-1, but that's dubious - ;; in several cases. - (set-case-syntax ?,F!(B "." tbl) - (set-case-syntax ?,F"(B "." tbl) - (set-case-syntax ?,F&(B "." tbl) - (set-case-syntax ?,F&(B "_" tbl) - (set-case-syntax ?,F'(B "." tbl) - (set-case-syntax ?,F)(B "_" tbl) - (set-case-syntax ?,F+(B "." tbl) - (set-case-syntax ?,F,(B "_" tbl) - (set-case-syntax ?,F-(B "_" tbl) - (set-case-syntax ?,F/(B "." tbl) - (set-case-syntax ?,F0(B "_" tbl) - (set-case-syntax ?,F1(B "_" tbl) - ;; (set-case-syntax ?,F7(B "_" tbl) - ;; (set-case-syntax ?,F=(B "_" tbl) - (set-case-syntax-pair ?,FA(B ?,Fa(B tbl) - (set-case-syntax-pair ?,FB(B ?,Fb(B tbl) - (set-case-syntax-pair ?,FC(B ?,Fc(B tbl) - (set-case-syntax-pair ?,FD(B ?,Fd(B tbl) - (set-case-syntax-pair ?,FE(B ?,Fe(B tbl) - (set-case-syntax-pair ?,FF(B ?,Ff(B tbl) - (set-case-syntax-pair ?,FG(B ?,Fg(B tbl) - (set-case-syntax-pair ?,FH(B ?,Fh(B tbl) - (set-case-syntax-pair ?,FI(B ?,Fi(B tbl) - (set-case-syntax-pair ?,FJ(B ?,Fj(B tbl) - (set-case-syntax-pair ?,FK(B ?,Fk(B tbl) - (set-case-syntax-pair ?,FL(B ?,Fl(B tbl) - (set-case-syntax-pair ?,FM(B ?,Fm(B tbl) - (set-case-syntax-pair ?,FN(B ?,Fn(B tbl) - (set-case-syntax-pair ?,FO(B ?,Fo(B tbl) - (set-case-syntax-pair ?,FP(B ?,Fp(B tbl) - (set-case-syntax-pair ?,FQ(B ?,Fq(B tbl) - (set-case-syntax-pair ?,FS(B ?,Fs(B tbl) - (set-case-syntax-pair ?,FT(B ?,Ft(B tbl) - (set-case-syntax-pair ?,FU(B ?,Fu(B tbl) - (set-case-syntax-pair ?,FV(B ?,Fv(B tbl) - (set-case-syntax-pair ?,FW(B ?,Fw(B tbl) - (set-case-syntax-pair ?,FX(B ?,Fx(B tbl) - (set-case-syntax-pair ?,FY(B ?,Fy(B tbl) - (set-case-syntax-pair ?,FZ(B ?,Fz(B tbl) - (set-case-syntax-pair ?,F[(B ?,F{(B tbl) - (set-case-syntax-pair ?,F?(B ?,F~(B tbl) - (set-case-syntax-pair ?,F>(B ?,F}(B tbl) - (set-case-syntax-pair ?,F<(B ?,F|(B tbl) - (set-case-syntax-pair ?,F6(B ?,F\(B tbl) - (set-case-syntax-pair ?,F8(B ?,F](B tbl) - (set-case-syntax-pair ?,F9(B ?,F^(B tbl) - (set-case-syntax-pair ?,F:(B ?,F_(B tbl) - ;; Unicode equivalents - (set-case-syntax-pair ?$,1&q(B ?$,1'1(B tbl) - (set-case-syntax-pair ?$,1&r(B ?$,1'2(B tbl) - (set-case-syntax-pair ?$,1&s(B ?$,1'3(B tbl) - (set-case-syntax-pair ?$,1&t(B ?$,1'4(B tbl) - (set-case-syntax-pair ?$,1&u(B ?$,1'5(B tbl) - (set-case-syntax-pair ?$,1&v(B ?$,1'6(B tbl) - (set-case-syntax-pair ?$,1&w(B ?$,1'7(B tbl) - (set-case-syntax-pair ?$,1&x(B ?$,1'8(B tbl) - (set-case-syntax-pair ?$,1&y(B ?$,1'9(B tbl) - (set-case-syntax-pair ?$,1&z(B ?$,1':(B tbl) - (set-case-syntax-pair ?$,1&{(B ?$,1';(B tbl) - (set-case-syntax-pair ?$,1&|(B ?$,1'<(B tbl) - (set-case-syntax-pair ?$,1&}(B ?$,1'=(B tbl) - (set-case-syntax-pair ?$,1&~(B ?$,1'>(B tbl) - (set-case-syntax-pair ?$,1&(B ?$,1'?(B tbl) - (set-case-syntax-pair ?$,1' (B ?$,1'@(B tbl) - (set-case-syntax-pair ?$,1'!(B ?$,1'A(B tbl) - (set-case-syntax-pair ?$,1'#(B ?$,1'C(B tbl) - (set-case-syntax-pair ?$,1'$(B ?$,1'D(B tbl) - (set-case-syntax-pair ?$,1'%(B ?$,1'E(B tbl) - (set-case-syntax-pair ?$,1'&(B ?$,1'F(B tbl) - (set-case-syntax-pair ?$,1''(B ?$,1'G(B tbl) - (set-case-syntax-pair ?$,1'((B ?$,1'H(B tbl) - (set-case-syntax-pair ?$,1')(B ?$,1'I(B tbl) - (set-case-syntax-pair ?$,1'*(B ?$,1'J(B tbl) - (set-case-syntax-pair ?$,1'+(B ?$,1'K(B tbl) - (set-case-syntax-pair ?$,1&o(B ?$,1'N(B tbl) - (set-case-syntax-pair ?$,1&n(B ?$,1'M(B tbl) - (set-case-syntax-pair ?$,1&l(B ?$,1'L(B tbl) - (set-case-syntax-pair ?$,1&f(B ?$,1',(B tbl) - (set-case-syntax-pair ?$,1&h(B ?$,1'-(B tbl) - (set-case-syntax-pair ?$,1&i(B ?$,1'.(B tbl) - (set-case-syntax-pair ?$,1&j(B ?$,1'/(B tbl)) + (dolist (c '(chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 + chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 + chinese-cns11643-7)) + (map-charset-chars #'modify-category-entry c ?c) + (if (eq c 'chinese-cns11643-1) + (map-charset-chars #'modify-category-entry c ?C #x4421 #x7E7E) + (map-charset-chars #'modify-category-entry c ?C)) + (map-charset-chars #'modify-category-entry c ?|)) - ;; Hebrew character set (ISO-8859-8) -;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212) ++;; Japanese character set (JISX0201, JISX0208, JISX0212, JISX0213) - (modify-category-entry (make-char 'hebrew-iso8859-8) ?w) - (let ((c #x591)) - (while (<= c #x5f4) - (modify-category-entry (decode-char 'ucs c) ?w) - (setq c (1+ c)))) - - (modify-syntax-entry (make-char 'hebrew-iso8859-8 208) ".") ; PASEQ - (modify-syntax-entry (make-char 'hebrew-iso8859-8 211) ".") ; SOF PASUQ - (modify-syntax-entry (decode-char 'ucs #x5be) ".") ; MAQAF - (modify-syntax-entry (decode-char 'ucs #x5c0) ".") ; PASEQ - (modify-syntax-entry (decode-char 'ucs #x5c3) ".") ; SOF PASUQ - (modify-syntax-entry (decode-char 'ucs #x5f3) ".") ; GERESH - (modify-syntax-entry (decode-char 'ucs #x5f4) ".") ; GERSHAYIM - - ;; (let ((c 224)) - ;; (while (< c 251) - ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 c) "w") - ;; (setq c (1+ c)))) - ;; (modify-syntax-entry (make-char 'hebrew-iso8859-8 160) "w") ; NBSP + (map-charset-chars #'modify-category-entry 'katakana-jisx0201 ?k) - ;; Indian character set (IS 13194 and other Emacs original Indian charsets) + (map-charset-chars #'modify-category-entry 'latin-jisx0201 ?r) - (modify-category-entry (make-char 'indian-is13194) ?i) - (modify-category-entry (make-char 'indian-2-column) ?I) - (modify-category-entry (make-char 'indian-glyph) ?I) - ;; Unicode Devanagari block - (let ((c #x901)) - (while (<= c #x970) - (modify-category-entry (decode-char 'ucs c) ?i) - (setq c (1+ c)))) - - (let ((l '(;; RANGE CATEGORY MEANINGS - (#x01 #x03 ?7) ; vowel modifier - (#x05 #x14 ?1) ; base vowel - (#x15 #x39 ?0) ; consonants - (#x3e #x4d ?8) ; vowel modifier - (#x51 #x54 ?4) ; stress/tone mark - (#x58 #x5f ?0) ; consonants - (#x60 #x61 ?1) ; base vowel - (#x62 #x63 ?8) ; vowel modifier - (#x66 #x6f ?6) ; digits - ))) - (dolist (elt1 '(#x900 #x980 #xa00 #xa80 #xb00 #xb80 #xc00 #xc80 #xd00)) - (dolist (elt2 l) - (let* ((from (car elt2)) - (counts (1+ (- (nth 1 elt2) from))) - (category (nth 2 elt2))) - (dotimes (i counts) - (modify-category-entry (decode-char 'ucs (+ elt1 from i)) - category)))))) - - ;; Japanese character set (JISX0201-kana, JISX0201-roman, JISX0208, JISX0212) - - (modify-category-entry (make-char 'katakana-jisx0201) ?k) - (modify-category-entry (make-char 'katakana-jisx0201) ?j) - (modify-category-entry (make-char 'latin-jisx0201) ?r) - (modify-category-entry (make-char 'japanese-jisx0208) ?j) - (modify-category-entry (make-char 'japanese-jisx0212) ?j) - (modify-category-entry (make-char 'katakana-jisx0201) ?\|) - (modify-category-entry (make-char 'japanese-jisx0208) ?\|) - (modify-category-entry (make-char 'japanese-jisx0212) ?\|) -(dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212)) ++(dolist (l '(katakana-jisx0201 japanese-jisx0208 japanese-jisx0212 ++ japanese-jisx0213-1 japanese-jisx0213-2)) + (map-charset-chars #'modify-category-entry l ?j) - (map-charset-chars #'modify-category-entry l ?\|)) ++ (if (eq l 'japanese-jisx0213-1) ++ (map-charset-chars #'modify-category-entry l ?\| #x2E21 #x7E7F) ++ (map-charset-chars #'modify-category-entry l ?\|))) ;; Unicode equivalents of JISX0201-kana - (let ((c #xff61)) - (while (<= c #xff9f) - (modify-category-entry (decode-char 'ucs c) ?k) - (modify-category-entry (decode-char 'ucs c) ?j) - (modify-category-entry (decode-char 'ucs c) ?\|) - (setq c (1+ c)))) + (let ((range '(#xff61 . #xff9f))) + (modify-category-entry range ?k) + (modify-category-entry range ?j) + (modify-category-entry range ?\|)) ;; Katakana block - (let ((c #x30a0)) - (while (<= c #x30ff) - ;; ?K is double width, ?k isn't specified - (modify-category-entry (decode-char 'ucs c) ?k) - (modify-category-entry (decode-char 'ucs c) ?j) - (modify-category-entry (decode-char 'ucs c) ?\|) - (setq c (1+ c)))) + (let ((range '(#x30a0 . #x30ff))) + ;; ?K is double width, ?k isn't specified + (modify-category-entry range ?K) + (modify-category-entry range ?\|)) ;; Hiragana block - (let ((c #x3040)) - (while (<= c #x309f) - ;; ?H is actually defined to be double width - (modify-category-entry (decode-char 'ucs c) ?H) - ;;(modify-category-entry (decode-char 'ucs c) ?j) - (modify-category-entry (decode-char 'ucs c) ?\|) - (setq c (1+ c)))) -(let ((range '(#x3040 . #x309f))) ++(let ((range '(#x3040 . #x309d))) + ;; ?H is actually defined to be double width + ;;(modify-category-entry range ?H) + ;;(modify-category-entry range ?\|) + ) ;; JISX0208 - ;; (modify-syntax-entry (make-char 'japanese-jisx0208) "w") - (modify-syntax-entry (make-char 'japanese-jisx0208 33) "_") - (modify-syntax-entry (make-char 'japanese-jisx0208 34) "_") - (modify-syntax-entry (make-char 'japanese-jisx0208 40) "_") - (let ((chars '(?$B!<(B ?$B!+(B ?$B!,(B ?$B!3(B ?$B!4(B ?$B!5(B ?$B!6(B ?$B!7(B ?$B!8(B ?$B!9(B ?$B!:(B ?$B!;(B))) - (while chars - (modify-syntax-entry (car chars) "w") - (setq chars (cdr chars)))) - (modify-syntax-entry ?\$B!J(B "($B!K(B") - (modify-syntax-entry ?\$B!N(B "($B!O(B") - (modify-syntax-entry ?\$B!P(B "($B!Q(B") - (modify-syntax-entry ?\$B!V(B "($B!W(B") - (modify-syntax-entry ?\$B!X(B "($B!Y(B") - (modify-syntax-entry ?\$B!K(B ")$B!J(B") - (modify-syntax-entry ?\$B!O(B ")$B!N(B") - (modify-syntax-entry ?\$B!Q(B ")$B!P(B") - (modify-syntax-entry ?\$B!W(B ")$B!V(B") - (modify-syntax-entry ?\$B!Y(B ")$B!X(B") - - (modify-category-entry (make-char 'japanese-jisx0208 35) ?A) - (modify-category-entry (make-char 'japanese-jisx0208 36) ?H) - (modify-category-entry (make-char 'japanese-jisx0208 37) ?K) - (modify-category-entry (make-char 'japanese-jisx0208 38) ?G) - (modify-category-entry (make-char 'japanese-jisx0208 39) ?Y) - (let ((row 48)) - (while (< row 127) - (modify-category-entry (make-char 'japanese-jisx0208 row) ?C) - (setq row (1+ row)))) - (modify-category-entry ?$B!<(B ?K) - (let ((chars '(?$B!+(B ?$B!,(B))) + (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2121 #x227E) + (map-charset-chars #'modify-syntax-entry 'japanese-jisx0208 "_" #x2821 #x287E) + (let ((chars '(?ー ?゛ ?゜ ?ヽ ?ヾ ?ゝ ?ゞ ?〃 ?仝 ?々 ?〆 ?〇))) + (dolist (elt chars) + (modify-syntax-entry (car chars) "w"))) + + (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?A #x2321 #x237E) + (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?H #x2421 #x247E) + (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?K #x2521 #x257E) + (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?G #x2621 #x267E) + (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?Y #x2721 #x277E) + (map-charset-chars #'modify-category-entry 'japanese-jisx0208 ?C #x3021 #x7E7E) + (modify-category-entry ?ー ?K) + (let ((chars '(?゛ ?゜))) (while chars (modify-category-entry (car chars) ?K) (modify-category-entry (car chars) ?H) @@@ -654,63 -224,98 +221,84 @@@ ;; Korean character set (KSC5601) - ;; (modify-syntax-entry (make-char 'korean-ksc5601) "w") - (modify-syntax-entry (make-char 'korean-ksc5601 33) "_") - (modify-syntax-entry (make-char 'korean-ksc5601 34) "_") - (modify-syntax-entry (make-char 'korean-ksc5601 38) "_") - (modify-syntax-entry (make-char 'korean-ksc5601 39) "_") - (modify-syntax-entry (make-char 'korean-ksc5601 40) "_") - (modify-syntax-entry (make-char 'korean-ksc5601 41) "_") - - (modify-category-entry (make-char 'korean-ksc5601) ?h) - (modify-category-entry (make-char 'korean-ksc5601 35) ?A) - (modify-category-entry (make-char 'korean-ksc5601 37) ?G) - (modify-category-entry (make-char 'korean-ksc5601 42) ?H) - (modify-category-entry (make-char 'korean-ksc5601 43) ?K) - (modify-category-entry (make-char 'korean-ksc5601 44) ?Y) - - ;; Latin character set (latin-1,2,3,4,5,8,9) - - (modify-category-entry (make-char 'latin-iso8859-1) ?l) - (modify-category-entry (make-char 'latin-iso8859-2) ?l) - (modify-category-entry (make-char 'latin-iso8859-3) ?l) - (modify-category-entry (make-char 'latin-iso8859-4) ?l) - (modify-category-entry (make-char 'latin-iso8859-9) ?l) - (modify-category-entry (make-char 'latin-iso8859-14) ?l) - (modify-category-entry (make-char 'latin-iso8859-15) ?l) - - (modify-category-entry (make-char 'latin-iso8859-1 160) ?\ ) - (modify-category-entry (make-char 'latin-iso8859-2 160) ?\ ) - (modify-category-entry (make-char 'latin-iso8859-3 160) ?\ ) - (modify-category-entry (make-char 'latin-iso8859-4 160) ?\ ) - (modify-category-entry (make-char 'latin-iso8859-9 160) ?\ ) - (modify-category-entry (make-char 'latin-iso8859-14 160) ?\ ) - (modify-category-entry (make-char 'latin-iso8859-15 160) ?\ ) + (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?h) + + (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2121 #x227E) + (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2621 #x277E) + (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2830 #x287E) + (map-charset-chars #'modify-syntax-entry 'korean-ksc5601 "_" #x2930 #x297E) + (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2330 #x2339) + (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2341 #x235A) + (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?A #x2361 #x237A) + (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?G #x2521 #x257E) + (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?H #x2A21 #x2A7E) + (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?K #x2B21 #x2B7E) + (map-charset-chars #'modify-category-entry 'korean-ksc5601 ?Y #x2C21 #x2C7E) + + ;; These are in more than one charset. -(modify-syntax-entry ?\( "()") -(modify-syntax-entry ?\ï¼» "(ï¼½") -(modify-syntax-entry ?\{ "(}") -(modify-syntax-entry ?\「 "(」") -(modify-syntax-entry ?\『 "(』") -(modify-syntax-entry ?\) ")(") -(modify-syntax-entry ?\ï¼½ ")ï¼»") -(modify-syntax-entry ?\} "){") -(modify-syntax-entry ?\」 ")「") -(modify-syntax-entry ?\』 ")『") - -(modify-syntax-entry ?\〔 "(〕") -(modify-syntax-entry ?\〈 "(〉") -(modify-syntax-entry ?\《 "(》") -(modify-syntax-entry ?\〖 "(〗") -(modify-syntax-entry ?\【 "(】") -(modify-syntax-entry ?\〕 ")〔") -(modify-syntax-entry ?\〉 ")〈") -(modify-syntax-entry ?\》 ")《") -(modify-syntax-entry ?\〗 ")〖") -(modify-syntax-entry ?\】 ")【") -(modify-syntax-entry ?\〚 "(〛") -(modify-syntax-entry ?\〛 ")〚") ++(let ((parens (concat "〈〉《》「」『』【】〔〕〖〗〘〙〚〛" ++ "︵︶︷︸︹︺︻︼︽︾︿﹀﹁﹂﹃﹄" ++ "()[]{}")) ++ open close) ++ (dotimes (i (/ (length parens) 2)) ++ (setq open (aref parens (* i 2)) ++ close (aref parens (1+ (* i 2)))) ++ (modify-syntax-entry open (format "(%c" close)) ++ (modify-syntax-entry close (format ")%c" open)))) - ;; Lao character set + ;; Arabic character set - (modify-category-entry (make-char 'lao) ?o) - (dotimes (i (1+ (- #xeff #xe80))) - (modify-category-entry (decode-char 'ucs (+ i #xe80)) ?o)) + (let ((charsets '(arabic-iso8859-6 + arabic-digit + arabic-1-column + arabic-2-column))) + (while charsets + (map-charset-chars #'modify-category-entry (car charsets) ?b) + (setq charsets (cdr charsets)))) + (modify-category-entry '(#x600 . #x6ff) ?b) + (modify-category-entry '(#xfb50 . #xfdff) ?b) + (modify-category-entry '(#xfe70 . #xfefe) ?b) - (let ((deflist '(;; chars syntax category - ("(1!(B-(1N(B" "w" ?0) ; consonant - ("(1PRS]`(B-(1d(B" "w" ?1) ; vowel base - ("(1QT(B-(1W[m(B" "w" ?2) ; vowel upper - ("(1XY(B" "w" ?3) ; vowel lower - ("(1h(B-(1l(B" "w" ?4) ; tone mark - ("(1\(B" "w" ?9) ; semivowel lower - ("(1p(B-(1y(B" "w" ?6) ; digit - ("(1Of(B" "_" ?5) ; symbol - ;; Unicode equivalents - ("$,1D!(B-$,1DN(B" "w" ?0) ; consonant - ("$,1DPDRDSD]D`(B-$,1Dd(B" "w" ?1) ; vowel base - ("$,1DQDT(B-$,1DWD[Dm(B" "w" ?2) ; vowel upper - ("$,1DXDY(B" "w" ?3) ; vowel lower - ("$,1Dh(B-$,1Dk(B" "w" ?4) ; tone mark - ("$,1D\D](B" "w" ?9) ; semivowel lower - ("$,1Dp(B-$,1Dy(B" "w" ?6) ; digit - ("$,1DODf(B" "_" ?5) ; symbol + ;; Cyrillic character set (ISO-8859-5) + + (modify-syntax-entry ?№ ".") + + ;; Ethiopic character set + + (modify-category-entry '(#x1200 . #x137b) ?e) + (let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨ ?ö ‡€ ?ö ‡‹ ?ö ‡Œ ?ö ‡ ?ö ‡Ž ?ö ‡))) + (while chars + (modify-syntax-entry (car chars) ".") + (setq chars (cdr chars)))) + (map-charset-chars #'modify-category-entry 'ethiopic ?e) + + ;; Hebrew character set (ISO-8859-8) + + (modify-syntax-entry #x5be ".") ; MAQAF + (modify-syntax-entry #x5c0 ".") ; PASEQ + (modify-syntax-entry #x5c3 ".") ; SOF PASUQ + (modify-syntax-entry #x5f3 ".") ; GERESH + (modify-syntax-entry #x5f4 ".") ; GERSHAYIM + + ;; Indian character set (IS 13194 and other Emacs original Indian charsets) + + (modify-category-entry '(#x901 . #x970) ?i) + (map-charset-chars #'modify-category-entry 'indian-is13194 ?i) + (map-charset-chars #'modify-category-entry 'indian-2-column ?i) + + ;; Lao character set + + (modify-category-entry '(#xe80 . #xeff) ?o) + (map-charset-chars #'modify-category-entry 'lao ?o) + + (let ((deflist '(("ກ-ຮ" "w" ?0) ; consonant + ("ະາຳຽເ-ໄ" "w" ?1) ; vowel base + ("ັິ-ືົໍ" "w" ?2) ; vowel upper + ("ຸູ" "w" ?3) ; vowel lower - ("່-໋" "w" ?4) ; tone mark ++ ("່-໋" "w" ?4) ; tone mark + ("ຼຽ" "w" ?9) ; semivowel lower + ("໐-໙" "w" ?6) ; digit + ("ຯໆ" "_" ?5) ; symbol )) elm chars len syntax category to ch i) (while deflist @@@ -736,26 -341,17 +324,17 @@@ ;; Thai character set (TIS620) - (modify-category-entry (make-char 'thai-tis620) ?t) - (dotimes (i (1+ (- #xe7f #xe00))) - (modify-category-entry (decode-char 'ucs (+ i #xe00)) ?t)) + (modify-category-entry '(#xe00 . #xe7f) ?t) + (map-charset-chars #'modify-category-entry 'thai-tis620 ?t) (let ((deflist '(;; chars syntax category - (",T!(B-,TCEG(B-,TN(B" "w" ?0) ; consonant - (",TDFPRS`(B-,Te(B" "w" ?1) ; vowel base - (",TQT(B-,TWgn(B" "w" ?2) ; vowel upper - (",TX(B-,TZ(B" "w" ?3) ; vowel lower - (",Th(B-,Tm(B" "w" ?4) ; tone mark - (",Tp(B-,Ty(B" "w" ?6) ; digit - (",TOf_oz{(B" "_" ?5) ; symbol - ;; Unicode equivalents - ("$,1Ba(B-$,1C#C%C'(B-$,1C.(B" "w" ?0) ; consonant - ("$,1C$C&C0C2C3C@(B-$,1CE(B" "w" ?1) ; vowel base - ("$,1C1C4(B-$,1C7CGCN(B" "w" ?2) ; vowel upper - ("$,1C8(B-$,1C:(B" "w" ?3) ; vowel lower - ("$,1CH(B-$,1CM(B" "w" ?4) ; tone mark - ("$,1CP(B-$,1CY(B" "w" ?6) ; digit - ("$,1C/CFC?COCZC[(B" "_" ?5) ; symbol + ("ก-รลว-ฮ" "w" ?0) ; consonant + ("ฤฦะาำเ-ๅ" "w" ?1) ; vowel base + ("ัิ-ื็๎" "w" ?2) ; vowel upper + ("ุ-ฺ" "w" ?3) ; vowel lower - ("่-ํ" "w" ?4) ; tone mark ++ ("่-ํ" "w" ?4) ; tone mark + ("๐-๙" "w" ?6) ; digit + ("ฯๆ฿๏๚๛" "_" ?5) ; symbol )) elm chars len syntax category to ch i) (while deflist @@@ -781,38 -377,25 +360,26 @@@ ;; Tibetan character set - (modify-category-entry (make-char 'tibetan) ?q) - (modify-category-entry (make-char 'tibetan-1-column) ?q) - (dotimes (i (1+ (- #xfff #xf00))) - (modify-category-entry (decode-char 'ucs (+ i #xf00)) ?q)) + (modify-category-entry '(#xf00 . #xfff) ?q) + (map-charset-chars #'modify-category-entry 'tibetan ?q) + (map-charset-chars #'modify-category-entry 'tibetan-1-column ?q) (let ((deflist '(;; chars syntax category - ("4$(7"!0"!1(B-4$(7"J0"J14"K0"K1(B" "w" ?0) ; consonant - ("$(7#!(B-$(7#J#K#L#M!"!#(B" "w" ?0) ; - ("$(7$!(B-$(7$e(B" "w" ?0) ; - ("$(7%!(B-$(7%u(B" "w" ?0) ; - ("$(7"S"["\"]"^"a(B" "w" ?2) ; upper vowel - ("$(7"_"c"d"g"h"i"j"k"l(B" "w" ?2) ; upper modifier - ("$(7!I"Q"R"U"e!e!g(B" "w" ?3) ; lowel vowel/modifier - ("$(7!P(B-$(7!Y!Z(B-$(7!c(B" "w" ?6) ; digit - ("$(7!;!=(B-$(7!B!D"`(B" "." ?|) ; line-break char - ("$(8!;!=!?!@!A!D"`(B" "." ?|) ; - ("$(7!8!;!=(B-$(7!B!D"`!m!d(B" "." ?>) ; prohibition - ("$(8!;!=!?!@!A!D"`(B" "." ?>) ; - ("$(7!0(B-$(7!:!l#R#S"f(B" "." ?<) ; prohibition - ("$(7!C!E(B-$(7!H!J(B-$(7!O!f!h(B-$(7!k!n!o#O#P(B-$(7#`(B" "." ?q) ; others - - ;; Unicode version (not complete) - ("$,1F (B-$,1FIFJ(B" "w" ?0) ; consonant - ("$,1Fp(B-$,1G9G:G;G<(B" "w" ?0) ; - ("$,1FRFZF[F\F]F`(B" "w" ?2) ; upper vowel - ("$,1F^FbFcFfFgFhFiFjFk(B" "w" ?2) ; upper modifier - ("$,1EYFPFQFTFdEuEw(B" "w" ?3) ; lowel vowel/modifier - ("$,1E`(B-$,1EiEj(B-$,1Es(B" "w" ?6) ; digit - ("$,1EKEM(B-$,1ERETF_(B" "." ?|) ; line-break char - ("$,1EHEKEM(B-$,1ERETF_E}Et(B" "." ?>) ; prohibition - ("$,1E@(B-$,1EJE|GAGBFe(B" "." ?<) ; prohibition - ("$,1ESEU(B-$,1EXEZ(B-$,1E_EvEx(B-$,1E{E~EG>G?(B-$,1GO(B" "." ?q) ; others + ("ཀ-ཀྵཪ" "w" ?0) ; consonant + ("ྐ-ྐྵྺྻྼö€ö€‚" "w" ?0) ; + ("ö„š-ö…ž" "w" ?0) ; + ("ö…¸-ö‡Œ" "w" ?0) ; + ("ིེཻོཽྀ" "w" ?2) ; upper vowel + ("ཾྂྃ྆྇ྈྉྊྋ" "w" ?2) ; upper modifier + ("༙ö‚Žà½±à½´à¾„༵༷" "w" ?3) ; lowel vowel/modifier ++ ("཰" "w" ?3) ; invisible vowel a + ("༠-༩༪-༳" "w" ?6) ; digit + ("་།-༒༔ཿ" "." ?|) ; line-break char + ("་།༏༐༑༔ཿ" "." ?|) ; + ("༈་།-༒༔ཿ༽༴" "." ?>) ; prohibition + ("་།༏༐༑༔ཿ" "." ?>) ; + ("ༀ-༊༼࿁࿂྅" "." ?<) ; prohibition + ("༓༕-༘༚-༟༶༸-༻༾༿྾྿-࿏" "." ?q) ; others )) elm chars len syntax category to ch i) (while deflist @@@ -1076,14 -685,13 +669,13 @@@ (or (and (>= c #x0460) (<= c #x0480)) (and (>= c #x048c) (<= c #x04be)) (and (>= c #x04d0) (<= c #x04f4))) - (set-case-syntax-pair - (decode-char 'ucs c) (decode-char 'ucs (1+ c)) tbl)) - (set-case-syntax-pair c (1+ c) tbl)) ++ (set-case-syntax-pair c (1+ c) tbl)) (setq c (1+ c))) - (set-case-syntax-pair ?$,1*!(B ?$,1*"(B tbl) - (set-case-syntax-pair ?$,1*#(B ?$,1*$(B tbl) - (set-case-syntax-pair ?$,1*'(B ?$,1*((B tbl) - (set-case-syntax-pair ?$,1*+(B ?$,1*,(B tbl) - (set-case-syntax-pair ?$,1*X(B ?$,1*Y(B tbl) + (set-case-syntax-pair ?Ӂ ?ӂ tbl) + (set-case-syntax-pair ?Ӄ ?ӄ tbl) + (set-case-syntax-pair ?Ӈ ?ӈ tbl) + (set-case-syntax-pair ?Ӌ ?ӌ tbl) + (set-case-syntax-pair ?Ó¸ ?Ó¹ tbl) ;; general punctuation (setq c #x2000) @@@ -1211,12 -817,209 +801,211 @@@ japanese-jisx0208 japanese-jisx0212 chinese-gb2312 chinese-big5-1 chinese-big5-2))) (while l - (aset auto-fill-chars (make-char (car l)) t) + ;;(aset auto-fill-chars (make-char (car l)) t) (put-charset-property (car l) 'nospace-between-words t) (setq l (cdr l)))) - + + + ;; CJK double width characters. + (let ((l '((#x1100 . #x11FF) + (#x2E80 . #x9FAF) + (#xAC00 . #xD7AF) + (#xF900 . #xFAFF) + (#xFE30 . #xFE4F) + (#xFF00 . #xFF5F) - (#xFFE0 . #xFFEF)))) ++ (#xFFE0 . #xFFEF) ++ (#x20000 . #x2AFFF) ++ (#x2F800 . #x2FFFF)))) + (dolist (elt l) + (set-char-table-range char-width-table + (cons (car elt) (cdr elt)) + 2))) + ;; Fixme: Doing this affects non-CJK characters through unification, + ;; but presumably CJK users expect those characters to be + ;; double-width when using these charsets. + ;; (map-charset-chars + ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) + ;; 'japanese-jisx0208) + ;; (map-charset-chars + ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) + ;; 'japanese-jisx0212) + ;; (map-charset-chars + ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) + ;; 'japanese-jisx0213-1) + ;; (map-charset-chars + ;; #'(lambda (range ignore) (set-char-table-range char-width-table range 2)) + ;; 'japanese-jisx0213-2) + ;; (map-charset-chars + ;; (lambda (range ignore) (set-char-table-range char-width-table range 2)) + ;; 'korean-ksc5601) + + ;; Other double width + (map-charset-chars + (lambda (range ignore) (set-char-table-range char-width-table range 2)) + 'ethiopic) + (map-charset-chars + (lambda (range ignore) (set-char-table-range char-width-table range 2)) + 'tibetan) + (map-charset-chars + (lambda (range ignore) (set-char-table-range char-width-table range 2)) + 'indian-2-column) + (map-charset-chars + (lambda (range ignore) (set-char-table-range char-width-table range 2)) + 'arabic-2-column) + + (optimize-char-table (standard-case-table)) + (optimize-char-table char-width-table) + (optimize-char-table (standard-category-table)) + (optimize-char-table (standard-syntax-table)) + + ;; The Unicode blocks actually extend past some of these ranges with + ;; undefined codepoints. + (let ((script-list nil)) + (dolist + (elt + '((#x0000 #x007F latin) + (#x00A0 #x036F latin) + (#x0370 #x03E1 greek) + (#x03E2 #x03EF coptic) + (#x03F0 #x03F3 greek) + (#x0400 #x04FF cyrillic) + (#x0530 #x058F armenian) + (#x0590 #x05FF hebrew) + (#x0600 #x06FF arabic) + (#x0700 #x074F syriac) + (#x0780 #x07BF thaana) + (#x0900 #x097F devanagari) + (#x0980 #x09FF bengali) + (#x0A00 #x0A7F gurmukhi) + (#x0A80 #x0AFF gujarati) + (#x0B00 #x0B7F oriya) + (#x0B80 #x0BFF tamil) + (#x0C00 #x0C7F telugu) + (#x0C80 #x0CFF kannada) + (#x0D00 #x0D7F malayalam) + (#x0D80 #x0DFF sinhala) + (#x0E00 #x0E5F thai) + (#x0E80 #x0EDF lao) + (#x0F00 #x0FFF tibetan) + (#x1000 #x105F myanmar) + (#x10A0 #x10FF georgian) + (#x1100 #x11FF hangul) + (#x1200 #x137F ethiopic) + (#x13A0 #x13FF cherokee) + (#x1400 #x167F canadian-aboriginal) + (#x1680 #x169F ogham) + (#x16A0 #x16FF runic) + (#x1780 #x17FF khmer) + (#x1800 #x18AF mongolian) + (#x1E00 #x1EFF latin) + (#x1F00 #x1FFF greek) + (#x20A0 #x20AF currency) + (#x2800 #x28FF braille) + (#x2E80 #x2FDF han) + (#x2FF0 #x2FFF ideographic-description) + (#x3000 #x303F cjk-misc) + (#x3040 #x30FF kana) + (#x3100 #x312F bopomofo) + (#x3130 #x318F hangul) + (#x3190 #x319F kanbun) + (#x31A0 #x31BF bopomofo) + (#x3400 #x9FAF han) + (#xA000 #xA4CF yi) + (#xAC00 #xD7AF hangul) + (#xF900 #xFA5F han) + (#xFB1D #xFB4F hebrew) + (#xFB50 #xFDFF arabic) + (#xFE70 #xFEFC arabic) + (#xFF00 #xFF5F cjk-misc) + (#xFF61 #xFF9F kana) + (#xFFE0 #xFFE6 cjk-misc) + (#x20000 #x2AFFF han) + (#x2F800 #x2FFFF han))) + (set-char-table-range char-script-table + (cons (car elt) (nth 1 elt)) (nth 2 elt)) + (or (memq (nth 2 elt) script-list) + (setq script-list (cons (nth 2 elt) script-list)))) + (set-char-table-extra-slot char-script-table 0 (nreverse script-list))) + -(map-charset-chars ++(map-charset-chars + #'(lambda (range ignore) + (set-char-table-range char-script-table range 'tibetan)) + 'tibetan) + + + ;;; Setting word boundary. + + (defun next-word-boundary-han (pos limit) + (if (<= pos limit) + (save-excursion + (goto-char pos) + (looking-at "\\cC+") + (goto-char (match-end 0)) + (if (looking-at "\\cH+") + (goto-char (match-end 0))) + (point)) + (while (and (> pos limit) + (eq (aref char-script-table (char-after (1- pos))) 'han)) + (setq pos (1- pos))) + pos)) + + (defun next-word-boundary-kana (pos limit) + (if (<= pos limit) + (save-excursion + (goto-char pos) + (if (looking-at "\\cK+") + (goto-char (match-end 0))) + (if (looking-at "\\cH+") + (goto-char (match-end 0))) + (point)) + (let ((category-set (char-category-set (char-after pos))) + category) + (if (aref category-set ?K) + (while (and (> pos limit) + (aref (char-category-set (char-after (1- pos))) ?K)) + (setq pos (1- pos))) + (while (and (> pos limit) - (aref (setq category-set ++ (aref (setq category-set + (char-category-set (char-after (1- pos)))) ?H)) + (setq pos (1- pos))) + (setq category (cond ((aref category-set ?C) ?C) + ((aref category-set ?K) ?K) + ((aref category-set ?A) ?A))) + (when category + (setq pos (1- pos)) + (while (and (> pos limit) + (aref (char-category-set (char-after (1- pos))) + category)) + (setq pos (1- pos))))) + pos))) + + (map-char-table + #'(lambda (char script) + (cond ((eq script 'han) - (set-char-table-range next-word-boundary-function-table ++ (set-char-table-range find-word-boundary-function-table + char #'next-word-boundary-han)) + ((eq script 'kana) - (set-char-table-range next-word-boundary-function-table ++ (set-char-table-range find-word-boundary-function-table + char #'next-word-boundary-kana)))) + char-script-table) + + (setq word-combining-categories + '((?l . ?l))) + + (setq word-separating-categories ; (2-byte character sets) + '((?A . ?K) ; Alpha numeric - Katakana + (?A . ?C) ; Alpha numeric - Chinese + (?H . ?A) ; Hiragana - Alpha numeric + (?H . ?K) ; Hiragana - Katakana + (?H . ?C) ; Hiragana - Chinese + (?K . ?A) ; Katakana - Alpha numeric + (?K . ?C) ; Katakana - Chinese + (?C . ?A) ; Chinese - Alpha numeric + (?C . ?K) ; Chinese - Katakana + )) + ;;; Local Variables: - ;;; coding: iso-2022-7bit + ;;; coding: utf-8-emacs ;;; End: ;;; characters.el ends here diff --cc lisp/international/encoded-kb.el index 6d8982b4934,e66c9f0009e..146b17f9e5c --- a/lisp/international/encoded-kb.el +++ b/lisp/international/encoded-kb.el @@@ -1,7 -1,8 +1,11 @@@ ;;; encoded-kb.el --- handler to input multibyte characters encoded somehow ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. + ;; Copyright (C) 2002 Free Software Foundation, Inc. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 ;; This file is part of GNU Emacs. diff --cc lisp/international/fontset.el index 90e98dba8f5,d2f83f62f19..d0ededdc6d8 --- a/lisp/international/fontset.el +++ b/lisp/international/fontset.el @@@ -1,10 -1,10 +1,13 @@@ ;;; fontset.el --- commands for handling fontset ;; Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001 Free Software Foundation, Inc. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 - ;; Keywords: mule, multilingual, fontset + ;; Keywords: mule, i18n, fontset ;; This file is part of GNU Emacs. @@@ -27,194 -27,327 +30,347 @@@ ;;; Code: + ;; Setup font-encoding-alist for all known encodings. + + (setq font-encoding-alist + '(("iso8859-1$" . iso-8859-1) + ("iso8859-2$" . iso-8859-2) + ("iso8859-3$" . iso-8859-3) + ("iso8859-4$" . iso-8859-4) + ("iso8859-5$" . iso-8859-5) + ("iso8859-6$" . iso-8859-6) + ("iso8859-7$" . iso-8859-7) + ("iso8859-8$" . iso-8859-8) + ("iso8859-9$" . iso-8859-9) + ("iso8859-10$" . iso-8859-10) + ("iso8859-11$" . iso-8859-11) + ("iso8859-13$" . iso-8859-13) + ("iso8859-14$" . iso-8859-14) + ("iso8859-15$" . iso-8859-15) + ("gb2312.1980" . chinese-gb2312) + ("jisx0208.1978" . japanese-jisx0208-1978) + ("jisx0208" . japanese-jisx0208) + ("jisx0201" . jisx0201) + ("jisx0212" . japanese-jisx0212) + ("ksc5601.1987" . korean-ksc5601) + ("cns11643.1992.*1" . chinese-cns11643-1) + ("cns11643.1992.*2" . chinese-cns11643-2) + ("cns11643.1992.*3" . chinese-cns11643-3) + ("cns11643.1992.*4" . chinese-cns11643-4) + ("cns11643.1992.*5" . chinese-cns11643-5) + ("cns11643.1992.*6" . chinese-cns11643-6) + ("cns11643.1992.*7" . chinese-cns11643-7) + ("big5" . big5) + ("sisheng_cwnn" . chinese-sisheng) + ("viscii" . viscii) + ("tis620" . tis620-2533) + ("mulearabic-0" . arabic-digit) + ("mulearabic-1" . arabic-1-column) + ("mulearabic-2" . arabic-2-column) + ("muleipa" . ipa) + ("ethiopic-unicode" . ethiopic) + ("is13194-devanagari" . indian-is13194) - ("devanagari-cdac" . devanagari-glyph) -;; These would be necessary for supporting the complete set of Indian -;; scripts. See also mule-conf.el. -;; ("sanskrit-cdac" . sanskrit-glyph) -;; ("bengali-cdac" . bengali-glyph) -;; ("assamese-cdac" . assamese-glyph) -;; ("punjabi-cdac" . punjabi-glyph) -;; ("gujarati-cdac" . gujarati-glyph) -;; ("oriya-cdac" . oriya-glyph) -;; ("tamil-cdac" . tamil-glyph) -;; ("telugu-cdac" . telugu-glyph) -;; ("kannada-cdac" . kannada-glyph) -;; ("malayalam-cdac" . malayalam-glyph) ++ ("Devanagari-CDAC" . devanagari-cdac) ++ ("Sanskrit-CDAC" . sanskrit-cdac) ++ ("Bengali-CDAC" . bengali-cdac) ++ ("Assamese-CDAC" . assamese-cdac) ++ ("Punjabi-CDAC" . punjabi-cdac) ++ ("Gujarati-CDAC" . gujarati-cdac) ++ ("Oriya-CDAC" . oriya-cdac) ++ ("Tamil-CDAC" . tamil-cdac) ++ ("Telugu-CDAC" . telugu-cdac) ++ ("Kannada-CDAC" . kannada-cdac) ++ ("Malayalam-CDAC" . malayalam-cdac) ++ ("Devanagari-Akruti" . devanagari-akruti) ++ ("Bengali-Akruti" . bengali-akruti) ++ ("Punjabi-Akruti" . punjabi-akruti) ++ ("Gujarati-Akruti" . gujarati-akruti) ++ ("Oriya-Akruti" . oriya-akruti) ++ ("Tamil-Akruti" . tamil-akruti) ++ ("Telugu-Akruti" . telugu-akruti) ++ ("Kannada-Akruti" . kannada-akruti) ++ ("Malayalam-Akruti" . malayalam-akruti) + ("muleindian-2" . indian-2-column) + ("muleindian-1" . indian-1-column) + ("mulelao-1" . mule-lao) + ("muletibetan-2" . tibetan) + ("muletibetan-1" . tibetan-1-column) + ("jisx0213.2000-1" . japanese-jisx0213-1) + ("jisx0213.2000-2" . japanese-jisx0213-2) + ("abobe-symbol" . symbol) + ("iso10646-1" . (unicode . nil)) + ("iso10646.indian-1" . (unicode . nil)))) + + ;; Set standard fontname specification of characters in the default - ;; fontset to find an appropriate font for each charset. This is used - ;; to generate a font name for a fontset if the fontset doesn't - ;; specify a font name for a specific character. The specification - ;; has the form (FAMILY . REGISTRY). FAMILY may be nil, in which - ;; case, the family name of default face is used. If REGISTRY + ;; fontset to find an appropriate font for each script/charset. The + ;; specification has the form ((SCRIPT FONT-SPEC ...) ...), where + ;; FONT-SPEC is: + ;; a vector [ FAMILY WEIGHT SLANT ADSTYLE REGISTRY ], + ;; or a cons (FAMILY . REGISTRY), + ;; or a string FONT-NAME. -;; ++;; + ;; FAMILY, WEIGHT, SLANT, and ADSTYLE may be nil, in which case, the + ;; the corresponding name of default face is used. If REGISTRY ;; contains a character `-', the string before that is embedded in ;; `CHARSET_REGISTRY' field, and the string after that is embedded in ;; `CHARSET_ENCODING' field. If it does not contain `-', the whole ;; string is embedded in `CHARSET_REGISTRY' field, and a wild card - ;; character `*' is embedded in `CHARSET_ENCODING' field. The - ;; REGISTRY for ASCII characters are predefined as "ISO8859-1". + ;; character `*' is embedded in `CHARSET_ENCODING' field. + ;; + ;; SCRIPT is a symbol that appears as an element of the char table + ;; `char-script-table'. SCRIPT may be a charset specifying the range + ;; of characters. -(new-fontset - "fontset-default" - '( ;; for each script - (latin (nil . "ISO8859-1") - (nil . "ISO8859-2") - (nil . "ISO8859-3") - (nil . "ISO8859-4") - (nil . "ISO8859-9") - (nil . "ISO8859-10") - (nil . "ISO8859-13") - (nil . "ISO8859-14") - (nil . "ISO8859-15") - (nil . "VISCII1.1-1")) - - (thai (nil . "TIS620*") - (nil . "ISO8859-11")) - - (devanagari (nil . "iso10646.indian-1")) - - (lao (nil . "MuleLao-1")) - - ;; both for script and charset. - (tibetan (nil . "muletibetan-2")) - - ;; both for script and charset. - (ethiopic (nil . "ethiopic-unicode")) - - (greek (nil . "ISO8859-7")) - - (cyrillic (nil . "ISO8859-5")) - - (arabic (nil . "MuleArabic-0") - (nil . "MuleArabic-1") - (nil . "MuleArabic-2") - (nil . "ISO8859-6")) - - (hebrew (nil . "ISO8859-8")) - - (kana (nil . "JISX0208*") - (nil . "GB2312.1980-0") - (nil . "KSC5601.1987*") - (nil . "JISX0201*")) - - (bopomofo (nil . "sisheng_cwnn-0")) - - (han (nil . "GB2312.1980-0") - (nil . "JISX0208*") - (nil . "JISX0212*") - (nil . "big5*") - (nil . "KSC5601.1987*") - (nil . "CNS11643.1992-1") - (nil . "CNS11643.1992-2") - (nil . "CNS11643.1992-3") - (nil . "CNS11643.1992-4") - (nil . "CNS11643.1992-5") - (nil . "CNS11643.1992-6") - (nil . "CNS11643.1992-7") - (nil . "gbk-0") - (nil . "JISX0213.2000-1") - (nil . "JISX0213.2000-2")) - - (cjk-misc (nil . "GB2312.1980-0") - (nil . "JISX0208*") - (nil . "JISX0212*") - (nil . "big5*") - (nil . "KSC5601.1987*") - (nil . "CNS11643.1992-1") - (nil . "CNS11643.1992-2") - (nil . "CNS11643.1992-3") - (nil . "CNS11643.1992-4") - (nil . "CNS11643.1992-5") - (nil . "CNS11643.1992-6") - (nil . "CNS11643.1992-7") - (nil . "gbk-0") - (nil . "JISX0213.2000-1") - (nil . "JISX0213.2000-2")) - - (hangul (nil . "KSC5601.1987-0")) - - ;; for each charset - (ascii (nil . "ISO8859-1")) - (arabic-digit ("*" . "MuleArabic-0")) - (arabic-1-column ("*" . "MuleArabic-1")) - (arabic-2-column ("*" . "MuleArabic-2")) - (indian-is13194 (nil . "is13194-devanagari")) - (indian-1-column ("*" . "muleindian-2")) - (devanagari-glyph ("altsys-dv_ttsurekh" . "devanagari-cdac")) -;; These would be necessary for supporting the complete set of Indian -;; scripts. - ;; fixme: family name -;; (sanskrit-glyph ("*" . "sanskrit-cdac")) -;; (bengali-glyph ("*" . "bengali-cdac")) -;; (assamese-glyph ("*" . "assamese-cdac")) -;; (punjabi-glyph ("*" . "punjabi-cdac")) -;; (gujarati-glyph ("*" . "gujarati-cdac")) -;; (oriya-glyph ("*" . "oriya-cdac")) -;; (tamil-glyph ("*" . "tamil-cdac")) -;; (telugu-glyph ("*" . "telugu-cdac")) -;; (kannada-glyph ("*" . "kannada-cdac")) - (malayalam-glyph ("altsys-ml_ttkarthika" . "malayalam-cdac")) - (ipa (nil . "MuleIPA-1")) - )) - -;; Append Unicode fonts. -;; This may find fonts with more variants (bold, italic) but which don't cover -;; many characters. -(set-fontset-font "fontset-default" '(0 . #xFFFF) - '(nil . "iso10646-1") nil 'append) -;; These may find fonts that cover many characters but with fewer variants. -(set-fontset-font "fontset-default" '(0 . #xFFFF) - '("gnu-unifont" . "iso10646-1") nil 'append) -(set-fontset-font "fontset-default" '(0 . #xFFFF) - '("mutt-clearlyu" . "iso10646-1") nil 'append) +(defun setup-default-fontset () + "Setup the default fontset." - (dolist (elt - `((latin-iso8859-1 . (nil . "ISO8859-1")) - (latin-iso8859-2 . (nil . "ISO8859-2")) - (latin-iso8859-3 . (nil . "ISO8859-3")) - (latin-iso8859-4 . (nil . "ISO8859-4")) - (thai-tis620 . ("*" . "TIS620")) - (greek-iso8859-7 . ("*" . "ISO8859-7")) - (arabic-iso8859-6 . ("*" . "ISO8859-6")) - (hebrew-iso8859-8 . ("*" . "ISO8859-8")) - (katakana-jisx0201 . ("*" . "JISX0201")) - (latin-jisx0201 . (nil . "JISX0201")) - (cyrillic-iso8859-5 . ("*" . "ISO8859-5")) - (latin-iso8859-9 . (nil . "ISO8859-9")) - (japanese-jisx0208-1978 . ("*" . "JISX0208.1978")) - (chinese-gb2312 . ("*" . "GB2312.1980")) - (japanese-jisx0208 . ("*" . "JISX0208.1990")) - (korean-ksc5601 . ("*" . "KSC5601.1989")) - (japanese-jisx0212 . ("*" . "JISX0212")) - (chinese-cns11643-1 . ("*" . "CNS11643.1992-1")) - (chinese-cns11643-2 . ("*" . "CNS11643.1992-2")) - (chinese-cns11643-3 . ("*" . "CNS11643.1992-3")) - (chinese-cns11643-4 . ("*" . "CNS11643.1992-4")) - (chinese-cns11643-5 . ("*" . "CNS11643.1992-5")) - (chinese-cns11643-6 . ("*" . "CNS11643.1992-6")) - (chinese-cns11643-7 . ("*" . "CNS11643.1992-7")) - (chinese-big5-1 . ("*" . "Big5")) - (chinese-big5-2 . ("*" . "Big5")) - (chinese-sisheng . (nil . "sisheng_cwnn")) - (vietnamese-viscii-lower . (nil . "VISCII1.1")) - (vietnamese-viscii-upper . (nil . "VISCII1.1")) - (arabic-digit . ("*" . "MuleArabic-0")) - (arabic-1-column . ("*" . "MuleArabic-1")) - (arabic-2-column . ("*" . "MuleArabic-2")) - (ipa . (nil . "MuleIPA")) - (ethiopic . ("*" . "Ethiopic-Unicode")) - (ascii-right-to-left . (nil . "ISO8859-1")) - (indian-is13194 . ("*" . "IS13194-Devanagari")) - (indian-2-column . ("*" . "MuleIndian-2")) - (lao . ("*" . "MuleLao-1")) - (tibetan . ("proportional" . "MuleTibetan-2")) - (tibetan-1-column . ("*" . "MuleTibetan-1")) - (latin-iso8859-14 . (nil . "ISO8859-14")) - (latin-iso8859-15 . (nil . "ISO8859-15")) - (mule-unicode-0100-24ff . (nil . "ISO10646-1")) - (mule-unicode-2500-33ff . (nil . "ISO10646-1")) - (mule-unicode-e000-ffff . (nil . "ISO10646-1")) - (japanese-jisx0213-1 . ("*" . "JISX0213.2000-1")) - (japanese-jisx0213-2 . ("*" . "JISX0213.2000-2")) - ;; unicode - ((,(decode-char 'ucs #x0900) . ,(decode-char 'ucs #x097F)) - . ("*" . "ISO10646.indian-1")) - ;; Indian CDAC - (,(indian-font-char-range 'cdac:dv-ttsurekh) - . ("*" . "Devanagari-CDAC")) - (,(indian-font-char-range 'cdac:sd-ttsurekh) - . ("*" . "Sanskrit-CDAC")) - (,(indian-font-char-range 'cdac:bn-ttdurga) - . ("*" . "Bengali-CDAC")) - (,(indian-font-char-range 'cdac:as-ttdurga) - . ("*" . "Assamese-CDAC")) - (,(indian-font-char-range 'cdac:pn-ttamar) - . ("*" . "Punjabi-CDAC")) - (,(indian-font-char-range 'cdac:gj-ttavantika) - . ("*" . "Gujarati-CDAC")) - (,(indian-font-char-range 'cdac:or-ttsarala) - . ("*" . "Oriya-CDAC")) - (,(indian-font-char-range 'cdac:tm-ttvalluvar) - . ("*" . "Tamil-CDAC")) - (,(indian-font-char-range 'cdac:tl-tthemalatha) - . ("*" . "Telugu-CDAC")) - (,(indian-font-char-range 'cdac:kn-ttuma) - . ("*" . "Kannada-CDAC")) - (,(indian-font-char-range 'cdac:ml-ttkarthika) - . ("*" . "Malayalam-CDAC")) - ;; Indian AKRUTI - (,(indian-font-char-range 'akruti:dev) - . ("*" . "Devanagari-Akruti")) - (,(indian-font-char-range 'akruti:bng) - . ("*" . "Bengali-Akruti")) - (,(indian-font-char-range 'akruti:pnj) - . ("*" . "Punjabi-Akruti")) - (,(indian-font-char-range 'akruti:guj) - . ("*" . "Gujarati-Akruti")) - (,(indian-font-char-range 'akruti:ori) - . ("*" . "Oriay-Akruti")) - (,(indian-font-char-range 'akruti:tml) - . ("*" . "Tamil-Akruti")) - (,(indian-font-char-range 'akruti:tlg) - . ("*" . "Telugu-Akruti")) - (,(indian-font-char-range 'akruti:knd) - . ("*" . "Kannada-Akruti")) - (,(indian-font-char-range 'akruti:mal) - . ("*" . "Malayalam-Akruti")) - )) - (set-fontset-font "fontset-default" (car elt) (cdr elt)))) - - ;; Set arguments in `font-encoding-alist' (which see). - (defun set-font-encoding (pattern charset encoding) ++ (new-fontset ++ "fontset-default" ++ '(;; for each script ++ (latin (nil . "ISO8859-1") ++ (nil . "ISO8859-2") ++ (nil . "ISO8859-3") ++ (nil . "ISO8859-4") ++ (nil . "ISO8859-9") ++ (nil . "ISO8859-10") ++ (nil . "ISO8859-13") ++ (nil . "ISO8859-14") ++ (nil . "ISO8859-15") ++ (nil . "VISCII1.1-1")) ++ ++ (thai (nil . "TIS620*") ++ (nil . "ISO8859-11")) ++ ++ (devanagari (nil . "iso10646.indian-1")) ++ ++ (lao (nil . "MuleLao-1")) ++ ++ ;; both for script and charset. ++ (tibetan (nil . "muletibetan-2")) ++ ++ ;; both for script and charset. ++ (ethiopic (nil . "ethiopic-unicode")) ++ ++ (greek (nil . "ISO8859-7")) ++ ++ (cyrillic (nil . "ISO8859-5")) ++ ++ (arabic (nil . "MuleArabic-0") ++ (nil . "MuleArabic-1") ++ (nil . "MuleArabic-2") ++ (nil . "ISO8859-6")) ++ ++ (hebrew (nil . "ISO8859-8")) ++ ++ (kana (nil . "JISX0208*") ++ (nil . "GB2312.1980-0") ++ (nil . "KSC5601.1987*") ++ (nil . "JISX0201*")) ++ ++ (bopomofo (nil . "sisheng_cwnn-0")) ++ ++ (han (nil . "GB2312.1980-0") ++ (nil . "JISX0208*") ++ (nil . "JISX0212*") ++ (nil . "big5*") ++ (nil . "KSC5601.1987*") ++ (nil . "CNS11643.1992-1") ++ (nil . "CNS11643.1992-2") ++ (nil . "CNS11643.1992-3") ++ (nil . "CNS11643.1992-4") ++ (nil . "CNS11643.1992-5") ++ (nil . "CNS11643.1992-6") ++ (nil . "CNS11643.1992-7") ++ (nil . "gbk-0") ++ (nil . "JISX0213.2000-1") ++ (nil . "JISX0213.2000-2")) ++ ++ (cjk-misc (nil . "GB2312.1980-0") ++ (nil . "JISX0208*") ++ (nil . "JISX0212*") ++ (nil . "big5*") ++ (nil . "KSC5601.1987*") ++ (nil . "CNS11643.1992-1") ++ (nil . "CNS11643.1992-2") ++ (nil . "CNS11643.1992-3") ++ (nil . "CNS11643.1992-4") ++ (nil . "CNS11643.1992-5") ++ (nil . "CNS11643.1992-6") ++ (nil . "CNS11643.1992-7") ++ (nil . "gbk-0") ++ (nil . "JISX0213.2000-1") ++ (nil . "JISX0213.2000-2")) ++ ++ (hangul (nil . "KSC5601.1987-0")) ++ ++ ;; for each charset ++ (ascii (nil . "ISO8859-1")) ++ (arabic-digit ("*" . "MuleArabic-0")) ++ (arabic-1-column ("*" . "MuleArabic-1")) ++ (arabic-2-column ("*" . "MuleArabic-2")) ++ (indian-is13194 (nil . "is13194-devanagari")) ++ (indian-1-column ("*" . "muleindian-2")) ++ ;; Indian CDAC ++ (devanagari-cdac (nil . "Devanagari-CDAC")) ++ (sanskrit-cdac (nil . "Sanskrit-CDAC")) ++ (bengali-cdac (nil . "Bengali-CDAC")) ++ (assamese-cdac (nil . "Assamese-CDAC")) ++ (punjabi-cdac (nil . "Punjabi-CDAC")) ++ (gujarati-cdac (nil . "Gujarati-CDAC")) ++ (oriya-cdac (nil . "Oriya-CDAC")) ++ (tamil-cdac (nil . "Tamil-CDAC")) ++ (telugu-cdac (nil . "Telugu-CDAC")) ++ (kannada-cdac (nil . "Kannada-CDAC")) ++ (malayalam-cdac (nil . "Malayalam-CDAC")) ++ ;; Indian AKRUTI ++ (devanagari-akruti (nil . "Devanagari-Akruti")) ++ (bengali-akruti (nil . "Bengali-Akruti")) ++ (punjabi-akruti (nil . "Punjabi-Akruti")) ++ (gujarati-akruti (nil . "Gujarati-Akruti")) ++ (oriya-akruti (nil . "Oriya-Akruti")) ++ (tamil-akruti (nil . "Tamil-Akruti")) ++ (telugu-akruti (nil . "Telugu-Akruti")) ++ (kannada-akruti (nil . "Kannada-Akruti")) ++ (malayalam-akruti (nil . "Malayalam-Akruti")) ++ ;;(devanagari-glyph ("altsys-dv_ttsurekh" . "devanagari-cdac")) ++ ;;(malayalam-glyph ("altsys-ml_ttkarthika" . "malayalam-cdac")) ++ (ipa (nil . "MuleIPA-1")) ++ )) ++ ++ ;; Append Unicode fonts. ++ ;; This may find fonts with more variants (bold, italic) but which ++ ;; don't cover many characters. ++ (set-fontset-font "fontset-default" '(0 . #xFFFF) ++ '(nil . "iso10646-1") nil 'append) ++ ;; These may find fonts that cover many characters but with fewer ++ ;; variants. ++ (set-fontset-font "fontset-default" '(0 . #xFFFF) ++ '("gnu-unifont" . "iso10646-1") nil 'append) ++ (set-fontset-font "fontset-default" '(0 . #xFFFF) ++ '("mutt-clearlyu" . "iso10646-1") nil 'append)) + + ;; These are the registered registries/encodings from + ;; ftp://ftp.x.org/pub/DOCS/registry 2001/06/01 + + ;; Name Reference + ;; ---- --------- + ;; "DEC" [27] + ;; registry prefix + ;; "DEC.CNS11643.1986-2" [53] + ;; CNS11643 2-plane using the encoding + ;; suggested in that standard + ;; "DEC.DTSCS.1990-2" [54] + ;; DEC Taiwan Supplemental Character Set + ;; "fujitsu.u90x01.1991-0" [87] + ;; "fujitsu.u90x03.1991-0" [87] + ;; "GB2312.1980-0" [39],[12] + ;; China (PRC) Hanzi, GL encoding + ;; "GB2312.1980-1" [39] + ;; (deprecated) + ;; China (PRC) Hanzi, GR encoding + ;; "HP-Arabic8" [36] + ;; HPARABIC8 8-bit character set + ;; "HP-East8" [36] + ;; HPEAST8 8-bit character set + ;; "HP-Greek8" [36] + ;; HPGREEK8 8-bit character set + ;; "HP-Hebrew8" [36] + ;; HPHEBREW8 8-bit character set + ;; "HP-Japanese15" [36] + ;; HPJAPAN15 15-bit characer set, + ;; modified from industry defacto + ;; standard Shift-JIS + ;; "HP-Kana8" [36] + ;; HPKANA8 8-bit character set + ;; "HP-Korean15" [36] + ;; HPKOREAN15 15-bit character set + ;; "HP-Roman8" [36] + ;; HPROMAN8 8-bit character set + ;; "HP-SChinese15" [36] + ;; HPSCHINA15 15-bit character set for + ;; support of Simplified Chinese + ;; "HP-TChinese15" [36] + ;; HPTCHINA15 15-bit character set for + ;; support of Traditional Chinese + ;; "HP-Turkish8" [36] + ;; HPTURKISH8 8-bit character set + ;; "IPSYS" [59] + ;; registry prefix + ;; "IPSYS.IE-1" [59] + ;; "ISO2022""-" [44] + ;; "ISO646.1991-IRV" [107] + ;; ISO 646 International Reference Version + ;; "ISO8859-1" [15],[12] + ;; ISO Latin alphabet No. 1 + ;; "ISO8859-2" [15],[12] + ;; ISO Latin alphabet No. 2 + ;; "ISO8859-3" [15],[12] + ;; ISO Latin alphabet No. 3 + ;; "ISO8859-4" [15],[12] + ;; ISO Latin alphabet No. 4 + ;; "ISO8859-5" [15],[12] + ;; ISO Latin/Cyrillic alphabet + ;; "ISO8859-6" [15],[12] + ;; ISO Latin/Arabic alphabet + ;; "ISO8859-7" [15],[12] + ;; ISO Latin/Greek alphabet + ;; "ISO8859-8" [15],[12] + ;; ISO Latin/Hebrew alphabet + ;; "ISO8859-9" [15],[12] + ;; ISO Latin alphabet No. 5 + ;; "ISO8859-10" [15],[12] + ;; ISO Latin alphabet No. 6 + ;; "ISO8859-13" [15],[12] + ;; ISO Latin alphabet No. 7 + ;; "ISO8859-14" [15],[12] + ;; ISO Latin alphabet No. 8 + ;; "ISO8859-15" [15],[12] + ;; ISO Latin alphabet No. 9 + ;; "FCD8859-15" [7] + ;; (deprecated) + ;; ISO Latin alphabet No. 9, Final Committee Draft + ;; "ISO10646-1" [133] + ;; Unicode Universal Multiple-Octet Coded Character Set + ;; "ISO10646-MES" [133] + ;; (deprecated) + ;; Unicode Minimum European Subset + ;; "JISX0201.1976-0" [38],[12] + ;; 8-Bit Alphanumeric-Katakana Code + ;; "JISX0208.1983-0" [40],[12] + ;; Japanese Graphic Character Set, + ;; GL encoding + ;; "JISX0208.1990-0" [71] + ;; Japanese Graphic Character Set, + ;; GL encoding + ;; "JISX0208.1983-1" [40] + ;; (deprecated) + ;; Japanese Graphic Character Set, + ;; GR encoding + ;; "JISX0212.1990-0" [72] + ;; Supplementary Japanese Graphic Character Set, + ;; GL encoding + ;; "KOI8-R" [119] + ;; Cyrillic alphabet + ;; "KSC5601.1987-0" [41],[12] + ;; Korean Graphic Character Set, + ;; GL encoding + ;; "KSC5601.1987-1" [41] + ;; (deprecated) + ;; Korean Graphic Character Set, + ;; GR encoding + ;; "omron_CNS11643-0" [45] + ;; "omron_CNS11643-1" [45] + ;; "omron_BIG5-0" [45] + ;; "omron_BIG5-1" [45] + ;; "wn.tamil.1993" [103] + + (defun set-font-encoding (pattern charset) + "Set arguments in `font-encoding-alist' (which see)." (let ((slot (assoc pattern font-encoding-alist))) (if slot - (let ((place (assq charset (cdr slot)))) - (if place - (setcdr place encoding) - (setcdr slot (cons (cons charset encoding) (cdr slot))))) + (setcdr slot charset) (setq font-encoding-alist - (cons (list pattern (cons charset encoding)) font-encoding-alist))) - )) - - ;; Allow display of arbitrary characters with an iso-10646-encoded - ;; (`Unicode') font. - (define-translation-table 'ucs-mule-to-mule-unicode - ucs-mule-to-mule-unicode) - (define-translation-hash-table 'ucs-mule-cjk-to-unicode - ucs-mule-cjk-to-unicode) - - (define-ccl-program ccl-encode-unicode-font - `(0 - ;; r0: charset-id - ;; r1: 1st position code - ;; r2: 2nd position code (if r0 is 2D charset) - ((if (r0 == ,(charset-id 'ascii)) - ((r2 = r1) - (r1 = 0)) - ;; At first, try to get a Unicode code point directly. - ((if (r2 >= 0) - ;; This is a 2D charset. - (r1 = ((r1 << 7) | r2))) - (lookup-character ucs-mule-cjk-to-unicode r0 r1) - (if r7 - ;; We got it! - ((r1 = (r0 >> 8)) - (r2 = (r0 & #xFF))) - ;; Look for a translation for non-ASCII chars. - ((translate-character ucs-mule-to-mule-unicode r0 r1) - (if (r0 == ,(charset-id 'latin-iso8859-1)) - ((r2 = (r1 + 128)) - (r1 = 0)) - ((r2 = (r1 & #x7F)) - (r1 >>= 7) - (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) - ((r1 *= 96) - (r1 += r2) - (r1 += ,(- #x100 (* 32 96) 32)) - (r1 >8= 0) - (r2 = r7)) - (if (r0 == ,(charset-id 'mule-unicode-2500-33ff)) - ((r1 *= 96) - (r1 += r2) - (r1 += ,(- #x2500 (* 32 96) 32)) - (r1 >8= 0) - (r2 = r7)) - (if (r0 == ,(charset-id 'mule-unicode-e000-ffff)) - ((r1 *= 96) - (r1 += r2) - (r1 += ,(- #xe000 (* 32 96) 32)) - (r1 >8= 0) - (r2 = r7)) - ;; No way, use the glyph for U+FFFD. - ((r1 = #xFF) - (r2 = #xFD))))))))))))) - "Encode characters for display with iso10646 font. - Translate through the translation-hash-table named - `ucs-mule-cjk-to-unicode' and the translation-table named - `ucs-mule-to-mule-unicode' initially.") - - ;; Use the above CCL encoder for Unicode fonts. Please note that the - ;; regexp is not simply "ISO10646-1" because there exists, for - ;; instance, the following Devanagari Unicode fonts: - ;; -misc-fixed-medium-r-normal--24-240-72-72-c-120-iso10646.indian-1 - ;; -sibal-devanagari-medium-r-normal--24-240-75-75-P--iso10646-dev - (setq font-ccl-encoder-alist - (cons '("ISO10646.*-*" . ccl-encode-unicode-font) - font-ccl-encoder-alist)) + (cons (cons pattern charset) font-encoding-alist))))) ;; Setting for suppressing XLoadQueryFont on big fonts. (setq x-pixel-size-width-font-regexp @@@ -483,30 -513,83 +536,82 @@@ Done when `mouse-set-font' is called. (slant (aref xlfd-fields xlfd-regexp-slant-subnum)) (swidth (aref xlfd-fields xlfd-regexp-swidth-subnum)) (size (aref xlfd-fields xlfd-regexp-pixelsize-subnum)) - (charset (aref xlfd-fields xlfd-regexp-registry-subnum)) - (nickname (aref xlfd-fields xlfd-regexp-encoding-subnum)) + (nickname (aref xlfd-fields xlfd-regexp-registry-subnum)) name) - (if (not (string= "fontset" charset)) - fontset - (if (> (string-to-int size) 0) - (setq name (format "%s: %s-dot" nickname size)) - (setq name nickname)) - (cond ((string-match "^medium$" weight) - (setq name (concat name " " "medium"))) - ((string-match "^bold$\\|^demibold$" weight) - (setq name (concat name " " weight)))) - (cond ((string-match "^i$" slant) - (setq name (concat name " " "italic"))) - ((string-match "^o$" slant) - (setq name (concat name " " "slant"))) - ((string-match "^ri$" slant) - (setq name (concat name " " "reverse italic"))) - ((string-match "^ro$" slant) - (setq name (concat name " " "reverse slant")))) - name)) + (if (not (string-match "^fontset-\\(.*\\)$" nickname)) + (setq nickname family) + (setq nickname (match-string 1 nickname))) + (if (and size (> (string-to-int size) 0)) + (setq name (format "%s: %s-dot" nickname size)) + (setq name nickname)) + (and weight + (cond ((string-match "^medium$" weight) + (setq name (concat name " " "medium"))) + ((string-match "^bold$\\|^demibold$" weight) + (setq name (concat name " " weight))))) + (and slant + (cond ((string-match "^i$" slant) + (setq name (concat name " " "italic"))) + ((string-match "^o$" slant) + (setq name (concat name " " "slant"))) + ((string-match "^ri$" slant) + (setq name (concat name " " "reverse italic"))) + ((string-match "^ro$" slant) + (setq name (concat name " " "reverse slant"))))) + name) fontset))) + (defvar charset-script-alist + '((ascii . latin) + (latin-iso8859-1 . latin) + (latin-iso8859-2 . latin) + (latin-iso8859-3 . latin) + (latin-iso8859-4 . latin) + (latin-iso8859-9 . latin) + (latin-iso8859-10 . latin) + (latin-iso8859-13 . latin) + (latin-iso8859-14 . latin) + (latin-iso8859-15 . latin) + (latin-iso8859-16 . latin) + (latin-jisx0201 . latin) + (thai-tis620 . thai) + (cyrillic-iso8859-5 . cyrillic) + (arabic-iso8859-6 . arabic) + (greek-iso8859-7 . latin) + (hebrew-iso8859-8 . latin) + (katakana-jisx0201 . kana) + (chinese-gb2312 . han) + (chinese-big5-1 . han) + (chinese-big5-2 . han) + (chinese-cns11643-1 . han) + (chinese-cns11643-2 . han) + (chinese-cns11643-3 . han) + (chinese-cns11643-4 . han) + (chinese-cns11643-5 . han) + (chinese-cns11643-6 . han) + (chinese-cns11643-7 . han) + (japanese-jisx0208 . han) + (japanese-jisx0208-1978 . han) + (japanese-jisx0212 . han) + (japanese-jisx0213-1 . han) + (japanese-jisx0213-2 . han) + (korean-ksc5601 . hangul) + (chinese-sisheng . bopomofo) + (vietnamese-viscii-lower . latin) + (vietnamese-viscii-upper . latin) + (arabic-digit . arabic) + (arabic-1-column . arabic) + (arabic-2-column . arabic) + (indian-is13194 . devanagari) + (indian-glyph . devanagari) + (indian-1-column . devanagari) + (indian-2-column . devanagari) + (tibetan-1-column . tibetan)) + "Alist of charsets vs the corresponding most appropriate scripts. + + This alist is used by the function `create-fontset-from-fontset-spec' + to map charsets to scripts.") -;;;###autoload (defun create-fontset-from-fontset-spec (fontset-spec &optional style-variant noerror) "Create a fontset from fontset specification string FONTSET-SPEC. diff --cc lisp/international/mule-cmds.el index 3d2372b4f75,e08a0c39a5b..c3545347335 --- a/lisp/international/mule-cmds.el +++ b/lisp/international/mule-cmds.el @@@ -1,9 -1,13 +1,12 @@@ ;;; mule-cmds.el --- commands for mulitilingual environment - -;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -;; Licensed to the Free Software Foundation. -;; Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc. -;; Copyright (C) 2001, 2002 +;; Copyright (C) 1995, 2003 Electrotechnical Laboratory, JAPAN. - ;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. +;; Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc. ++;; Copyright (C) 2003 + ;; National Institute of Advanced Industrial Science and Technology (AIST) + ;; Registration Number H13PRO009 - ;; Keywords: mule, multilingual + ;; Keywords: mule, i18n ;; This file is part of GNU Emacs. @@@ -26,7 -30,9 +29,9 @@@ ;;; Code: - (eval-when-compile (defvar dos-codepage)) -(eval-when-compile ++(eval-when-compile + (defvar dos-codepage) + (require 'wid-edit)) ;;; MULE related key bindings and menus. @@@ -259,21 -248,21 +264,21 @@@ wrong, use this command again to toggl ;; We have to decode the file in any environment. (let ((default-enable-multibyte-characters t) (coding-system-for-read 'iso-2022-7bit)) - (find-file-read-only (expand-file-name "HELLO" data-directory)))) + (view-file (expand-file-name "HELLO" data-directory)))) -(defun universal-coding-system-argument () +(defun universal-coding-system-argument (coding-system) "Execute an I/O command using the specified coding system." - (interactive) - (let* ((default (and buffer-file-coding-system + (interactive + (let ((default (and buffer-file-coding-system (not (eq (coding-system-type buffer-file-coding-system) - t)) + 'undecided)) - buffer-file-coding-system)) - (coding-system (read-coding-system - (if default - (format "Coding system for following command (default, %s): " default) - "Coding system for following command: ") - default)) - (keyseq (read-key-sequence + buffer-file-coding-system))) + (list (read-coding-system + (if default + (format "Coding system for following command (default, %s): " default) + "Coding system for following command: ") + default)))) + (let* ((keyseq (read-key-sequence (format "Command to execute with %s:" coding-system))) (cmd (key-binding keyseq)) prefix) @@@ -364,9 -340,7 +369,7 @@@ system, and Emacs automatically sets th startup. A coding system that requires automatic detection of text - encoding (e.g. undecided, unix) can't be preferred. - - See also `coding-category-list' and `coding-system-category'." -encoding (e.g. undecided, unix) can't be preferred.." +++encoding (e.g. undecided, unix) can't be preferred.." (interactive "zPrefer coding system: ") (if (not (and coding-system (coding-system-p coding-system))) (error "Invalid coding system `%s'" coding-system)) @@@ -412,63 -380,42 +409,66 @@@ If the variable `sort-coding-systems-pr non-nil, it is used to sort CODINGS in the different way than above." (if sort-coding-systems-predicate (sort codings sort-coding-systems-predicate) - (let* ((from-categories (mapcar #'(lambda (x) (symbol-value x)) - coding-category-list)) - (most-preferred (car from-categories)) - (let* ((most-preferred (coding-system-priority-list t)) ++ (let* ((from-priority (coding-system-priority-list)) ++ (most-preferred (car from-priority)) (lang-preferred (get-language-info current-language-environment 'coding-system)) (func (function (lambda (x) (let ((base (coding-system-base x))) - (+ (if (eq base most-preferred) 64 0) - (let ((mime (coding-system-get base :mime-charset))) + ;; We calculate the priority number 0..255 by + ;; using the 8 bits PMMLCEII as this: + ;; P: 1 iff most preferred. + ;; MM: greater than 0 iff mime-charset. + ;; L: 1 iff one of the current lang. env.'s codings. + ;; C: 1 iff one of codings listed in the category list. + ;; E: 1 iff not XXX-with-esc + ;; II: if iso-2022 based, 0..3, else 1. + (logior + (lsh (if (eq base most-preferred) 1 0) 7) + (lsh - (let ((mime (coding-system-get base 'mime-charset))) ++ (let ((mime (coding-system-get base :mime-charset))) + ;; Prefer coding systems corresponding to a + ;; MIME charset. (if mime - (if (string-match "^x-" (symbol-name mime)) - 16 32) + ;; Lower utf-16 priority so that we + ;; normally prefer utf-8 to it, and put + ;; x-ctext below that. + (cond ((string-match "utf-16" + (symbol-name mime)) + 2) + ((string-match "^x-" (symbol-name mime)) + 1) + (t 3)) 0)) - (if (memq base lang-preferred) 8 0) - (if (string-match "-with-esc$" (symbol-name base)) - 0 4) - (if (eq (coding-system-type base) 'iso-2022) - (let ((category (coding-system-category base))) - ;; For ISO based coding systems, prefer - ;; one that doesn't use designation nor - ;; locking/single shifting. + 5) + (lsh (if (memq base lang-preferred) 1 0) 4) - (lsh (if (memq base from-categories) 1 0) 3) ++ (lsh (if (memq base from-priority) 1 0) 3) + (lsh (if (string-match "-with-esc\\'" + (symbol-name base)) + 0 1) 2) - (if (eq (coding-system-type base) 2) - ;; For ISO based coding systems, prefer - ;; one that doesn't use escape sequences. - (let ((flags (coding-system-flags base))) - (if (or (consp (aref flags 0)) - (consp (aref flags 1)) - (consp (aref flags 2)) - (consp (aref flags 3))) - (if (or (aref flags 8) (aref flags 9)) - 0 - 1) - 2)) - 1))))))) ++ (if (eq (coding-system-type base) 'iso-2022) ++ (let ((category (coding-system-category base))) ++ ;; For ISO based coding systems, prefer ++ ;; one that doesn't use designation nor ++ ;; locking/single shifting. + (cond + ((or (eq category 'coding-category-iso-8-1) + (eq category 'coding-category-iso-8-2)) + 2) + ((or (eq category 'coding-category-iso-7-tight) + (eq category 'coding-category-iso-7)) + 1) + (t + 0))) + 1) + )))))) (sort codings (function (lambda (x y) (> (funcall func x) (funcall func y)))))))) (defun find-coding-systems-region (from to) "Return a list of proper coding systems to encode a text between FROM and TO. ++ If FROM is a string, find coding systems in that instead of the buffer. All coding systems in the list can safely encode any multibyte characters in the text. @@@ -565,48 -518,9 +571,28 @@@ Optional 4th arg EXCLUDE is a list of c (setcar (cdr slot) (1+ count)) (if (or (not maxcount) (< count maxcount)) (nconc slot (list char))))) - (setq chars (cons (list charset 1 char) chars))))) - (setq idx (1+ idx)))) - (save-excursion - (goto-char from) - (while (re-search-forward "[^\000-\177]" to t) - (setq char (preceding-char) - charset (char-charset char)) - (if (eq charset 'unknown) - (setq char (match-string 0))) - (if (or (memq charset '(unknown eight-bit-control eight-bit-graphic)) - (not (or (eq excludes t) (memq charset excludes)))) - (let ((slot (assq charset chars))) - (if slot - (if (not (member char (nthcdr 2 slot))) - (let ((count (nth 1 slot))) - (setcar (cdr slot) (1+ count)) - (if (or (not maxcount) (< count maxcount)) - (nconc slot (list char))))) - (setq chars (cons (list charset 1 char) chars)))))))) + (setq chars (cons (list charset 1 char) chars))))))))) (nreverse chars))) - +(defun search-unencodable-char (coding-system) + "Search forward from point for a character that is not encodable. +It asks which coding system to check. +If such a character is found, set point after that character. +Otherwise, don't move point. + +When called from a program, the value is a position of the found character, +or nil if all characters are encodable." + (interactive + (list (let ((default (or buffer-file-coding-system 'us-ascii))) + (read-coding-system + (format "Coding-system (default, %s): " default) + default)))) + (let ((pos (unencodable-char-position (point) (point-max) coding-system))) + (if pos + (goto-char (1+ pos)) + (message "All following characters are encodable by %s" coding-system)) + pos)) + - (defvar last-coding-system-specified nil "Most recent coding system explicitly specified by the user when asked. This variable is set whenever Emacs asks the user which coding system @@@ -666,50 -565,33 +652,48 @@@ and TO is ignored. (not (listp default-coding-system))) (setq default-coding-system (list default-coding-system))) - ;; Change elements of the list to (coding . base-coding). - (setq default-coding-system - (mapcar (function (lambda (x) (cons x (coding-system-base x)))) - default-coding-system)) - - ;; If buffer-file-coding-system is not nil nor undecided, append it - ;; to the defaults. - (if buffer-file-coding-system - (let ((base (coding-system-base buffer-file-coding-system))) - (or (eq base 'undecided) - (assq buffer-file-coding-system default-coding-system) - (rassq base default-coding-system) - (setq default-coding-system - (append default-coding-system - (list (cons buffer-file-coding-system base))))))) - - ;; If the most preferred coding system has the property mime-charset, - ;; append it to the defaults. - (let ((preferred (coding-system-priority-list t)) - base) - (and (coding-system-p preferred) - (setq base (coding-system-base preferred)) - (coding-system-get preferred :mime-charset) - (not (assq preferred default-coding-system)) - (not (rassq base default-coding-system)) - (setq default-coding-system - (append default-coding-system (list (cons preferred base)))))) + (let ((no-other-defaults nil)) + (if (eq (car default-coding-system) t) + (setq no-other-defaults t + default-coding-system (cdr default-coding-system))) + + ;; Change elements of the list to (coding . base-coding). + (setq default-coding-system + (mapcar (function (lambda (x) (cons x (coding-system-base x)))) + default-coding-system)) + + ;; From now on, the list of defaults is reversed. + (setq default-coding-system (nreverse default-coding-system)) + + (unless no-other-defaults + ;; If buffer-file-coding-system is not nil nor undecided, append it + ;; to the defaults. + (if buffer-file-coding-system + (let ((base (coding-system-base buffer-file-coding-system))) + (or (eq base 'undecided) + (rassq base default-coding-system) + (push (cons buffer-file-coding-system base) + default-coding-system)))) + + ;; If default-buffer-file-coding-system is not nil nor undecided, + ;; append it to the defaults. + (if default-buffer-file-coding-system + (let ((base (coding-system-base default-buffer-file-coding-system))) + (or (eq base 'undecided) + (rassq base default-coding-system) + (push (cons default-buffer-file-coding-system base) + default-coding-system)))) + + ;; If the most preferred coding system has the property mime-charset, + ;; append it to the defaults. - (let ((tail coding-category-list) - preferred base) - (while (and tail (not (setq preferred (symbol-value (car tail))))) - (setq tail (cdr tail))) ++ (let ((preferred (coding-system-priority-list t)) ++ base) + (and (coding-system-p preferred) + (setq base (coding-system-base preferred)) - (coding-system-get preferred 'mime-charset) ++ (coding-system-get preferred :mime-charset) + (not (rassq base default-coding-system)) + (push (cons preferred base) + default-coding-system))))) (if select-safe-coding-system-accept-default-p (setq accept-default-p select-safe-coding-system-accept-default-p)) @@@ -1496,10 -1256,14 +1485,14 @@@ just activated. "Normal hook run just after an input method is inactivated. The variable `current-input-method' still keeps the input method name - just inactivated.") + just inactivated." + :type 'hook + :group 'mule) - (defvar input-method-after-insert-chunk-hook nil - "Normal hook run just after an input method insert some chunk of text.") + (defcustom input-method-after-insert-chunk-hook nil - "Normal hook run just after an input method inserts some chunk of text." ++ "Normal hook run just after an input method insert some chunk of text." + :type 'hook + :group 'mule) (defvar input-method-exit-on-first-char nil "This flag controls when an input method returns. @@@ -1610,72 -1366,26 +1598,45 @@@ The default status is as follows ;; This function formerly set default-enable-multibyte-characters to t, ;; but that is incorrect. It should not alter the unibyte/multibyte choice. - (setq coding-category-iso-7-tight 'iso-2022-jp - coding-category-iso-7 'iso-2022-7bit - coding-category-iso-8-1 'iso-latin-1 - coding-category-iso-8-2 'iso-latin-1 - coding-category-iso-7-else 'iso-2022-7bit-lock - coding-category-iso-8-else 'iso-2022-8bit-ss2 - coding-category-emacs-mule 'emacs-mule - coding-category-raw-text 'raw-text - coding-category-sjis 'japanese-shift-jis - coding-category-big5 'chinese-big5 - coding-category-utf-16-be 'mule-utf-16be-with-signature - coding-category-utf-16-le 'mule-utf-16le-with-signature - coding-category-utf-8 'mule-utf-8 - coding-category-ccl nil - coding-category-binary 'no-conversion) - - (set-coding-priority - '(coding-category-iso-8-1 - coding-category-iso-8-2 - coding-category-utf-8 - coding-category-utf-16-be - coding-category-utf-16-le - coding-category-iso-7-tight - coding-category-iso-7 - coding-category-iso-7-else - coding-category-iso-8-else - coding-category-emacs-mule - coding-category-raw-text - coding-category-sjis - coding-category-big5 - coding-category-ccl - coding-category-binary)) - - (update-coding-systems-internal) + (set-coding-system-priority + 'utf-8 + 'iso-2022-7bit + 'iso-latin-1 + 'iso-2022-7bit-lock + 'iso-2022-8bit-ss2 + 'emacs-mule + 'raw-text) - + (set-default-coding-systems nil) (setq default-sendmail-coding-system 'iso-latin-1) - (setq default-process-coding-system '(undecided . iso-latin-1)) + (setq default-file-name-coding-system 'iso-latin-1) + ;; Preserve eol-type from existing default-process-coding-systems. + ;; On non-unix-like systems in particular, these may have been set + ;; carefully by the user, or by the startup code, to deal with the + ;; users shell appropriately, so should not be altered by changing + ;; language environment. + (let ((output-coding + ;; When bootstrapping, coding-systems are not defined yet, so + ;; we need to catch the error from check-coding-system. + (condition-case nil + (coding-system-change-text-conversion + (car default-process-coding-system) 'undecided) + (coding-system-error 'undecided))) + (input-coding + (condition-case nil + (coding-system-change-text-conversion + (cdr default-process-coding-system) 'iso-latin-1) + (coding-system-error 'iso-latin-1)))) + (setq default-process-coding-system + (cons output-coding input-coding))) ;; Don't alter the terminal and keyboard coding systems here. ;; The terminal still supports the same coding system ;; that it supported a minute ago. -;;; (set-terminal-coding-system-internal nil) -;;; (set-keyboard-coding-system-internal nil) + ;; (set-terminal-coding-system-internal nil) + ;; (set-keyboard-coding-system-internal nil) - (setq nonascii-translation-table nil - nonascii-insert-offset 0)) + (set-unibyte-charset 'iso-8859-1)) (reset-language-environment) @@@ -2115,11 -1852,10 +2115,11 @@@ of `buffer-file-coding-system' set by t ; zh_HK/BIG5-HKSCS \ ("zh.*[._]big5" . "Chinese-BIG5") - ("zh.*[._]gbk" . nil) ; Solaris 2.7; has gbk-0 as well as GB 2312.1980-0 + ("zh.*[._].gb18030" . "Chinese-GB18030") ; zh_CN.GB18030/GB18030 in glibc + ("zh.*[._].gbk" . "Chinese-GBK") + ;; glibc has zh_TW.EUC-TW, with zh_TW defaulting to Big5 - ("zh_tw" . "Chinese-CNS") + ("zh_tw" . "Chinese-CNS") ; glibc uses big5 + ("zh_tw[._]euc-tw" . "Chinese-EUC-TW") ("zh" . "Chinese-GB") ; zu Zulu @@@ -2299,50 -2043,21 +2309,47 @@@ See also `locale-charset-language-names (when coding-system (prefer-coding-system coding-system) - (setq locale-coding-system coding-system)))) + (setq locale-coding-system coding-system)) + (when (get-language-info current-language-environment 'coding-priority) - (let ((codeset (langinfo 'codeset)) - (coding-system (car (coding-system-priority-list)))) - (when codeset - (let ((cs (coding-system-aliases coding-system)) - result) - (while (and cs (not result)) - (setq result - (locale-charset-match-p (symbol-name (pop cs)) - (langinfo 'codeset)))) - (unless result ++ (let ((codeset (locale-info 'codeset)) ++ (coding-system (car (coding-system-priority-list)))) ++ (when codeset ++ (let ((cs (coding-system-aliases coding-system)) ++ result) ++ (while (and cs (not result)) ++ (setq result ++ (locale-charset-match-p (symbol-name (pop cs)) ++ (locale-info 'codeset)))) ++ (unless result + (message "Warning: Default coding system `%s' disagrees with -system codeset `%s' for this locale." coding-system codeset)))))))))) ++system codeset `%s' for this locale." coding-system codeset)))))))) + + ;; Default to A4 paper if we're not in a C, POSIX or US locale. + ;; (See comments in Flocale_info.) + (let ((locale locale) + (paper (locale-info 'paper))) + (if paper + ;; This will always be null at the time of writing. + (cond + ((equal paper '(216 279)) + (setq ps-paper-type 'letter)) + ((equal paper '(210 297)) + (setq ps-paper-type 'a4))) + (let ((vars '("LC_ALL" "LC_PAPER" "LANG"))) + (while (and vars (= 0 (length locale))) + (setq locale (getenv (pop vars))))) + (when locale + ;; As of glibc 2.2.5, these are the only US Letter locales, + ;; and the rest are A4. + (setq ps-paper-type + (or (locale-name-match locale '(("c$" . letter) + ("posix$" . letter) + (".._us" . letter) + (".._pr" . letter) + (".._ca" . letter))) + 'a4)))))) + nil) - ;;; Charset property - - (defun get-charset-property (charset propname) - "Return the value of CHARSET's PROPNAME property. - This is the last value stored with - (put-charset-property CHARSET PROPNAME VALUE)." - (and (not (eq charset 'composition)) - (plist-get (charset-plist charset) propname))) - - (defun put-charset-property (charset propname value) - "Store CHARSETS's PROPNAME property with value VALUE. - It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." - (or (eq charset 'composition) - (set-charset-plist charset - (plist-put (charset-plist charset) propname value)))) - ;;; Character code property (put 'char-code-property-table 'char-table-extra-slots 0) diff --cc lisp/international/mule-conf.el index 04bc47d3de2,38f6690114e..121e2bdf9dc --- a/lisp/international/mule-conf.el +++ b/lisp/international/mule-conf.el @@@ -1,9 -1,13 +1,13 @@@ - ;;; mule-conf.el --- configure multilingual environment -*- no-byte-compile: t -*- + ;;; mule-conf.el --- configure multilingual environment ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. -;; Copyright (C) 2001, 2002 ++;; Licensed to the Free Software Foundation. ++;; Copyright (C) 2002, 2003 Free Software Foundation, Inc. ++;; Copyright (C) 2003 + ;; National Institute of Advanced Industrial Science and Technology (AIST) + ;; Registration Number H13PRO009 -;; Copyright (C) 2002, 2003 Free Software Foundation, Inc. - ;; Keywords: mule, multilingual, character set, coding system + ;; Keywords: i18n, mule, multilingual, character set, coding system ;; This file is part of GNU Emacs. @@@ -28,135 -34,728 +34,735 @@@ ;;; Code: + ;;; Remarks + + ;; The ISO-IR registry is at http://www.itscj.ipsj.or.jp/ISO-IR/. + ;; Standards docs equivalent to iso-2022 and iso-8859 are at + ;; http://www.ecma.ch/. + + ;; FWIW, http://www.microsoft.com/globaldev/ lists the following for + ;; MS Windows, which are presumably the only charsets we really need + ;; to worry about on such systems: + ;; `OEM codepages': 437, 720, 737, 775, 850, 852, 855, 857, 858, 862, 866 + ;; `Windows codepages': 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, + ;; 1258, 874, 932, 936, 949, 950 + ;;; Definitions of character sets. - ;; Basic (official) character sets. These character sets are treated - ;; efficiently with respect to buffer memory. - - ;; Syntax: - ;; (define-charset CHARSET-ID CHARSET - ;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE - ;; SHORT-NAME LONG-NAME DESCRIPTION ]) - ;; ASCII charset is defined in src/charset.c as below. - ;; (define-charset 0 ascii - ;; [1 94 1 0 ?B 0 "ASCII" "ASCII" "ASCII (ISO646 IRV)"]) - - ;; 1-byte charsets. Valid range of CHARSET-ID is 128..143. - - ;; CHARSET-ID 128 is not used. - - (define-charset 129 'latin-iso8859-1 - [1 96 1 0 ?A 1 "RHP of Latin-1" "RHP of Latin-1 (ISO 8859-1): ISO-IR-100" - "Right-Hand Part of Latin Alphabet 1 (ISO/IEC 8859-1): ISO-IR-100."]) - (define-charset 130 'latin-iso8859-2 - [1 96 1 0 ?B 1 "RHP of Latin-2" "RHP of Latin-2 (ISO 8859-2): ISO-IR-101" - "Right-Hand Part of Latin Alphabet 2 (ISO/IEC 8859-2): ISO-IR-101."]) - (define-charset 131 'latin-iso8859-3 - [1 96 1 0 ?C 1 "RHP of Latin-3" "RHP of Latin-3 (ISO 8859-3): ISO-IR-109" - "Right-Hand Part of Latin Alphabet 3 (ISO/IEC 8859-3): ISO-IR-109."]) - (define-charset 132 'latin-iso8859-4 - [1 96 1 0 ?D 1 "RHP of Latin-4" "RHP of Latin-4 (ISO 8859-4): ISO-IR-110" - "Right-Hand Part of Latin Alphabet 4 (ISO/IEC 8859-4): ISO-IR-110."]) - (define-charset 133 'thai-tis620 - [1 96 1 0 ?T 1 "RHP of TIS620" "RHP of Thai (TIS620): ISO-IR-166" - "Right-Hand Part of TIS620.2533 (Thai): ISO-IR-166."]) - (define-charset 134 'greek-iso8859-7 - [1 96 1 0 ?F 1 "RHP of ISO8859/7" "RHP of Greek (ISO 8859-7): ISO-IR-126" - "Right-Hand Part of Latin/Greek Alphabet (ISO/IEC 8859-7): ISO-IR-126."]) - (define-charset 135 'arabic-iso8859-6 - [1 96 1 1 ?G 1 "RHP of ISO8859/6" "RHP of Arabic (ISO 8859-6): ISO-IR-127" - "Right-Hand Part of Latin/Arabic Alphabet (ISO/IEC 8859-6): ISO-IR-127."]) - (define-charset 136 'hebrew-iso8859-8 - [1 96 1 1 ?H 1 "RHP of ISO8859/8" "RHP of Hebrew (ISO 8859-8): ISO-IR-138" - "Right-Hand Part of Latin/Hebrew Alphabet (ISO/IEC 8859-8): ISO-IR-138."]) - (define-charset 137 'katakana-jisx0201 - [1 94 1 0 ?I 1 "JISX0201 Katakana" "Japanese Katakana (JISX0201.1976)" - "Katakana Part of JISX0201.1976."]) - (define-charset 138 'latin-jisx0201 - [1 94 1 0 ?J 0 "JISX0201 Roman" "Japanese Roman (JISX0201.1976)" - "Roman Part of JISX0201.1976."]) - - ;; CHARSET-ID is not used 139. - - (define-charset 140 'cyrillic-iso8859-5 - [1 96 1 0 ?L 1 "RHP of ISO8859/5" "RHP of Cyrillic (ISO 8859-5): ISO-IR-144" - "Right-Hand Part of Latin/Cyrillic Alphabet (ISO/IEC 8859-5): ISO-IR-144."]) - (define-charset 141 'latin-iso8859-9 - [1 96 1 0 ?M 1 "RHP of Latin-5" "RHP of Latin-5 (ISO 8859-9): ISO-IR-148" - "Right-Hand Part of Latin Alphabet 5 (ISO/IEC 8859-9): ISO-IR-148."]) - (define-charset 142 'latin-iso8859-15 - [1 96 1 0 ?b 1 "RHP of Latin-9" "RHP of Latin-9 (ISO 8859-15): ISO-IR-203" - "Right-Hand Part of Latin Alphabet 9 (ISO/IEC 8859-15): ISO-IR-203."]) - (define-charset 143 'latin-iso8859-14 - [1 96 1 0 ?_ 1 "RHP of Latin-8" "RHP of Latin-8 (ISO 8859-14): ISO-IR-199" - "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14): ISO-IR-199."]) - - ;; 2-byte charsets. Valid range of CHARSET-ID is 144..153. - - (define-charset 144 'japanese-jisx0208-1978 - [2 94 2 0 ?@ 0 "JISX0208.1978" "JISX0208.1978 (Japanese): ISO-IR-42" - "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42."]) - (define-charset 145 'chinese-gb2312 - [2 94 2 0 ?A 0 "GB2312" "GB2312: ISO-IR-58" - "GB2312 Chinese simplified: ISO-IR-58."]) - (define-charset 146 'japanese-jisx0208 - [2 94 2 0 ?B 0 "JISX0208" "JISX0208.1983/1990 (Japanese): ISO-IR-87" - "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87."]) - (define-charset 147 'korean-ksc5601 - [2 94 2 0 ?C 0 "KSC5601" "KSC5601 (Korean): ISO-IR-149" - "KSC5601 Korean Hangul and Hanja: ISO-IR-149."]) - (define-charset 148 'japanese-jisx0212 - [2 94 2 0 ?D 0 "JISX0212" "JISX0212 (Japanese): ISO-IR-159" - "JISX0212 Japanese supplement: ISO-IR-159."]) - (define-charset 149 'chinese-cns11643-1 - [2 94 2 0 ?G 0 "CNS11643-1" "CNS11643-1 (Chinese traditional): ISO-IR-171" - "CNS11643 Plane 1 Chinese traditional: ISO-IR-171."]) - (define-charset 150 'chinese-cns11643-2 - [2 94 2 0 ?H 0 "CNS11643-2" "CNS11643-2 (Chinese traditional): ISO-IR-172" - "CNS11643 Plane 2 Chinese traditional: ISO-IR-172."]) - (define-charset 151 'japanese-jisx0213-1 - [2 94 2 0 ?O 0 "JISX0213-1" "JISX0213-1" "JISX0213 Plane 1 (Japanese)"]) - (define-charset 152 'chinese-big5-1 - [2 94 2 0 ?0 0 "Big5 (Level-1)" "Big5 (Level-1) A141-C67F" - "Frequently used part (A141-C67F) of Big5 (Chinese traditional)."]) - (define-charset 153 'chinese-big5-2 - [2 94 2 0 ?1 0 "Big5 (Level-2)" "Big5 (Level-2) C940-FEFE" - "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)."]) - - ;; Additional (private) character sets. These character sets are - ;; treated less space-efficiently in the buffer. - - ;; Syntax: - ;; (define-charset CHARSET-ID CHARSET - ;; [ DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE - ;; SHORT-NAME LONG-NAME DESCRIPTION ]) - - ;; ISO-2022 allows a use of character sets not registered in ISO with - ;; final characters `0' (0x30) through `?' (0x3F). Among them, Emacs - ;; reserves `0' through `9' to support several private character sets. - ;; The remaining final characters `:' through `?' are for users. - - ;; 1-byte 1-column charsets. Valid range of CHARSET-ID is 160..223. - - (define-charset 160 'chinese-sisheng - [1 94 1 0 ?0 0 "SiSheng" "SiSheng (PinYin/ZhuYin)" - "Sisheng characters (vowels with tone marks) for Pinyin/Zhuyin."]) - - ;; IPA characters for phonetic symbols. - (define-charset 161 'ipa - [1 96 1 0 ?0 1 "IPA" "IPA" - "IPA (International Phonetic Association) characters."]) - - ;; Vietnamese VISCII. VISCII is 1-byte character set which contains - ;; more than 96 characters. Since Emacs can't handle it as one - ;; character set, it is divided into two: lower case letters and upper - ;; case letters. - (define-charset 162 'vietnamese-viscii-lower - [1 96 1 0 ?1 1 "VISCII lower" "VISCII lower-case" - "Vietnamese VISCII1.1 lower-case characters."]) - (define-charset 163 'vietnamese-viscii-upper - [1 96 1 0 ?2 1 "VISCII upper" "VISCII upper-case" - "Vietnamese VISCII1.1 upper-case characters."]) + ;; The charsets `ascii', `unicode' and `eight-bit' are already defined + ;; in charset.c as below: + ;; + ;; (define-charset 'ascii + ;; "" + ;; :dimension 1 + ;; :code-space [0 127] + ;; :iso-final-char ?B + ;; :ascii-compatible-p t + ;; :emacs-mule-id 0 + ;; :code-offset 0) + ;; + ;; (define-charset 'unicode + ;; "" + ;; :dimension 3 + ;; :code-space [0 255 0 255 0 16] + ;; :ascii-compatible-p t + ;; :code-offset 0) + ;; + ;; (define-charset 'eight-bit + ;; "" + ;; :dimension 1 + ;; :code-space [128 255] + ;; :code-offset #x3FFF80) + ;; + ;; We now set :docstring, :short-name, and :long-name properties. + + (put-charset-property + 'ascii :docstring "ASCII (ISO646 IRV)") + (put-charset-property + 'ascii :short-name "ASCII") + (put-charset-property + 'ascii :long-name "ASCII (ISO646 IRV)") + (put-charset-property + 'iso-8859-1 :docstring "Latin-1 (ISO/IEC 8859-1)") + (put-charset-property + 'iso-8859-1 :short-name "Latin-1") + (put-charset-property + 'iso-8859-1 :long-name "Latin-1") + (put-charset-property + 'unicode :docstring "Unicode (ISO10646)") + (put-charset-property + 'unicode :short-name "Unicode") + (put-charset-property + 'unicode :long-name "Unicode (ISO10646)") + (put-charset-property 'eight-bit :docstring "Raw bytes 0-255") + (put-charset-property 'eight-bit :short-name "Raw bytes") + + (define-charset-alias 'ucs 'unicode) + + (define-charset 'emacs + "Full Emacs characters" + :ascii-compatible-p t + :code-space [ 0 255 0 255 0 63 ] + :code-offset 0 + :supplementary-p t) + + (define-charset 'latin-iso8859-1 + "Right-Hand Part of ISO/IEC 8859/1 (Latin-1): ISO-IR-100" + :short-name "RHP of Latin-1" + :long-name "RHP of ISO/IEC 8859/1 (Latin-1): ISO-IR-100" + :iso-final-char ?A + :emacs-mule-id 129 + :code-space [32 127] + :code-offset 160) + + ;; Name perhaps not ideal, but is XEmacs-compatible. + (define-charset 'control-1 + "8-bit control code (0x80..0x9F)" + :short-name "8-bit control code" + :code-space [128 159] + :code-offset 128) + + (define-charset 'eight-bit-control + "Raw bytes in the range 0x80..0x9F (usually produced from invalid encodings)" + :short-name "Raw bytes 0x80..0x9F" + :code-space [128 159] + :code-offset #x3FFF80) ; see character.h + + (define-charset 'eight-bit-graphic + "Raw bytes in the range 0xA0..0xFF (usually produced from invalid encodings)" + :short-name "Raw bytes 0xA0..0xFF" + :code-space [160 255] + :code-offset #x3FFFA0) ; see character.h + + (defmacro define-iso-single-byte-charset (symbol iso-symbol name nickname + iso-ir iso-final + emacs-mule-id map) + `(progn + (define-charset ,symbol + ,name + :short-name ,nickname + :long-name ,name + :ascii-compatible-p t + :code-space [0 255] + :map ,map) + (if ,iso-symbol + (define-charset ,iso-symbol + (if ,iso-ir + (format "Right-Hand Part of %s (%s): ISO-IR-%d" + ,name ,nickname ,iso-ir) + (format "Right-Hand Part of %s (%s)" ,name ,nickname)) + :short-name (format "RHP of %s" ,name) + :long-name (format "RHP of %s (%s)" ,name ,nickname) + :iso-final-char ,iso-final + :emacs-mule-id ,emacs-mule-id + :code-space [32 127] + :subset (list ,symbol 160 255 -128))))) + + (define-iso-single-byte-charset 'iso-8859-2 'latin-iso8859-2 + "ISO/IEC 8859/2" "Latin-2" 101 ?B 130 "8859-2") + + (define-iso-single-byte-charset 'iso-8859-3 'latin-iso8859-3 + "ISO/IEC 8859/3" "Latin-3" 109 ?C 131 "8859-3") + + (define-iso-single-byte-charset 'iso-8859-4 'latin-iso8859-4 + "ISO/IEC 8859/4" "Latin-4" 110 ?D 132 "8859-4") + + (define-iso-single-byte-charset 'iso-8859-5 'cyrillic-iso8859-5 + "ISO/IEC 8859/5" "Latin/Cyrillic" 144 ?L 140 "8859-5") + + (define-iso-single-byte-charset 'iso-8859-6 'arabic-iso8859-6 + "ISO/IEC 8859/6" "Latin/Arabic" 127 ?G 135 "8859-6") + + (define-iso-single-byte-charset 'iso-8859-7 'greek-iso8859-7 + "ISO/IEC 8859/7" "Latin/Greek" 126 ?F 134 "8859-7") + + (define-iso-single-byte-charset 'iso-8859-8 'hebrew-iso8859-8 + "ISO/IEC 8859/8" "Latin/Hebrew" 138 ?H 136 "8859-8") + + (define-iso-single-byte-charset 'iso-8859-9 'latin-iso8859-9 + "ISO/IEC 8859/9" "Latin-5" 148 ?M 141 "8859-9") + + (define-iso-single-byte-charset 'iso-8859-10 'latin-iso8859-10 + "ISO/IEC 8859/10" "Latin-6" 157 ?V nil "8859-10") + + ;; http://www.nectec.or.th/it-standards/iso8859-11/ + ;; http://www.cwi.nl/~dik/english/codes/8859.html says this is tis-620 + ;; plus nbsp + (define-iso-single-byte-charset 'iso-8859-11 'thai-iso8859-11 + "ISO/IEC 8859/11" "Latin/Thai" 166 ?T nil "8859-11") + + ;; 8859-12 doesn't (yet?) exist. + + (define-iso-single-byte-charset 'iso-8859-13 'latin-iso8859-13 + "ISO/IEC 8859/13" "Latin-7" 179 ?Y nil "8859-13") + + (define-iso-single-byte-charset 'iso-8859-14 'latin-iso8859-14 + "ISO/IEC 8859/14" "Latin-8" 199 ?_ 143 "8859-14") + + (define-iso-single-byte-charset 'iso-8859-15 'latin-iso8859-15 + "ISO/IEC 8859/15" "Latin-9" 203 ?b 142 "8859-15") + + (define-iso-single-byte-charset 'iso-8859-16 'latin-iso8859-16 + "ISO/IEC 8859/16" "Latin-10" 226 ?f nil "8859-16") + + ;; No point in keeping it around. + (fmakunbound 'define-iso-single-byte-charset) + + ;; Can this be shared with 8859-11? + ;; N.b. not all of these are defined unicodes. + (define-charset 'thai-tis620 + "TIS620.2533" + :short-name "TIS620.2533" + :iso-final-char ?T + :emacs-mule-id 133 + :code-space [32 127] + :code-offset #x0E00) + + ;; Fixme: doc for this, c.f. above + (define-charset 'tis620-2533 + "TIS620.2533" + :short-name "TIS620.2533" + :ascii-compatible-p t + :code-space [0 255] + :superset '(ascii eight-bit-control (thai-tis620 . 128))) + + (define-charset 'jisx0201 + "JISX0201" + :short-name "JISX0201" - :code-space [33 254] - :map "jisx0201") ++ :code-space [0 #xDF] ++ :map "JISX0201") + + (define-charset 'latin-jisx0201 + "Roman Part of JISX0201.1976" + :short-name "JISX0201 Roman" + :long-name "Japanese Roman (JISX0201.1976)" + :iso-final-char ?J + :emacs-mule-id 138 + :code-space [33 126] + :subset '(jisx0201 33 126 0)) + + (define-charset 'katakana-jisx0201 + "Katakana Part of JISX0201.1976" + :short-name "JISX0201 Katakana" + :long-name "Japanese Katakana (JISX0201.1976)" + :iso-final-char ?I + :emacs-mule-id 137 + :code-space [33 126] + :subset '(jisx0201 161 254 -128)) + + (define-charset 'chinese-gb2312 + "GB2312 Chinese simplified: ISO-IR-58" + :short-name "GB2312" + :long-name "GB2312: ISO-IR-58" + :iso-final-char ?A + :emacs-mule-id 145 + :code-space [33 126 33 126] + :code-offset #x110000 - :unify-map "gb2312-1980") ++ :unify-map "GB2312") + + (define-charset 'chinese-gbk + "GBK Chinese simplified." + :short-name "GBK" + :code-space [#x40 #xFE #x81 #xFE] + :code-offset #x160000 - :unify-map "gbk") ++ :unify-map "GBK") + (define-charset-alias 'cp936 'chinese-gbk) + (define-charset-alias 'windows-936 'chinese-gbk) + + (define-charset 'chinese-cns11643-1 + "CNS11643 Plane 1 Chinese traditional: ISO-IR-171" + :short-name "CNS11643-1" + :long-name "CNS11643-1 (Chinese traditional): ISO-IR-171" + :iso-final-char ?G + :emacs-mule-id 149 + :code-space [33 126 33 126] + :code-offset #x114000 - :unify-map "cns11643-1") ++ :unify-map "CNS-1") + + (define-charset 'chinese-cns11643-2 + "CNS11643 Plane 2 Chinese traditional: ISO-IR-172" + :short-name "CNS11643-2" + :long-name "CNS11643-2 (Chinese traditional): ISO-IR-172" + :iso-final-char ?H + :emacs-mule-id 150 + :code-space [33 126 33 126] + :code-offset #x118000 - :unify-map "cns11643-2") ++ :unify-map "CNS-2") + + (define-charset 'chinese-cns11643-3 + "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183" + :short-name "CNS11643-3" + :long-name "CNS11643-3 (Chinese traditional): ISO-IR-183" + :iso-final-char ?I + :code-space [33 126 33 126] + :emacs-mule-id 246 - :code-offset #x11C000) ++ :code-offset #x11C000 ++ :unify-map "CNS-3") + + (define-charset 'chinese-cns11643-4 + "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184" + :short-name "CNS11643-4" + :long-name "CNS11643-4 (Chinese traditional): ISO-IR-184" + :iso-final-char ?J + :emacs-mule-id 247 + :code-space [33 126 33 126] - :code-offset #x120000) ++ :code-offset #x120000 ++ :unify-map "CNS-4") + + (define-charset 'chinese-cns11643-5 + "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185" + :short-name "CNS11643-5" + :long-name "CNS11643-5 (Chinese traditional): ISO-IR-185" + :iso-final-char ?K + :emacs-mule-id 248 + :code-space [33 126 33 126] - :code-offset #x124000) ++ :code-offset #x124000 ++ :unify-map "CNS-5") + + (define-charset 'chinese-cns11643-6 + "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186" + :short-name "CNS11643-6" + :long-name "CNS11643-6 (Chinese traditional): ISO-IR-186" + :iso-final-char ?L + :emacs-mule-id 249 + :code-space [33 126 33 126] - :code-offset #x128000) ++ :code-offset #x128000 ++ :unify-map "CNS-6") + + (define-charset 'chinese-cns11643-7 + "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187" + :short-name "CNS11643-7" + :long-name "CNS11643-7 (Chinese traditional): ISO-IR-187" + :iso-final-char ?M + :emacs-mule-id 250 + :code-space [33 126 33 126] - :code-offset #x12C000) ++ :code-offset #x12C000 ++ :unify-map "CNS-7") + + (define-charset 'big5 + "Big5 (Chinese traditional)" + :short-name "Big5" + :code-space [#x40 #xFE #xA1 #xFE] + :code-offset #x130000 - :unify-map "big5") ++ :unify-map "BIG5") + ;; Fixme: AKA cp950 according to + ;; . Is + ;; that correct? + + (define-charset 'chinese-big5-1 + "Frequently used part (A141-C67E) of Big5 (Chinese traditional)" + :short-name "Big5 (Level-1)" + :long-name "Big5 (Level-1) A141-C67F" + :iso-final-char ?0 + :emacs-mule-id 152 + :code-space [#x21 #x7E #x21 #x7E] + :code-offset #x135000 - :unify-map "big5-1") ++ :unify-map "BIG5-1") + + (define-charset 'chinese-big5-2 + "Less frequently used part (C940-FEFE) of Big5 (Chinese traditional)" + :short-name "Big5 (Level-2)" + :long-name "Big5 (Level-2) C940-FEFE" + :iso-final-char ?1 + :emacs-mule-id 153 + :code-space [#x21 #x7E #x21 #x7E] + :code-offset #x137800 - :unify-map "big5-2") ++ :unify-map "BIG5-2") + + (define-charset 'japanese-jisx0208 + "JISX0208.1983/1990 Japanese Kanji: ISO-IR-87" + :short-name "JISX0208" + :long-name "JISX0208.1983/1990 (Japanese): ISO-IR-87" + :iso-final-char ?B + :emacs-mule-id 146 + :code-space [33 126 33 126] + :code-offset #x140000 - :unify-map "jisx0208-1990") ++ :unify-map "JISX0208") + + (define-charset 'japanese-jisx0208-1978 + "JISX0208.1978 Japanese Kanji (so called \"old JIS\"): ISO-IR-42" + :short-name "JISX0208.1978" - :long-name "JISX0208.1978 (Japanese): ISO-IR-42" ++ :long-name "JISX0208.1978 (JISC6226.1978): ISO-IR-42" + :iso-final-char ?@ + :emacs-mule-id 144 + :code-space [33 126 33 126] + :code-offset #x144000 - :unify-map "jisx0208-1978") ++ :unify-map "JISC6226") + + (define-charset 'japanese-jisx0212 + "JISX0212 Japanese supplement: ISO-IR-159" + :short-name "JISX0212" + :long-name "JISX0212 (Japanese): ISO-IR-159" + :iso-final-char ?D + :emacs-mule-id 148 + :code-space [33 126 33 126] + :code-offset #x148000 - :unify-map "jisx0212-1990") ++ :unify-map "JISX0212") + + ;; Note that jisx0213 contains characters not in Unicode (3.2?). It's + ;; arguable whether it should have a unify-map. + (define-charset 'japanese-jisx0213-1 + "JISX0213 Plane 1 (Japanese)" + :short-name "JISX0213-1" + :iso-final-char ?O + :emacs-mule-id 151 - :unify-map "jisx0213-1" ++ :unify-map "JISX2131" + :code-space [33 126 33 126] + :code-offset #x14C000) + + (define-charset 'japanese-jisx0213-2 + "JISX0213 Plane 2 (Japanese)" + :short-name "JISX0213-2" + :iso-final-char ?P + :emacs-mule-id 254 - :unify-map "jisx0213-2" ++ :unify-map "JISX2132" + :code-space [33 126 33 126] + :code-offset #x150000) + + (define-charset 'korean-ksc5601 + "KSC5601 Korean Hangul and Hanja: ISO-IR-149" + :short-name "KSC5601" + :long-name "KSC5601 (Korean): ISO-IR-149" + :iso-final-char ?C + :emacs-mule-id 147 + :code-space [33 126 33 126] + :code-offset #x279f94 ; ... #x27c217 - :unify-map "ksc5601-1987") ++ :unify-map "KSC5601") + + (define-charset 'big5-hkscs + "Big5-HKSCS (Chinese traditional, Hong Kong supplement)" + :short-name "Big5" + :code-space [#x40 #xFE #xA1 #xFE] + :code-offset #x27c218 ; ... #x280839 - :unify-map "big5-hkscs") ++ :unify-map "BIG5-HKSCS") + + ;; Fixme: Korean cp949/UHC + + (define-charset 'chinese-sisheng + "SiSheng characters for PinYin/ZhuYin" + :short-name "SiSheng" + :long-name "SiSheng (PinYin/ZhuYin)" + :iso-final-char ?0 + :emacs-mule-id 160 + :code-space [33 126] - :unify-map "sisheng" ++ :unify-map "MULE-sisheng" + :code-offset #x200000) + + ;; A subset of the 1989 version of IPA. It consists of the consonant + ;; signs used in English, French, German and Italian, and all vowels + ;; signs in the table. [says old MULE doc] + (define-charset 'ipa + "IPA (International Phonetic Association)" + :short-name "IPA" + :iso-final-char ?0 + :emacs-mule-id 161 - :unify-map "ipa" ++ :unify-map "MULE-ipa" + :code-space [32 127] + :code-offset #x200080) + + (define-charset 'viscii + "VISCII1.1" + :short-name "VISCII" + :long-name "VISCII 1.1" + :code-space [0 255] - :map "viscii") ++ :map "VISCII") + + (define-charset 'vietnamese-viscii-lower + "VISCII1.1 lower-case" + :short-name "VISCII lower" + :long-name "VISCII lower-case" + :iso-final-char ?1 + :emacs-mule-id 162 + :code-space [32 127] + :code-offset #x200200 - :unify-map "viscii-lower") ++ :unify-map "MULE-lviscii") + + (define-charset 'vietnamese-viscii-upper + "VISCII1.1 upper-case" + :short-name "VISCII upper" + :long-name "VISCII upper-case" + :iso-final-char ?2 + :emacs-mule-id 163 + :code-space [32 127] + :code-offset #x200280 - :unify-map "viscii-upper") ++ :unify-map "MULE-uviscii") + + (define-charset 'vscii - "VSCII1.1" ++ "VSCII1.1 (TCVN-5712 VN1)" + :short-name "VSCII" + :code-space [0 255] - :map "vscii") ++ :map "VSCII") ++ ++(define-charset-alias 'tcvn-5712 'vscii) + + ;; Fixme: see note in tcvn.map about combining characters -(define-charset 'tcvn-5712 - "TCVN-5712" ++(define-charset 'vscii-2 ++ "VSCII-2 (TCVN-5712 VN2)" + :code-space [0 255] - :map "tcvn") ++ :map "VSCII-2") + + (define-charset 'koi8-r + "KOI8-R" + :short-name "KOI8-R" + :ascii-compatible-p t + :code-space [0 255] - :map "koi8-r") ++ :map "KOI8-R") + + (define-charset-alias 'koi8 'koi8-r) + + (define-charset 'alternativnyj + "ALTERNATIVNYJ" + :short-name "alternativnyj" + :ascii-compatible-p t + :code-space [0 255] - :map "alternativnyj") ++ :map "ALTERNATIVNYJ") + + (define-charset 'cp866 + "CP866" + :short-name "cp866" + :ascii-compatible-p t + :code-space [0 255] - :map "ibm866") ++ :map "IBM866") + (define-charset-alias 'ibm866 'cp866) + + (define-charset 'koi8-u + "KOI8-U" + :short-name "KOI8-U" + :ascii-compatible-p t + :code-space [0 255] - :map "koi8-u") ++ :map "KOI8-U") + + (define-charset 'koi8-t + "KOI8-T" + :short-name "KOI8-T" + :ascii-compatible-p t + :code-space [0 255] - :map "koi8-t") ++ :map "KOI8-T") + + (define-charset 'georgian-ps + "GEORGIAN-PS" + :short-name "GEORGIAN-PS" + :ascii-compatible-p t + :code-space [0 255] - :map "georgian-ps") ++ :map "KA-PS") + + (define-charset 'georgian-academy + "GEORGIAN-ACADEMY" + :short-name "GEORGIAN-ACADEMY" + :ascii-compatible-p t + :code-space [0 255] - :map "georgian-academy") ++ :map "KA-ACADEMY") + + (define-charset 'windows-1250 + "WINDOWS-1250 (Central Europe)" + :short-name "WINDOWS-1250" + :ascii-compatible-p t + :code-space [0 255] - :map "windows-1250") ++ :map "CP1250") + (define-charset-alias 'cp1250 'windows-1250) + + (define-charset 'windows-1251 + "WINDOWS-1251 (Cyrillic)" + :short-name "WINDOWS-1251" + :ascii-compatible-p t + :code-space [0 255] - :map "windows-1251") ++ :map "CP1251") + (define-charset-alias 'cp1251 'windows-1251) + + (define-charset 'windows-1252 + "WINDOWS-1252 (Latin I)" + :short-name "WINDOWS-1252" + :ascii-compatible-p t + :code-space [0 255] - :map "windows-1252") ++ :map "CP1252") + (define-charset-alias 'cp1252 'windows-1252) + + (define-charset 'windows-1253 + "WINDOWS-1253 (Greek)" + :short-name "WINDOWS-1253" + :ascii-compatible-p t + :code-space [0 255] - :map "windows-1253") ++ :map "CP1253") + (define-charset-alias 'cp1253 'windows-1253) + + (define-charset 'windows-1254 + "WINDOWS-1254 (Turkish)" + :short-name "WINDOWS-1254" + :ascii-compatible-p t + :code-space [0 255] - :map "windows-1254") ++ :map "CP1254") + (define-charset-alias 'cp1254 'windows-1254) + + (define-charset 'windows-1255 + "WINDOWS-1255 (Hebrew)" + :short-name "WINDOWS-1255" + :ascii-compatible-p t + :code-space [0 255] - :map "windows-1255") ++ :map "CP1255") + (define-charset-alias 'cp1255 'windows-1255) + + (define-charset 'windows-1256 + "WINDOWS-1256 (Arabic)" + :short-name "WINDOWS-1256" + :ascii-compatible-p t + :code-space [0 255] - :map "windows-1256") ++ :map "CP1256") + (define-charset-alias 'cp1256 'windows-1256) + + (define-charset 'windows-1257 + "WINDOWS-1257 (Baltic)" + :short-name "WINDOWS-1257" + :ascii-compatible-p t + :code-space [0 255] - :map "windows-1257") ++ :map "CP1257") + (define-charset-alias 'cp1257 'windows-1257) + + (define-charset 'windows-1258 + "WINDOWS-1258 (Viet Nam)" + :short-name "WINDOWS-1258" + :ascii-compatible-p t + :code-space [0 255] - :map "windows-1258") ++ :map "CP1258") + (define-charset-alias 'cp1258 'windows-1258) + + (define-charset 'next + "NEXT" + :short-name "NEXT" + :ascii-compatible-p t + :code-space [0 255] - :map "next") ++ :map "NEXTSTEP") + + (define-charset 'cp1125 + "CP1125" + :short-name "CP1125" + :code-space [0 255] - :map "cp1125") ++ :map "CP1125") + (define-charset-alias 'ruscii 'cp1125) + ;; Original name for cp1125, says Serhii Hlodin + (define-charset-alias 'cp866u 'cp1125) + + ;; Fixme: C.f. iconv, http://czyborra.com/charsets/codepages.html + ;; shows this as not ASCII comptaible, with various graphics in + ;; 0x01-0x1F. + (define-charset 'cp437 + "CP437 (MS-DOS United States, Australia, New Zealand, South Africa)" + :short-name "CP437" + :code-space [0 255] + :ascii-compatible-p t - :map "cp437") ++ :map "IBM437") + + (define-charset 'cp720 + "CP720 (Arabic)" + :short-name "CP720" + :code-space [0 255] + :ascii-compatible-p t - :map "cp720") ++ :map "CP720") + + (define-charset 'cp737 + "CP737 (PC Greek)" + :short-name "CP737" + :code-space [0 255] + :ascii-compatible-p t - :map "cp737") ++ :map "CP737") + + (define-charset 'cp775 + "CP775 (PC Baltic)" + :short-name "CP775" + :code-space [0 255] + :ascii-compatible-p t - :map "cp775") ++ :map "CP775") + + (define-charset 'cp851 + "CP851 (Greek)" + :short-name "CP851" + :code-space [0 255] + :ascii-compatible-p t - :map "cp851") ++ :map "IBM851") + + (define-charset 'cp852 + "CP852 (MS-DOS Latin-2)" + :short-name "CP852" + :code-space [0 255] + :ascii-compatible-p t - :map "cp852") ++ :map "IBM852") + + (define-charset 'cp855 + "CP855 (IBM Cyrillic)" + :short-name "CP855" + :code-space [0 255] + :ascii-compatible-p t - :map "cp855") ++ :map "IBM855") + + (define-charset 'cp857 + "CP857 (IBM Turkish)" + :short-name "CP857" + :code-space [0 255] + :ascii-compatible-p t - :map "cp857") ++ :map "IBM857") + + (define-charset 'cp858 + "CP858 (Multilingual Latin I + Euro)" + :short-name "CP858" + :code-space [0 255] + :ascii-compatible-p t - :map "cp858") ++ :map "CP858") + (define-charset-alias 'cp00858 'cp858) ; IANA has IBM00858/CP00858 + + (define-charset 'cp860 + "CP860 (MS-DOS Portuguese)" + :short-name "CP860" + :code-space [0 255] + :ascii-compatible-p t - :map "cp860") ++ :map "IBM860") + + (define-charset 'cp861 + "CP861 (MS-DOS Icelandic)" + :short-name "CP861" + :code-space [0 255] + :ascii-compatible-p t - :map "cp861") ++ :map "IBM861") + + (define-charset 'cp862 + "CP862 (PC Hebrew)" + :short-name "CP862" + :code-space [0 255] + :ascii-compatible-p t - :map "cp862") ++ :map "IBM862") + + (define-charset 'cp863 + "CP863 (MS-DOS Canadian French)" + :short-name "CP863" + :code-space [0 255] + :ascii-compatible-p t - :map "cp863") ++ :map "IBM863") + + (define-charset 'cp864 + "CP864 (PC Arabic)" + :short-name "CP864" + :code-space [0 255] + :ascii-compatible-p t - :map "cp864") ++ :map "IBM864") + + (define-charset 'cp865 + "CP865 (MS-DOS Nordic)" + :short-name "CP865" + :code-space [0 255] + :ascii-compatible-p t - :map "cp865") ++ :map "IBM865") + + (define-charset 'cp869 + "CP869 (IBM Modern Greek)" + :short-name "CP869" + :code-space [0 255] + :ascii-compatible-p t - :map "cp869") ++ :map "IBM869") + + (define-charset 'cp874 + "CP874 (IBM Thai)" + :short-name "CP874" + :code-space [0 255] + :ascii-compatible-p t - :map "cp874") ++ :map "IBM874") ;; For Arabic, we need three different types of character sets. ;; Digits are of direction left-to-right and of width 1-column. @@@ -192,85 -809,279 +816,279 @@@ ;; not assigned. They are automatically converted to each Indian ;; script which IS-13194 supports. - (define-charset 225 'indian-is13194 - [1 94 2 0 ?5 1 "IS 13194" "Indian IS 13194" - "Generic Indian character set for data exchange with IS 13194."]) - - ;; CHARSET-IDs 226..239 are not used. - - (define-charset 240 'indian-glyph - [2 96 1 0 ?4 0 "Indian glyph" "Indian glyph" - "Glyphs for Indian characters."]) - ;; 240 used to be [2 94 1 0 ?6 0 "Indian 1-col" "Indian 1 Column"] - - ;; 2-byte 1-column charsets. Valid range of CHARSET-ID is 240..244. + (define-charset 'indian-is13194 + "Generic Indian charset for data exchange with IS 13194" + :short-name "IS 13194" + :long-name "Indian IS 13194" + :iso-final-char ?5 + :emacs-mule-id 225 + :code-space [33 126] + :code-offset #x180000) + -(define-charset 'devanagari-glyph - "Glyphs for Devanagari script. Subset of `indian-glyph'." - :short-name "Devanagari glyph" - :code-space [0 255] - :code-offset #x180100) - -(define-charset 'malayalam-glyph - "Glyphs for Malayalam script. Subset of `indian-glyph'." - :short-name "Malayalam glyph" - :code-space [0 255] - :code-offset #x180200) - -;; These would be necessary for supporting the complete set of Indian -;; scripts. See also fontset.el. - -;; (let ((i 0)) -;; (dolist (script '(sanskrit bengali tamil telugu assamese -;; oriya kannada malayalam gujarati punjabi)) -;; (define-charset (intern (concat (symbol-name script) "-glyph")) -;; (concat "Glyphs for " (capitalize (symbol-name script)) -;; " script. Subset of `indian-glyph'.") -;; :short-name (concat (capitalize (symbol-name script)) " glyph") -;; :code-space [0 255] -;; :code-offset (+ #x180100 (* 256 i))) -;; (setq i (1+ i)))) ++(let ((code-offset #x180100)) ++ (dolist (script '(devanagari sanskrit bengali tamil telugu assamese ++ oriya kannada malayalam gujarati punjabi)) ++ (define-charset (intern (format "%s-cdac" script)) ++ (format "Glyphs of %s script for CDAC font. Subset of `indian-glyph'." ++ (capitalize (symbol-name script))) ++ :short-name (format "CDAC %s glyphs" (capitalize (symbol-name script))) ++ :code-space [0 255] ++ :code-offset code-offset) ++ (setq code-offset (+ code-offset #x100))) ++ ++ (dolist (script '(devanagari bengali punjabi gujarati ++ oriya tamil telugu kannada malayalam)) ++ (define-charset (intern (format "%s-akruti" script)) ++ (format "Glyphs of %s script for AKRUTI font. Subset of `indian-glyph'." ++ (capitalize (symbol-name script))) ++ :short-name (format "AKRUTI %s glyphs" (capitalize (symbol-name script))) ++ :code-space [0 255] ++ :code-offset code-offset) ++ (setq code-offset (+ code-offset #x100)))) + + (define-charset 'indian-glyph + "Glyphs for Indian characters." + :short-name "Indian glyph" + :iso-final-char ?4 + :emacs-mule-id 240 + :code-space [32 127 32 127] + :code-offset #x180100) ;; Actual Glyph for 1-column width. - (define-charset 241 'tibetan-1-column - [2 94 1 0 ?8 0 "Tibetan 1-col" "Tibetan 1 column" - "Tibetan 1-column glyphs."]) - - ;; Subsets of Unicode. - - (define-charset 242 'mule-unicode-2500-33ff - [2 96 1 0 ?2 0 "Unicode subset 2" "Unicode subset (U+2500..U+33FF)" - "Unicode characters of the range U+2500..U+33FF."]) - - (define-charset 243 'mule-unicode-e000-ffff - [2 96 1 0 ?3 0 "Unicode subset 3" "Unicode subset (U+E000+FFFF)" - "Unicode characters of the range U+E000..U+FFFF."]) - - (define-charset 244 'mule-unicode-0100-24ff - [2 96 1 0 ?1 0 "Unicode subset" "Unicode subset (U+0100..U+24FF)" - "Unicode characters of the range U+0100..U+24FF."]) - - ;; 2-byte 2-column charsets. Valid range of CHARSET-ID is 245..254. - - ;; Ethiopic characters (Amharic and Tigrigna). - (define-charset 245 'ethiopic - [2 94 2 0 ?3 0 "Ethiopic" "Ethiopic characters" - "Ethiopic characters."]) - - ;; Chinese CNS11643 Plane3 thru Plane7. Although these are official - ;; character sets, the use is rare and don't have to be treated - ;; space-efficiently in the buffer. - (define-charset 246 'chinese-cns11643-3 - [2 94 2 0 ?I 0 "CNS11643-3" "CNS11643-3 (Chinese traditional): ISO-IR-183" - "CNS11643 Plane 3 Chinese Traditional: ISO-IR-183."]) - (define-charset 247 'chinese-cns11643-4 - [2 94 2 0 ?J 0 "CNS11643-4" "CNS11643-4 (Chinese traditional): ISO-IR-184" - "CNS11643 Plane 4 Chinese Traditional: ISO-IR-184."]) - (define-charset 248 'chinese-cns11643-5 - [2 94 2 0 ?K 0 "CNS11643-5" "CNS11643-5 (Chinese traditional): ISO-IR-185" - "CNS11643 Plane 5 Chinese Traditional: ISO-IR-185."]) - (define-charset 249 'chinese-cns11643-6 - [2 94 2 0 ?L 0 "CNS11643-6" "CNS11643-6 (Chinese traditional): ISO-IR-186" - "CNS11643 Plane 6 Chinese Traditional: ISO-IR-186."]) - (define-charset 250 'chinese-cns11643-7 - [2 94 2 0 ?M 0 "CNS11643-7" "CNS11643-7 (Chinese traditional): ISO-IR-187" - "CNS11643 Plane 7 Chinese Traditional: ISO-IR-187."]) + (define-charset 'indian-1-column + "Indian charset for 1-column width glyphs." + :short-name "Indian 1-col" + :long-name "Indian 1 Column" + :iso-final-char ?6 + :emacs-mule-id 240 + :code-space [33 126 33 126] + :code-offset #x184000) ;; Actual Glyph for 2-column width. - (define-charset 251 'indian-2-column - [2 94 2 0 ?5 0 "Indian 2-col" "Indian 2 Column" - "Indian character set for 2-column width glyphs."]) - ;; old indian-1-column characters will be translated to indian-2-column. - (declare-equiv-charset 2 94 ?6 'indian-2-column) - - ;; Tibetan script. - (define-charset 252 'tibetan - [2 94 2 0 ?7 0 "Tibetan 2-col" "Tibetan 2 column" - "Tibetan 2-column width glyphs."]) - - ;; CHARSET-ID 253 is not used. - - ;; JISX0213 Plane 2 - (define-charset 254 'japanese-jisx0213-2 - [2 94 2 0 ?P 0 "JISX0213-2" "JISX0213-2" - "JISX0213 Plane 2 (Japanese)."]) + (define-charset 'indian-2-column + "Indian charset for 2-column width glyphs." + :short-name "Indian 2-col" + :long-name "Indian 2 Column" + :iso-final-char ?5 + :emacs-mule-id 251 + :code-space [33 126 33 126] + :code-offset #x184000) + + (define-charset 'tibetan + "Tibetan characters" + :iso-final-char ?7 + :short-name "Tibetan 2-col" + :long-name "Tibetan 2 column" + :iso-final-char ?7 + :emacs-mule-id 252 - :unify-map "tibetan" ++ :unify-map "MULE-tibetan" + :code-space [33 126 33 37] + :code-offset #x190000) + + (define-charset 'tibetan-1-column + "Tibetan 1 column glyph" + :short-name "Tibetan 1-col" + :long-name "Tibetan 1 column" + :iso-final-char ?8 + :emacs-mule-id 241 + :code-space [33 126 33 37] + :code-offset #x190000) - ;; Tell C code charset ID's of several charsets. - (setup-special-charsets) + ;; Subsets of Unicode. + (define-charset 'mule-unicode-2500-33ff + "Unicode characters of the range U+2500..U+33FF." + :short-name "Unicode subset 2" + :long-name "Unicode subset (U+2500..U+33FF)" + :iso-final-char ?2 + :emacs-mule-id 242 + :code-space [#x20 #x7f #x20 #x47] + :code-offset #x2500) + + (define-charset 'mule-unicode-e000-ffff + "Unicode characters of the range U+E000..U+FFFF." + :short-name "Unicode subset 3" + :long-name "Unicode subset (U+E000+FFFF)" + :iso-final-char ?3 + :emacs-mule-id 243 + :code-space [#x20 #x7F #x20 #x75] + :code-offset #xE000 + :max-code 30015) ; U+FFFF + + (define-charset 'mule-unicode-0100-24ff + "Unicode characters of the range U+0100..U+24FF." + :short-name "Unicode subset" + :long-name "Unicode subset (U+0100..U+24FF)" + :iso-final-char ?1 + :emacs-mule-id 244 + :code-space [#x20 #x7F #x20 #x7F] + :code-offset #x100) + + (define-charset 'unicode-bmp + "Unicode Basic Multilingual Plane" + :short-name "Unicode BMP" + :code-space [0 255 0 255] + :code-offset 0) + + (define-charset 'ethiopic + "Ethiopic characters for Amharic and Tigrigna." + :short-name "Ethiopic" + :long-name "Ethiopic characters" + :iso-final-char ?3 + :emacs-mule-id 245 - :unify-map "ethiopic" ++ :unify-map "MULE-ethiopic" + :code-space [33 126 33 126] + :code-offset #x1A0000) + + (define-charset 'mac-roman + "Mac Roman charset" + :short-name "Mac Roman" + :ascii-compatible-p t + :code-space [0 255] - :map "mac-roman") ++ :map "MACINTOSH") + + ;; Fixme: modern EBCDIC variants, e.g. IBM00924? + (define-charset 'ebcdic-us + "US version of EBCDIC" + :short-name "EBCDIC-US" + :code-space [0 255] + :mime-charset 'ebcdic-us - :map "ebcdic-us") ++ :map "EBCDICUS") + + (define-charset 'ebcdic-uk + "UK version of EBCDIC" + :short-name "EBCDIC-UK" + :code-space [0 255] + :mime-charset 'ebcdic-uk - :map "ebcdic-uk") ++ :map "EBCDICUK") + + (define-charset 'ibm1047 + ;; Says groff: + "IBM1047, `EBCDIC Latin 1/Open Systems' used by OS/390 Unix." + :short-name "IBM1047" + :code-space [0 255] + :mime-charset 'ibm1047 - :map "ibm1047") ++ :map "IBM1047") + (define-charset-alias 'cp1047 'ibm1047) + + (define-charset 'hp-roman8 + "Encoding used by Hewlet-Packard printer software" + :short-name "HP-ROMAN8" + :ascii-compatible-p t + :code-space [0 255] - :map "hp-roman8") ++ :map "HP-ROMAN8") + + ;; To make a coding system with this, a pre-write-conversion should + ;; account for the commented-out multi-valued code points in + ;; stdenc.map. + (define-charset 'adobe-standard-encoding + "Adobe `standard encoding' used in PostScript" + :short-name "ADOBE-STANDARD-ENCODING" + :code-space [#x20 255] + :map "stdenc") + + (define-charset 'symbol + "Adobe symbol encoding used in PostScript" + :short-name "ADOBE-SYMBOL" + :code-space [#x20 255] + :map "symbol") + + (define-charset 'ibm850 + "DOS codepage 850 (Latin-1)" + :short-name "IBM850" + :ascii-compatible-p t + :code-space [0 255] - :map "ibm850") ++ :map "IBM850") + (define-charset-alias 'cp850 'ibm850) + + (define-charset 'mik + "Bulgarian DOS codepage" + :short-name "MIK" + :ascii-compatible-p t + :code-space [0 255] - :map "mik") ++ :map "MIK") + -(define-charset 'pt154 ++(define-charset 'ptcp154 + "`Paratype' codepage (Asian Cyrillic)" + :short-name "PT154" + :ascii-compatible-p t + :code-space [0 255] + :mime-charset 'pt154 - :map "pt154") -(define-charset-alias 'ptcp154 'pt154) -(define-charset-alias 'cp154 'pt154) ++ :map "PTCP154") ++(define-charset-alias 'pt154 'ptcp154) ++(define-charset-alias 'cp154 'ptcp154) + + (define-charset 'gb18030-2-byte + "GB18030 2-byte (0x814E..0xFEFE)" + :code-space [#x40 #xFE #x81 #xFE] + :supplementary-p t - :map "gb18030-2") ++ :map "GB180302") + + (define-charset 'gb18030-4-byte-bmp + "GB18030 4-byte for BMP (0x81308130-0x8431A439)" + :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x81 #x84] + :supplementary-p t - :map "gb18030-4") ++ :map "GB180304") + + (define-charset 'gb18030-4-byte-smp + "GB18030 4-byte for SMP (0x90308130-0xE3329A35)" + :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x90 #xE3] + :min-code '(#x9030 . #x8130) + :max-code '(#xE332 . #x9A35) + :supplementary-p t + :code-offset #x10000) + + (define-charset 'gb18030-4-byte-ext-1 + "GB18030 4-byte (0x8431A530-0x8F39FE39)" + :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #x84 #x8F] + :min-code '(#x8431 . #xA530) + :max-code '(#x8F39 . #xFE39) + :supplementary-p t + :code-offset #x200000 ; ... #x22484B + ) + + (define-charset 'gb18030-4-byte-ext-2 + "GB18030 4-byte (0xE3329A36-0xFE39FE39)" + :code-space [#x30 #x39 #x81 #xFE #x30 #x39 #xE3 #xFE] + :min-code '(#xE332 . #x9A36) + :max-code '(#xFE39 . #xFE39) + :supplementary-p t + :code-offset #X22484C ; ... #x279f93 + ) + + (define-charset 'gb18030 + "GB18030" + :code-space [#x00 #xFF #x00 #xFE #x00 #xFE #x00 #xFE] + :min-code 0 + :max-code '(#xFE39 . #xFE39) + :superset '(ascii gb18030-2-byte + gb18030-4-byte-bmp gb18030-4-byte-smp + gb18030-4-byte-ext-1 gb18030-4-byte-ext-2)) + + (unify-charset 'chinese-gb2312) + (unify-charset 'chinese-gbk) + (unify-charset 'chinese-cns11643-1) + (unify-charset 'chinese-cns11643-2) ++(unify-charset 'chinese-cns11643-3) ++(unify-charset 'chinese-cns11643-4) ++(unify-charset 'chinese-cns11643-5) ++(unify-charset 'chinese-cns11643-6) ++(unify-charset 'chinese-cns11643-7) + (unify-charset 'big5) + (unify-charset 'chinese-big5-1) + (unify-charset 'chinese-big5-2) ++(unify-charset 'big5-hkscs) ++(unify-charset 'korean-ksc5601) + (unify-charset 'vietnamese-viscii-lower) + (unify-charset 'vietnamese-viscii-upper) -(unify-charset 'big5-hkscs) + (unify-charset 'chinese-sisheng) -(unify-charset 'korean-ksc5601) + (unify-charset 'ipa) + (unify-charset 'tibetan) + (unify-charset 'ethiopic) -;; (unify-charset 'japanese-jisx0208-1978) ++(unify-charset 'japanese-jisx0208-1978) + (unify-charset 'japanese-jisx0208) + (unify-charset 'japanese-jisx0212) + (unify-charset 'japanese-jisx0213-1) + (unify-charset 'japanese-jisx0213-2) ;; These are tables for translating characters on decoding and @@@ -317,37 -1109,8 +1116,8 @@@ (define-coding-system-alias 'binary 'no-conversion) - (put 'undecided 'coding-system - (vector t ?- "No conversion on encoding, automatic conversion on decoding" - (list 'alias-coding-systems '(undecided) - 'safe-charsets '(ascii)) - nil)) - (setq coding-system-list (cons 'undecided coding-system-list)) - (setq coding-system-alist (cons '("undecided") coding-system-alist)) - (put 'undecided 'eol-type - (make-subsidiary-coding-system 'undecided)) - - (define-coding-system-alias 'unix 'undecided-unix) - (define-coding-system-alias 'dos 'undecided-dos) - (define-coding-system-alias 'mac 'undecided-mac) - - ;; Coding systems not specific to each language environment. - - (make-coding-system - 'emacs-mule 0 ?= - "Emacs internal format used in buffer and string. - - Encoding text with this coding system produces the actual byte - sequence of the text in buffers and strings. An exception is made for - eight-bit-control characters. Each of them is encoded into a single - byte." - nil - '((safe-charsets . t) - (composition . t))) - - (make-coding-system - 'raw-text 5 ?t - "Raw text, which means text contains random 8-bit codes. + (define-coding-system 'raw-text - "Raw text, which means text contains random 8-bit codes. ++ "Raw text, which means text contains random 8-bit codes. Encoding text with this coding system produces the actual byte sequence of the text in buffers and strings. An exception is made for eight-bit-control characters. Each of them is encoded into a single @@@ -356,72 -1119,176 +1126,183 @@@ byte When you visit a file with this coding, the file is read into a unibyte buffer as is (except for EOL format), thus each byte of a file is treated as a character." - nil - '((safe-charsets . t))) - - (make-coding-system - 'iso-2022-7bit 2 ?J - "ISO 2022 based 7-bit encoding using only G0" - '((ascii t) nil nil nil - short ascii-eol ascii-cntl seven) - '((safe-charsets . t) - (composition . t))) - - (make-coding-system - 'iso-2022-7bit-ss2 2 ?$ - "ISO 2022 based 7-bit encoding using SS2 for 96-charset" - '((ascii t) nil t nil - short ascii-eol ascii-cntl seven nil single-shift) - '((safe-charsets . t) - (composition . t))) - - (make-coding-system - 'iso-2022-7bit-lock 2 ?& - "ISO-2022 coding system using Locking-Shift for 96-charset" - '((ascii t) t nil nil - nil ascii-eol ascii-cntl seven locking-shift) - '((safe-charsets . t) - (composition . t))) - :coding-type 'raw-text - :mnemonic ?t) ++ :coding-type 'raw-text ++ :for-unibyte t ++ :mnemonic ?t) + ++(define-coding-system 'no-conversion-multibyte ++ "Like `no-conversion' but don't read a file into a unibyte buffer." ++ :coding-type 'raw-text ++ :eol-type 'unix ++ :mnemonic ?=) ++ + (define-coding-system 'undecided + "No conversion on encoding, automatic conversion on decoding." + :coding-type 'undecided + :mnemonic ?- + :charset-list '(ascii)) - (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock) + (define-coding-system-alias 'unix 'undecided-unix) + (define-coding-system-alias 'dos 'undecided-dos) + (define-coding-system-alias 'mac 'undecided-mac) - (make-coding-system - 'iso-2022-7bit-lock-ss2 2 ?i - "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN" - '((ascii t) - (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 t) - (nil chinese-cns11643-2) - (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 - chinese-cns11643-6 chinese-cns11643-7) - short ascii-eol ascii-cntl seven locking-shift single-shift nil nil nil - init-bol) - '((safe-charsets ascii japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201 - korean-ksc5601 chinese-gb2312 chinese-cns11643-1 - chinese-cns11643-2 chinese-cns11643-3 chinese-cns11643-4 - chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7) - (composition . t))) + (define-coding-system 'iso-latin-1 + "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)." + :coding-type 'charset + :mnemonic ?1 + :charset-list '(iso-8859-1) + :mime-charset 'iso-8859-1) - (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2) + (define-coding-system-alias 'iso-8859-1 'iso-latin-1) + (define-coding-system-alias 'latin-1 'iso-latin-1) - (make-coding-system - 'iso-2022-8bit-ss2 2 ?@ - "ISO 2022 based 8-bit encoding using SS2 for 96-charset" - '((ascii t) nil t nil - nil ascii-eol ascii-cntl nil nil single-shift) - '((safe-charsets . t) - (composition . t))) + ;; Coding systems not specific to each language environment. - (make-coding-system - 'compound-text 2 ?x - "Compound text based generic encoding for decoding unknown messages. + (define-coding-system 'emacs-mule + "Emacs 21 internal format used in buffer and string." + :coding-type 'emacs-mule + :charset-list 'emacs-mule + :mnemonic ?M) + + (define-coding-system 'utf-8 + "UTF-8." + :coding-type 'utf-8 + :mnemonic ?U + :charset-list '(unicode) + :mime-charset 'utf-8) + + (define-coding-system-alias 'mule-utf-8 'utf-8) + + (define-coding-system 'utf-8-emacs + "Support for all Emacs characters (including non-Unicode characters)." + :coding-type 'utf-8 + :mnemonic ?U + :charset-list '(emacs)) + + (define-coding-system 'utf-16le + "UTF-16LE (little endian, no signature (BOM))." + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :endian 'little + :mime-text-unsuitable t + :mime-charset 'utf-16le) + + (define-coding-system 'utf-16be + "UTF-16BE (big endian, no signature (BOM))." + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :endian 'big + :mime-text-unsuitable t + :mime-charset 'utf-16be) + + (define-coding-system 'utf-16le-with-signature + "UTF-16 (little endian, with signature (BOM))." + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :bom t + :endian 'little + :mime-text-unsuitable t + :mime-charset 'utf-16) + + (define-coding-system 'utf-16be-with-signature + "UTF-16 (big endian, with signature)." + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :bom t + :endian 'big + :mime-text-unsuitable t + :mime-charset 'utf-16) + + (define-coding-system 'utf-16 + "UTF-16 (detect endian on decoding, use big endian on encoding with BOM)." + :coding-type 'utf-16 + :mnemonic ?U + :charset-list '(unicode) + :bom '(utf-16le-with-signature . utf-16be-with-signature) + :endian 'big + :mime-text-unsuitable t + :mime-charset 'utf-16) + + ;; Backwards compatibility (old names, also used by Mule-UCS). We + ;; prefer the MIME names. + (define-coding-system-alias 'utf-16-le 'utf-16le-with-signature) + (define-coding-system-alias 'utf-16-be 'utf-16be-with-signature) + + + (define-coding-system 'iso-2022-7bit + "ISO 2022 based 7-bit encoding using only G0." + :coding-type 'iso-2022 + :mnemonic ?J + :charset-list 'iso-2022 + :designation [(ascii t) nil nil nil] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation composition)) + + (define-coding-system 'iso-2022-7bit-ss2 + "ISO 2022 based 7-bit encoding using SS2 for 96-charset." + :coding-type 'iso-2022 + :mnemonic ?$ + :charset-list 'iso-2022 + :designation [(ascii 94) nil (nil 96) nil] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit + designation single-shift composition)) + + (define-coding-system 'iso-2022-7bit-lock + "ISO-2022 coding system using Locking-Shift for 96-charset." + :coding-type 'iso-2022 + :mnemonic ?& + :charset-list 'iso-2022 + :designation [(ascii 94) (nil 96) nil nil] + :flags '(ascii-at-eol ascii-at-cntl 7-bit + designation locking-shift composition)) - This coding system does not support extended segments." - '((ascii t) (latin-iso8859-1 katakana-jisx0201 t) t t - nil ascii-eol ascii-cntl nil locking-shift single-shift nil nil nil - init-bol nil nil) - '((safe-charsets . t) - (mime-charset . x-ctext) - (composition . t))) + (define-coding-system-alias 'iso-2022-int-1 'iso-2022-7bit-lock) + + (define-coding-system 'iso-2022-7bit-lock-ss2 + "Mixture of ISO-2022-JP, ISO-2022-KR, and ISO-2022-CN." + :coding-type 'iso-2022 + :mnemonic ?i + :charset-list '(ascii + japanese-jisx0208 japanese-jisx0208-1978 latin-jisx0201 + korean-ksc5601 + chinese-gb2312 + chinese-cns11643-1 chinese-cns11643-2 chinese-cns11643-3 + chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 + chinese-cns11643-7) + :designation [(ascii 94) + (nil korean-ksc5601 chinese-gb2312 chinese-cns11643-1 96) + (nil chinese-cns11643-2) + (nil chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 + chinese-cns11643-6 chinese-cns11643-7)] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit locking-shift + single-shift init-bol)) + + (define-coding-system-alias 'iso-2022-cjk 'iso-2022-7bit-lock-ss2) + + (define-coding-system 'iso-2022-8bit-ss2 + "ISO 2022 based 8-bit encoding using SS2 for 96-charset." + :coding-type 'iso-2022 + :mnemonic ?@ + :charset-list 'iso-2022 + :designation [(ascii 94) nil (nil 96) nil] + :flags '(ascii-at-eol ascii-at-cntl designation single-shift composition)) + + (define-coding-system 'compound-text + "Compound text based generic encoding for decoding unknown messages. + -This coding system does not support ICCCM Extended Segments." ++This coding system does not support extended segments of CTEXT." + :coding-type 'iso-2022 + :mnemonic ?x + :charset-list 'iso-2022 + :designation [(ascii 94) (latin-iso8859-1 katakana-jisx0201 96) nil nil] + :flags '(ascii-at-eol ascii-at-cntl + designation locking-shift single-shift composition) + ;; Fixme: this isn't a valid MIME charset and has to be + ;; special-cased elsewhere -- fx + :mime-charset 'x-ctext) (define-coding-system-alias 'x-ctext 'compound-text) (define-coding-system-alias 'ctext 'compound-text) @@@ -456,33 -1324,31 +1338,31 @@@ for decoding and encoding files, proces (define-coding-system-alias 'ctext-with-extensions 'compound-text-with-extensions) - (make-coding-system - 'iso-safe 2 ?- - "Encode ASCII asis and encode non-ASCII characters to `?'." - '(ascii nil nil nil - nil ascii-eol ascii-cntl nil nil nil nil nil nil nil nil t) - '((safe-charsets ascii))) - - (define-coding-system-alias - 'us-ascii 'iso-safe) - - (make-coding-system - 'iso-latin-1 2 ?1 - "ISO 2022 based 8-bit encoding for Latin-1 (MIME:ISO-8859-1)." - '(ascii latin-iso8859-1 nil nil - nil nil nil nil nil nil nil nil nil nil nil t t) - '((safe-charsets ascii latin-iso8859-1) - (mime-charset . iso-8859-1))) - - (define-coding-system-alias 'iso-8859-1 'iso-latin-1) - (define-coding-system-alias 'latin-1 'iso-latin-1) - - ;; Use iso-safe for terminal output if some other coding system is not + (define-coding-system 'us-ascii - "Convert all characters but ASCII to `?'." ++ "Encode ASCII as-is and encode non-ASCII characters to `?'." + :coding-type 'charset + :mnemonic ?- + :charset-list '(ascii) + :default-char ?? + :mime-charset 'us-ascii) + + (define-coding-system-alias 'iso-safe 'us-ascii) + + (define-coding-system 'utf-7 + "UTF-7 encoding of Unicode (RFC 2152)." + :coding-type 'utf-8 + :mnemonic ?U + :mime-charset 'utf-7 + :charset-list '(unicode) + :pre-write-conversion 'utf-7-pre-write-conversion + :post-read-conversion 'utf-7-post-read-conversion) + + ;; Use us-ascii for terminal output if some other coding system is not ;; specified explicitly. - (set-safe-terminal-coding-system-internal 'iso-safe) + (set-safe-terminal-coding-system-internal 'us-ascii) ;; The other coding-systems are defined in each language specific - ;; section of languages.el. + ;; files under lisp/language. ;; Normally, set coding system to `undecided' before reading a file. ;; Compiled Emacs Lisp files (*.elc) are not decoded at all, @@@ -499,7 -1370,6 +1384,7 @@@ ;; the beginning of a doc string, work. ("\\(\\`\\|/\\)loaddefs.el\\'" . (raw-text . raw-text-unix)) ("\\.tar\\'" . (no-conversion . no-conversion)) - ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system) ++ ( "\\.po[tx]?\\'\\|\\.po\\." . po-find-file-coding-system) ("" . (undecided . nil)))) @@@ -546,13 -1388,26 +1403,31 @@@ ;;; Miscellaneous settings. + + ;; Make all multibyte characters self-insert. + (set-char-table-range (nth 1 global-map) + (cons 128 (max-char)) + 'self-insert-command) + +(aset latin-extra-code-table ?\221 t) (aset latin-extra-code-table ?\222 t) +(aset latin-extra-code-table ?\223 t) +(aset latin-extra-code-table ?\224 t) +(aset latin-extra-code-table ?\225 t) +(aset latin-extra-code-table ?\226 t) - (update-coding-systems-internal) + ;; Move least specific charsets to end of priority list + + (apply #'set-charset-priority + (delq 'unicode (delq 'emacs (charset-priority-list)))) + + ;; The old code-pages library is obsoleted by coding systems based on + ;; the charsets defined in this file but might be required by user + ;; code. + (provide 'code-pages) + + ;; Local variables: + ;; no-byte-compile: t + ;; End: ;;; mule-conf.el ends here diff --cc lisp/international/mule-diag.el index 6f03ff8aeea,6eaa618e67a..b4a1d3d4cd1 --- a/lisp/international/mule-diag.el +++ b/lisp/international/mule-diag.el @@@ -1,8 -1,11 +1,11 @@@ ;;; mule-diag.el --- show diagnosis of multilingual environment (Mule) ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. -;; Copyright (C) 2001, 2002 ++;; Copyright (C) 2003 + ;; National Institute of Advanced Industrial Science and Technology (AIST) + ;; Registration Number H13PRO009 ;; Keywords: multilingual, charset, coding system, fontset, diagnosis, i18n @@@ -61,73 -58,7 +58,6 @@@ 'help-function #'list-charset-chars 'help-echo "mouse-2, RET: show table of characters for this character set") - ;;;###autoload - (defvar non-iso-charset-alist - `((mac-roman - (ascii latin-iso8859-1 mule-unicode-2500-33ff - mule-unicode-0100-24ff mule-unicode-e000-ffff) - mac-roman-decoder - ((0 255))) - (viscii - (ascii vietnamese-viscii-lower vietnamese-viscii-upper) - viet-viscii-nonascii-translation-table - ((0 255))) - (vietnamese-tcvn - (ascii vietnamese-viscii-lower vietnamese-viscii-upper) - viet-tcvn-nonascii-translation-table - ((0 255))) - (koi8-r - (ascii cyrillic-iso8859-5) - cyrillic-koi8-r-nonascii-translation-table - ((32 255))) - (alternativnyj - (ascii cyrillic-iso8859-5) - cyrillic-alternativnyj-nonascii-translation-table - ((32 255))) - (koi8-u - (ascii cyrillic-iso8859-5 mule-unicode-0100-24ff) - cyrillic-koi8-u-nonascii-translation-table - ((32 255))) - (big5 - (ascii chinese-big5-1 chinese-big5-2) - decode-big5-char - ((32 127) - ((?\xA1 ?\xFE) . (?\x40 ?\x7E ?\xA1 ?\xFE)))) - (sjis - (ascii katakana-jisx0201 japanese-jisx0208) - decode-sjis-char - ((32 127 ?\xA1 ?\xDF) - ((?\x81 ?\x9F ?\xE0 ?\xEF) . (?\x40 ?\x7E ?\x80 ?\xFC))))) - "Alist of charset names vs the corresponding information. - This is mis-named for historical reasons. The charsets are actually - non-built-in ones. They correspond to Emacs coding systems, not Emacs - charsets, i.e. what Emacs can read (or write) by mapping to (or - from) Emacs internal charsets that typically correspond to a limited - set of ISO charsets. - - Each element has the following format: - (CHARSET CHARSET-LIST TRANSLATION-METHOD [ CODE-RANGE ]) - - CHARSET is the name (symbol) of the charset. - - CHARSET-LIST is a list of Emacs charsets into which characters of - CHARSET are mapped. - - TRANSLATION-METHOD is a translation table (symbol) to translate a - character code of CHARSET to the corresponding Emacs character - code. It can also be a function to call with one argument, a - character code in CHARSET. - - CODE-RANGE specifies the valid code ranges of CHARSET. - It is a list of RANGEs, where each RANGE is of the form: - (FROM1 TO1 FROM2 TO2 ...) - or - ((FROM1-1 TO1-1 FROM1-2 TO1-2 ...) . (FROM2-1 TO2-1 FROM2-2 TO2-2 ...)) - In the first form, valid codes are between FROM1 and TO1, or FROM2 and - TO2, or... - The second form is used for 2-byte codes. The car part is the ranges - of the first byte, and the cdr part is the ranges of the second byte.") -- ;;;###autoload (defun list-character-sets (arg) "Display a list of all character sets. @@@ -215,55 -113,23 +113,28 @@@ but still shows the full information. (goto-char (point-min)) (re-search-forward "[0-9][0-9][0-9]") (beginning-of-line) - (delete-region (point) (point-max)) - (list-character-sets-1 sort-key))))) + (let ((pos (point))) + (search-forward "----------") + (beginning-of-line) + (save-restriction + (narrow-to-region pos (point)) + (delete-region (point-min) (point-max)) + (list-character-sets-1 sort-key))))))) - (defun charset-multibyte-form-string (charset) - (let ((info (charset-info charset))) - (cond ((eq charset 'ascii) - "xx") - ((eq charset 'eight-bit-control) - (format "%2X Xx" (aref info 6))) - ((eq charset 'eight-bit-graphic) - "XX") - (t - (let ((str (format "%2X" (aref info 6)))) - (if (> (aref info 7) 0) - (setq str (format "%s %2X" - str (aref info 7)))) - (setq str (concat str " XX")) - (if (> (aref info 2) 1) - (setq str (concat str " XX"))) - str))))) - - ;; Insert a list of character sets sorted by SORT-KEY. SORT-KEY - ;; should be one of `id', `name', and `iso-spec'. If SORT-KEY is nil, - ;; it defaults to `id'. - (defun list-character-sets-1 (sort-key) + "Insert a list of character sets sorted by SORT-KEY. + SORT-KEY should be `name' or `iso-spec' (default `name')." (or sort-key - (setq sort-key 'id)) - (let ((tail (charset-list)) - charset-info-list elt charset info sort-func) - (while tail - (setq charset (car tail) tail (cdr tail) - info (charset-info charset)) - + (setq sort-key 'name)) + (let ((tail charset-list) + charset-info-list charset sort-func) + (dolist (charset charset-list) ;; Generate a list that contains all information to display. - (setq charset-info-list - (cons (list (charset-id charset) ; ID-NUM - charset ; CHARSET-NAME - (charset-multibyte-form-string charset); MULTIBYTE-FORM - (aref info 2) ; DIMENSION - (aref info 3) ; CHARS - (aref info 8) ; FINAL-CHAR - ) - charset-info-list))) + (push (list charset + (charset-dimension charset) + (charset-chars charset) + (charset-iso-final-char charset)) + charset-info-list)) ;; Determine a predicate for `sort' by SORT-KEY. (setq sort-func @@@ -287,21 -151,19 +156,17 @@@ (setq charset-info-list (sort charset-info-list sort-func)) ;; Insert information of character sets. - (while charset-info-list - (setq elt (car charset-info-list) - charset-info-list (cdr charset-info-list)) - (insert (format "%03d(%02X)" (car elt) (car elt))) ; ID-NUM - (indent-to 8) - (insert-text-button (symbol-name (nth 1 elt)) + (dolist (elt charset-info-list) + (insert-text-button (symbol-name (car elt)) :type 'list-charset-chars - 'help-args (list (nth 1 elt))) + 'help-args (list (car elt))) (goto-char (point-max)) (insert "\t") - (indent-to 40) - (insert (nth 2 elt)) ; MULTIBYTE-FORM - (indent-to 56) - (insert (format "%d %2d " (nth 3 elt) (nth 4 elt)) ; DIMENSION and CHARS - (if (< (nth 5 elt) 0) "none" (nth 5 elt))) ; FINAL-CHAR - ;; (indent-to 40) - ;; (insert (nth 2 elt)) ; MULTIBYTE-FORM + (indent-to 48) + (insert (format "%d %3d " (nth 1 elt) (nth 2 elt)) ; DIMENSION and CHARS + (if (< (nth 3 elt) 0) + "none" + (nth 3 elt))) ; FINAL-CHAR (insert "\n")))) @@@ -397,180 -230,282 +233,147 @@@ detailed meanings of these arguments. (if (> (length charset) 0) (intern charset)))) ++;; Vector of 16 space-only strings. Nth string has display property ++;; '(space :align-to COL) when COL is the column number to align the ++;; Nth character in a row. Used by `list-block-of-chars'. ++ ++(defconst stretches-for-character-list ++ (let ((stretches (make-vector 16 nil))) ++ (dotimes (i 16) ++ (aset stretches i ++ (propertize " " 'display `(space :align-to ,(+ 6 (* i 4)))))) ++ stretches) ++ "For internal use only.") ;; List characters of the range MIN and MAX of CHARSET. If dimension ;; of CHARSET is two (i.e. 2-byte charset), ROW is the first byte ;; (block index) of the characters, and MIN and MAX are the second ;; bytes of the characters. If the dimension is one, ROW should be 0. --;; For a non-ISO charset, CHARSET is a translation table (symbol) or a --;; function to get Emacs' character codes that corresponds to the --;; characters to list. (defun list-block-of-chars (charset row min max) (let (i ch) -- (insert-char ?- (+ 4 (* 3 16))) -- (insert "\n ") ++ (insert-char ?- (+ 5 (* 4 16))) ++ (insert "\n ") (setq i 0) (while (< i 16) -- (insert (format "%3X" i)) ++ (insert (format "%4X" i)) (setq i (1+ i))) (setq i (* (/ min 16) 16)) (while (<= i max) (if (= (% i 16) 0) -- (insert (format "\n%3Xx" (/ (+ (* row 256) i) 16)))) -- (setq ch (cond ((< i min) -- 32) -- ((charsetp charset) - (if (= row 0) - (make-char charset i) - (make-char charset row i))) - (or (decode-char charset (+ (* row 256) i)) - 32)) ; gap in mapping -- ((and (symbolp charset) (get charset 'translation-table)) -- (aref (get charset 'translation-table) i)) -- (t (funcall charset (+ (* row 256) i))))) -- (if (and (char-table-p charset) -- (or (< ch 32) (and (>= ch 127) (<= ch 255)))) -- ;; Don't insert a control code. -- (setq ch 32)) -- (unless ch (setq ch 32)) -- (if (eq ch ?\t) -- ;; Make it visible. -- (setq ch (propertize "\t" 'display "^I"))) -- ;; This doesn't DTRT. Maybe it's better to insert "^J" and not -- ;; worry about the buffer contents not being correct. --;;; (if (eq ch ?\n) --;;; (setq ch (propertize "\n" 'display "^J"))) -- (indent-to (+ (* (% i 16) 3) 6)) -- (insert ch) ++ (insert (format "\n%4Xx" (/ (+ (* row 256) i) 16)))) ++ (setq ch (if (< i min) ++ 32 ++ (or (decode-char charset (+ (* row 256) i)) ++ 32))) ; gap in mapping ++ ;; Don't insert a control code. ++ (if (or (< ch 32) (= ch 127)) ++ (setq ch (single-key-description ch)) ++ (if (and (>= ch 128) (< ch 160)) ++ (setq ch (format "%02Xh" ch)))) ++ (insert (aref stretches-for-character-list (% i 16)) ch) (setq i (1+ i)))) (insert "\n")) - (defun list-iso-charset-chars (charset) - (let ((dim (charset-dimension charset)) - (chars (charset-chars charset)) - (plane (charset-iso-graphic-plane charset)) - min max) - (insert (format "Characters in the coded character set %s.\n" charset)) - - (cond ((eq charset 'eight-bit-control) - (setq min 128 max 159)) - ((eq charset 'eight-bit-graphic) - (setq min 160 max 255)) - (t - (if (= chars 94) - (setq min 33 max 126) - (setq min 32 max 127)) - (or (= plane 0) - (setq min (+ min 128) max (+ max 128))))) - - (if (= dim 1) - (list-block-of-chars charset 0 min max) - (let ((i min)) - (while (<= i max) - (list-block-of-chars charset i min max) - (setq i (1+ i))))))) - - (defun list-non-iso-charset-chars (charset) - "List all characters in non-built-in coded character set CHARSET." - (let* ((slot (assq charset non-iso-charset-alist)) - (charsets (nth 1 slot)) - (translate-method (nth 2 slot)) - (ranges (nth 3 slot)) - range) - (or slot - (error "Unknown character set: %s" charset)) - (insert (format "Characters in the coded character set %s.\n" charset)) - (if charsets - (insert "They are mapped to: " - (mapconcat #'symbol-name charsets ", ") - "\n")) - (while ranges - (setq range (pop ranges)) - (if (integerp (car range)) - ;; The form of RANGES is (FROM1 TO1 FROM2 TO2 ...). - (if (and (not (functionp translate-method)) - (< (car (last range)) 256)) - ;; Do it all in one block to avoid the listing being - ;; broken up at gaps in the range. Don't do that for - ;; function translate-method, since not all codes in - ;; that range may be valid. - (list-block-of-chars translate-method - 0 (car range) (car (last range))) - (while range - (list-block-of-chars translate-method - 0 (car range) (nth 1 range)) - (setq range (nthcdr 2 range)))) - ;; The form of RANGES is ((FROM1-1 TO1-1 ...) . (FROM2-1 TO2-1 ...)). - (let ((row-range (car range)) - row row-max - col-range col col-max) - (while row-range - (setq row (car row-range) row-max (nth 1 row-range) - row-range (nthcdr 2 row-range)) - (while (<= row row-max) - (setq col-range (cdr range)) - (while col-range - (setq col (car col-range) col-max (nth 1 col-range) - col-range (nthcdr 2 col-range)) - (list-block-of-chars translate-method row col col-max)) - (setq row (1+ row))))))))) - - ;;;###autoload (defun list-charset-chars (charset) - "Display a list of characters in the specified character set. - This can list both Emacs `official' (ISO standard) charsets and the - characters encoded by various Emacs coding systems which correspond to - PC `codepages' and other coded character sets. See `non-iso-charset-alist'." + "Display a list of characters in character set CHARSET." (interactive (list (read-charset "Character set: "))) - (with-output-to-temp-buffer "*Help*" + (with-output-to-temp-buffer "*Character List*" (with-current-buffer standard-output + (setq mode-line-format (copy-sequence mode-line-format)) + (let ((slot (memq 'mode-line-buffer-identification mode-line-format))) + (if slot + (setcdr slot + (cons (format " (%s)" charset) + (cdr slot))))) (setq indent-tabs-mode nil) (set-buffer-multibyte t) - (cond ((charsetp charset) - (list-iso-charset-chars charset)) - ((assq charset non-iso-charset-alist) - (list-non-iso-charset-chars charset)) - (t - (error "Invalid character set %s" charset)))))) + (unless (charsetp charset) + (error "Invalid character set %s" charset)) + (let ((dim (charset-dimension charset)) + (chars (charset-chars charset)) + ;; (plane (charset-iso-graphic-plane charset)) + (plane 1) + (range (plist-get (charset-plist charset) :code-space)) + min max min2 max2) + (if (> dim 2) + (error "Can only list 1- and 2-dimensional charsets")) + (insert (format "Characters in the coded character set %s.\n" charset)) + (setq min (aref range 0) + max (aref range 1)) + (if (= dim 1) + (list-block-of-chars charset 0 min max) + (setq min2 (aref range 2) + max2 (aref range 3)) + (let ((i min2)) + (while (<= i max2) + (list-block-of-chars charset i min max) + (setq i (1+ i))))))))) ;;;###autoload (defun describe-character-set (charset) "Display information about built-in character set CHARSET." - (interactive (list (let ((non-iso-charset-alist nil)) - (read-charset "Charset: ")))) + (interactive (list (read-charset "Charset: "))) (or (charsetp charset) (error "Invalid charset: %S" charset)) - (let ((info (charset-info charset))) - (help-setup-xref (list #'describe-character-set charset) (interactive-p)) - (with-output-to-temp-buffer (help-buffer) - (with-current-buffer standard-output - (insert "Character set: " (symbol-name charset) - (format " (ID:%d)\n\n" (aref info 0))) - (insert (aref info 13) "\n\n") ; description - (insert "Number of contained characters: " - (if (= (aref info 2) 1) - (format "%d\n" (aref info 3)) - (format "%dx%d\n" (aref info 3) (aref info 3)))) - (insert "Final char of ISO2022 designation sequence: ") - (if (>= (aref info 8) 0) - (insert (format "`%c'\n" (aref info 8))) - (insert "not assigned\n")) - (insert (format "Width (how many columns on screen): %d\n" - (aref info 4))) - (insert (format "Internal multibyte sequence: %s\n" - (charset-multibyte-form-string charset))) - (let ((coding (plist-get (aref info 14) 'preferred-coding-system))) - (when coding - (insert (format "Preferred coding system: %s\n" coding)) - (search-backward (symbol-name coding)) - (help-xref-button 0 'help-coding-system coding))))))) + (help-setup-xref (list #'describe-character-set charset) (interactive-p)) + (with-output-to-temp-buffer (help-buffer) + (with-current-buffer standard-output + (insert "Character set: " (symbol-name charset)) + (let ((name (get-charset-property charset :name))) + (if (not (eq name charset)) + (insert " (alias of " (symbol-name name) ?\)))) + (insert "\n\n" (charset-description charset) "\n\n") + (insert "Number of contained characters: ") + (dotimes (i (charset-dimension charset)) + (unless (= i 0) + (insert ?×)) + (insert (format "%d" (charset-chars charset (1+ i))))) + (insert ?\n) + (let ((char (charset-iso-final-char charset))) + (when (> char 0) + (insert "Final char of ISO2022 designation sequence: ") + (insert (format "`%c'\n" char)))) + (insert (format "Width (how many columns on screen): %d\n" + (aref char-width-table (make-char charset)))) + (let (aliases) + (dolist (c charset-list) + (if (and (not (eq c charset)) + (eq charset (get-charset-property c :name))) + (push c aliases))) + (if aliases + (insert "Aliases: " (mapconcat #'symbol-name aliases ", ") ?\n))) + + (dolist (elt `((:ascii-compatible-p "ASCII compatible." nil) + (:map "Map file: " identity) + (:unify-map "Unification map file: " identity) + (:invalid-code + nil + ,(lambda (c) + (format "Invalid character: %c (code %d)" c c))) + (:emacs-mule-id "Id in emacs-mule coding system: " + number-to-string) + (:parents "Parents: " + (lambda (parents) + (mapconcat ,(lambda (elt) + (format "%s" elt)) + parents + ", "))) + (:code-space "Code space: " ,(lambda (c) + (format "%s" c))) + (:code-offset "Code offset: " number-to-string) + (:iso-revision-number "ISO revision number: " + number-to-string) + (:supplementary-p + "Used only as a parent of some other charset." nil))) + (let ((val (get-charset-property charset (car elt)))) + (when val + (if (cadr elt) (insert (cadr elt))) + (if (nth 2 elt) + (insert (funcall (nth 2 elt) val))) + (insert ?\n))))))) - -;;;###autoload -(defun describe-char-after (&optional pos) - "Display information about the character at POS in the current buffer. -POS defaults to point. -The information includes character code, charset and code points in it, -syntax, category, how the character is encoded in a file, -which font is being used for displaying the character." - (interactive) - (or pos - (setq pos (point))) - (if (>= pos (point-max)) - (error "No character at point")) - (let* ((char (char-after pos)) - (charset (char-charset char)) - (props (text-properties-at pos)) - (composition (find-composition (point) nil nil t)) - (composed (if composition (buffer-substring (car composition) - (nth 1 composition)))) - (multibyte-p enable-multibyte-characters) - item-list max-width) - (if (not (characterp char)) - (setq item-list - `(("character" - ,(format "%s (0%o, %d, 0x%x) -- invalid character code" - (char-to-string char) char char char)))) - (setq item-list - `(("character" - ,(format "%s (0%o, %d, 0x%x%s)" - (if (< char 256) - (single-key-description char) - (char-to-string char)) - char char char - (if (encode-char char 'ucs) - (format ", U+%04X" (encode-char char 'ucs)) - ""))) - ("preferred charset" - ,(symbol-name charset) - ,(format "(%s)" (charset-description charset))) - ("code point" - ,(let ((split (split-char char))) - (mapconcat #'number-to-string (cdr split) " "))) - ("syntax" - ,(let* ((old-table (syntax-table)) - (table (get-char-property (point) 'syntax-table))) - (if (consp table) - (nth 1 (assq (car table) - (mapcar #'cdr syntax-code-table))) - (unwind-protect - (progn - (if (syntax-table-p table) - (set-syntax-table table)) - (nth 2 (assq (char-syntax char) syntax-code-table))) - (set-syntax-table old-table))))) - ("category" - ,@(let ((category-set (char-category-set char))) - (if (not category-set) - '("-- none --") - (mapcar #'(lambda (x) (format "%c:%s " - x (category-docstring x))) - (category-set-mnemonics category-set))))) - ,@(let ((props (aref char-code-property-table char)) - ps) - (when props - (while props - (push (format "%s:" (pop props)) ps) - (push (format "%s;" (pop props)) ps)) - (list (cons "Properties" (nreverse ps))))) - ("buffer code" - ,(encoded-string-description - (string-as-unibyte (char-to-string char)) nil)) - ("file code" - ,@(let* ((coding buffer-file-coding-system) - (encoded (encode-coding-char char coding))) - (if encoded - (list (encoded-string-description encoded coding) - (format "(encoded by coding system %S)" coding)) - (list "not encodable by coding system" - (symbol-name coding))))) - ,(if (display-graphic-p (selected-frame)) - (list "font" (or (internal-char-font (point)) - "-- none --")) - (list "terminal code" - (let* ((coding (terminal-coding-system)) - (encoded (encode-coding-char char coding))) - (if encoded - (encoded-string-description encoded coding) - "not encodable")))) - ,@(let ((unicodedata (if (encode-char char 'ucs) - (unicode-data char)))) - (if unicodedata - (cons (list "Unicode data" " ") unicodedata)))))) - (setq max-width (apply #'max (mapcar #'(lambda (x) - (if (cadr x) - (length (car x)) - 0)) - item-list))) - (with-output-to-temp-buffer "*Help*" - (save-excursion - (set-buffer standard-output) - (set-buffer-multibyte multibyte-p) - (let ((formatter (format "%%%ds:" max-width))) - (dolist (elt item-list) - (when (cadr elt) - (insert (format formatter (car elt))) - (dolist (clm (cdr elt)) - (when (>= (+ (current-column) - (or (string-match "\n" clm) - (string-width clm)) 1) - (frame-width)) - (insert "\n") - (indent-to (1+ max-width))) - (insert " " clm)) - (insert "\n")))) - (when composition - (insert "\nComposed with the following character(s) " - (mapconcat (lambda (x) (format "`%c'" x)) - (substring composed 1) - ", ") - " to form `" composed "'") - (if (nth 3 composition) - (insert ".\n") - (insert "\nby the rule (" - (mapconcat (lambda (x) - (format (if (consp x) "%S" "?%c") x)) - (nth 2 composition) - " ") - ").\n" - "See the variable `reference-point-alist' for " - "the meaning of the rule.\n"))) - (if props - (insert "\nText properties\n")) - (while props - (insert (format " %s: %s" (car props) (cadr props))) - (setq props (cddr props))) - )))) - ;;; CODING-SYSTEM @@@ -624,66 -561,50 +429,50 @@@ (interactive-p)) (with-output-to-temp-buffer (help-buffer) (print-coding-system-briefly coding-system 'doc-string) - (princ "\n") - (let ((vars (coding-system-get coding-system 'dependency))) - (when vars - (princ "See also the documentation of these customizable variables - which alter the behaviour of this coding system.\n") - (dolist (v vars) - (princ " `") - (princ v) - (princ "'\n")) - (princ "\n"))) - - (princ "Type: ") - (let* ((type (coding-system-type coding-system)) - ;; Fixme: use this - (extra-spec (coding-system-plist coding-system))) + (let ((type (coding-system-type coding-system)) - (flags (coding-system-flags coding-system))) ++ ;; Fixme: use this ++ (extra-spec (coding-system-plist coding-system))) + (princ "Type: ") (princ type) - (cond ((eq type nil) - (princ " (do no conversion)")) - ((eq type t) + (cond ((eq type 'undecided) (princ " (do automatic conversion)")) - ((eq type 0) - (princ " (Emacs internal multibyte form)")) - ((eq type 1) + ((eq type 'utf-8) + (princ " (UTF-8: Emacs internal multibyte form)")) + ((eq type 'utf-16) + ;; (princ " (UTF-16)") + ) + ((eq type 'shift-jis) (princ " (Shift-JIS, MS-KANJI)")) - ((eq type 2) + ((eq type 'iso-2022) (princ " (variant of ISO-2022)\n") (princ "Initial designations:\n") - (print-designation flags) - (princ "Other Form: \n ") - (princ (if (aref flags 4) "short-form" "long-form")) - (if (aref flags 5) (princ ", ASCII@EOL")) - (if (aref flags 6) (princ ", ASCII@CNTL")) - (princ (if (aref flags 7) ", 7-bit" ", 8-bit")) - (if (aref flags 8) (princ ", use-locking-shift")) - (if (aref flags 9) (princ ", use-single-shift")) - (if (aref flags 10) (princ ", use-roman")) - (if (aref flags 11) (princ ", use-old-jis")) - (if (aref flags 12) (princ ", no-ISO6429")) - (if (aref flags 13) (princ ", init-bol")) - (if (aref flags 14) (princ ", designation-bol")) - (if (aref flags 15) (princ ", convert-unsafe")) - (if (aref flags 16) (princ ", accept-latin-extra-code")) - (princ ".")) - ((eq type 3) - (princ " (Big5)")) - ((eq type 4) + (print-designation (coding-system-get coding-system + :designation)) + + (when (coding-system-get coding-system :flags) + (princ "Other specifications: \n ") + (apply #'print-list + (coding-system-get coding-system :flags)))) + ((eq type 'charset) + (princ " (charset)")) + ((eq type 'ccl) (princ " (do conversion by CCL program)")) - ((eq type 5) + ((eq type 'raw-text) (princ " (text with random binary characters)")) - (t (princ ": invalid coding-system.")))) - (princ "\nEOL type: ") - (let ((eol-type (coding-system-eol-type coding-system))) - (cond ((vectorp eol-type) - (princ "Automatic selection from:\n\t") - (princ eol-type) - (princ "\n")) - ((or (null eol-type) (eq eol-type 0)) (princ "LF\n")) - ((eq eol-type 1) (princ "CRLF\n")) - ((eq eol-type 2) (princ "CR\n")) - (t (princ "invalid\n")))) - (let ((postread (coding-system-get coding-system 'post-read-conversion))) + ((eq type 'emacs-mule) + (princ " (Emacs 21 internal encoding)")) + (t (princ ": invalid coding-system."))) + (princ "\nEOL type: ") + (let ((eol-type (coding-system-eol-type coding-system))) + (cond ((vectorp eol-type) + (princ "Automatic selection from:\n\t") + (princ eol-type) + (princ "\n")) + ((or (null eol-type) (eq eol-type 0)) (princ "LF\n")) + ((eq eol-type 1) (princ "CRLF\n")) + ((eq eol-type 2) (princ "CR\n")) + (t (princ "invalid\n"))))) + (let ((postread (coding-system-get coding-system :post-read-conversion))) (when postread (princ "After decoding text normally,") (princ " perform post-conversion using the function: ") @@@ -760,41 -683,23 +551,40 @@@ in place of `..' (coding-system-eol-type-mnemonic (cdr default-process-coding-system)) ))) - ;; Print symbol name and mnemonic letter of CODING-SYSTEM with `princ'. - ;; If DOC-STRING is non-nil, print also the docstring of CODING-SYSTEM. - ;; If DOC-STRING is `tightly', don't print an empty line before the - ;; docstring, and print only the first line of the docstring. - (defun print-coding-system-briefly (coding-system &optional doc-string) - "Print symbol name and mnemonic letter of CODING-SYSTEM with `princ'." ++ "Print symbol name and mnemonic letter of CODING-SYSTEM with `princ'. ++If DOC-STRING is non-nil, print also the docstring of CODING-SYSTEM. ++If DOC-STRING is `tightly', don't print an empty line before the ++docstring, and print only the first line of the docstring." (if (not coding-system) (princ "nil\n") (princ (format "%c -- %s" (coding-system-mnemonic coding-system) coding-system)) - (let ((aliases (coding-system-get coding-system 'alias-coding-systems))) + (let ((aliases (coding-system-aliases coding-system))) - (if (eq coding-system (car aliases)) - (if (cdr aliases) - (princ (format " %S" (cons 'alias: (cdr aliases))))) - (if (memq coding-system aliases) - (princ (format " (alias of %s)" (car aliases)))))) - (princ "\n\n") - (if (and doc-string - (setq doc-string (coding-system-doc-string coding-system))) - (princ (format "%s\n" doc-string))))) + (cond ((eq coding-system (car aliases)) + (if (cdr aliases) + (princ (format " %S" (cons 'alias: (cdr aliases)))))) + ((memq coding-system aliases) + (princ (format " (alias of %s)" (car aliases)))) + (t + (let ((eol-type (coding-system-eol-type coding-system)) + (base-eol-type (coding-system-eol-type (car aliases)))) + (if (and (integerp eol-type) + (vectorp base-eol-type) + (not (eq coding-system (aref base-eol-type eol-type)))) + (princ (format " (alias of %s)" + (aref base-eol-type eol-type)))))))) + (princ "\n") + (or (eq doc-string 'tightly) + (princ "\n")) + (if doc-string + (let ((doc (or (coding-system-doc-string coding-system) ""))) + (when (eq doc-string 'tightly) + (if (string-match "\n" doc) + (setq doc (substring doc 0 (match-beginning 0)))) + (setq doc (concat " " doc))) + (princ (format "%s\n" doc)))))) ;;;###autoload (defun describe-current-coding-system () @@@ -858,12 -758,13 +643,12 @@@ Priority order for recognizing coding s (while categories (setq coding-system (symbol-value (car categories))) (mapcar - (function - (lambda (x) - (if (and (not (eq x coding-system)) + (lambda (x) + (if (and (not (eq x coding-system)) - (coding-system-get x 'no-initial-designation) - (let ((flags (coding-system-flags x))) - (not (or (aref flags 10) (aref flags 11))))) + (let ((flags (coding-system-get :flags))) + (not (or (memq 'use-roman flags) + (memq 'use-oldjis flags))))) - (setq codings (cons x codings))))) + (setq codings (cons x codings)))) (get (car categories) 'coding-systems)) (if codings (let ((max-col (frame-width)) @@@ -1014,11 -915,16 +799,12 @@@ but still contains full information abo ## POST-READ-CONVERSION, PRE-WRITE-CONVERSION = function name to be called ## ")) - (let ((bases (coding-system-list 'base-only)) - coding-system) - (while bases - (setq coding-system (car bases)) - (if (null arg) - (print-coding-system-briefly coding-system 'doc-string) - (print-coding-system coding-system)) - (setq bases (cdr bases))))) + (dolist (coding-system (sort-coding-systems (coding-system-list 'base-only))) + (if (null arg) + (print-coding-system-briefly coding-system 'tightly) + (print-coding-system coding-system)))) + ;; Fixme: delete? ;;;###autoload (defun list-coding-categories () "Display a list of all coding categories." @@@ -1062,76 -968,66 +848,69 @@@ (with-output-to-temp-buffer "*Help*" (describe-font-internal font-info 'verbose))))) - (defun print-fontset (fontset &optional print-fonts) + (defun print-fontset-element (val) + ;; VAL has this format: + ;; ((REQUESTED-FONT-NAME OPENED-FONT-NAME ...) ...) + ;; CHAR RANGE is already inserted. Get character codes from + ;; the current line. + (beginning-of-line) + (let ((from (following-char)) + (to (if (looking-at "[^.]*[.]* ") + (char-after (match-end 0))))) + (if (re-search-forward "[ \t]*$" nil t) + (delete-region (match-beginning 0) (match-end 0))) + + ;; For non-ASCII characters, insert also CODE RANGE. + (if (or (>= from 128) (and to (>= to 128))) + (if to + (insert (format " (#x%02X .. #x%02X)" from to)) + (insert (format " (#x%02X)" from)))) + + ;; Insert a requested font name. + (dolist (elt val) + (let ((requested (car elt))) + (if (stringp requested) + (insert "\n " requested) + (let ((family (aref requested 0)) + (registry (aref requested 5))) + (if (not family) + (setq family "*-*") + (or (string-match "-" family) + (setq family (concat "*-" family)))) + (or (string-match "-" registry) + (= (aref registry (1- (length registry))) ?*) + (setq registry (concat registry "*"))) + (insert "\n -" family + ?- (or (aref requested 1) ?*) ; weight + ?- (or (aref requested 2) ?*) ; slant + "-*-" (or (aref requested 3) ?*) ; width + "-*-" (or (aref requested 4) ?*) ; adstyle + "-*-*-*-*-*-*-" registry)))) + + ;; Insert opened font names (if any). + (if (and (boundp 'print-opened) (symbol-value 'print-opened)) + (dolist (opened (cdr elt)) + (insert "\n\t[" opened "]")))))) + + (defun print-fontset (fontset &optional print-opened) "Print information about FONTSET. +If FONTSET is nil, print information about the default fontset. - If optional arg PRINT-FONTS is non-nil, also print names of all opened + If optional arg PRINT-OPENED is non-nil, also print names of all opened fonts for FONTSET. This function actually inserts the information in the current buffer." + (or fontset + (setq fontset (query-fontset "fontset-default"))) - (let ((tail (aref (fontset-info fontset) 2)) - elt chars font-spec opened prev-charset charset from to) - (beginning-of-line) - (insert "Fontset: " fontset "\n") - (insert "CHARSET or CHAR RANGE") - (indent-to 24) - (insert "FONT NAME\n") - (insert "---------------------") - (indent-to 24) - (insert "---------") - (insert "\n") - (while tail - (setq elt (car tail) tail (cdr tail)) - (setq chars (car elt) font-spec (car (cdr elt)) opened (cdr (cdr elt))) - (if (symbolp chars) - (setq charset chars from nil to nil) - (if (integerp chars) - (setq charset (char-charset chars) from chars to chars) - (setq charset (char-charset (car chars)) - from (car chars) to (cdr chars)))) - (unless (eq charset prev-charset) - (insert (symbol-name charset)) - (if from - (insert "\n"))) - (when from - (let ((split (split-char from))) - (if (and (= (charset-dimension charset) 2) - (= (nth 2 split) 0)) - (setq from - (make-char charset (nth 1 split) - (if (= (charset-chars charset) 94) 33 32)))) - (insert " " from)) - (when (/= from to) - (insert "-") - (let ((split (split-char to))) - (if (and (= (charset-dimension charset) 2) - (= (nth 2 split) 0)) - (setq to - (make-char charset (nth 1 split) - (if (= (charset-chars charset) 94) 126 127)))) - (insert to)))) - (indent-to 24) - (if (stringp font-spec) - (insert font-spec) - (if (car font-spec) - (if (string-match "-" (car font-spec)) - (insert "-" (car font-spec) "-*-") - (insert "-*-" (car font-spec) "-*-")) - (insert "-*-")) - (if (cdr font-spec) - (if (string-match "-" (cdr font-spec)) - (insert (cdr font-spec)) - (insert (cdr font-spec) "-*")) - (insert "*"))) - (insert "\n") - (when print-fonts - (while opened - (indent-to 5) - (insert "[" (car opened) "]\n") - (setq opened (cdr opened)))) - (setq prev-charset charset) - ))) + (beginning-of-line) + (insert "Fontset: " fontset "\n") + (insert (propertize "CHAR RANGE" 'face 'underline) + " (" (propertize "CODE RANGE" 'face 'underline) ")\n") + (insert " " (propertize "FONT NAME" 'face 'underline) + " (" (propertize "REQUESTED" 'face 'underline) + " and [" (propertize "OPENED" 'face 'underline) "])") + (let ((info (fontset-info fontset))) + (describe-vector info 'print-fontset-element) + (insert "\n ------") + (describe-vector (char-table-extra-slot info 0) 'print-fontset-element))) ;;;###autoload (defun describe-fontset (fontset) @@@ -1317,6 -1211,190 +1086,195 @@@ system which uses fontsets). (setq fontsets (cdr fontsets))))) (print-help-return-message)))) ++;;;###autoload + (defcustom unicodedata-file nil + "Location of UnicodeData file. + This is the UnicodeData.txt file from the Unicode consortium, used for + diagnostics. If it is non-nil `describe-char-after' will print data + looked up from it." + :group 'mule + :type '(choice (const :tag "None" nil) + file)) + + ;; We could convert the unidata file into a Lispy form once-for-all + ;; and distribute it for loading on demand. It might be made more + ;; space-efficient by splitting strings word-wise and replacing them + ;; with lists of symbols interned in a private obarray, e.g. + ;; "LATIN SMALL LETTER A" => '(LATIN SMALL LETTER A). ++ ++;;;###autoload + (defun unicode-data (char) + "Return a list of Unicode data for unicode CHAR. + Each element is a list of a property description and the property value. + The list is null if CHAR isn't found in `unicodedata-file'." + (when unicodedata-file + (unless (file-exists-p unicodedata-file) + (error "`unicodedata-file' %s not found" unicodedata-file)) + (save-excursion + (set-buffer (find-file-noselect unicodedata-file nil t)) + (goto-char (point-min)) + (let ((hex (format "%04X" char)) + found first last) + (if (re-search-forward (concat "^" hex) nil t) + (setq found t) + ;; It's not listed explicitly. Look for ranges, e.g. CJK + ;; ideographs, and check whether it's in one of them. + (while (and (re-search-forward "^\\([^;]+\\);[^;]+First>;" nil t) + (>= char (setq first + (string-to-number (match-string 1) 16))) + (progn + (forward-line 1) + (looking-at "^\\([^;]+\\);[^;]+Last>;") + (> char + (setq last + (string-to-number (match-string 1) 16)))))) + (if (and (>= char first) + (<= char last)) + (setq found t))) + (if found + (let ((fields (mapcar (lambda (elt) + (if (> (length elt) 0) + elt)) + (cdr (split-string + (buffer-substring + (line-beginning-position) + (line-end-position)) + ";"))))) + ;; The length depends on whether the last field was empty. + (unless (or (= 13 (length fields)) + (= 14 (length fields))) + (error "Invalid contents in %s" unicodedata-file)) + ;; The field names and values lists are slightly + ;; modified from Mule-UCS unidata.el. + (list + (list "Name" (let ((name (nth 0 fields))) + ;; Check for <..., First>, <..., Last> + (if (string-match "\\`\\(<[^,]+\\)," name) + (concat (match-string 1 name) ">") + name))) + (list "Category" + (cdr (assoc + (nth 1 fields) + '(("Lu" . "uppercase letter") + ("Ll" . "lowercase letter") + ("Lt" . "titlecase letter") + ("Mn" . "non-spacing mark") + ("Mc" . "spacing-combining mark") + ("Me" . "enclosing mark") + ("Nd" . "decimal digit") + ("Nl" . "letter number") + ("No" . "other number") + ("Zs" . "space separator") + ("Zl" . "line separator") + ("Zp" . "paragraph separator") + ("Cc" . "other control") + ("Cf" . "other format") + ("Cs" . "surrogate") + ("Co" . "private use") + ("Cn" . "not assigned") + ("Lm" . "modifier letter") + ("Lo" . "other letter") + ("Pc" . "connector punctuation") + ("Pd" . "dash punctuation") + ("Ps" . "open punctuation") + ("Pe" . "close punctuation") + ("Pi" . "initial-quotation punctuation") + ("Pf" . "final-quotation punctuation") + ("Po" . "other punctuation") + ("Sm" . "math symbol") + ("Sc" . "currency symbol") + ("Sk" . "modifier symbol") + ("So" . "other symbol"))))) + (list "Combining class" + (cdr (assoc + (string-to-number (nth 2 fields)) + '((0 . "Spacing") + (1 . "Overlays and interior") + (7 . "Nuktas") + (8 . "Hiragana/Katakana voicing marks") + (9 . "Viramas") + (10 . "Start of fixed position classes") + (199 . "End of fixed position classes") + (200 . "Below left attached") + (202 . "Below attached") + (204 . "Below right attached") + (208 . "Left attached (reordrant around \ + single base character)") + (210 . "Right attached") + (212 . "Above left attached") + (214 . "Above attached") + (216 . "Above right attached") + (218 . "Below left") + (220 . "Below") + (222 . "Below right") + (224 . "Left (reordrant around single base \ + character)") + (226 . "Right") + (228 . "Above left") + (230 . "Above") + (232 . "Above right") + (233 . "Double below") + (234 . "Double above") + (240 . "Below (iota subscript)"))))) + (list "Bidi category" + (cdr (assoc + (nth 3 fields) + '(("L" . "Left-to-Right") + ("LRE" . "Left-to-Right Embedding") + ("LRO" . "Left-to-Right Override") + ("R" . "Right-to-Left") + ("AL" . "Right-to-Left Arabic") + ("RLE" . "Right-to-Left Embedding") + ("RLO" . "Right-to-Left Override") + ("PDF" . "Pop Directional Format") + ("EN" . "European Number") + ("ES" . "European Number Separator") + ("ET" . "European Number Terminator") + ("AN" . "Arabic Number") + ("CS" . "Common Number Separator") + ("NSM" . "Non-Spacing Mark") + ("BN" . "Boundary Neutral") + ("B" . "Paragraph Separator") + ("S" . "Segment Separator") + ("WS" . "Whitespace") + ("ON" . "Other Neutrals"))))) + (list "Decomposition" + (if (nth 4 fields) + (let* ((parts (split-string (nth 4 fields))) + (info (car parts))) + (if (string-match "\\`<\\(.+\\)>\\'" info) + (setq info (match-string 1 info)) + (setq info nil)) + (if info (setq parts (cdr parts))) + (setq parts (mapconcat + (lambda (arg) + (string (string-to-number arg 16))) + parts " ")) + (concat info parts)))) + (list "Decimal digit value" + (nth 5 fields)) + (list "Digit value" + (nth 6 fields)) + (list "Numeric value" + (nth 7 fields)) + (list "Mirrored" + (if (equal "Y" (nth 8 fields)) + "yes")) + (list "Old name" (nth 9 fields)) + (list "ISO 10646 comment" (nth 10 fields)) + (list "Uppercase" (and (nth 11 fields) + (string (string-to-number + (nth 11 fields) 16)))) + (list "Lowercase" (and (nth 12 fields) + (string (string-to-number + (nth 12 fields) 16)))) + (list "Titlecase" (and (nth 13 fields) + (string (string-to-number + (nth 13 fields) 16))))))))))) + +(provide 'mule-diag) + + ;; Local Variables: + ;; coding: utf-8 + ;; End: ;;; mule-diag.el ends here diff --cc lisp/international/mule-util.el index 9ed3d8a3880,bbe07e99e58..79148510a0a --- a/lisp/international/mule-util.el +++ b/lisp/international/mule-util.el @@@ -1,7 -1,8 +1,12 @@@ ;;; mule-util.el --- utility functions for mulitilingual environment (mule) ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. + ;; Copyright (C) 2002 Free Software Foundation, Inc. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 ++ ;; Keywords: mule, multilingual @@@ -305,13 -245,13 +310,13 @@@ Optional 3rd argument NIL-FOR-TOO-LONG ;;;###autoload (defun coding-system-translation-table-for-decode (coding-system) -- "Return the value of CODING-SYSTEM's `translation-table-for-decode' property." - (coding-system-get coding-system 'translation-table-for-decode)) ++ "Return the value of CODING-SYSTEM's `decode-translation-table' property." + (coding-system-get coding-system :decode-translation-table)) ;;;###autoload (defun coding-system-translation-table-for-encode (coding-system) -- "Return the value of CODING-SYSTEM's `translation-table-for-encode' property." - (coding-system-get coding-system 'translation-table-for-encode)) ++ "Return the value of CODING-SYSTEM's `encode-translation-table' property." + (coding-system-get coding-system :encode-translation-table)) ;;;###autoload (defun coding-system-equal (coding-system-1 coding-system-2) @@@ -326,6 -266,6 +331,21 @@@ or one is an alias of the other. (or (eq eol-type-1 eol-type-2) (and (vectorp eol-type-1) (vectorp eol-type-2))))))) ++;;;###autoload ++(defmacro with-coding-priority (coding-systems &rest body) ++ "Execute BODY like `progn' with CODING-SYSTEMS at the front of priority list. ++CODING-SYSTEMS is a list of coding systems. See ++`set-coding-priority'. This affects the implicit sorting of lists of ++coding sysems returned by operations such as `find-coding-systems-region'." ++ (let ((current (make-symbol "current"))) ++ `(let ((,current (coding-system-priority-list))) ++ (apply #'set-coding-system-priority ,coding-systems) ++ (unwind-protect ++ (progn ,@body) ++ (apply #'set-coding-system-priority ,current))))) ++(put 'with-coding-priority 'lisp-indent-function 1) ++(put 'with-coding-priority 'edebug-form-spec t) ++ ;;;###autoload (defmacro detect-coding-with-priority (from to priority-list) "Detect a coding system of the text between FROM and TO with PRIORITY-LIST. @@@ -351,13 -283,23 +363,9 @@@ The detection takes into account the co language environment LANG-ENV." (let ((coding-priority (get-language-info lang-env 'coding-priority))) (if coding-priority - (detect-coding-with-priority - from to - (mapcar (function (lambda (x) - (cons (coding-system-get x 'coding-category) x))) - coding-priority)) - (detect-coding-region from to)))) + (with-coding-priority coding-priority + (detect-coding-region from to))))) -;;;###autoload -(defmacro with-coding-priority (coding-systems &rest body) - "Execute BODY like `progn' with CODING-SYSTEMS at the front of priority list. -CODING-SYSTEMS is a list of coding systems. See -`set-coding-priority'. This affects the implicit sorting of lists of -coding sysems returned by operations such as `find-coding-systems-region'." - (let ((current (make-symbol "current"))) - `(let ((,current (coding-system-priority-list))) - (apply #'set-coding-system-priority ,coding-systems) - (unwind-protect - (progn ,@body) - (apply #'set-coding-system-priority ,current))))) -(put 'with-coding-priority 'lisp-indent-function 1) -(put 'with-coding-priority 'edebug-form-spec t) (provide 'mule-util) diff --cc lisp/international/mule.el index f37c3704cf2,51ae8832c1c..ec633bfcfaf --- a/lisp/international/mule.el +++ b/lisp/international/mule.el @@@ -1,8 -1,11 +1,11 @@@ - ;;; mule.el --- basic commands for mulitilingual environment + ;;; mule.el --- basic commands for multilingual environment ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. -;; Copyright (C) 2001, 2002 ++;; Copyright (C) 2003 + ;; National Institute of Advanced Industrial Science and Technology (AIST) + ;; Registration Number H13PRO009 ;; Keywords: mule, multilingual, character set, coding system @@@ -27,12 -30,203 +30,203 @@@ ;;; Code: - (defconst mule-version "5.0 (SAKAKI)" "\ -(defconst mule-version "7.0 (SAKAKI)" "\ ++(defconst mule-version "6.0 (HANACHIRUSATO)" "\ Version number and name of this version of MULE (multilingual environment).") - (defconst mule-version-date "1999.12.7" "\ -(defconst mule-version-date "2002.2.28" "\ ++(defconst mule-version-date "2003.9.1" "\ Distribution date of this version of MULE (multilingual environment).") + + + ;;; CHARACTER + (defalias 'char-valid-p 'characterp) + (make-obsolete 'char-valid-p 'characterp "22.1") + + + ;;; CHARSET + + (defun define-charset (name docstring &rest props) + "Define NAME (symbol) as a charset with DOCSTRING. + The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE + may be any symbol. The following have special meanings, and one of + `:code-offset', `:map', `:subset', `:superset' must be specified. + + `:short-name' + + VALUE must be a short string to identify the charset. If omitted, + NAME is used. + + `:long-name' + + VALUE must be a string longer than `:short-name' to identify the + charset. If omitted, the value of the `:short-name' attribute is used. + + `:dimension' + + VALUE must be an integer 0, 1, 2, or 3, specifying the dimension of + code-points of the charsets. If omitted, it is calculated from the + value of the `:code-space' attribute. + + `:code-space' + + VALUE must be a vector of length at most 8 specifying the byte code + range of each dimension in this format: + [ MIN-1 MAX-1 MIN-2 MAX-2 ... ] + where MIN-N is the minimum byte value of Nth dimension of code-point, + MAX-N is the maximum byte value of that. + + `:min-code' + + VALUE must be an integer specifying the mininum code point of the + charset. If omitted, it is calculated from `:code-space'. VALUE may + be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of + the code point and LOW is the least significant 16 bits. + + `:max-code' + + VALUE must be an integer specifying the maxinum code point of the + charset. If omitted, it is calculated from `:code-space'. VALUE may + be a cons (HIGH . LOW), where HIGH is the most significant 16 bits of + the code point and LOW is the least significant 16 bits. + + `:iso-final-char' + + VALUE must be a character in the range 32 to 127 (inclusive) + specifying the final char of the charset for ISO-2022 encoding. If + omitted, the charset can't be encoded by ISO-2022 based + coding-systems. + + `:iso-revision-number' + + VALUE must be an integer in the range 0..63, specifying the revision + number of the charset for ISO-2022 encoding. + + `:emacs-mule-id' + + VALUE must be an integer of 0, 128..255. If omitted, the charset + can't be encoded by coding-systems of type `emacs-mule'. + + `:ascii-compatible-p' + + VALUE must be nil or t (default nil). If VALUE is t, the charset is + compatible with ASCII, i.e. the first 128 code points map to ASCII. + + `:supplementary-p' + + VALUE must be nil or t. If the VALUE is t, the charset is + supplementary, which means it is used only as a parent of some other + charset. + + `:invalid-code' + + VALUE must be a nonnegative integer that can be used as an invalid + code point of the charset. If the minimum code is 0 and the maximum + code is greater than Emacs' maximum integer value, `:invalid-code' + should not be omitted. + + `:code-offset' + + VALUE must be an integer added to the index number of a character to + get the corresponding character code. + + `:map' + + VALUE must be vector or string. + + If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], + where CODE-n is a code-point of the charset, and CHAR-n is the + corresponding character code. + + If it is a string, it is a name of file that contains the above + information. Each line of the file must be this format: + 0xXXX 0xYYY + where XXX is a hexadecimal representation of CODE-n and YYY is a + hexadecimal representation of CHAR-n. A line starting with `#' is a + comment line. + + `:subset' + + VALUE must be a list: + ( PARENT MIN-CODE MAX-CODE OFFSET ) + PARENT is a parent charset. MIN-CODE and MAX-CODE specify the range + of characters inherited from the parent. OFFSET is an integer value + to add to a code point of the parent charset to get the corresponding + code point of this charset. + + `:superset' + + VALUE must be a list of parent charsets. The charset inherits + characters from them. Each element of the list may be a cons (PARENT + . OFFSET), where PARENT is a parent charset, and OFFSET is an offset + value to add to a code point of PARENT to get the corresponding code + point of this charset. + + `:unify-map' + + VALUE must be vector or string. + + If it is a vector, the format is [ CODE-1 CHAR-1 CODE-2 CHAR-2 ... ], + where CODE-n is a code-point of the charset, and CHAR-n is the + corresponding Unicode character code. + + If it is a string, it is a name of file that contains the above + information. The file format is the same as what described for `:map' + attribute." + (let ((attrs (mapcar 'list '(:dimension + :code-space + :min-code + :max-code + :iso-final-char + :iso-revision-number + :emacs-mule-id + :ascii-compatible-p + :supplementary-p + :invalid-code + :code-offset + :map + :subset + :superset + :unify-map + :plist)))) + + ;; If :dimension is omitted, get the dimension from :code-space. + (let ((dimension (plist-get props :dimension))) + (or dimension + (progn + (setq dimension (/ (length (plist-get props :code-space)) 2)) + (setq props (plist-put props :dimension dimension))))) + + (dolist (slot attrs) + (setcdr slot (plist-get props (car slot)))) + + ;; Make sure that the value of :code-space is a vector of 8 + ;; elements. + (let* ((slot (assq :code-space attrs)) + (val (cdr slot)) + (len (length val))) + (if (< len 8) + (setcdr slot + (vconcat val (make-vector (- 8 len) 0))))) + + ;; Add :name and :docstring properties to PROPS. + (setq props + (cons :name (cons name (cons :docstring (cons docstring props))))) + (or (plist-get props :short-name) + (plist-put props :short-name (symbol-name name))) + (or (plist-get props :long-name) + (plist-put props :long-name (plist-get props :short-name))) + ;; We can probably get a worthwhile amount in purespace. + (setq props + (mapcar (lambda (elt) + (if (stringp elt) + (purecopy elt) + elt)) + props)) + (setcdr (assq :plist attrs) props) + + (apply 'define-charset-internal name (mapcar 'cdr attrs)))) + + (defun load-with-code-conversion (fullname file &optional noerror nomessage) "Execute a file of Lisp code named FILE whose absolute name is FULLNAME. The file contents are decoded before evaluation if necessary. @@@ -77,8 -271,8 +271,8 @@@ Return t if file exists. ;; Otherwise, eval-buffer might try to interpret random ;; binary junk as multibyte characters. (if (and enable-multibyte-characters - (or (eq (coding-system-type last-coding-system-used) 5) - (eq last-coding-system-used 'no-conversion))) - (eq (coding-system-type last-coding-system-used) - 'raw-text)) ++ (or (eq (coding-system-type last-coding-system-used) ++ 'raw-text))) (set-buffer-multibyte nil)) ;; Make `kill-buffer' quiet. (set-buffer-modified-p nil)) @@@ -101,354 -295,383 +295,443 @@@ (message "Loading %s...done" file))) t))) --;; API (Application Program Interface) for charsets. - - (defsubst charset-quoted-standard-p (obj) - "Return t if OBJ is a quoted symbol, and is the name of a standard charset." - (and (listp obj) (eq (car obj) 'quote) - (symbolp (car-safe (cdr obj))) - (let ((vector (get (car-safe (cdr obj)) 'charset))) - (and (vectorp vector) - (< (aref vector 0) 160))))) - - (defsubst charsetp (object) - "T if OBJECT is a charset." - (and (symbolp object) (vectorp (get object 'charset)))) - - (defsubst charset-info (charset) ++(defun charset-info (charset) + "Return a vector of information of CHARSET. ++This function is provided for backward compatibility. + -;;; Charset property +The elements of the vector are: + CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION, + LEADING-CODE-BASE, LEADING-CODE-EXT, + ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE, + REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION, - PLIST, ++ PLIST. +where - CHARSET-ID (integer) is the identification number of the charset. - BYTES (integer) is the length of multi-byte form of a character in - the charset: one of 1, 2, 3, and 4. - DIMENSION (integer) is the number of bytes to represent a character of - the charset: 1 or 2. - CHARS (integer) is the number of characters in a dimension: 94 or 96. - WIDTH (integer) is the number of columns a character in the charset - occupies on the screen: one of 0, 1, and 2. - DIRECTION (integer) is the rendering direction of characters in the - charset when rendering. If 0, render from left to right, else - render from right to left. - LEADING-CODE-BASE (integer) is the base leading-code for the - charset. - LEADING-CODE-EXT (integer) is the extended leading-code for the - charset. All charsets of less than 0xA0 has the value 0. ++CHARSET-ID is always 0. ++BYTES is always 0. ++DIMENSION is the number of bytes of a code-point of the charset: ++ 1, 2, 3, or 4. ++CHARS is the number of characters in a dimension: ++ 94, 96, 128, or 256. ++WIDTH is always 0. ++DIRECTION is always 0. ++LEADING-CODE-BASE is always 0. ++LEADING-CODE-EXT is always 0. +ISO-FINAL-CHAR (character) is the final character of the + corresponding ISO 2022 charset. If the charset is not assigned + any final character, the value is -1. - ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked - while encoding to variants of ISO 2022 coding system, one of the - following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). - If the charset is not assigned any final character, the value is -1. - REVERSE-CHARSET (integer) is the charset which differs only in - LEFT-TO-RIGHT value from the charset. If there's no such a - charset, the value is -1. ++ISO-GRAPHIC-PLANE is always 0. ++REVERSE-CHARSET is always -1. +SHORT-NAME (string) is the short name to refer to the charset. +LONG-NAME (string) is the long name to refer to the charset +DESCRIPTION (string) is the description string of the charset. +PLIST (property list) may contain any type of information a user + want to put and get by functions `put-charset-property' and + `get-charset-property' respectively." - (get charset 'charset)) ++ (vector 0 ++ 0 ++ (charset-dimension charset) ++ (charset-chars charset) ++ 0 ++ 0 ++ 0 ++ 0 ++ (charset-iso-final-char charset) ++ 0 ++ -1 ++ (get-charset-property charset :short-name) ++ (get-charset-property charset :short-name) ++ (charset-description charset) ++ (charset-plist charset))) + +;; It is better not to use backquote in this file, +;; because that makes a bootstrapping problem +;; if you need to recompile all the Lisp files using interpreted code. + - (defmacro charset-id (charset) - "Return charset identification number of CHARSET." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 0) - (list 'aref (list 'charset-info charset) 0))) ++(defun charset-id (charset) ++ "Always return 0. This is provided for backward compatibility." ++ 0) + +(defmacro charset-bytes (charset) - "Return bytes of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 1) - (list 'aref (list 'charset-info charset) 1))) - - (defmacro charset-dimension (charset) - "Return dimension of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 2) - (list 'aref (list 'charset-info charset) 2))) - - (defmacro charset-chars (charset) - "Return character numbers contained in a dimension of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 3) - (list 'aref (list 'charset-info charset) 3))) - - (defmacro charset-width (charset) - "Return width (how many column occupied on a screen) of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 4) - (list 'aref (list 'charset-info charset) 4))) - - (defmacro charset-direction (charset) - "Return direction of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 5) - (list 'aref (list 'charset-info charset) 5))) - - (defmacro charset-iso-final-char (charset) - "Return final char of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 8) - (list 'aref (list 'charset-info charset) 8))) - - (defmacro charset-iso-graphic-plane (charset) - "Return graphic plane of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 9) - (list 'aref (list 'charset-info charset) 9))) - - (defmacro charset-reverse-charset (charset) - "Return reverse charset of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 10) - (list 'aref (list 'charset-info charset) 10))) ++ "Always return 0. This is provided for backward compatibility." ++ 0) + + (defun get-charset-property (charset propname) + "Return the value of CHARSET's PROPNAME property. + This is the last value stored with + (put-charset-property CHARSET PROPNAME VALUE)." + (plist-get (charset-plist charset) propname)) + + (defun put-charset-property (charset propname value) + "Set CHARSETS's PROPNAME property to value VALUE. + It can be retrieved with `(get-charset-property CHARSET PROPNAME)'." + (set-charset-plist charset + (plist-put (charset-plist charset) propname value))) + - + (defun charset-description (charset) + "Return description string of CHARSET." + (plist-get (charset-plist charset) :docstring)) + + (defun charset-dimension (charset) + "Return dimension of CHARSET." + (plist-get (charset-plist charset) :dimension)) + + (defun charset-chars (charset &optional dimension) + "Return number of characters contained in DIMENSION of CHARSET. + DIMENSION defaults to the first dimension." + (unless dimension (setq dimension 1)) + (let ((code-space (plist-get (charset-plist charset) :code-space))) + (1+ (- (aref code-space (1- (* 2 dimension))) + (aref code-space (- (* 2 dimension) 2)))))) + + (defun charset-iso-final-char (charset) + "Return ISO-2022 final character of CHARSET. + Return -1 if charset isn't an ISO 2022 one." + (or (plist-get (charset-plist charset) :iso-final-char) + -1)) (defmacro charset-short-name (charset) - "Return short name of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 11) - (list 'aref (list 'charset-info charset) 11))) + "Return short name of CHARSET." + (plist-get (charset-plist charset) :short-name)) (defmacro charset-long-name (charset) - "Return long name of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 12) - (list 'aref (list 'charset-info charset) 12))) - - (defmacro charset-description (charset) - "Return description of CHARSET. - See the function `charset-info' for more detail." - (if (charset-quoted-standard-p charset) - (aref (charset-info (nth 1 charset)) 13) - (list 'aref (list 'charset-info charset) 13))) - - (defmacro charset-plist (charset) - "Return list charset property of CHARSET. - See the function `charset-info' for more detail." - (list 'aref - (if (charset-quoted-standard-p charset) - (charset-info (nth 1 charset)) - (list 'charset-info charset)) - 14)) - - (defun set-charset-plist (charset plist) - "Set CHARSET's property list to PLIST, and return PLIST." - (aset (charset-info charset) 14 plist)) - - (defun make-char (charset &optional code1 code2) - "Return a character of CHARSET whose position codes are CODE1 and CODE2. - CODE1 and CODE2 are optional, but if you don't supply - sufficient position codes, return a generic character which stands for - all characters or group of characters in the character set. - A generic character can be used to index a char table (e.g. syntax-table). - - Such character sets as ascii, eight-bit-control, and eight-bit-graphic - don't have corresponding generic characters. If CHARSET is one of - them and you don't supply CODE1, return the character of the smallest - code in CHARSET. - - If CODE1 or CODE2 are invalid (out of range), this function signals an - error. However, the eighth bit of both CODE1 and CODE2 is zeroed - before they are used to index CHARSET. Thus you may use, say, the - actual ISO 8859 character code rather than subtracting 128, as you - would need to index the corresponding Emacs charset." - (make-char-internal (charset-id charset) code1 code2)) - - (put 'make-char 'byte-compile - (function - (lambda (form) - (let ((charset (nth 1 form))) - (if (charset-quoted-standard-p charset) - (byte-compile-normal-call - (cons 'make-char-internal - (cons (charset-id (nth 1 charset)) (nthcdr 2 form)))) - (byte-compile-normal-call - (cons 'make-char-internal - (cons (list 'charset-id charset) (nthcdr 2 form))))))))) + "Return long name of CHARSET." + (plist-get (charset-plist charset) :long-name)) (defun charset-list () - "Return list of charsets ever defined. + "Return list of all charsets ever defined. This function is provided for backward compatibility. Now we have the variable `charset-list'." charset-list) + (make-obsolete 'charset-list "Use variable `charset-list'" "22.1") - (defsubst generic-char-p (char) - "Return t if and only if CHAR is a generic character. - See also the documentation of `make-char'." - (and (>= char 0400) - (let ((l (split-char char))) - (and (or (= (nth 1 l) 0) (eq (nth 2 l) 0)) - (not (eq (car l) 'composition)))))) - - (defun decode-char (ccs code-point &optional restriction) - "Return character specified by coded character set CCS and CODE-POINT in it. - Return nil if such a character is not supported. - Currently the only supported coded character set is `ucs' (ISO/IEC - 10646: Universal Multi-Octet Coded Character Set), and the result is - translated through the translation-table named - `utf-translation-table-for-decode' or the translation-hash-table named - `utf-subst-table-for-decode'. - - Optional argument RESTRICTION specifies a way to map the pair of CCS - and CODE-POINT to a character. Currently not supported and just ignored." - (cond - ((eq ccs 'ucs) - (or (gethash code-point - (get 'utf-subst-table-for-decode 'translation-hash-table)) - (let ((c (cond - ((< code-point 160) - code-point) - ((< code-point 256) - (make-char 'latin-iso8859-1 code-point)) - ((< code-point #x2500) - (setq code-point (- code-point #x0100)) - (make-char 'mule-unicode-0100-24ff - (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) - ((< code-point #x3400) - (setq code-point (- code-point #x2500)) - (make-char 'mule-unicode-2500-33ff - (+ (/ code-point 96) 32) (+ (% code-point 96) 32))) - ((and (>= code-point #xe000) (< code-point #x10000)) - (setq code-point (- code-point #xe000)) - (make-char 'mule-unicode-e000-ffff - (+ (/ code-point 96) 32) - (+ (% code-point 96) 32)))))) - (when c - (or (aref (get 'utf-translation-table-for-decode - 'translation-table) c) - c))))))) - - (defun encode-char (char ccs &optional restriction) - "Return code-point in coded character set CCS that corresponds to CHAR. - Return nil if CHAR is not included in CCS. - Currently the only supported coded character set is `ucs' (ISO/IEC - 10646: Universal Multi-Octet Coded Character Set), and CHAR is first - translated through the translation-table named - `utf-translation-table-for-encode' or the translation-hash-table named - `utf-subst-table-for-encode'. - - CHAR should be in one of these charsets: - ascii, latin-iso8859-1, mule-unicode-0100-24ff, mule-unicode-2500-33ff, - mule-unicode-e000-ffff, eight-bit-control - Otherwise, return nil. - - Optional argument RESTRICTION specifies a way to map CHAR to a - code-point in CCS. Currently not supported and just ignored." - (let* ((split (split-char char)) - (charset (car split)) - trans) - (cond ((eq ccs 'ucs) - (or (gethash char (get 'utf-subst-table-for-encode - 'translation-hash-table)) - (let ((table (get 'utf-translation-table-for-encode - 'translation-table))) - (setq trans (aref table char)) - (if trans - (setq split (split-char trans) - charset (car split))) - (cond ((eq charset 'ascii) - char) - ((eq charset 'latin-iso8859-1) - (+ (nth 1 split) 128)) - ((eq charset 'mule-unicode-0100-24ff) - (+ #x0100 (+ (* (- (nth 1 split) 32) 96) - (- (nth 2 split) 32)))) - ((eq charset 'mule-unicode-2500-33ff) - (+ #x2500 (+ (* (- (nth 1 split) 32) 96) - (- (nth 2 split) 32)))) - ((eq charset 'mule-unicode-e000-ffff) - (+ #xe000 (+ (* (- (nth 1 split) 32) 96) - (- (nth 2 split) 32)))) - ((eq charset 'eight-bit-control) - char)))))))) - + (defun generic-char-p (char) - "Always return nil. This exists only for backward compatibility." ++ "Always return nil. This is provided for backward compatibility." + nil) + (make-obsolete 'generic-char-p "Generic characters no longer exist" "22.1") ;; Coding system stuff - ;; Coding system is a symbol that has the property `coding-system'. - ;; - ;; The value of the property `coding-system' is a vector of the - ;; following format: - ;; [TYPE MNEMONIC DOC-STRING PLIST FLAGS] - ;; We call this vector as coding-spec. See comments in src/coding.c - ;; for more detail. - - (defconst coding-spec-type-idx 0) - (defconst coding-spec-mnemonic-idx 1) - (defconst coding-spec-doc-string-idx 2) - (defconst coding-spec-plist-idx 3) - (defconst coding-spec-flags-idx 4) - - ;; PLIST is a property list of a coding system. To share PLIST among - ;; alias coding systems, a coding system has PLIST in coding-spec - ;; instead of having it in normal property list of Lisp symbol. - ;; Here's a list of coding system properties currently being used. - ;; - ;; o coding-category - ;; - ;; The value is a coding category the coding system belongs to. The - ;; function `make-coding-system' sets this value automatically - ;; unless its argument PROPERTIES specifies this property. - ;; - ;; o alias-coding-systems - ;; - ;; The value is a list of coding systems of the same alias group. The - ;; first element is the coding system made at first, which we call as - ;; `base coding system'. The function `make-coding-system' sets this - ;; value automatically and `define-coding-system-alias' updates it. - ;; - ;; See the documentation of make-coding-system for the meanings of the - ;; following properties. - ;; - ;; o post-read-conversion - ;; o pre-write-conversion - ;; o translation-table-for-decode - ;; o translation-table-for-encode - ;; o safe-chars - ;; o safe-charsets - ;; o mime-charset - ;; o valid-codes (meaningful only for a coding system based on CCL) - - - (defsubst coding-system-spec (coding-system) - "Return coding-spec of CODING-SYSTEM." - (get (check-coding-system coding-system) 'coding-system)) + ;; Coding system is a symbol that has been defined by the function + ;; `define-coding-system'. - (defun coding-system-type (coding-system) - "Return the coding type of CODING-SYSTEM. - A coding type is an integer value indicating the encoding method - of CODING-SYSTEM. See the function `make-coding-system' for more detail." - (aref (coding-system-spec coding-system) coding-spec-type-idx)) + (defconst coding-system-iso-2022-flags + '(long-form + ascii-at-eol + ascii-at-cntl + 7-bit + locking-shift + single-shift + designation + revision + direction + init-at-bol + designate-at-bol + safe + latin-extra + composition + euc-tw-shift + use-roman + use-oldjis) + "List of symbols that control ISO-2022 encoder/decoder. - (defun coding-system-mnemonic (coding-system) - "Return the mnemonic character of CODING-SYSTEM. - The mnemonic character of a coding system is used in mode line - to indicate the coding system. If the arg is nil, return ?-." - (let ((spec (coding-system-spec coding-system))) - (if spec (aref spec coding-spec-mnemonic-idx) ?-))) + The value of the `:flags' attribute in the argument of the function + `define-coding-system' must be one of them. + + If `long-form' is specified, use a long designation sequence on + encoding for the charsets `japanese-jisx0208-1978', `chinese-gb2312', + and `japanese-jisx0208'. The long designation sequence doesn't + conform to ISO 2022, but is used by such coding systems as + `compound-text'. + + If `ascii-at-eol' is specified, designate ASCII to g0 at end of line + on encoding. + + If `ascii-at-cntl' is specified, designate ASCII to g0 before control + codes and SPC on encoding. + + If `7-bit' is specified, use 7-bit code only on encoding. + + If `locking-shift' is specified, decode locking-shift code correctly + on decoding, and use locking-shift to invoke a graphic element on + encoding. + + If `single-shift' is specified, decode single-shift code correctly on + decoding, and use single-shift to invoke a graphic element on encoding. + + If `designation' is specified, decode designation code correctly on + decoding, and use designation to designate a charset to a graphic + element on encoding. + + If `revision' is specified, produce an escape sequence to specify + revision number of a charset on encoding. Such an escape sequence is + always correctly decoded on decoding. + + If `direction' is specified, decode ISO6429's code for specifying + direction correctly, and produce the code on encoding. + + If `init-at-bol' is specified, on encoding, it is assumed that + invocation and designation statuses are reset at each beginning of + line even if `ascii-at-eol' is not specified; thus no codes for + resetting them are produced. + + If `safe' is specified, on encoding, characters not supported by a + coding are replaced with `?'. + + If `latin-extra' is specified, the code-detection routine assumes that a + code specified in `latin-extra-code-table' (which see) is valid. + + If `composition' is specified, an escape sequence to specify + composition sequence is correctly decoded on decoding, and is produced + on encoding. + + If `euc-tw-shift' is specified, the EUC-TW specific shifting code is + correctly decoded on decoding, and is produced on encoding. + + If `use-roman' is specified, JIS0201-1976-Roman is designated instead + of ASCII. + + If `use-oldjis' is specified, JIS0208-1976 is designated instead of + JIS0208-1983.") + + (defun define-coding-system (name docstring &rest props) + "Define NAME (a symbol) as a coding system with DOCSTRING and attributes. + The remaining arguments must come in pairs ATTRIBUTE VALUE. ATTRIBUTE + may be any symbol. + + The following attributes have special meanings. Those labeled as + \"(required)\", should not be omitted. + + `:mnemonic' (required) + + VALUE is a character to display on mode line for the coding system. + + `:coding-type' (required) + + VALUE must be one of `charset', `utf-8', `utf-16', `iso-2022', + `emacs-mule', `shift-jis', `ccl', `raw-text', `undecided'. + + `:eol-type' + + VALUE is the EOL (end-of-line) format of the coding system. It must be + one of `unix', `dos', `mac'. The symbol `unix' means Unix-like EOL + \(i.e. single LF), `dos' means DOS-like EOL \(i.e. sequence of CR LF), + and `mac' means MAC-like EOL \(i.e. single CR). If omitted, on + decoding by the coding system, Emacs automatically detects the EOL + format of the source text. + + `:charset-list' + + VALUE must be a list of charsets supported by the coding system. On + encoding by the coding system, if a character belongs to multiple + charsets in the list, a charset that comes earlier in the list is + selected. If `:coding-type' is `iso-2022', VALUE may be `iso-2022', + which indicates that the coding system supports all ISO-2022 based + charsets. If `:coding-type' is `emacs-mule', VALUE may be + `emacs-mule', which indicates that the coding system supports all + charsets that have the `:emacs-mule-id' property. + + `:ascii-compatible-p' + + If VALUE is non-nil, the coding system decodes all 7-bit bytes into + the corresponding ASCII characters, and encodes all ASCII characters + back to the corresponding 7-bit bytes. VALUE defaults to nil. + + `:decode-translation-table' + + VALUE must be a translation table to use on decoding. + + `:encode-translation-table' + + VALUE must be a translation table to use on encoding. + + `:post-read-conversion' + + VALUE must be a function to call after some text is inserted and + decoded by the coding system itself and before any functions in + `after-insert-functions' are called. The arguments to this function + are the same as those of a function in `after-insert-file-functions', + i.e. LENGTH of the text to be decoded with point at the head of it, + and the function should leave point unchanged. + + `:pre-write-conversion' + + VALUE must be a function to call after all functions in + `write-region-annotate-functions' and `buffer-file-format' are called, + and before the text is encoded by the coding system itself. The + arguments to this function are the same as those of a function in + `write-region-annotate-functions'. + + `:default-char' + + VALUE must be a character. On encoding, a character not supported by + the coding system is replaced with VALUE. + ++`:for-unibyte' ++ ++VALUE non-nil means that visiting a file with the coding system ++results in a unibyte buffer. ++ + `:eol-type' + + VALUE must be `unix', `dos', `mac'. The symbol `unix' means Unix-like + EOL (LF), `dos' means DOS-like EOL (CRLF), and `mac' means MAC-like + EOL (CR). If omitted, on decoding, the coding system detects EOL + format automatically, and on encoding, uses Unix-like EOL. + + `:mime-charset' + + VALUE must be a symbol whose name is that of a MIME charset converted + to lower case. + + `:mime-text-unsuitable' + + VALUE non-nil means the `:mime-charset' property names a charset which + is unsuitable for the top-level media type \"text\". + + `:flags' + + VALUE must be a list of symbols that control the ISO-2022 converter. + Each must be a member of the list `coding-system-iso-2022-flags' + \(which see). This attribute has a meaning only when `:coding-type' + is `iso-2022'. + + `:designation' + + VALUE must be a vector [G0-USAGE G1-USAGE G2-USAGE G3-USAGE]. + GN-USAGE specifies the usage of graphic register GN as follows. + + If it is nil, no charset can be designated to GN. + + If it is a charset, the charset is initially designated to GN, and + never used by the other charsets. + + If it is a list, the elements must be charsets, nil, 94, or 96. GN + can be used by all the listed charsets. If the list contains 94, any + iso-2022 charset whose code-space ranges are 94 long can be designated + to GN. If the list contains 96, any charsets whose whose ranges are + 96 long can be designated to GN. If the first element is a charset, + that charset is initially designated to GN. + + This attribute has a meaning only when `:coding-type' is `iso-2022'. + + `:bom' + + This attributes specifies whether the coding system uses a `byte order + mark'. VALUE must nil, t, or cons of coding systems whose + `:coding-type' is `utf-16'. + + If the value is nil, on decoding, don't treat the first two-byte as + BOM, and on encoding, don't produce BOM bytes. + + If the value is t, on decoding, skip the first two-byte as BOM, and on + encoding, produce BOM bytes accoding to the value of `:endian'. + + If the value is cons, on decoding, check the first two-byte. If theyq + are 0xFE 0xFF, use the car part coding system of the value. If they + are 0xFF 0xFE, use the car part coding system of the value. + Otherwise, treat them as bytes for a normal character. On encoding, + produce BOM bytes accoding to the value of `:endian'. + + This attribute has a meaning only when `:coding-type' is `utf-16'. + + `:endian' + + VALUE must be `big' or `little' specifying big-endian and + little-endian respectively. The default value is `big'. + + This attribute has a meaning only when `:coding-type' is `utf-16'. + + `:ccl-decoder' + + VALUE is a symbol representing the registered CCL program used for + decoding. This attribute has a meaning only when `:coding-type' is + `ccl'. + + `:ccl-encoder' + + VALUE is a symbol representing the registered CCL program used for + encoding. This attribute has a meaning only when `:coding-type' is + `ccl'." + (let* ((common-attrs (mapcar 'list + '(:mnemonic + :coding-type + :charset-list + :ascii-compatible-p + :decode-translation-table + :encode-translation-table + :post-read-conversion + :pre-write-conversion + :default-char ++ :prefer-unibyte + :plist + :eol-type))) + (coding-type (plist-get props :coding-type)) + (spec-attrs (mapcar 'list + (cond ((eq coding-type 'iso-2022) + '(:initial + :reg-usage + :request + :flags)) + ((eq coding-type 'utf-16) + '(:bom + :endian)) + ((eq coding-type 'ccl) + '(:ccl-decoder + :ccl-encoder + :valids)))))) + + (dolist (slot common-attrs) + (setcdr slot (plist-get props (car slot)))) + + (dolist (slot spec-attrs) + (setcdr slot (plist-get props (car slot)))) + + (if (eq coding-type 'iso-2022) + (let ((designation (plist-get props :designation)) + (flags (plist-get props :flags)) + (initial (make-vector 4 nil)) + (reg-usage (cons 4 4)) + request elt) + (dotimes (i 4) + (setq elt (aref designation i)) + (cond ((charsetp elt) + (aset initial i elt) + (setq request (cons (cons elt i) request))) + ((consp elt) + (aset initial i (car elt)) + (if (charsetp (car elt)) + (setq request (cons (cons (car elt) i) request))) + (dolist (e (cdr elt)) + (cond ((charsetp e) + (setq request (cons (cons e i) request))) + ((eq e 94) + (setcar reg-usage i)) + ((eq e 96) + (setcdr reg-usage i)) + ((eq e t) + (setcar reg-usage i) + (setcdr reg-usage i))))))) + (setcdr (assq :initial spec-attrs) initial) + (setcdr (assq :reg-usage spec-attrs) reg-usage) + (setcdr (assq :request spec-attrs) request) + + ;; Change :flags value from a list to a bit-mask. + (let ((bits 0) + (i 0)) + (dolist (elt coding-system-iso-2022-flags) + (if (memq elt flags) + (setq bits (logior bits (lsh 1 i)))) + (setq i (1+ i))) + (setcdr (assq :flags spec-attrs) bits)))) + + ;; Add :name and :docstring properties to PROPS. + (setq props + (cons :name (cons name (cons :docstring (cons (purecopy docstring) + props))))) + (setcdr (assq :plist common-attrs) props) - + (apply 'define-coding-system-internal + name (mapcar 'cdr (append common-attrs spec-attrs))))) (defun coding-system-doc-string (coding-system) "Return the documentation string for CODING-SYSTEM." @@@ -491,39 -715,6 +775,17 @@@ like `mime-charset' as well as the curr (defalias 'coding-system-parent 'coding-system-base) (make-obsolete 'coding-system-parent 'coding-system-base "20.3") - ;; Coding system also has a property `eol-type'. - ;; - ;; This property indicates how the coding system handles end-of-line - ;; format. The value is integer 0, 1, 2, or a vector of three coding - ;; systems. Each integer value 0, 1, and 2 indicates the format of - ;; end-of-line LF, CRLF, and CR respectively. A vector value - ;; indicates that the format of end-of-line should be detected - ;; automatically. Nth element of the vector is the subsidiary coding - ;; system whose `eol-type' property is N. - - (defun coding-system-eol-type (coding-system) - "Return eol-type of CODING-SYSTEM. - An eol-type is integer 0, 1, 2, or a vector of coding systems. - - Integer values 0, 1, and 2 indicate a format of end-of-line; LF, - CRLF, and CR respectively. - - A vector value indicates that a format of end-of-line should be - detected automatically. Nth element of the vector is the subsidiary - coding system whose eol-type is N." - (get coding-system 'eol-type)) - +(defun coding-system-eol-type-mnemonic (coding-system) + "Return the string indicating end-of-line format of CODING-SYSTEM." + (let* ((eol-type (coding-system-eol-type coding-system)) + (val (cond ((eq eol-type 0) eol-mnemonic-unix) + ((eq eol-type 1) eol-mnemonic-dos) + ((eq eol-type 2) eol-mnemonic-mac) + (t eol-mnemonic-undecided)))) + (if (stringp val) + val + (char-to-string val)))) + (defun coding-system-lessp (x y) (cond ((eq x 'no-conversion) t) ((eq y 'no-conversion) nil) @@@ -574,546 -765,6 +836,206 @@@ formats (e.g. iso-latin-1-unix, koi8-r- (setq tail (cdr tail))))) codings)) - (defun map-charset-chars (func charset) - "Use FUNC to map over all characters in CHARSET for side effects. - FUNC is a function of two args, the start and end (inclusive) of a - character code range. Thus FUNC should iterate over [START, END]." - (let* ((dim (charset-dimension charset)) - (chars (charset-chars charset)) - (start (if (= chars 94) - 33 - 32))) - (if (= dim 1) - (funcall func - (make-char charset start) - (make-char charset (+ start chars -1))) - (dotimes (i chars) - (funcall func - (make-char charset (+ i start) start) - (make-char charset (+ i start) (+ start chars -1))))))) - - (defun register-char-codings (coding-system safe-chars) - "This is an obsolete function. - It exists just for backward compatibility, and it does nothing.") - (make-obsolete 'register-char-codings - "Unnecessary function. Calling it has no effect." - "21.3") - +(defconst char-coding-system-table nil + "This is an obsolete variable. +It exists just for backward compatibility, and the value is always nil.") + - (defun make-subsidiary-coding-system (coding-system) - "Make subsidiary coding systems (eol-type variants) of CODING-SYSTEM." - (let ((coding-spec (coding-system-spec coding-system)) - (subsidiaries (vector (intern (format "%s-unix" coding-system)) - (intern (format "%s-dos" coding-system)) - (intern (format "%s-mac" coding-system)))) - (i 0) - temp) - (while (< i 3) - (put (aref subsidiaries i) 'coding-system coding-spec) - (put (aref subsidiaries i) 'eol-type i) - (add-to-coding-system-list (aref subsidiaries i)) - (setq coding-system-alist - (cons (list (symbol-name (aref subsidiaries i))) - coding-system-alist)) - (setq i (1+ i))) - subsidiaries)) - +(defun transform-make-coding-system-args (name type &optional doc-string props) + "For internal use only. +Transform XEmacs style args for `make-coding-system' to Emacs style. +Value is a list of transformed arguments." + (let ((mnemonic (string-to-char (or (plist-get props 'mnemonic) "?"))) + (eol-type (plist-get props 'eol-type)) + properties tmp) + (cond + ((eq eol-type 'lf) (setq eol-type 'unix)) + ((eq eol-type 'crlf) (setq eol-type 'dos)) + ((eq eol-type 'cr) (setq eol-type 'mac))) + (if (setq tmp (plist-get props 'post-read-conversion)) + (setq properties (plist-put properties 'post-read-conversion tmp))) + (if (setq tmp (plist-get props 'pre-write-conversion)) + (setq properties (plist-put properties 'pre-write-conversion tmp))) + (cond + ((eq type 'shift-jis) + `(,name 1 ,mnemonic ,doc-string () ,properties ,eol-type)) + ((eq type 'iso2022) ; This is not perfect. + (if (plist-get props 'escape-quoted) + (error "escape-quoted is not supported: %S" + `(,name ,type ,doc-string ,props))) + (let ((g0 (plist-get props 'charset-g0)) + (g1 (plist-get props 'charset-g1)) + (g2 (plist-get props 'charset-g2)) + (g3 (plist-get props 'charset-g3)) + (use-roman + (and + (eq (cadr (assoc 'latin-jisx0201 + (plist-get props 'input-charset-conversion))) + 'ascii) + (eq (cadr (assoc 'ascii + (plist-get props 'output-charset-conversion))) + 'latin-jisx0201))) + (use-oldjis + (and + (eq (cadr (assoc 'japanese-jisx0208-1978 + (plist-get props 'input-charset-conversion))) + 'japanese-jisx0208) + (eq (cadr (assoc 'japanese-jisx0208 + (plist-get props 'output-charset-conversion))) + 'japanese-jisx0208-1978)))) + (if (charsetp g0) + (if (plist-get props 'force-g0-on-output) + (setq g0 `(nil ,g0)) + (setq g0 `(,g0 t)))) + (if (charsetp g1) + (if (plist-get props 'force-g1-on-output) + (setq g1 `(nil ,g1)) + (setq g1 `(,g1 t)))) + (if (charsetp g2) + (if (plist-get props 'force-g2-on-output) + (setq g2 `(nil ,g2)) + (setq g2 `(,g2 t)))) + (if (charsetp g3) + (if (plist-get props 'force-g3-on-output) + (setq g3 `(nil ,g3)) + (setq g3 `(,g3 t)))) + `(,name 2 ,mnemonic ,doc-string + (,g0 ,g1 ,g2 ,g3 + ,(plist-get props 'short) + ,(not (plist-get props 'no-ascii-eol)) + ,(not (plist-get props 'no-ascii-cntl)) + ,(plist-get props 'seven) + t + ,(not (plist-get props 'lock-shift)) + ,use-roman + ,use-oldjis + ,(plist-get props 'no-iso6429) + nil nil nil nil) + ,properties ,eol-type))) + ((eq type 'big5) + `(,name 3 ,mnemonic ,doc-string () ,properties ,eol-type)) + ((eq type 'ccl) + `(,name 4 ,mnemonic ,doc-string + (,(plist-get props 'decode) . ,(plist-get props 'encode)) + ,properties ,eol-type)) + (t + (error "unsupported XEmacs style make-coding-style arguments: %S" + `(,name ,type ,doc-string ,props)))))) + +(defun make-coding-system (coding-system type mnemonic doc-string + &optional + flags + properties + eol-type) + "Define a new coding system CODING-SYSTEM (symbol). - Remaining arguments are TYPE, MNEMONIC, DOC-STRING, FLAGS (optional), - and PROPERTIES (optional) which construct a coding-spec of CODING-SYSTEM - in the following format: - [TYPE MNEMONIC DOC-STRING PLIST FLAGS] - - TYPE is an integer value indicating the type of the coding system as follows: - 0: Emacs internal format, - 1: Shift-JIS (or MS-Kanji) used mainly on Japanese PCs, - 2: ISO-2022 including many variants, - 3: Big5 used mainly on Chinese PCs, - 4: private, CCL programs provide encoding/decoding algorithm, - 5: Raw-text, which means that text contains random 8-bit codes. - - MNEMONIC is a character to be displayed on mode line for the coding system. - - DOC-STRING is a documentation string for the coding system. - - FLAGS specifies more detailed information of the coding system as follows: - - If TYPE is 2 (ISO-2022), FLAGS is a list of these elements: - CHARSET0, CHARSET1, CHARSET2, CHARSET3, SHORT-FORM, - ASCII-EOL, ASCII-CNTL, SEVEN, LOCKING-SHIFT, SINGLE-SHIFT, - USE-ROMAN, USE-OLDJIS, NO-ISO6429, INIT-BOL, DESIGNATION-BOL, - SAFE, ACCEPT-LATIN-EXTRA-CODE. - CHARSETn are character sets initially designated to Gn graphic registers. - If CHARSETn is nil, Gn is never used. - If CHARSETn is t, Gn can be used but nothing designated initially. - If CHARSETn is a list of character sets, those character sets are - designated to Gn on output, but nothing designated to Gn initially. - But, character set `ascii' can be designated only to G0. - SHORT-FORM non-nil means use short designation sequence on output. - ASCII-EOL non-nil means designate ASCII to g0 at end of line on output. - ASCII-CNTL non-nil means designate ASCII to g0 before control codes and - SPACE on output. - SEVEN non-nil means use 7-bit code only on output. - LOCKING-SHIFT non-nil means use locking-shift. - SINGLE-SHIFT non-nil means use single-shift. - USE-ROMAN non-nil means designate JIS0201-1976-Roman instead of ASCII. - USE-OLDJIS non-nil means designate JIS0208-1976 instead of JIS0208-1983. - NO-ISO6429 non-nil means not use ISO6429's direction specification. - INIT-BOL non-nil means any designation state is assumed to be reset - to initial at each beginning of line on output. - DESIGNATION-BOL non-nil means designation sequences should be placed - at beginning of line on output. - SAFE non-nil means convert unsafe characters to `?' on output. - Characters not specified in the property `safe-charsets' nor - `safe-chars' are unsafe. - ACCEPT-LATIN-EXTRA-CODE non-nil means code-detection routine accepts - a code specified in `latin-extra-code-table' (which see) as a valid - code of the coding system. - - If TYPE is 4 (private), FLAGS should be a cons of CCL programs, for - decoding and encoding. CCL programs should be specified by their - symbols. - - PROPERTIES is an alist of properties vs the corresponding values. The - following properties are recognized: - - o post-read-conversion - - The value is a function to call after some text is inserted and - decoded by the coding system itself and before any functions in - `after-insert-functions' are called. The argument of this - function is the same as for a function in - `after-insert-file-functions', i.e. LENGTH of the text inserted, - with point at the head of the text to be decoded. - - o pre-write-conversion - - The value is a function to call after all functions in - `write-region-annotate-functions' and `buffer-file-format' are - called, and before the text is encoded by the coding system itself. - The arguments to this function are the same as those of a function - in `write-region-annotate-functions', i.e. FROM and TO, specifying - a region of text. - - o translation-table-for-decode - - The value is a translation table to be applied on decoding. See - the function `make-translation-table' for the format of translation - table. This is not applicable to type 4 (CCL-based) coding systems. - - o translation-table-for-encode - - The value is a translation table to be applied on encoding. This is - not applicable to type 4 (CCL-based) coding systems. - - o safe-chars - - The value is a char table. If a character has non-nil value in it, - the character is safely supported by the coding system. This - overrides the specification of safe-charsets. - - o safe-charsets - - The value is a list of charsets safely supported by the coding - system. The value t means that all charsets Emacs handles are - supported. Even if some charset is not in this list, it doesn't - mean that the charset can't be encoded in the coding system; - it just means that some other receiver of text encoded - in the coding system won't be able to handle that charset. - - o mime-charset - - The value is a symbol whose name is the `MIME-charset' parameter of - the coding system. - - o valid-codes (meaningful only for a coding system based on CCL) - - The value is a list to indicate valid byte ranges of the encoded - file. Each element of the list is an integer or a cons of integer. - In the former case, the integer value is a valid byte code. In the - latter case, the integers specify the range of valid byte codes. - - o composition (meaningful only when TYPE is 0 or 2) - - If the value is non-nil, the coding system preserves composition - information. - - These properties are set in PLIST, a property list. This function - also sets properties `coding-category' and `alias-coding-systems' - automatically. - - EOL-TYPE specifies the EOL type of the coding-system in one of the - following formats: - - o symbol (unix, dos, or mac) - - The symbol `unix' means Unix-like EOL (LF), `dos' means - DOS-like EOL (CRLF), and `mac' means MAC-like EOL (CR). - - o number (0, 1, or 2) - - The number 0, 1, and 2 mean UNIX, DOS, and MAC-like EOL - respectively. - - o vector of coding-systems of length 3 - - The EOL type is detected automatically for the coding system. - And, according to the detected EOL type, one of the coding - systems in the vector is selected. Elements of the vector - corresponds to Unix-like EOL, DOS-like EOL, and Mac-like EOL - in this order. - - Kludgy features for backward compatibility: - - 1. If TYPE is 4 and car or cdr of FLAGS is a vector, the vector is - treated as a compiled CCL code. - - 2. If PROPERTIES is just a list of character sets, the list is set as - a value of `safe-charsets' in PLIST." - ++This function is provided for backward compatibility. ++Use `define-coding-system' instead." + ;; For compatiblity with XEmacs, we check the type of TYPE. If it + ;; is a symbol, perhaps, this function is called with XEmacs-style + ;; arguments. Here, try to transform that kind of arguments to + ;; Emacs style. + (if (symbolp type) + (let ((args (transform-make-coding-system-args coding-system type + mnemonic doc-string))) + (setq coding-system (car args) + type (nth 1 args) + mnemonic (nth 2 args) + doc-string (nth 3 args) + flags (nth 4 args) + properties (nth 5 args) + eol-type (nth 6 args)))) + - ;; Set a value of `coding-system' property. - (let ((coding-spec (make-vector 5 nil)) - (no-initial-designation t) - (no-alternative-designation t) - (accept-latin-extra-code nil) - coding-category) - (if (or (not (integerp type)) (< type 0) (> type 5)) - (error "TYPE argument must be 0..5")) - (if (or (not (integerp mnemonic)) (<= mnemonic ? ) (> mnemonic 127)) - (error "MNEMONIC argument must be an ASCII printable character")) - (aset coding-spec coding-spec-type-idx type) - (aset coding-spec coding-spec-mnemonic-idx mnemonic) - (aset coding-spec coding-spec-doc-string-idx - (purecopy (if (stringp doc-string) doc-string ""))) - (cond ((= type 0) - (setq coding-category 'coding-category-emacs-mule)) - ((= type 1) - (setq coding-category 'coding-category-sjis)) - ((= type 2) ; ISO2022 - (let ((i 0) - (vec (make-vector 32 nil)) - (g1-designation nil) - (fl flags)) - (while (< i 4) - (let ((charset (car fl))) - (if (and no-initial-designation - (> i 0) - (or (charsetp charset) - (and (consp charset) - (charsetp (car charset))))) - (setq no-initial-designation nil)) - (if (charsetp charset) - (if (= i 1) (setq g1-designation charset)) - (if (consp charset) - (let ((tail charset) - elt) - (while tail - (setq elt (car tail)) - (if (eq elt t) - (setq no-alternative-designation nil) - (if (and elt (not (charsetp elt))) - (error "Invalid charset: %s" elt))) - (setq tail (cdr tail))) - (setq g1-designation (car charset))) - (if charset - (if (eq charset t) - (setq no-alternative-designation nil) - (error "Invalid charset: %s" charset))))) - (aset vec i charset)) - (setq fl (cdr fl) i (1+ i))) - (while (and (< i 32) fl) - (aset vec i (car fl)) - (if (and (= i 16) ; ACCEPT-LATIN-EXTRA-CODE - (car fl)) - (setq accept-latin-extra-code t)) - (setq fl (cdr fl) i (1+ i))) - (aset coding-spec 4 vec) - (setq coding-category - (if (aref vec 8) ; Use locking-shift. - (or (and (aref vec 7) 'coding-category-iso-7-else) - 'coding-category-iso-8-else) - (if (aref vec 7) ; 7-bit only. - (if (aref vec 9) ; Use single-shift. - 'coding-category-iso-7-else - (if no-alternative-designation - 'coding-category-iso-7-tight - 'coding-category-iso-7)) - (if (or no-initial-designation - (not no-alternative-designation)) - 'coding-category-iso-8-else - (if (and (charsetp g1-designation) - (= (charset-dimension g1-designation) 2)) - 'coding-category-iso-8-2 - 'coding-category-iso-8-1))))))) - ((= type 3) - (setq coding-category 'coding-category-big5)) - ((= type 4) ; private - (setq coding-category 'coding-category-ccl) - (if (not (consp flags)) - (error "Invalid FLAGS argument for TYPE 4 (CCL)") - (let ((decoder (check-ccl-program - (car flags) - (intern (format "%s-decoder" coding-system)))) - (encoder (check-ccl-program - (cdr flags) - (intern (format "%s-encoder" coding-system))))) - (if (and decoder encoder) - (aset coding-spec 4 (cons decoder encoder)) - (error "Invalid FLAGS argument for TYPE 4 (CCL)"))))) - (t ; i.e. (= type 5) - (setq coding-category 'coding-category-raw-text))) - - (let ((plist (list 'coding-category coding-category - 'alias-coding-systems (list coding-system)))) - (if no-initial-designation - (plist-put plist 'no-initial-designation t)) - (if (and properties - (or (eq properties t) - (not (consp (car properties))))) - ;; In the old version, the arg PROPERTIES is a list to be - ;; set in PLIST as a value of property `safe-charsets'. - (setq properties (list (cons 'safe-charsets properties)))) - ;; In the current version PROPERTIES is a property list. - ;; Reflect it into PLIST one by one while handling safe-chars - ;; specially. - (let ((safe-charsets (cdr (assq 'safe-charsets properties))) - (safe-chars (cdr (assq 'safe-chars properties))) - (l properties) - prop val) - ;; If only safe-charsets is specified, make a char-table from - ;; it, and store that char-table as the value of `safe-chars'. - (if (and (not safe-chars) safe-charsets) - (let (charset) - (if (eq safe-charsets t) - (setq safe-chars t) - (setq safe-chars (make-char-table 'safe-chars)) - (while safe-charsets - (setq charset (car safe-charsets) - safe-charsets (cdr safe-charsets)) - (cond ((eq charset 'ascii)) ; just ignore - ((eq charset 'eight-bit-control) - (let ((i 128)) - (while (< i 160) - (aset safe-chars i t) - (setq i (1+ i))))) - ((eq charset 'eight-bit-graphic) - (let ((i 160)) - (while (< i 256) - (aset safe-chars i t) - (setq i (1+ i))))) - (t - (aset safe-chars (make-char charset) t)))) - (if accept-latin-extra-code - (let ((i 128)) - (while (< i 160) - (if (aref latin-extra-code-table i) - (aset safe-chars i t)) - (setq i (1+ i)))))) - (setq l (cons (cons 'safe-chars safe-chars) l)))) - (while l - (setq prop (car (car l)) val (cdr (car l)) l (cdr l)) - (if (eq prop 'safe-chars) - (progn - (if (and (symbolp val) - (get val 'translation-table)) - (setq safe-chars (get val 'translation-table))) - (setq val safe-chars))) - (plist-put plist prop val))) - ;; The property `coding-category' may have been set differently - ;; through PROPERTIES. - (setq coding-category (plist-get plist 'coding-category)) - (aset coding-spec coding-spec-plist-idx plist)) - (put coding-system 'coding-system coding-spec) - (put coding-category 'coding-systems - (cons coding-system (get coding-category 'coding-systems)))) - - ;; Next, set a value of `eol-type' property. - (if (not eol-type) - ;; If EOL-TYPE is nil, set a vector of subsidiary coding - ;; systems, each corresponds to a coding system for the detected - ;; EOL format. - (setq eol-type (make-subsidiary-coding-system coding-system))) - (setq eol-type - (cond ((or (eq eol-type 'unix) (null eol-type)) - 0) - ((eq eol-type 'dos) - 1) - ((eq eol-type 'mac) - 2) - ((or (and (vectorp eol-type) - (= (length eol-type) 3)) - (and (numberp eol-type) - (and (>= eol-type 0) - (<= eol-type 2)))) - eol-type) ++ (setq type ++ (cond ((eq type 0) 'emacs-mule) ++ ((eq type 1) 'shift-jis) ++ ((eq type 2) 'iso2022) ++ ((eq type 3) 'big5) ++ ((eq type 4) 'ccl) ++ ((eq type 5) 'raw-text) + (t - (error "Invalid EOL-TYPE spec:%S" eol-type)))) - (put coding-system 'eol-type eol-type) - - (define-coding-system-internal coding-system) - - ;; At last, register CODING-SYSTEM in `coding-system-list' and - ;; `coding-system-alist'. - (add-to-coding-system-list coding-system) - (setq coding-system-alist (cons (list (symbol-name coding-system)) - coding-system-alist)) - - ;; For a coding system of cateogory iso-8-1 and iso-8-2, create - ;; XXX-with-esc variants. - (let ((coding-category (coding-system-category coding-system))) - (if (or (eq coding-category 'coding-category-iso-8-1) - (eq coding-category 'coding-category-iso-8-2)) - (let ((esc (intern (concat (symbol-name coding-system) "-with-esc"))) - (doc (format "Same as %s but can handle any charsets by ISO's escape sequences." coding-system)) - (safe-charsets (assq 'safe-charsets properties)) - (mime-charset (assq 'mime-charset properties))) - (if safe-charsets - (setcdr safe-charsets t) - (setq properties (cons (cons 'safe-charsets t) properties))) - (if mime-charset - (setcdr mime-charset nil)) - (make-coding-system esc type mnemonic doc - (if (listp (car flags)) - (cons (append (car flags) '(t)) (cdr flags)) - (cons (list (car flags) t) (cdr flags))) - properties)))) - - coding-system) - - (put 'safe-chars 'char-table-extra-slots 0) - - (defun define-coding-system-alias (alias coding-system) - "Define ALIAS as an alias for coding system CODING-SYSTEM." - (put alias 'coding-system (coding-system-spec coding-system)) - (add-to-coding-system-list alias) - (setq coding-system-alist (cons (list (symbol-name alias)) - coding-system-alist)) - (let ((eol-type (coding-system-eol-type coding-system))) - (if (vectorp eol-type) - (progn - (nconc (coding-system-get alias 'alias-coding-systems) (list alias)) - (put alias 'eol-type (make-subsidiary-coding-system alias))) - (put alias 'eol-type eol-type)))) ++ (error "Invalid coding system type: %s" type)))) ++ ++ (setq properties ++ (let ((plist nil) key) ++ (dolist (elt properties) ++ (setq key (car elt)) ++ (cond ((eq key 'post-read-conversion) ++ (setq key :post-read-conversion)) ++ ((eq key 'pre-write-conversion) ++ (setq key :pre-write-conversion)) ++ ((eq key 'translation-table-for-decode) ++ (setq key :decode-translation-table)) ++ ((eq key 'translation-table-for-encode) ++ (setq key :encode-translation-table)) ++ ((eq key 'safe-charsets) ++ (setq key :charset-list)) ++ ((eq key 'mime-charset) ++ (setq key :mime-charset)) ++ ((eq key 'valid-codes) ++ (setq key :valids))) ++ (setq plist (plist-put plist key (cdr elt)))) ++ plist)) ++ (plist-put properties :mnemonic mnemonic) ++ (plist-put properties :coding-type type) ++ (cond ((eq eol-type 0) (setq eol-type 'unix)) ++ ((eq eol-type 1) (setq eol-type 'dos)) ++ ((eq eol-type 2) (setq eol-type 'mac)) ++ ((vectorp eol-type) (setq eol-type nil))) ++ (plist-put properties :eol-type eol-type) ++ ++ (cond ++ ((eq type 'iso2022) ++ (plist-put properties :flags ++ (list (and (or (consp (nth 0 flags)) ++ (consp (nth 1 flags)) ++ (consp (nth 2 flags)) ++ (consp (nth 3 flags))) 'designation) ++ (or (nth 4 flags) 'long-form) ++ (and (nth 5 flags) 'ascii-at-eol) ++ (and (nth 6 flags) 'ascii-at-cntl) ++ (and (nth 7 flags) '7-bit) ++ (and (nth 8 flags) 'locking-shift) ++ (and (nth 9 flags) 'single-shift) ++ (and (nth 10 flags) 'use-roman) ++ (and (nth 11 flags) 'use-oldjis) ++ (or (nth 12 flags) 'direction) ++ (and (nth 13 flags) 'init-at-bol) ++ (and (nth 14 flags) 'designate-at-bol) ++ (and (nth 15 flags) 'safe) ++ (and (nth 16 flags) 'latin-extra))) ++ (plist-put properties :designation ++ (let ((vec (make-vector 4 nil))) ++ (dotimes (i 4) ++ (let ((spec (nth i flags))) ++ (if (eq spec t) ++ (aset vec i '(94 96)) ++ (if (consp spec) ++ (progn ++ (if (memq t spec) ++ (setq spec (append (delq t spec) '(94 96)))) ++ (aset vec i spec)))))) ++ vec))) ++ ++ ((eq type 'ccl) ++ (plist-put properties :ccl-decoder (car flags)) ++ (plist-put properties :ccl-encoder (cdr flags)))) ++ ++ (apply 'define-coding-system coding-system doc-string properties)) + +(defun merge-coding-systems (first second) + "Fill in any unspecified aspects of coding system FIRST from SECOND. +Return the resulting coding system." + (let ((base (coding-system-base second)) + (eol (coding-system-eol-type second))) + ;; If FIRST doesn't specify text conversion, merge with that of SECOND. + (if (eq (coding-system-base first) 'undecided) + (setq first (coding-system-change-text-conversion first base))) + ;; If FIRST doesn't specify eol conversion, merge with that of SECOND. + (if (and (vectorp (coding-system-eol-type first)) + (numberp eol) (>= eol 0) (<= eol 2)) + (setq first (coding-system-change-eol-conversion + first eol))) + first)) + (defun set-buffer-file-coding-system (coding-system &optional force) "Set the file coding-system of the current buffer to CODING-SYSTEM. This means that when you save the buffer, it will be converted @@@ -1333,28 -966,19 +1247,28 @@@ This alist is used to decode an extene ;; For UTF-8 encoding. "\\(\e%G[^\e]*\e%@\\)"))) +;; Functions to support "Non-Standard Character Set Encodings" defined +;; by the COMPOUND-TEXT spec. +;; We support that by decoding the whole data by `ctext' which just +;; pertains byte sequences belonging to ``extended segment'', then +;; decoding those byte sequences one by one in Lisp. +;; This function also supports "The UTF-8 encoding" described in the +;; section 7 of the documentation fo COMPOUND-TEXT distributed with +;; XFree86. + (defun ctext-post-read-conversion (len) "Decode LEN characters encoded as Compound Text with Extended Segments." + ;; We don't need the following because it is expected that this + ;; function is mainly used for decoding X selection which is not + ;; that big data. + ;;(buffer-disable-undo) ; minimize consing due to insertions and deletions (save-match-data (save-restriction + (narrow-to-region (point) (+ (point) len)) (let ((case-fold-search nil) - (in-workbuf (string= (buffer-name) " *code-converting-work*")) last-coding-system-used pos bytes) - (or in-workbuf - (narrow-to-region (point) (+ (point) len))) (decode-coding-region (point-min) (point-max) 'ctext) - (if in-workbuf - (set-buffer-multibyte t)) (while (re-search-forward ctext-non-standard-encodings-regexp nil 'move) (setq pos (match-beginning 0)) @@@ -1479,9 -1096,10 +1389,10 @@@ text, and convert it in the temporary b ;;; FILE I/O (defcustom auto-coding-alist - '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|sx[dmicw]\\|tar\\|tgz\\)\\'" . no-conversion) - '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|tar\\|tgz\\)\\'" . no-conversion) - ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion)) ++ '(("\\.\\(arc\\|zip\\|lzh\\|zoo\\|jar\\|sx[dmicw]\\|tar\\)\\'" . no-conversion-multibyte) ++ ("\\.tgz\\'" . no-conversion) + ("\\.\\(gz\\|Z\\|bz\\|bz2\\|gpg\\)\\'" . no-conversion) + ("/#[^/]+#\\'" . emacs-mule)) "Alist of filename patterns vs corresponding coding systems. Each element looks like (REGEXP . CODING-SYSTEM). A file whose name matches REGEXP is decoded by CODING-SYSTEM on reading. @@@ -1675,18 -1264,17 +1587,6 @@@ different if the buffer has become unib (modified-p (buffer-modified-p))) (when coding-system (set-buffer-file-coding-system coding-system t) -- (if (and enable-multibyte-characters - (or (eq coding-system 'no-conversion) - (eq (coding-system-type coding-system) 5)) - (or (eq (coding-system-type coding-system) 'raw-text)) -- ;; If buffer was unmodified and the size is the -- ;; same as INSERTED, we must be visiting it. -- (not modified-p) -- (= (buffer-size) inserted)) -- ;; For coding systems no-conversion and raw-text..., -- ;; edit the buffer as unibyte. - (let ((pos-marker (copy-marker (+ (point) inserted)))) - (let ((pos-byte (position-bytes (+ (point) inserted)))) -- (set-buffer-multibyte nil) - (setq inserted (- pos-marker (point))))) - (setq inserted (- pos-byte (position-bytes (point)))))) (set-buffer-modified-p modified-p)))) inserted) @@@ -1955,49 -1493,14 +1828,49 @@@ the table in `translation-table-vector' (put 'with-category-table 'lisp-indent-function 1) -(defmacro with-category-table (category-table &rest body) - "Execute BODY like `progn' with CATEGORY-TABLE the current category table." - (let ((current-category-table (make-symbol "current-category-table"))) - `(let ((,current-category-table (category-table))) - (set-category-table ,category-table) +(defmacro with-category-table (table &rest body) - "Evaluate BODY with category table of current buffer set to TABLE. ++ "Execute BODY like `progn' with CATEGORY-TABLE the current category table. +The category table of the current buffer is saved, BODY is evaluated, +then the saved table is restored, even in case of an abnormal exit. +Value is what BODY returns." + (let ((old-table (make-symbol "old-table")) + (old-buffer (make-symbol "old-buffer"))) + `(let ((,old-table (category-table)) + (,old-buffer (current-buffer))) (unwind-protect - (progn ,@body) - (set-category-table ,current-category-table))))) + (progn + (set-category-table ,table) + ,@body) + (save-current-buffer + (set-buffer ,old-buffer) + (set-category-table ,old-table)))))) + +(defun define-translation-hash-table (symbol table) + "Define SYMBOL as the name of the hash translation TABLE for use in CCL. + +Analogous to `define-translation-table', but updates +`translation-hash-table-vector' and the table is for use in the CCL +`lookup-integer' and `lookup-character' functions." + (unless (and (symbolp symbol) + (hash-table-p table)) + (error "Bad args to define-translation-hash-table")) + (let ((len (length translation-hash-table-vector)) + (id 0) + done) + (put symbol 'translation-hash-table table) + (while (not done) + (if (>= id len) + (setq translation-hash-table-vector + (vconcat translation-hash-table-vector [nil]))) + (let ((slot (aref translation-hash-table-vector id))) + (if (or (not slot) + (eq (car slot) symbol)) + (progn + (aset translation-hash-table-vector id (cons symbol table)) + (setq done t)) + (setq id (1+ id))))) + (put symbol 'translation-hash-table-id id) + id)) ;;; Initialize some variables. @@@ -2007,44 -1510,8 +1880,46 @@@ (setq ignore-relative-composition (make-char-table 'ignore-relative-composition)) + (make-obsolete 'set-char-table-default + "Generic characters no longer exist" "22.1") + +;;; Built-in auto-coding-functions: + +(defun sgml-xml-auto-coding-function (size) + "Determine whether the buffer is XML, and if so, its encoding. +This function is intended to be added to `auto-coding-functions'." + (setq size (+ (point) size)) + (when (re-search-forward "\\`[[:space:]\n]*<\\?xml" size t) + (let ((end (save-excursion + ;; This is a hack. + (re-search-forward "\"\\s-*\\?>" size t)))) + (when end + (if (re-search-forward "encoding=\"\\(.+?\\)\"" end t) + (let* ((match (match-string 1)) + (sym (intern (downcase match)))) + (if (coding-system-p sym) + sym + (message "Warning: unknown coding system \"%s\"" match) + nil)) + 'utf-8))))) + +(defun sgml-html-meta-auto-coding-function (size) + "If the buffer has an HTML meta tag, use it to determine encoding. +This function is intended to be added to `auto-coding-functions'." + (setq size (min (+ (point) size) + ;; Only search forward 10 lines + (save-excursion + (forward-line 10) + (point)))) + (when (and (search-forward "" size t) + (re-search-forward "(B [?O]) - (aset standard-display-table ?$,1(?(B [?P]) - (aset standard-display-table ?$,1(@(B [?R]) - (aset standard-display-table ?$,1(A(B [?S]) - (aset standard-display-table ?$,1(B(B [?T]) - (aset standard-display-table ?$,1(C(B [?U]) - (aset standard-display-table ?$,1(D(B [?F]) - (aset standard-display-table ?$,1(E(B [?K ?h]) - (aset standard-display-table ?$,1(F(B [?T ?s]) - (aset standard-display-table ?$,1(G(B [?C ?h]) - (aset standard-display-table ?$,1(H(B [?S ?h]) - (aset standard-display-table ?$,1(I(B [?S ?c ?h]) - (aset standard-display-table ?$,1(J(B [?~]) - (aset standard-display-table ?$,1(K(B [?Y]) - (aset standard-display-table ?$,1(L(B [?']) - (aset standard-display-table ?$,1(M(B [?E ?']) - (aset standard-display-table ?$,1(N(B [?Y ?u]) - (aset standard-display-table ?$,1(O(B [?Y ?a]) - - (aset standard-display-table ?$,1(t(B [?i ?e]) - (aset standard-display-table ?$,1(w(B [?i]) - (aset standard-display-table ?$,1(~(B [?u]) - (aset standard-display-table ?$,1(r(B [?d ?j]) - (aset standard-display-table ?$,1({(B [?c ?h ?j]) - (aset standard-display-table ?$,1(s(B [?g ?j]) - (aset standard-display-table ?$,1(u(B [?s]) - (aset standard-display-table ?$,1(|(B [?k]) - (aset standard-display-table ?$,1(v(B [?i]) - (aset standard-display-table ?$,1(x(B [?j]) - (aset standard-display-table ?$,1(y(B [?l ?j]) - (aset standard-display-table ?$,1(z(B [?n ?j]) - (aset standard-display-table ?$,1((B [?d ?z]) - - (aset standard-display-table ?$,1($(B [?Y ?e]) - (aset standard-display-table ?$,1('(B [?Y ?i]) - (aset standard-display-table ?$,1(.(B [?U]) - (aset standard-display-table ?$,1("(B [?D ?j]) - (aset standard-display-table ?$,1(+(B [?C ?h ?j]) - (aset standard-display-table ?$,1(#(B [?G ?j]) - (aset standard-display-table ?$,1(%(B [?S]) - (aset standard-display-table ?$,1(,(B [?K]) - (aset standard-display-table ?$,1(&(B [?I]) - (aset standard-display-table ?$,1(((B [?J]) - (aset standard-display-table ?$,1()(B [?L ?j]) - (aset standard-display-table ?$,1(*(B [?N ?j]) - (aset standard-display-table ?$,1(/(B [?D ?j]) - (when (equal cyrillic-language "Bulgarian") - (aset standard-display-table ?,Li(B [?s?h?t]) - (aset standard-display-table ?,LI(B [?S?h?t]) - (aset standard-display-table ?,Ln(B [?i?u]) - (aset standard-display-table ?,LN(B [?I?u]) - (aset standard-display-table ?,Lo(B [?i?a]) - (aset standard-display-table ?,LO(B [?I?a])) + (aset standard-display-table ?,Li(B [?s ?h ?t]) + (aset standard-display-table ?,LI(B [?S ?h ?t]) + (aset standard-display-table ?,Ln(B [?i ?u]) + (aset standard-display-table ?,LN(B [?I ?u]) + (aset standard-display-table ?,Lo(B [?i ?a]) - (aset standard-display-table ?,LO(B [?I ?a]) - ;; Unicode version: - (aset standard-display-table ?$,1(i(B [?s ?h ?t]) - (aset standard-display-table ?$,1(I(B [?S ?h ?t]) - (aset standard-display-table ?$,1(n(B [?i ?u]) - (aset standard-display-table ?$,1(N(B [?I ?u]) - (aset standard-display-table ?$,1(o(B [?i ?a]) - (aset standard-display-table ?$,1(O(B [?I ?a])) ++ (aset standard-display-table ?,LO(B [?I ?a])) (when (equal cyrillic-language "Ukrainian") ; based on the official ; transliteration table @@@ -294,15 -183,8 +183,8 @@@ (aset standard-display-table ?,L8(B [?Y]) (aset standard-display-table ?,LY(B [?i]) (aset standard-display-table ?,L9(B [?Y]) - (aset standard-display-table ?,Ln(B [?i?u]) - (aset standard-display-table ?,Lo(B [?i?a])))) + (aset standard-display-table ?,Ln(B [?i ?u]) - (aset standard-display-table ?,Lo(B [?i ?a]) - ;; Unicode version: - (aset standard-display-table ?$,1(X(B [?y]) - (aset standard-display-table ?$,1(8(B [?Y]) - (aset standard-display-table ?$,1(Y(B [?i]) - (aset standard-display-table ?$,1(9(B [?Y]) - (aset standard-display-table ?$,1(n(B [?i ?u]) - (aset standard-display-table ?$,1(o(B [?i ?a])))) ++ (aset standard-display-table ?,Lo(B [?i ?a])))) ;; (provide 'cyril-util) diff --cc lisp/language/cyrillic.el index 742da4fc5a0,1e9896eb460..f95a5427a12 --- a/lisp/language/cyrillic.el +++ b/lisp/language/cyrillic.el @@@ -1,11 -1,10 +1,14 @@@ ;;; cyrillic.el --- support for Cyrillic -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. -;; Copyright (C) 2002 Free Software Foundation, Inc. ++;; Licensed to the Free Software Foundation. +;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 -;; Keywords: multilingual, Cyrillic +;; Author: Kenichi Handa +;; Keywords: multilingual, Cyrillic, i18n ;; This file is part of GNU Emacs. @@@ -65,375 -45,61 +67,77 @@@ (define-coding-system-alias 'iso-8859-5 'cyrillic-iso-8bit) (set-language-info-alist - "Cyrillic-ISO" '((charset cyrillic-iso8859-5) + "Cyrillic-ISO" '((charset iso-8859-5) (coding-system cyrillic-iso-8bit) (coding-priority cyrillic-iso-8bit) + (input-method . "cyrillic-yawerty") ; fixme - (nonascii-translation . cyrillic-iso8859-5) + (nonascii-translation . iso-8859-5) - (input-method . "cyrillic-yawerty") (unibyte-display . cyrillic-iso-8bit) (features cyril-util) (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") (documentation . "Support for Cyrillic ISO-8859-5.")) '("Cyrillic")) -;; KOI-8 stuff +;; KOI-8R stuff - ;; The mule-unicode portion of this is from - ;; http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT, - ;; which references RFC 1489. - (defvar cyrillic-koi8-r-decode-table - [ - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ;; 8859-5 plus Unicode - ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B - ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1s"(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B - ?$,2 p(B ?$,2 q(B ?$,2 r(B ?,Lq(B ?$,2 s(B ?$,2 t(B ?$,2 u(B ?$,2 v(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,2 |(B ?$,2 }(B ?$,2 ~(B - ?$,2 (B ?$,2! (B ?$,2!!(B ?,L!(B ?$,2!"(B ?$,2!#(B ?$,2!$(B ?$,2!%(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,2!+(B ?$,2!,(B ?,A)(B - ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B - ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B - ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B - ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B - ;; All Unicode: - ;; ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B - ;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1s"(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,A (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B - ;; ?$,2 p(B ?$,2 q(B ?$,2 r(B ?$,1(q(B ?$,2 s(B ?$,2 t(B ?$,2 u(B ?$,2 v(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,2 |(B ?$,2 }(B ?$,2 ~(B - ;; ?$,2 (B ?$,2! (B ?$,2!!(B ?$,1(!(B ?$,2!"(B ?$,2!#(B ?$,2!$(B ?$,2!%(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,2!+(B ?$,2!,(B ?,A)(B - ;; ?$,1(n(B ?$,1(P(B ?$,1(Q(B ?$,1(f(B ?$,1(T(B ?$,1(U(B ?$,1(d(B ?$,1(S(B ?$,1(e(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B - ;; ?$,1(_(B ?$,1(o(B ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(V(B ?$,1(R(B ?$,1(l(B ?$,1(k(B ?$,1(W(B ?$,1(h(B ?$,1(m(B ?$,1(i(B ?$,1(g(B ?$,1(j(B - ;; ?$,1(N(B ?$,1(0(B ?$,1(1(B ?$,1(F(B ?$,1(4(B ?$,1(5(B ?$,1(D(B ?$,1(3(B ?$,1(E(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B - ;; ?$,1(?(B ?$,1(O(B ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(6(B ?$,1(2(B ?$,1(L(B ?$,1(K(B ?$,1(7(B ?$,1(H(B ?$,1(M(B ?$,1(I(B ?$,1(G(B ?$,1(J(B - ] - "Cyrillic KOI8-R decoding table.") - - (let ((table (make-translation-table-from-vector - cyrillic-koi8-r-decode-table))) - (define-translation-table 'cyrillic-koi8-r-nonascii-translation-table table) - (define-translation-table 'cyrillic-koi8-r-encode-table - (char-table-extra-slot table 0))) - - ;; No point in keeping it around. (It can't be let-bound, since it's - ;; needed for macro expansion.) - (makunbound 'cyrillic-koi8-r-decode-table) - - (define-ccl-program ccl-decode-koi8 - `(4 - ((loop - (r0 = 0) - (read r1) - (if (r1 < 128) - (write-repeat r1) - ((translate-character cyrillic-koi8-r-nonascii-translation-table r0 r1) - (translate-character ucs-translation-table-for-decode r0 r1) - (write-multibyte-character r0 r1) - (repeat)))))) - "CCL program to decode KOI8-R.") - - (define-ccl-program ccl-encode-koi8 - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character cyrillic-koi8-r-encode-table r0 r1) - (if (r0 != ,(charset-id 'ascii)) - (if (r0 != ,(charset-id 'eight-bit-graphic)) - (if (r0 != ,(charset-id 'eight-bit-control)) - (r1 = ??)))) - (write-repeat r1)))) - "CCL program to encode KOI8-R.") - - (defun cyrillic-unify-encoding (table) - "Set up equivalent characters in the encoding TABLE. - This works whether or not the table is Unicode-based or - 8859-5-based. (Only appropriate for Cyrillic.)" - (let ((table (get table 'translation-table))) - (dotimes (i 96) - (let* ((c (make-char 'cyrillic-iso8859-5 (+ i 32))) - (u ; equivalent Unicode char - (cond ((eq c ?,L (B) ?,A (B) - ((eq c ?,L-(B) ?,A-(B) - ((eq c ?,L}(B) ?,A'(B) - (t (decode-char 'ucs (+ #x400 i))))) - (ec (aref table c)) ; encoding of 8859-5 - (uc (aref table u))) ; encoding of Unicode - (unless (memq c '(?,L (B ?,L-(B ?,L}(B)) ; 8859-5 exceptions - (unless uc - (aset table u ec)) - (unless ec - (aset table c uc))))))) - - (cyrillic-unify-encoding 'cyrillic-koi8-r-encode-table) - - (make-coding-system - 'cyrillic-koi8 4 - ;; We used to use ?K. It is true that ?K is more strictly correct, - ;; but it is also used for Korean. - ;; So people who use koi8 for languages other than Russian - ;; will have to forgive us. - ?R "KOI8-R 8-bit encoding for Cyrillic (MIME: KOI8-R)." - '(ccl-decode-koi8 . ccl-encode-koi8) - `((safe-chars . cyrillic-koi8-r-encode-table) - (mime-charset . koi8-r) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode))) + (define-coding-system 'cyrillic-koi8 + "KOI8 8-bit encoding for Cyrillic (MIME: KOI8-R)." + :coding-type 'charset + ;; We used to use ?K. It is true that ?K is more strictly correct, + ;; but it is also used for Korean. So people who use koi8 for + ;; languages other than Russian will have to forgive us. + :mnemonic ?R + :charset-list '(koi8) + :mime-charset 'koi8-r) (define-coding-system-alias 'koi8-r 'cyrillic-koi8) (define-coding-system-alias 'koi8 'cyrillic-koi8) (define-coding-system-alias 'cp878 'cyrillic-koi8) - ;; Allow displaying some of KOI & al with an 8859-5-encoded font. We - ;; won't bother about the exceptions when encoding the font, since - ;; NBSP will fall through below and work anyhow, and we'll have - ;; avoided setting the fontset for the other two to 8859-5 -- they're - ;; not in KOI and Alternativnyj anyhow. - (define-ccl-program ccl-encode-8859-5-font - `(0 - ((if (r0 == ,(charset-id 'cyrillic-iso8859-5)) - (r1 += 128) - (if (r0 == ,(charset-id 'mule-unicode-0100-24ff)) - (r1 = (r2 + 128)))))) - "Encode ISO 8859-5 and Cyrillic Unicode chars to 8859-5 font.") - - (add-to-list 'font-ccl-encoder-alist '("iso8859-5" . ccl-encode-8859-5-font)) - - ;; The table is set up later to encode both Unicode and 8859-5. - (define-ccl-program ccl-encode-koi8-font - `(0 - (translate-character cyrillic-koi8-r-encode-table r0 r1)) - "CCL program to encode Cyrillic chars to KOI font.") - - (add-to-list 'font-ccl-encoder-alist '("koi8" . ccl-encode-koi8-font)) - (set-language-info-alist - "Cyrillic-KOI8" `((charset cyrillic-iso8859-5) - (nonascii-translation - . ,(get 'cyrillic-koi8-r-nonascii-translation-table - 'translation-table)) + "Cyrillic-KOI8" `((charset koi8) (coding-system cyrillic-koi8) - (coding-priority cyrillic-koi8) + (coding-priority cyrillic-koi8 cyrillic-iso-8bit) + (nonascii-translation . koi8) - (input-method . "cyrillic-jcuken") + (input-method . "russian-typewriter") (features cyril-util) (unibyte-display . cyrillic-koi8) (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") (documentation . "Support for Cyrillic KOI8-R.")) '("Cyrillic")) +(set-language-info-alist + "Russian" `((charset cyrillic-iso8859-5) + (nonascii-translation + . ,(get 'cyrillic-koi8-r-nonascii-translation-table + 'translation-table)) + (coding-system cyrillic-koi8) + (coding-priority cyrillic-koi8 cyrillic-iso-8bit) + (input-method . "russian-computer") + (features cyril-util) + (unibyte-display . cyrillic-koi8) + (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") + (documentation . "\ +Support for Russian using koi8-r and the russian-computer input method.") + (tutorial . "TUTORIAL.ru")) + '("Cyrillic")) + - - (defvar cyrillic-koi8-u-decode-table - [ - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ;; All Unicode: - ;; ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B - ;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1x9(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B - ;; ?$,2 p(B ?$,2 q(B ?$,2 r(B ?$,1(q(B ?$,1(t(B ?$,2 t(B ?$,1(v(B ?$,1(w(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,1)Q(B ?$,2 }(B ?$,2 ~(B - ;; ?$,2 (B ?$,2! (B ?$,2!!(B ?$,1(!(B ?$,1($(B ?$,2!#(B ?$,1(&(B ?$,1('(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,1)P(B ?$,2!,(B ?,A)(B - ;; ?$,1(n(B ?$,1(P(B ?$,1(Q(B ?$,1(f(B ?$,1(T(B ?$,1(U(B ?$,1(d(B ?$,1(S(B ?$,1(e(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B - ;; ?$,1(_(B ?$,1(o(B ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(V(B ?$,1(R(B ?$,1(l(B ?$,1(k(B ?$,1(W(B ?$,1(h(B ?$,1(m(B ?$,1(i(B ?$,1(g(B ?$,1(j(B - ;; ?$,1(N(B ?$,1(0(B ?$,1(1(B ?$,1(F(B ?$,1(4(B ?$,1(5(B ?$,1(D(B ?$,1(3(B ?$,1(E(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B - ;; ?$,1(?(B ?$,1(O(B ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(6(B ?$,1(2(B ?$,1(L(B ?$,1(K(B ?$,1(7(B ?$,1(H(B ?$,1(M(B ?$,1(I(B ?$,1(G(B ?$,1(J(B - ;; 8859-5 plus Unicode: - ?$,2 (B ?$,2 "(B ?$,2 ,(B ?$,2 0(B ?$,2 4(B ?$,2 8(B ?$,2 <(B ?$,2 D(B ?$,2 L(B ?$,2 T(B ?$,2 \(B ?$,2!@(B ?$,2!D(B ?$,2!H(B ?$,2!L(B ?$,2!P(B - ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,1{ (B ?$,2!`(B ?$,1x9(B ?$,1x:(B ?$,1xh(B ?$,1y$(B ?$,1y%(B ?,L (B ?$,1{!(B ?,A0(B ?,A2(B ?,A7(B ?,Aw(B - ?$,2 p(B ?$,2 q(B ?$,2 r(B ?,Lq(B ?,Lt(B ?$,2 t(B ?,Lv(B ?,Lw(B ?$,2 w(B ?$,2 x(B ?$,2 y(B ?$,2 z(B ?$,2 {(B ?$,1)Q(B ?$,2 }(B ?$,2 ~(B - ?$,2 (B ?$,2! (B ?$,2!!(B ?,L!(B ?,L$(B ?$,2!#(B ?,L&(B ?,L'(B ?$,2!&(B ?$,2!'(B ?$,2!((B ?$,2!)(B ?$,2!*(B ?$,1)P(B ?$,2!,(B ?,A)(B - ?,Ln(B ?,LP(B ?,LQ(B ?,Lf(B ?,LT(B ?,LU(B ?,Ld(B ?,LS(B ?,Le(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B - ?,L_(B ?,Lo(B ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,LV(B ?,LR(B ?,Ll(B ?,Lk(B ?,LW(B ?,Lh(B ?,Lm(B ?,Li(B ?,Lg(B ?,Lj(B - ?,LN(B ?,L0(B ?,L1(B ?,LF(B ?,L4(B ?,L5(B ?,LD(B ?,L3(B ?,LE(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B - ?,L?(B ?,LO(B ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,L6(B ?,L2(B ?,LL(B ?,LK(B ?,L7(B ?,LH(B ?,LM(B ?,LI(B ?,LG(B ?,LJ(B - ] - "Cyrillic KOI8-U decoding table.") - - (let ((table (make-translation-table-from-vector - cyrillic-koi8-u-decode-table))) - (define-translation-table 'cyrillic-koi8-u-nonascii-translation-table table) - (define-translation-table 'cyrillic-koi8-u-encode-table - (char-table-extra-slot table 0))) - - (makunbound 'cyrillic-koi8-u-decode-table) - - (define-ccl-program ccl-decode-koi8-u - `(4 - ((loop - (r0 = 0) - (read r1) - (if (r1 < 128) - (write-repeat r1) - ((translate-character cyrillic-koi8-u-nonascii-translation-table r0 r1) - (translate-character ucs-translation-table-for-decode r0 r1) - (write-multibyte-character r0 r1) - (repeat)))))) - "CCL program to decode KOI8-U.") - - (define-ccl-program ccl-encode-koi8-u - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character cyrillic-koi8-u-encode-table r0 r1) - (if (r0 != ,(charset-id 'ascii)) - (if (r0 != ,(charset-id 'eight-bit-graphic)) - (if (r0 != ,(charset-id 'eight-bit-control)) - (r1 = ??)))) - (write-repeat r1)))) - "CCL program to encode KOI8-U.") - - (cyrillic-unify-encoding 'cyrillic-koi8-u-encode-table) - - (make-coding-system - 'koi8-u 4 - ?U "KOI8-U 8-bit encoding for Cyrillic (MIME: KOI8-U)" - '(ccl-decode-koi8-u . ccl-encode-koi8-u) - `((safe-chars . cyrillic-koi8-u-encode-table) - (mime-charset . koi8-u) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode))) - - (define-ccl-program ccl-encode-koi8-u-font - `(0 - (translate-character cyrillic-koi8-u-encode-table r0 r1)) - "CCL program to encode Cyrillic chars to KOI-U font.") - - (add-to-list 'font-ccl-encoder-alist '("koi8-u" . ccl-encode-koi8-u-font)) - - (set-language-info-alist - "Ukrainian" `((coding-system koi8-u) - (coding-priority koi8-u) - (nonascii-translation - . ,(get 'cyrillic-koi8-u-nonascii-translation-table - 'translation-table)) - (input-method . "ukrainian-computer") - (features code-pages) - (documentation - . "Support for Ukrainian with KOI8-U character set.")) - '("Cyrillic")) - ;;; ALTERNATIVNYJ stuff - ;; Fixme: It's unclear what's the correct table. I've found - ;; statements both that it's the same as cp866 and somewhat different, - ;; but nothing that looks really definitive. - (defvar cyrillic-alternativnyj-decode-table - [ - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 - 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 - 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 - 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 - 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 - 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 - ;; ?$,1(0(B ?$,1(1(B ?$,1(2(B ?$,1(3(B ?$,1(4(B ?$,1(5(B ?$,1(6(B ?$,1(7(B ?$,1(8(B ?$,1(9(B ?$,1(:(B ?$,1(;(B ?$,1(<(B ?$,1(=(B ?$,1(>(B ?$,1(?(B - ;; ?$,1(@(B ?$,1(A(B ?$,1(B(B ?$,1(C(B ?$,1(D(B ?$,1(E(B ?$,1(F(B ?$,1(G(B ?$,1(H(B ?$,1(I(B ?$,1(J(B ?$,1(K(B ?$,1(L(B ?$,1(M(B ?$,1(N(B ?$,1(O(B - ;; ?$,1(P(B ?$,1(Q(B ?$,1(R(B ?$,1(S(B ?$,1(T(B ?$,1(U(B ?$,1(V(B ?$,1(W(B ?$,1(X(B ?$,1(Y(B ?$,1(Z(B ?$,1([(B ?$,1(\(B ?$,1(](B ?$,1(^(B ?$,1(_(B - ;; ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,2 "(B ?$,2 D(B ?$,2!!(B ?$,2!"(B ?$,2 v(B ?$,2 u(B ?$,2!#(B ?$,2 q(B ?$,2 w(B ?$,2 }(B ?$,2 |(B ?$,2 {(B ?$,2 0(B - ;; ?$,2 4(B ?$,2 T(B ?$,2 L(B ?$,2 <(B ?$,2 (B ?$,2 \(B ?$,2 ~(B ?$,2 (B ?$,2 z(B ?$,2 t(B ?$,2!)(B ?$,2!&(B ?$,2! (B ?$,2 p(B ?$,2!,(B ?$,2!'(B - ;; ?$,2!((B ?$,2!$(B ?$,2!%(B ?$,2 y(B ?$,2 x(B ?$,2 r(B ?$,2 s(B ?$,2!+(B ?$,2!*(B ?$,2 8(B ?$,2 ,(B ?$,2!H(B ?$,2!D(B ?$,2!L(B ?$,2!P(B ?$,2!@(B - ;; ?$,1(`(B ?$,1(a(B ?$,1(b(B ?$,1(c(B ?$,1(d(B ?$,1(e(B ?$,1(f(B ?$,1(g(B ?$,1(h(B ?$,1(i(B ?$,1(j(B ?$,1(k(B ?$,1(l(B ?$,1(m(B ?$,1(n(B ?$,1(o(B - ;; ?$,1(!(B ?$,1(q(B ?$,1ry(B ?$,1rx(B ?$,1%A(B ?$,1%@(B ?$,1s:(B ?$,1s9(B ?$,1vq(B ?$,1vs(B ?,A1(B ?,Aw(B ?$,1uV(B ?,A$(B ?$,2!`(B ?,A (B ; - ;; 8859+Unicode - ?,L0(B ?,L1(B ?,L2(B ?,L3(B ?,L4(B ?,L5(B ?,L6(B ?,L7(B ?,L8(B ?,L9(B ?,L:(B ?,L;(B ?,L<(B ?,L=(B ?,L>(B ?,L?(B - ?,L@(B ?,LA(B ?,LB(B ?,LC(B ?,LD(B ?,LE(B ?,LF(B ?,LG(B ?,LH(B ?,LI(B ?,LJ(B ?,LK(B ?,LL(B ?,LM(B ?,LN(B ?,LO(B - ?,LP(B ?,LQ(B ?,LR(B ?,LS(B ?,LT(B ?,LU(B ?,LV(B ?,LW(B ?,LX(B ?,LY(B ?,LZ(B ?,L[(B ?,L\(B ?,L](B ?,L^(B ?,L_(B - ?$,2!Q(B ?$,2!R(B ?$,2!S(B ?$,2 "(B ?$,2 D(B ?$,2!!(B ?$,2!"(B ?$,2 v(B ?$,2 u(B ?$,2!#(B ?$,2 q(B ?$,2 w(B ?$,2 }(B ?$,2 |(B ?$,2 {(B ?$,2 0(B - ?$,2 4(B ?$,2 T(B ?$,2 L(B ?$,2 <(B ?$,2 (B ?$,2 \(B ?$,2 ~(B ?$,2 (B ?$,2 z(B ?$,2 t(B ?$,2!)(B ?$,2!&(B ?$,2! (B ?$,2 p(B ?$,2!,(B ?$,2!'(B - ?$,2!((B ?$,2!$(B ?$,2!%(B ?$,2 y(B ?$,2 x(B ?$,2 r(B ?$,2 s(B ?$,2!+(B ?$,2!*(B ?$,2 8(B ?$,2 ,(B ?$,2!H(B ?$,2!D(B ?$,2!L(B ?$,2!P(B ?$,2!@(B - ?,L`(B ?,La(B ?,Lb(B ?,Lc(B ?,Ld(B ?,Le(B ?,Lf(B ?,Lg(B ?,Lh(B ?,Li(B ?,Lj(B ?,Lk(B ?,Ll(B ?,Lm(B ?,Ln(B ?,Lo(B - ;; Taken from http://www.cyrillic.com/ref/cyrillic/koi-8alt.html - ;; with guesses for the Unicodes of the glyphs in the absence of a - ;; table. - ?,L!(B ?,Lq(B ?$,1ry(B ?$,1rx(B ?$,1%A(B ?$,1%@(B ?$,1s:(B ?$,1s9(B ?$,1vq(B ?$,1vs(B ?,A1(B ?,Aw(B ?,Lp(B ?,A$(B ?$,2!`(B ?,L (B] - "Cyrillic ALTERNATIVNYJ decoding table.") - - (let ((table (make-translation-table-from-vector - cyrillic-alternativnyj-decode-table))) - (define-translation-table 'cyrillic-alternativnyj-nonascii-translation-table - table) - (define-translation-table 'cyrillic-alternativnyj-encode-table - (char-table-extra-slot table 0))) - - (makunbound 'cyrillic-alternativnyj-decode-table) - - (define-ccl-program ccl-decode-alternativnyj - `(4 - ((loop - (r0 = 0) - (read r1) - (if (r1 < 128) - (write-repeat r1) - ((translate-character cyrillic-alternativnyj-nonascii-translation-table - r0 r1) - (translate-character ucs-translation-table-for-decode r0 r1) - (write-multibyte-character r0 r1) - (repeat)))))) - "CCL program to decode Alternativnyj.") - - (define-ccl-program ccl-encode-alternativnyj - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character cyrillic-alternativnyj-encode-table r0 r1) - (if (r0 != ,(charset-id 'ascii)) - (if (r0 != ,(charset-id 'eight-bit-graphic)) - (if (r0 != ,(charset-id 'eight-bit-control)) - (r1 = ??)))) - (write-repeat r1)))) - "CCL program to encode Alternativnyj.") - - (cyrillic-unify-encoding 'cyrillic-alternativnyj-encode-table) - - (make-coding-system - 'cyrillic-alternativnyj 4 ?A - "ALTERNATIVNYJ 8-bit encoding for Cyrillic." - '(ccl-decode-alternativnyj . ccl-encode-alternativnyj) - `((safe-chars . cyrillic-alternativnyj-encode-table) - (valid-codes (0 . 255)) - (dependency unify-8859-on-encoding-mode unify-8859-on-decoding-mode))) + (define-coding-system 'cyrillic-alternativnyj + "ALTERNATIVNYJ 8-bit encoding for Cyrillic." + :coding-type 'charset + :mnemonic ?A + :charset-list '(alternativnyj)) (define-coding-system-alias 'alternativnyj 'cyrillic-alternativnyj) - (define-ccl-program ccl-encode-alternativnyj-font - `(0 - (translate-character cyrillic-alternativnyj-encode-table r0 r1)) - "CCL program to encode Cyrillic chars to Alternativnyj font.") - - (add-to-list 'font-ccl-encoder-alist - '("alternativnyj" . ccl-encode-alternativnyj-font)) - (set-language-info-alist - "Cyrillic-ALT" `((charset cyrillic-iso8859-5) - (nonascii-translation - . ,(get 'cyrillic-alternativnyj-nonascii-translation-table - 'translation-table)) + "Cyrillic-ALT" `((charset alternativnyj) (coding-system cyrillic-alternativnyj) (coding-priority cyrillic-alternativnyj) + (nonascii-translation . alternativnyj) - (input-method . "cyrillic-jcuken") + (input-method . "russian-typewriter") (features cyril-util) (unibyte-display . cyrillic-alternativnyj) (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!") diff --cc lisp/language/devan-util.el index ccbaf36e64c,f2f7873b33e..4e3fbc9a257 --- a/lisp/language/devan-util.el +++ b/lisp/language/devan-util.el @@@ -51,9 -49,9 +49,14 @@@ (defconst devanagari-consonant "[$,15U(B-$,15y68(B-$,16?(B]") ++ ;;("$,16B(B" . nil) ++ ;;("$,16A(B" . nil) ++ ;;("$,16C(B" . nil) ++ ++ (defconst devanagari-composable-pattern (concat - "\\([$,15E(B-$,15T6@6A(B][$,15A5B(B]?\\)\\|$,15C(B" + "\\([$,15E(B-$,15T6@6A(B][$,15A5B(B]?\\)\\|[$,15C6D(B]" "\\|\\(" "\\(?:\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?\\(?:[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?[$,15U(B-$,15y68(B-$,16?(B]$,16-(B\\)?" "[$,15U(B-$,15y68(B-$,16?(B]\\(?:$,16-(B\\|[$,15~(B-$,16-6B6C(B]?[$,15B5A(B]?\\)?" @@@ -84,11 -89,11 +94,6 @@@ (set-buffer-modified-p buffer-modified-p) (- (point-max) (point-min)))))) --(defun devanagari-range (from to) -- "Make the list of the integers of range FROM to TO." -- (let (result) -- (while (<= from to) (setq result (cons to result) to (1- to))) result)) -- (defun devanagari-regexp-of-hashtbl-keys (hashtbl) "Return a regular expression that matches all keys in hashtable HASHTBL." (let ((max-specpdl-size 1000)) @@@ -99,21 -104,19 +104,18 @@@ dummy) (function (lambda (x y) (> (length x) (length y)))))))) - (defun devanagari-composition-function (from to pattern &optional string) - "Compose Devanagari characters in REGION, or STRING if specified. - Assume that the REGION or STRING must fully match the composable - PATTERN regexp." - (if string (devanagari-compose-syllable-string string) - (devanagari-compose-syllable-region from to)) - (- to from)) -- - ;; Register a function to compose Devanagari characters. - (mapc - (function (lambda (ucs) - (aset composition-function-table (decode-char 'ucs ucs) - (list (cons devanagari-composable-pattern - 'devanagari-composition-function))))) - (nconc '(#x0903) (devanagari-range #x0905 #x0939) (devanagari-range #x0958 #x0961))) + ;;;###autoload + (defun devanagari-composition-function (pos &optional string) + "Compose Devanagari characters after the position POS. + If STRING is not nil, it is a string, and POS is an index to the string. + In this case, compose characters after POS of the string." + (if string + ;; Not yet implemented. + nil + (goto-char pos) + (if (looking-at devanagari-composable-pattern) + (prog1 (match-end 0) + (devanagari-compose-syllable-region pos (match-end 0)))))) ;; Notes on conversion steps. @@@ -490,11 -493,11 +492,10 @@@ preferred rule from the sanskrit fonts. (defvar dev-glyph-glyph-2-regexp (devanagari-regexp-of-hashtbl-keys dev-glyph-glyph-2-hash)) -- (defun dev-charseq (from &optional to) (if (null to) (setq to from)) - (mapcar (function (lambda (x) (indian-glyph-char x 'devanagari))) - (devanagari-range from to))) - (devanagari-range (make-char 'devanagari-glyph from) - (make-char 'devanagari-glyph to))) ++ (number-sequence (decode-char 'devanagari-cdac from) ++ (decode-char 'devanagari-cdac to))) (defvar dev-glyph-cvn (append @@@ -564,84 -566,84 +565,89 @@@ (defun devanagari-compose-syllable-region (from to) "Compose devanagari syllable in region FROM to TO." (let ((glyph-str nil) (cons-num 0) glyph-str-list - (last-halant nil) (preceding-r nil) (last-modifier nil) - (last-char (char-before to)) match-str - glyph-block split-pos) + (last-halant nil) (preceding-r nil) (last-modifier nil) + (last-char (char-before to)) match-str + glyph-block split-pos) (save-excursion (save-restriction - ;;; *** char-to-glyph conversion *** - ;; Special rule 1. -- Last halant must be preserved. - (if (eq last-char ?$,16-(B) - (progn - (setq last-halant t) - (narrow-to-region from (1- to))) - (narrow-to-region from to) - ;; note if the last char is modifier. - (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B)) - (setq last-modifier t))) - (goto-char (point-min)) - ;; Special rule 2. -- preceding "r halant" must be modifier. - (when (looking-at "$,15p6-(B.") - (setq preceding-r t) - (goto-char (+ 2 (point)))) - ;; translate the rest characters into glyphs - (while (re-search-forward dev-char-glyph-regexp nil t) - (setq match-str (match-string 0)) - (setq glyph-str - (concat glyph-str - (gethash match-str dev-char-glyph-hash))) - ;; count the number of consonant-glyhs. - (if (string-match devanagari-consonant match-str) - (setq cons-num (1+ cons-num)))) - ;; preceding-r must be attached before the anuswar if exists. - (if preceding-r - (if last-modifier - (setq glyph-str (concat (substring glyph-str 0 -1) - "$,4"'(B" (substring glyph-str -1))) - (setq glyph-str (concat glyph-str "$,4"'(B")))) - (if last-halant (setq glyph-str (concat glyph-str "$,4""(B"))) - ;;; *** glyph-to-glyph conversion *** - (when (string-match dev-glyph-glyph-regexp glyph-str) - (setq glyph-str - (replace-match (gethash (match-string 0 glyph-str) - dev-glyph-glyph-hash) - nil t glyph-str)) - (if (and (> cons-num 1) - (string-match dev-glyph-glyph-2-regexp glyph-str)) - (setq glyph-str - (replace-match (gethash (match-string 0 glyph-str) - dev-glyph-glyph-2-hash) - nil t glyph-str)))) - ;;; *** glyph reordering *** - (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str)) - (setq glyph-block (substring glyph-str 0 (1+ split-pos))) - (setq glyph-str (substring glyph-str (1+ split-pos))) - (setq - glyph-block - (if (string-match dev-glyph-right-modifier-regexp glyph-block) - (sort (string-to-list glyph-block) - (function (lambda (x y) - (< (get-char-code-property x 'composition-order) - (get-char-code-property y 'composition-order))))) - (sort (string-to-list glyph-block) - (function (lambda (x y) - (let ((xo (get-char-code-property x 'composition-order)) - (yo (get-char-code-property y 'composition-order))) - (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) - (setq glyph-str-list (nconc glyph-str-list glyph-block))) - ;; concatenate and attach reference-points. - (setq glyph-str - (cdr - (apply - 'nconc - (mapcar - (function (lambda (x) - (list - (or (get-char-code-property x 'reference-point) - '(5 . 3) ;; default reference point. - ) - x))) - glyph-str-list)))))) + ;;; *** char-to-glyph conversion *** + ;; Special rule 1. -- Last halant must be preserved. + (if (eq last-char ?$,16-(B) + (progn + (setq last-halant t) + (narrow-to-region from (1- to))) + (narrow-to-region from to) + ;; note if the last char is modifier. + (if (or (eq last-char ?$,15A(B) (eq last-char ?$,15B(B)) + (setq last-modifier t))) + (goto-char (point-min)) + ;; Special rule 2. -- preceding "r halant" must be modifier. + (when (looking-at "$,15p6-(B.") + (setq preceding-r t) + (goto-char (+ 2 (point)))) + ;; translate the rest characters into glyphs - (while (re-search-forward dev-char-glyph-regexp nil t) - (setq match-str (match-string 0)) - (setq glyph-str - (concat glyph-str - (gethash match-str dev-char-glyph-hash))) - ;; count the number of consonant-glyhs. - (if (string-match devanagari-consonant match-str) - (setq cons-num (1+ cons-num)))) ++ (while (not (eobp)) ++ (if (looking-at dev-char-glyph-regexp) ++ (let ((end (match-end 0))) ++ (setq match-str (match-string 0) ++ glyph-str ++ (concat glyph-str ++ (gethash match-str dev-char-glyph-hash))) ++ ;; count the number of consonant-glyhs. ++ (if (string-match devanagari-consonant match-str) ++ (setq cons-num (1+ cons-num))) ++ (goto-char end)) ++ (setq glyph-str (concat glyph-str (string (following-char)))) ++ (forward-char 1))) + ;; preceding-r must be attached before the anuswar if exists. + (if preceding-r + (if last-modifier + (setq glyph-str (concat (substring glyph-str 0 -1) + "$,4"'(B" (substring glyph-str -1))) + (setq glyph-str (concat glyph-str "$,4"'(B")))) + (if last-halant (setq glyph-str (concat glyph-str "$,4""(B"))) + ;;; *** glyph-to-glyph conversion *** + (when (string-match dev-glyph-glyph-regexp glyph-str) + (setq glyph-str + (replace-match (gethash (match-string 0 glyph-str) + dev-glyph-glyph-hash) + nil t glyph-str)) + (if (and (> cons-num 1) + (string-match dev-glyph-glyph-2-regexp glyph-str)) + (setq glyph-str + (replace-match (gethash (match-string 0 glyph-str) + dev-glyph-glyph-2-hash) + nil t glyph-str)))) + ;;; *** glyph reordering *** + (while (setq split-pos (string-match "$,4""(B\\|.$" glyph-str)) + (setq glyph-block (substring glyph-str 0 (1+ split-pos))) + (setq glyph-str (substring glyph-str (1+ split-pos))) + (setq + glyph-block + (if (string-match dev-glyph-right-modifier-regexp glyph-block) + (sort (string-to-list glyph-block) + (function (lambda (x y) + (< (get-char-code-property x 'composition-order) + (get-char-code-property y 'composition-order))))) + (sort (string-to-list glyph-block) + (function (lambda (x y) + (let ((xo (get-char-code-property x 'composition-order)) + (yo (get-char-code-property y 'composition-order))) + (if (= xo 2) nil (if (= yo 2) t (< xo yo))))))))) + (setq glyph-str-list (nconc glyph-str-list glyph-block))) + ;; concatenate and attach reference-points. + (setq glyph-str + (cdr + (apply + 'nconc + (mapcar + (function (lambda (x) + (list + (or (get-char-code-property x 'reference-point) + '(5 . 3) ;; default reference point. + ) + x))) + glyph-str-list)))))) (compose-region from to glyph-str))) (provide 'devan-util) diff --cc lisp/language/english.el index af7dbafdb71,3cc93a313f5..342dea6da98 --- a/lisp/language/english.el +++ b/lisp/language/english.el @@@ -1,7 -1,8 +1,10 @@@ -;;; english.el --- support for English +;;; english.el --- support for English -*- no-byte-compile: t -*- ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. -;; Copyright (C) 2002 Free Software Foundation, Inc. ++;; Licensed to the Free Software Foundation. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 ;; Keywords: multibyte character, character set, syntax, category diff --cc lisp/language/european.el index 7c8d728523c,ef51d3eeac9..e56c5f49df4 --- a/lisp/language/european.el +++ b/lisp/language/european.el @@@ -1,8 -1,8 +1,11 @@@ ;;; european.el --- support for European languages -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995, 1997, 2001 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. -;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. ++;; Licensed to the Free Software Foundation. +;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 ;; Keywords: multilingual, European @@@ -92,7 -88,7 +93,8 @@@ character set which supports the follow and Swedish. We also have specific language environments for the following languages: For Czech, \"Czech\". + For Croatian, \"Croatian\". + For Polish, \"Polish\". For Romanian, \"Romanian\". For Slovak, \"Slovak\".")) '("European")) @@@ -241,29 -274,127 +280,130 @@@ addition of the Euro sign and some addi Latin-9 is sometimes nicknamed `Latin-0'.")) '("European")) -(define-coding-system 'iso-latin-7 - "ISO 2022 based 8-bit encoding for Latin-7 (MIME:ISO-8859-13)." - :coding-type 'charset - ;; `0' for `Latin-0' - :mnemonic ?* - :charset-list '(iso-8859-13) - :mime-charset 'iso-8859-13) - -(define-coding-system-alias 'iso-8859-13 'iso-latin-7) -(define-coding-system-alias 'latin-7 'iso-latin-7) - + (define-coding-system 'windows-1250 + "windows-1250 (Central European) encoding (MIME: WINDOWS-1250)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(windows-1250) + :mime-charset 'windows-1250) + (define-coding-system-alias 'cp1250 'windows-1250) + + (define-coding-system 'windows-1252 + "windows-1252 (Western European) encoding (MIME: WINDOWS-1252)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(windows-1252) + :mime-charset 'windows-1252) + (define-coding-system-alias 'cp1252 'windows-1252) + + (define-coding-system 'windows-1254 + "windows-1254 (Turkish) encoding (MIME: WINDOWS-1254)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(windows-1254) + :mime-charset 'windows-1254) + (define-coding-system-alias 'cp1254 'windows-1254) + + (define-coding-system 'windows-1257 + "windows-1257 (Baltic) encoding (MIME: WINDOWS-1257)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(windows-1257) + :mime-charset 'windows-1257) + (define-coding-system-alias 'cp1257 'windows-1257) + + (define-coding-system 'cp850 + "DOS codepage 850 (Western European)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp850) + :mime-charset 'cp850) + (define-coding-system-alias 'ibm850 'cp850) + + (define-coding-system 'cp852 + "DOS codepage 852 (Slavic)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp852) + :mime-charset 'cp852) + (define-coding-system-alias 'ibm852 'cp852) + + (define-coding-system 'cp857 + "DOS codepage 857 (Turkish)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp857) + :mime-charset 'cp857) + (define-coding-system-alias 'ibm857 'cp857) + + (define-coding-system 'cp858 + "Codepage 858 (Multilingual Latin I + Euro)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp858) + :mime-charset 'cp858) + + (define-coding-system 'cp860 + "DOS codepage 860 (Portuguese)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp860) + :mime-charset 'cp860) + (define-coding-system-alias 'ibm860 'cp860) + + (define-coding-system 'cp861 + "DOS codepage 861 (Icelandic)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp861) + :mime-charset 'cp861) + (define-coding-system-alias 'ibm861 'cp861) + + (define-coding-system 'cp863 + "DOS codepage 863 (French Canadian)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp863) + :mime-charset 'cp863) + (define-coding-system-alias 'ibm863 'cp863) + + (define-coding-system 'cp865 + "DOS codepage 865 (Norwegian/Danish)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp865) + :mime-charset 'cp865) + (define-coding-system-alias 'ibm865 'cp865) + + (define-coding-system 'cp437 + "DOS codepage 437" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp437) + :mime-charset 'cp437) + (define-coding-system-alias 'ibm437 'cp437) + +(set-language-info-alist + "Dutch" '((tutorial . "TUTORIAL.nl") - (charset ascii latin-iso8859-1) ++ (charset iso-8859-1) + (coding-system iso-latin-1 iso-latin-9) + (coding-priority iso-latin-1) - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") ++ (nonascii-translation . iso-8859-1) + (unibyte-display . iso-latin-1) + (input-method . "dutch") + (sample-text . "Er is een aantal manieren waarop je dit kan doen") + (documentation . "\ +This language environment is almost the same as Latin-1, +but it selects the Dutch tutorial and input method.")) + '("European")) + (set-language-info-alist "German" '((tutorial . "TUTORIAL.de") - (charset ascii latin-iso8859-1) + (charset iso-8859-1) (coding-system iso-latin-1 iso-latin-9) (coding-priority iso-latin-1) + (nonascii-translation . iso-8859-1) (input-method . "german-postfix") - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") (unibyte-display . iso-latin-1) (sample-text . "\ German (Deutsch Nord) Guten Tag @@@ -286,32 -416,15 +425,30 @@@ Additionally, it selects the German tut (sample-text . "French (Fran,Ag(Bais) Bonjour, Salut") (documentation . "\ This language environment is almost the same as Latin-1, -but it selects the French tutorial.")) +but it selects the French tutorial and input method.")) + '("European")) + +(set-language-info-alist + "Italian" '((tutorial . "TUTORIAL.it") - (charset ascii latin-iso8859-1) ++ (charset iso-8859-1) + (coding-system iso-latin-1 iso-latin-9) + (coding-priority iso-latin-1) - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") ++ (nonascii-translation . iso-8859-1) + (unibyte-display . iso-latin-1) + (input-method . "italian-postfix") + (sample-text . "Salve, ciao!") + (documentation . "\ +This language environment is almost the same as Latin-1, +but sets the default input method to \"italian-postfix\". +Additionally, it selects the Italian tutorial.")) '("European")) (set-language-info-alist - "Slovenian" '((charset . (ascii latin-iso8859-2)) - (coding-system . (iso-8859-2)) + "Slovenian" '((charset iso-8859-2) + (coding-system . (iso-8859-2 windows-1250)) (coding-priority . (iso-8859-2)) - (nonascii-translation . latin-iso8859-2) + (nonascii-translation . iso-8859-2) - (input-method . "latin-2-postfix") + (input-method . "slovenian") - (unibyte-syntax . "latin-2") (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.sl") (sample-text . ",B.(Belimo vam uspe,B9(Ben dan!") @@@ -322,12 -433,11 +459,11 @@@ but it selects the Slovenian tutorial a (set-language-info-alist "Spanish" '((tutorial . "TUTORIAL.es") - (charset ascii latin-iso8859-1) + (charset iso-8859-1) (coding-system iso-latin-1 iso-latin-9) (coding-priority iso-latin-1) - (nonascii-translation . iso-8859-1) (input-method . "spanish-postfix") - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") ++ (nonascii-translation . iso-8859-1) (unibyte-display . iso-latin-1) (sample-text . "Spanish (Espa,Aq(Bol) ,A!(BHola!") (documentation . "\ @@@ -342,30 -466,39 +478,39 @@@ and it selects the Spanish tutorial.") ;; "Latin-3" language environment. (set-language-info-alist - "Turkish" '((charset ascii latin-iso8859-9) - (coding-system iso-latin-5 iso-latin-3) + "Turkish" '((charset iso-8859-9) + (coding-system iso-latin-5 windows-1254 iso-latin-3) (coding-priority iso-latin-5) - (nonascii-translation . latin-iso8859-9) - (unibyte-syntax . "latin-5") + (nonascii-translation . iso-8859-9) (unibyte-display . iso-latin-5) (input-method . "turkish-postfix") - (sample-text . "Turkish (T,A|(Brk,Ag(Be) Merhaba") + (sample-text . "Turkish (T,M|(Brk,Mg(Be) Merhaba") - (documentation . t))) + (setup-function + . (lambda () + (set-case-syntax-pair ?I ?,C9(B (standard-case-table)) + (set-case-syntax-pair ?,C)(B ?i (standard-case-table)))) + (exit-function + . (lambda () + (set-case-syntax-pair ?I ?i (standard-case-table)) + (set-case-syntax ?,C9(B "w" (standard-case-table)) + (set-case-syntax ?,C)(B "w" (standard-case-table)))) + (documentation . "Support for Turkish. + Differs from the Latin-5 environment in using the `turkish-postfix' input + method and applying Turkish case rules for the characters i, I, ,C9(B, ,C)(B."))) ;; Polish ISO 8859-2 environment. ;; Maintainer: Wlodek Bzyl ;; Keywords: multilingual, Polish (set-language-info-alist - "Polish" '((charset . (ascii latin-iso8859-2)) - (coding-system . (iso-8859-2)) - "Polish" '((charset . (iso-8859-2)) - (coding-system . (iso-8859-2 windows-1250)) -- (coding-priority . (iso-8859-2)) - (nonascii-translation . iso-8859-2) ++ "Polish" '((charset iso-8859-2) ++ (coding-system iso-8859-2 windows-1250) ++ (coding-priority iso-8859-2) (input-method . "polish-slash") - (nonascii-translation . latin-iso8859-2) - (unibyte-syntax . "latin-2") ++ (nonascii-translation . iso-8859-2) (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.pl") - (sample-text . "P,As(Bjd,B<(B, ki,Bq(B-,B?(Be t,Bj(B chmurno,B6f(B w g,B31(Bb flaszy") + (sample-text . "P,Bs(Bjd,B<(B, ki,Bq(B-,B?(Be t,Bj(B chmurno,B6f(B w g,B31(Bb flaszy") (documentation . t)) '("European")) @@@ -389,10 -513,8 +534,8 @@@ (set-language-info-alist "Latin-7" `((coding-system latin-7) (coding-priority latin-7) - (nonascii-translation . ,(get 'decode-iso-latin-7 - 'translation-table)) + (nonascii-translation . iso-8859-13) - ;; Fixme: input-method + (input-method . "latin-prefix") - (features code-pages) (documentation . "Support for Latin-7, e.g. Latvian, Lithuanian.")) '("European")) @@@ -413,220 -534,76 +555,97 @@@ (documentation . "Support for Latvian.")) '("European")) +(set-language-info-alist + "Swedish" '((tutorial . "TUTORIAL.sv") - (charset ascii latin-iso8859-1) ++ (charset iso-8859-1) + (coding-system iso-latin-1) + (coding-priority iso-latin-1) - (nonascii-translation . latin-iso8859-1) - (unibyte-syntax . "latin-1") ++ (nonascii-translation . iso-8859-1) + (unibyte-display . iso-latin-1) + (sample-text . "Goddag Hej") + (documentation . "Support for Swedish")) + '("European")) + +(set-language-info-alist - "Croatian" '((charset . (ascii latin-iso8859-2)) - (coding-system . (iso-8859-2)) - (coding-priority . (iso-8859-2)) ++ "Croatian" '((charset iso-8859-2) ++ (coding-system iso-8859-2) ++ (coding-priority iso-8859-2) + (input-method . "croatian") - (nonascii-translation . latin-iso8859-2) - (unibyte-syntax . "latin-2") ++ (nonascii-translation . iso-8859-2) + (unibyte-display . iso-8859-2) + (documentation . "Support for Croatian with Latin-2 encoding.")) + '("European")) ++ + + (define-coding-system 'mac-roman + "Mac Roman Encoding (MIME:MACINTOSH)." + :coding-type 'charset + :mnemonic ?M + :charset-list '(mac-roman) + :mime-charset 'macintosh) + + (define-coding-system 'next + "NeXTstep encoding" + :coding-type 'charset + :mnemonic ?* + :charset-list '(next) + :mime-charset 'next) + + (define-coding-system 'hp-roman8 + "Hewlet-Packard roman-8 encoding (MIME:ROMAN-8)" + :coding-type 'charset + :mnemonic ?* + :charset-list '(hp-roman8) + :mime-charset 'hp-roman8) + (define-coding-system-alias 'roman8 'hp-roman8) + + (define-coding-system 'adobe-standard-encoding + "Adobe `standard' encoding for PostScript" + :coding-type 'charset + :mnemonic ?* + :charset-list '(adobe-standard-encoding) + :mime-charset 'adobe-standard-encoding) + - ;; Definitions for the Mac Roman character sets and coding system. - ;; The Mac Roman encoding uses all 128 code points in the range 128 to - ;; 255 for actual characters. Emacs decodes them to one of the - ;; following character sets. - ;; ascii, latin-iso8859-1, mule-unicode-0100-24ff, - ;; mule-unicode-2500-33ff, mule-unicode-e000-ffff - - (let - ((encoding-vector (make-vector 256 nil)) - (i 0) - (vec ;; mac-roman (128..255) -> UCS mapping - [ #x00C4 ;; 128:LATIN CAPITAL LETTER A WITH DIAERESIS - #x00C5 ;; 129:LATIN CAPITAL LETTER A WITH RING ABOVE - #x00C7 ;; 130:LATIN CAPITAL LETTER C WITH CEDILLA - #x00C9 ;; 131:LATIN CAPITAL LETTER E WITH ACUTE - #x00D1 ;; 132:LATIN CAPITAL LETTER N WITH TILDE - #x00D6 ;; 133:LATIN CAPITAL LETTER O WITH DIAERESIS - #x00DC ;; 134:LATIN CAPITAL LETTER U WITH DIAERESIS - #x00E1 ;; 135:LATIN SMALL LETTER A WITH ACUTE - #x00E0 ;; 136:LATIN SMALL LETTER A WITH GRAVE - #x00E2 ;; 137:LATIN SMALL LETTER A WITH CIRCUMFLEX - #x00E4 ;; 138:LATIN SMALL LETTER A WITH DIAERESIS - #x00E3 ;; 139:LATIN SMALL LETTER A WITH TILDE - #x00E5 ;; 140:LATIN SMALL LETTER A WITH RING ABOVE - #x00E7 ;; 141:LATIN SMALL LETTER C WITH CEDILLA - #x00E9 ;; 142:LATIN SMALL LETTER E WITH ACUTE - #x00E8 ;; 143:LATIN SMALL LETTER E WITH GRAVE - #x00EA ;; 144:LATIN SMALL LETTER E WITH CIRCUMFLEX - #x00EB ;; 145:LATIN SMALL LETTER E WITH DIAERESIS - #x00ED ;; 146:LATIN SMALL LETTER I WITH ACUTE - #x00EC ;; 147:LATIN SMALL LETTER I WITH GRAVE - #x00EE ;; 148:LATIN SMALL LETTER I WITH CIRCUMFLEX - #x00EF ;; 149:LATIN SMALL LETTER I WITH DIAERESIS - #x00F1 ;; 150:LATIN SMALL LETTER N WITH TILDE - #x00F3 ;; 151:LATIN SMALL LETTER O WITH ACUTE - #x00F2 ;; 152:LATIN SMALL LETTER O WITH GRAVE - #x00F4 ;; 153:LATIN SMALL LETTER O WITH CIRCUMFLEX - #x00F6 ;; 154:LATIN SMALL LETTER O WITH DIAERESIS - #x00F5 ;; 155:LATIN SMALL LETTER O WITH TILDE - #x00FA ;; 156:LATIN SMALL LETTER U WITH ACUTE - #x00F9 ;; 157:LATIN SMALL LETTER U WITH GRAVE - #x00FB ;; 158:LATIN SMALL LETTER U WITH CIRCUMFLEX - #x00FC ;; 159:LATIN SMALL LETTER U WITH DIAERESIS - #x2020 ;; 160:DAGGER - #x00B0 ;; 161:DEGREE SIGN - #x00A2 ;; 162:CENT SIGN - #x00A3 ;; 163:POUND SIGN - #x00A7 ;; 164:SECTION SIGN - #x2022 ;; 165:BULLET - #x00B6 ;; 166:PILCROW SIGN - #x00DF ;; 167:LATIN SMALL LETTER SHARP S - #x00AE ;; 168:REGISTERED SIGN - #x00A9 ;; 169:COPYRIGHT SIGN - #x2122 ;; 170:TRADE MARK SIGN - #x00B4 ;; 171:ACUTE ACCENT - #x00A8 ;; 172:DIAERESIS - #x2260 ;; 173:NOT EQUAL TO - #x00C6 ;; 174:LATIN CAPITAL LETTER AE - #x00D8 ;; 175:LATIN CAPITAL LETTER O WITH STROKE - #x221E ;; 176:INFINITY - #x00B1 ;; 177:PLUS-MINUS SIGN - #x2264 ;; 178:LESS-THAN OR EQUAL TO - #x2265 ;; 179:GREATER-THAN OR EQUAL TO - #x00A5 ;; 180:YEN SIGN - #x00B5 ;; 181:MICRO SIGN - #x2202 ;; 182:PARTIAL DIFFERENTIAL - #x2211 ;; 183:N-ARY SUMMATION - #x220F ;; 184:N-ARY PRODUCT - #x03C0 ;; 185:GREEK SMALL LETTER PI - #x222B ;; 186:INTEGRAL - #x00AA ;; 187:FEMININE ORDINAL INDICATOR - #x00BA ;; 188:MASCULINE ORDINAL INDICATOR - #x03A9 ;; 189:GREEK CAPITAL LETTER OMEGA - #x00E6 ;; 190:LATIN SMALL LETTER AE - #x00F8 ;; 191:LATIN SMALL LETTER O WITH STROKE - #x00BF ;; 192:INVERTED QUESTION MARK - #x00A1 ;; 193:INVERTED EXCLAMATION MARK - #x00AC ;; 194:NOT SIGN - #x221A ;; 195:SQUARE ROOT - #x0192 ;; 196:LATIN SMALL LETTER F WITH HOOK - #x2248 ;; 197:ALMOST EQUAL TO - #x2206 ;; 198:INCREMENT - #x00AB ;; 199:LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - #x00BB ;; 200:RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - #x2026 ;; 201:HORIZONTAL ELLIPSIS - #x00A0 ;; 202:NO-BREAK SPACE - #x00C0 ;; 203:LATIN CAPITAL LETTER A WITH GRAVE - #x00C3 ;; 204:LATIN CAPITAL LETTER A WITH TILDE - #x00D5 ;; 205:LATIN CAPITAL LETTER O WITH TILDE - #x0152 ;; 206:LATIN CAPITAL LIGATURE OE - #x0153 ;; 207:LATIN SMALL LIGATURE OE - #x2013 ;; 208:EN DASH - #x2014 ;; 209:EM DASH - #x201C ;; 210:LEFT DOUBLE QUOTATION MARK - #x201D ;; 211:RIGHT DOUBLE QUOTATION MARK - #x2018 ;; 212:LEFT SINGLE QUOTATION MARK - #x2019 ;; 213:RIGHT SINGLE QUOTATION MARK - #x00F7 ;; 214:DIVISION SIGN - #x25CA ;; 215:LOZENGE - #x00FF ;; 216:LATIN SMALL LETTER Y WITH DIAERESIS - #x0178 ;; 217:LATIN CAPITAL LETTER Y WITH DIAERESIS - #x2044 ;; 218:FRACTION SLASH - #x20AC ;; 219:EURO SIGN - #x2039 ;; 220:SINGLE LEFT-POINTING ANGLE QUOTATION MARK - #x203A ;; 221:SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - #xFB01 ;; 222:LATIN SMALL LIGATURE FI - #xFB02 ;; 223:LATIN SMALL LIGATURE FL - #x2021 ;; 224:DOUBLE DAGGER - #x00B7 ;; 225:MIDDLE DOT - #x201A ;; 226:SINGLE LOW-9 QUOTATION MARK - #x201E ;; 227:DOUBLE LOW-9 QUOTATION MARK - #x2030 ;; 228:PER MILLE SIGN - #x00C2 ;; 229:LATIN CAPITAL LETTER A WITH CIRCUMFLEX - #x00CA ;; 230:LATIN CAPITAL LETTER E WITH CIRCUMFLEX - #x00C1 ;; 231:LATIN CAPITAL LETTER A WITH ACUTE - #x00CB ;; 232:LATIN CAPITAL LETTER E WITH DIAERESIS - #x00C8 ;; 233:LATIN CAPITAL LETTER E WITH GRAVE - #x00CD ;; 234:LATIN CAPITAL LETTER I WITH ACUTE - #x00CE ;; 235:LATIN CAPITAL LETTER I WITH CIRCUMFLEX - #x00CF ;; 236:LATIN CAPITAL LETTER I WITH DIAERESIS - #x00CC ;; 237:LATIN CAPITAL LETTER I WITH GRAVE - #x00D3 ;; 238:LATIN CAPITAL LETTER O WITH ACUTE - #x00D4 ;; 239:LATIN CAPITAL LETTER O WITH CIRCUMFLEX - #xF8FF ;; 240:Apple logo - #x00D2 ;; 241:LATIN CAPITAL LETTER O WITH GRAVE - #x00DA ;; 242:LATIN CAPITAL LETTER U WITH ACUTE - #x00DB ;; 243:LATIN CAPITAL LETTER U WITH CIRCUMFLEX - #x00D9 ;; 244:LATIN CAPITAL LETTER U WITH GRAVE - #x0131 ;; 245:LATIN SMALL LETTER DOTLESS I - #x02C6 ;; 246:MODIFIER LETTER CIRCUMFLEX ACCENT - #x02DC ;; 247:SMALL TILDE - #x00AF ;; 248:MACRON - #x02D8 ;; 249:BREVE - #x02D9 ;; 250:DOT ABOVE - #x02DA ;; 251:RING ABOVE - #x00B8 ;; 252:CEDILLA - #x02DD ;; 253:DOUBLE ACUTE ACCENT - #x02DB ;; 254:OGONEK - #x02C7 ;; 255:CARON - ]) - translation-table) - (while (< i 128) - (aset encoding-vector i i) - (setq i (1+ i))) - (while (< i 256) - (aset encoding-vector i - (decode-char 'ucs (aref vec (- i 128)))) - (setq i (1+ i))) - (setq translation-table - (make-translation-table-from-vector encoding-vector)) - (define-translation-table 'mac-roman-decoder translation-table) - (define-translation-table 'mac-roman-encoder - (char-table-extra-slot translation-table 0))) - - (define-ccl-program decode-mac-roman - `(4 - ((loop - (read r1) - (if (r1 < 128) ;; ASCII - (r0 = ,(charset-id 'ascii)) - (if (r1 < 160) - (r0 = ,(charset-id 'eight-bit-control)) - (r0 = ,(charset-id 'eight-bit-graphic)))) - (translate-character mac-roman-decoder r0 r1) - (write-multibyte-character r0 r1) - (repeat)))) - "CCL program to decode Mac Roman") - - (define-ccl-program encode-mac-roman - `(1 - ((loop - (read-multibyte-character r0 r1) - (translate-character ucs-mule-to-mule-unicode r0 r1) - (translate-character mac-roman-encoder r0 r1) - (if (r0 != ,(charset-id 'ascii)) - (if (r0 != ,(charset-id 'eight-bit-graphic)) - (if (r0 != ,(charset-id 'eight-bit-control)) - (r1 = ??)))) - (write-repeat r1)))) - "CCL program to encode Mac Roman") - - (make-coding-system - 'mac-roman 4 ?M - "Mac Roman Encoding (MIME:MACINTOSH)." - '(decode-mac-roman . encode-mac-roman) - (list (cons 'safe-chars (get 'mac-roman-encoder 'translation-table)) - '(valid-codes (0 . 255)) - '(mime-charset . macintosh))) ; per IANA, rfc1345 + ;; For automatic composing of diacritics and combining marks. + (dolist (range '( ;; combining diacritical marks + (#x0300 #x0314 (tc . bc)) + (#x0315 (tr . bl)) + (#x0316 #x0319 (bc . tc)) + (#x031A (tr . cl)) + (#x031B #x0320 (bc . tc)) + (#x0321 (Br . tr)) + (#x0322 (Br . tl)) + (#x0323 #x0333 (bc . tc)) + (#x0334 #x0338 (Bc . Bc)) + (#x0339 #x033C (bc . tc)) + (#x033D #x033F (tc . bc)) + (#x0340 (tl . bc)) + (#x0341 (tr . bc)) + (#x0342 #x0344 (tc . bc)) + (#x0345 (bc . tc)) + (#x0346 (tc . bc)) + (#x0347 #x0349 (bc . tc)) + (#x034A #x034C (tc . bc)) + (#x034D #x034E (bc . tc)) + ;; combining diacritical marks for symbols + (#x20D0 #x20D1 (tc . bc)) + (#x20D2 #x20D3 (Bc . Bc)) + (#x20D4 #x20D7 (tc . bc)) + (#x20D8 #x20DA (Bc . Bc)) + (#x20DB #x20DC (tc . bc)) + (#x20DD #x20E0 (Bc . Bc)) + (#x20E1 (tc . bc)) + (#x20E2 #x20E3 (Bc . Bc)))) + (let* ((from (car range)) + (to (if (= (length range) 3) + (nth 1 range) + from)) + (composition (car (last range)))) + (while (<= from to) + (put-char-code-property from 'diacritic-composition composition) + (aset composition-function-table from 'diacritic-composition-function) + (setq from (1+ from))))) (defconst diacritic-composition-pattern "\\C^\\c^+") diff --cc lisp/language/georgian.el index 027c361c00b,70f3f932b6c..f38529d20aa --- a/lisp/language/georgian.el +++ b/lisp/language/georgian.el @@@ -1,6 -1,6 +1,6 @@@ -;;; georgian.el --- language support for Georgian +;;; georgian.el --- language support for Georgian -*- no-byte-compile: t -*- - ;; Copyright (C) 2001 Free Software Foundation, Inc. + ;; Copyright (C) 2001, 2003 Free Software Foundation, Inc. ;; Author: Dave Love ;; Keywords: i18n diff --cc lisp/language/greek.el index b8843960723,d183d0c617e..6061ed203c8 --- a/lisp/language/greek.el +++ b/lisp/language/greek.el @@@ -1,7 -1,8 +1,11 @@@ -;;; greek.el --- support for Greek +;;; greek.el --- support for Greek -*- no-byte-compile: t -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. + ;; Copyright (C) 2002 Free Software Foundation, Inc. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 ;; Keywords: multilingual, Greek @@@ -38,13 -38,43 +41,43 @@@ (define-coding-system-alias 'iso-8859-7 'greek-iso-8bit) + (define-coding-system 'windows-1253 + "windows-1253 encoding for Greek" + :coding-type 'charset + :mnemonic ?g + :charset-list '(windows-1253) + :mime-charset 'windows-1253) + (define-coding-system-alias 'cp1253 'windows-1253) + + (define-coding-system 'cp737 + "Codepage 737 (PC Greek)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp737) + :mime-charset 'cp737) + + (define-coding-system 'cp851 + "DOS codepage 851 (Greek)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp851) + :mime-charset 'cp851) + (define-coding-system-alias 'ibm851 'cp851) + + (define-coding-system 'cp869 + "DOS codepage 869 (Greek)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp869) + :mime-charset 'cp869) + (define-coding-system-alias 'ibm869 'cp869) + (set-language-info-alist - "Greek" '((charset . (greek-iso8859-7)) - (coding-system . (greek-iso-8bit)) + "Greek" '((charset iso-8859-7) - (coding-system . (greek-iso-8bit windows-1253 cp851 cp869)) ++ (coding-system greek-iso-8bit windows-1253 cp851 cp869) (coding-priority greek-iso-8bit) - (nonascii-translation . greek-iso8859-7) + (nonascii-translation . iso-8859-7) (input-method . "greek") - (unibyte-display . greek-iso-8bit) (documentation . t))) (provide 'greek) diff --cc lisp/language/hebrew.el index 2bc79ff810d,585d2f4196b..871ec1b223e --- a/lisp/language/hebrew.el +++ b/lisp/language/hebrew.el @@@ -1,8 -1,8 +1,11 @@@ -;;; hebrew.el --- support for Hebrew -*- coding: iso-2022-7bit; -*- +;;; hebrew.el --- support for Hebrew -*- coding: iso-2022-7bit; no-byte-compile: t -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. - ;; Copyright (C) 2001 Free Software Foundation, Inc. ++;; Licensed to the Free Software Foundation. + ;; Copyright (C) 2001, 2002 Free Software Foundation, Inc. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 ;; Keywords: multilingual, Hebrew @@@ -49,10 -48,10 +51,10 @@@ (define-coding-system-alias 'iso-8859-8-i 'hebrew-iso-8bit) (set-language-info-alist - "Hebrew" '((charset . (hebrew-iso8859-8)) - "Hebrew" '((charset . iso-8859-8) ++ "Hebrew" '((charset iso-8859-8) (coding-priority hebrew-iso-8bit) - (coding-system . (hebrew-iso-8bit)) - (nonascii-translation . hebrew-iso8859-8) + (coding-system hebrew-iso-8bit windows-1255 cp862) + (nonascii-translation . iso-8859-8) (input-method . "hebrew") (unibyte-display . hebrew-iso-8bit) (sample-text . "Hebrew ,Hylem(B") diff --cc lisp/language/ind-util.el index 60008cce48c,7a95388f81b..862ebf39e84 --- a/lisp/language/ind-util.el +++ b/lisp/language/ind-util.el @@@ -407,10 -213,12 +407,10 @@@ FUNCTION will be called 15 times. ;; trans-char -- nil / string / list of strings (when (and char trans-char) (if (stringp trans-char) (setq trans-char (list trans-char))) - (if (char-valid-p char) (setq char (char-to-string char))) + (if (characterp char) (setq char (char-to-string char))) (puthash char (car trans-char) encode-hash) - (mapc - (lambda (trans) - (puthash trans char decode-hash)) - trans-char)))) + (dolist (trans trans-char) + (puthash trans char decode-hash))))) (defun indian--map (f l1 l2) (while l1 @@@ -574,209 -347,201 +574,201 @@@ ;;; IS 13194 utilities -;; The following provide conversion between IS 13194 (ISCII) and UCS. - -(defvar is13194-default-repertory 'devanagari) - -(defvar is13194-repertory-to-ucs-script - `((DEF ?\x40 ,is13194-default-repertory) - (RMN ?\x41 ,is13194-default-repertory) - (DEV ?\x42 devanagari) - (BNG ?\x43 bengali) - (TML ?\x44 tamil) - (TLG ?\x45 telugu) - (ASM ?\x46 bengali) - (ORI ?\x47 oriya) - (KND ?\x48 kannada) - (MLM ?\x49 malayalam) - (GJR ?\x4a gujarati) - (PNJ ?\x4b gurmukhi))) - -;; for guiding find-variable function. -(defvar is13194-to-ucs-devanagari-hashtbl nil) -(defvar is13194-to-ucs-devanagari-regexp nil) -(defvar is13194-to-ucs-bengali-hashtbl nil) -(defvar is13194-to-ucs-bengali-regexp nil) -(defvar is13194-to-ucs-assamese-hashtbl nil) -(defvar is13194-to-ucs-assamese-regexp nil) -(defvar is13194-to-ucs-gurmukhi-hashtbl nil) -(defvar is13194-to-ucs-gurmukhi-regexp nil) -(defvar is13194-to-ucs-gujarati-hashtbl nil) -(defvar is13194-to-ucs-gujarati-regexp nil) -(defvar is13194-to-ucs-oriya-hashtbl nil) -(defvar is13194-to-ucs-oriya-regexp nil) -(defvar is13194-to-ucs-tamil-hashtbl nil) -(defvar is13194-to-ucs-tamil-regexp nil) -(defvar is13194-to-ucs-telugu-hashtbl nil) -(defvar is13194-to-ucs-telugu-regexp nil) -(defvar is13194-to-ucs-malayalam-hashtbl nil) -(defvar is13194-to-ucs-malayalam-regexp nil) +;; The followings provide conversion between IS 13194 (ISCII) and UCS. - (defvar ucs-devanagari-to-is13194-alist - '(;;Unicode vs IS13194 ;; only Devanagari is supported now. - (?\x0900 . "[U+0900]") - (?\x0901 . "(5!(B") - (?\x0902 . "(5"(B") - (?\x0903 . "(5#(B") - (?\x0904 . "[U+0904]") - (?\x0905 . "(5$(B") - (?\x0906 . "(5%(B") - (?\x0907 . "(5&(B") - (?\x0908 . "(5'(B") - (?\x0909 . "(5((B") - (?\x090a . "(5)(B") - (?\x090b . "(5*(B") - (?\x090c . "(5&i(B") - (?\x090d . "(5.(B") - (?\x090e . "(5+(B") - (?\x090f . "(5,(B") - (?\x0910 . "(5-(B") - (?\x0911 . "(52(B") - (?\x0912 . "(5/(B") - (?\x0913 . "(50(B") - (?\x0914 . "(51(B") - (?\x0915 . "(53(B") - (?\x0916 . "(54(B") - (?\x0917 . "(55(B") - (?\x0918 . "(56(B") - (?\x0919 . "(57(B") - (?\x091a . "(58(B") - (?\x091b . "(59(B") - (?\x091c . "(5:(B") - (?\x091d . "(5;(B") - (?\x091e . "(5<(B") - (?\x091f . "(5=(B") - (?\x0920 . "(5>(B") - (?\x0921 . "(5?(B") - (?\x0922 . "(5@(B") - (?\x0923 . "(5A(B") - (?\x0924 . "(5B(B") - (?\x0925 . "(5C(B") - (?\x0926 . "(5D(B") - (?\x0927 . "(5E(B") - (?\x0928 . "(5F(B") - (?\x0929 . "(5G(B") - (?\x092a . "(5H(B") - (?\x092b . "(5I(B") - (?\x092c . "(5J(B") - (?\x092d . "(5K(B") - (?\x092e . "(5L(B") - (?\x092f . "(5M(B") - (?\x0930 . "(5O(B") - (?\x0931 . "(5P(B") - (?\x0932 . "(5Q(B") - (?\x0933 . "(5R(B") - (?\x0934 . "(5S(B") - (?\x0935 . "(5T(B") - (?\x0936 . "(5U(B") - (?\x0937 . "(5V(B") - (?\x0938 . "(5W(B") - (?\x0939 . "(5X(B") - (?\x093a . "[U+093a]") - (?\x093b . "[U+093b]") - (?\x093c . "(5i(B") - (?\x093d . "(5ji(B") - (?\x093e . "(5Z(B") - (?\x093f . "(5[(B") - (?\x0940 . "(5\(B") - (?\x0941 . "(5](B") - (?\x0942 . "(5^(B") - (?\x0943 . "(5_(B") - (?\x0944 . "(5_i(B") - (?\x0945 . "(5c(B") - (?\x0946 . "(5`(B") - (?\x0947 . "(5a(B") - (?\x0948 . "(5b(B") - (?\x0949 . "(5g(B") - (?\x094a . "(5d(B") - (?\x094b . "(5e(B") - (?\x094c . "(5f(B") - (?\x094d . "(5h(B") - (?\x094e . "[U+094e]") - (?\x094f . "[U+094f]") - (?\x0950 . "(5!i(B") - (?\x0951 . "(5p5(B") - (?\x0952 . "(5p8(B") - (?\x0953 . "[DEVANAGARI GRAVE ACCENT]") - (?\x0954 . "[DEVANAGARI ACUTE ACCENT]") - (?\x0955 . "[U+0955]") - (?\x0956 . "[U+0956]") - (?\x0957 . "[U+0957]") - (?\x0958 . "(53i(B") - (?\x0959 . "(54i(B") - (?\x095a . "(55i(B") - (?\x095b . "(5:i(B") - (?\x095c . "(5?i(B") - (?\x095d . "(5@i(B") - (?\x095e . "(5Ii(B") - (?\x095f . "(5N(B") - (?\x0960 . "(5*i(B") - (?\x0961 . "(5'i(B") - (?\x0962 . "(5[i(B") - (?\x0963 . "(5ei(B") - (?\x0964 . "(5j(B") - (?\x0965 . "(5jj(B") - (?\x0966 . "(5q(B") - (?\x0967 . "(5r(B") - (?\x0968 . "(5s(B") - (?\x0969 . "(5t(B") - (?\x096a . "(5u(B") - (?\x096b . "(5v(B") - (?\x096c . "(5w(B") - (?\x096d . "(5x(B") - (?\x096e . "(5y(B") - (?\x096f . "(5z(B") - (?\x0970 . "[U+0970]") - (?\x0971 . "[U+0971]") - (?\x0972 . "[U+0972]") - (?\x0973 . "[U+0973]") - (?\x0974 . "[U+0974]") - (?\x0975 . "[U+0975]") - (?\x0976 . "[U+0976]") - (?\x0977 . "[U+0977]") - (?\x0978 . "[U+0978]") - (?\x0979 . "[U+0979]") - (?\x097a . "[U+097a]") - (?\x097b . "[U+097b]") - (?\x097c . "[U+097c]") - (?\x097d . "[U+097d]") - (?\x097e . "[U+097e]") - (?\x097f . "[U+097f]"))) - - (defvar ucs-bengali-to-is13194-alist nil) - (defvar ucs-assamese-to-is13194-alist nil) - (defvar ucs-gurmukhi-to-is13194-alist nil) - (defvar ucs-gujarati-to-is13194-alist nil) - (defvar ucs-oriya-to-is13194-alist nil) - (defvar ucs-tamil-to-is13194-alist nil) - (defvar ucs-telugu-to-is13194-alist nil) - (defvar ucs-malayalam-to-is13194-alist nil) - - (defvar is13194-default-repartory 'devanagari) + (let - ;;Unicode vs IS13194. Only Devanagari is supported currently. ++ ;;Unicode vs IS13194 ;; only Devanagari is supported now. + ((ucs-devanagari-to-is13194-alist + '((?\x0900 . "[U+0900]") + (?\x0901 . "(5!(B") + (?\x0902 . "(5"(B") + (?\x0903 . "(5#(B") + (?\x0904 . "[U+0904]") + (?\x0905 . "(5$(B") + (?\x0906 . "(5%(B") + (?\x0907 . "(5&(B") + (?\x0908 . "(5'(B") + (?\x0909 . "(5((B") + (?\x090a . "(5)(B") + (?\x090b . "(5*(B") + (?\x090c . "(5&i(B") + (?\x090d . "(5.(B") + (?\x090e . "(5+(B") + (?\x090f . "(5,(B") + (?\x0910 . "(5-(B") + (?\x0911 . "(52(B") + (?\x0912 . "(5/(B") + (?\x0913 . "(50(B") + (?\x0914 . "(51(B") + (?\x0915 . "(53(B") + (?\x0916 . "(54(B") + (?\x0917 . "(55(B") + (?\x0918 . "(56(B") + (?\x0919 . "(57(B") + (?\x091a . "(58(B") + (?\x091b . "(59(B") + (?\x091c . "(5:(B") + (?\x091d . "(5;(B") + (?\x091e . "(5<(B") + (?\x091f . "(5=(B") + (?\x0920 . "(5>(B") + (?\x0921 . "(5?(B") + (?\x0922 . "(5@(B") + (?\x0923 . "(5A(B") + (?\x0924 . "(5B(B") + (?\x0925 . "(5C(B") + (?\x0926 . "(5D(B") + (?\x0927 . "(5E(B") + (?\x0928 . "(5F(B") + (?\x0929 . "(5G(B") + (?\x092a . "(5H(B") + (?\x092b . "(5I(B") + (?\x092c . "(5J(B") + (?\x092d . "(5K(B") + (?\x092e . "(5L(B") + (?\x092f . "(5M(B") + (?\x0930 . "(5O(B") + (?\x0931 . "(5P(B") + (?\x0932 . "(5Q(B") + (?\x0933 . "(5R(B") + (?\x0934 . "(5S(B") + (?\x0935 . "(5T(B") + (?\x0936 . "(5U(B") + (?\x0937 . "(5V(B") + (?\x0938 . "(5W(B") + (?\x0939 . "(5X(B") + (?\x093a . "[U+093a]") + (?\x093b . "[U+093b]") + (?\x093c . "(5i(B") + (?\x093d . "(5ji(B") + (?\x093e . "(5Z(B") + (?\x093f . "(5[(B") + (?\x0940 . "(5\(B") + (?\x0941 . "(5](B") + (?\x0942 . "(5^(B") + (?\x0943 . "(5_(B") + (?\x0944 . "(5_i(B") + (?\x0945 . "(5c(B") + (?\x0946 . "(5`(B") + (?\x0947 . "(5a(B") + (?\x0948 . "(5b(B") + (?\x0949 . "(5g(B") + (?\x094a . "(5d(B") + (?\x094b . "(5e(B") + (?\x094c . "(5f(B") + (?\x094d . "(5h(B") + (?\x094e . "[U+094e]") + (?\x094f . "[U+094f]") + (?\x0950 . "(5!i(B") + (?\x0951 . "(5p5(B") + (?\x0952 . "(5p8(B") + (?\x0953 . "[DEVANAGARI GRAVE ACCENT]") + (?\x0954 . "[DEVANAGARI ACUTE ACCENT]") + (?\x0955 . "[U+0955]") + (?\x0956 . "[U+0956]") + (?\x0957 . "[U+0957]") + (?\x0958 . "(53i(B") + (?\x0959 . "(54i(B") + (?\x095a . "(55i(B") + (?\x095b . "(5:i(B") + (?\x095c . "(5?i(B") + (?\x095d . "(5@i(B") + (?\x095e . "(5Ii(B") + (?\x095f . "(5N(B") + (?\x0960 . "(5*i(B") + (?\x0961 . "(5'i(B") + (?\x0962 . "(5[i(B") + (?\x0963 . "(5ei(B") + (?\x0964 . "(5j(B") + (?\x0965 . "(5jj(B") + (?\x0966 . "(5q(B") + (?\x0967 . "(5r(B") + (?\x0968 . "(5s(B") + (?\x0969 . "(5t(B") + (?\x096a . "(5u(B") + (?\x096b . "(5v(B") + (?\x096c . "(5w(B") + (?\x096d . "(5x(B") + (?\x096e . "(5y(B") + (?\x096f . "(5z(B") + (?\x0970 . "[U+0970]") + (?\x0971 . "[U+0971]") + (?\x0972 . "[U+0972]") + (?\x0973 . "[U+0973]") + (?\x0974 . "[U+0974]") + (?\x0975 . "[U+0975]") + (?\x0976 . "[U+0976]") + (?\x0977 . "[U+0977]") + (?\x0978 . "[U+0978]") + (?\x0979 . "[U+0979]") + (?\x097a . "[U+097a]") + (?\x097b . "[U+097b]") + (?\x097c . "[U+097c]") + (?\x097d . "[U+097d]") + (?\x097e . "[U+097e]") + (?\x097f . "[U+097f]"))) + (ucs-bengali-to-is13194-alist nil) + (ucs-assamese-to-is13194-alist nil) + (ucs-gurmukhi-to-is13194-alist nil) + (ucs-gujarati-to-is13194-alist nil) + (ucs-oriya-to-is13194-alist nil) + (ucs-tamil-to-is13194-alist nil) + (ucs-telugu-to-is13194-alist nil) + (ucs-malayalam-to-is13194-alist nil)) + (dolist (script '(devanagari bengali assamese gurmukhi gujarati + oriya tamil telugu malayalam)) + (let ((hashtable (intern (concat "is13194-to-ucs-" + (symbol-name script) "-hashtbl" ))) + (regexp (intern (concat "is13194-to-ucs-" + (symbol-name script) "-regexp")))) + (set hashtable (make-hash-table :test 'equal :size 128)) + (dolist (x (eval (intern (concat "ucs-" (symbol-name script) + "-to-is13194-alist")))) + (put-char-code-property (car x) 'script script) + (put-char-code-property (car x) 'iscii (cdr x)) + (puthash (cdr x) (char-to-string (car x)) (eval hashtable))) - (set regexp (indian-regexp-of-hashtbl-keys (eval hashtable)))))) ++ (set regexp (indian-regexp-of-hashtbl-keys (eval hashtable)))))) ++ ++(defvar is13194-default-repertory 'devanagari) + +(defvar is13194-repertory-to-ucs-script - `((DEF ?\x40 ,is13194-default-repartory) - (RMN ?\x41 ,is13194-default-repartory) ++ `((DEF ?\x40 ,is13194-default-repertory) ++ (RMN ?\x41 ,is13194-default-repertory) + (DEV ?\x42 devanagari) + (BNG ?\x43 bengali) + (TML ?\x44 tamil) + (TLG ?\x45 telugu) + (ASM ?\x46 bengali) + (ORI ?\x47 oriya) + (KND ?\x48 kannada) + (MLM ?\x49 malayalam) + (GJR ?\x4a gujarati) + (PNJ ?\x4b gurmukhi))) + +;; for guiding find-variable function. +(defvar is13194-to-ucs-devanagari-hashtbl nil) +(defvar is13194-to-ucs-devanagari-regexp nil) +(defvar is13194-to-ucs-bengali-hashtbl nil) +(defvar is13194-to-ucs-bengali-regexp nil) +(defvar is13194-to-ucs-assamese-hashtbl nil) +(defvar is13194-to-ucs-assamese-regexp nil) +(defvar is13194-to-ucs-gurmukhi-hashtbl nil) +(defvar is13194-to-ucs-gurmukhi-regexp nil) +(defvar is13194-to-ucs-gujarati-hashtbl nil) +(defvar is13194-to-ucs-gujarati-regexp nil) +(defvar is13194-to-ucs-oriya-hashtbl nil) +(defvar is13194-to-ucs-oriya-regexp nil) +(defvar is13194-to-ucs-tamil-hashtbl nil) +(defvar is13194-to-ucs-tamil-regexp nil) +(defvar is13194-to-ucs-telugu-hashtbl nil) +(defvar is13194-to-ucs-telugu-regexp nil) +(defvar is13194-to-ucs-malayalam-hashtbl nil) +(defvar is13194-to-ucs-malayalam-regexp nil) - (mapc - (function (lambda (script) - (let ((hashtable (intern (concat "is13194-to-ucs-" - (symbol-name script) "-hashtbl" ))) - (regexp (intern (concat "is13194-to-ucs-" - (symbol-name script) "-regexp")))) - (set hashtable (make-hash-table :test 'equal :size 128)) - (mapc - (function (lambda (x) - (put-char-code-property (decode-char 'ucs (car x)) - 'script script) - (put-char-code-property (decode-char 'ucs (car x)) - 'iscii (cdr x)) - (puthash (cdr x) (char-to-string (decode-char 'ucs (car x))) - (eval hashtable)))) - (eval (intern (concat "ucs-" (symbol-name script) - "-to-is13194-alist")))) - (set regexp (indian-regexp-of-hashtbl-keys (eval hashtable)))))) - '(devanagari bengali assamese gurmukhi gujarati - oriya tamil telugu malayalam)) - (defvar ucs-to-is13194-regexp ;; only Devanagari is supported now. - (concat "[" (char-to-string (decode-char 'ucs #x0900)) - "-" (char-to-string (decode-char 'ucs #x097f)) "]") + (concat "[" (char-to-string #x0900) - "-" (char-to-string #x097f) "]") ++ "-" (char-to-string #x097f) "]") "Regexp that matches to conversion") (defun ucs-to-iscii-region (from to) @@@ -1207,55 -978,19 +1205,19 @@@ Returns new end position. (save-excursion (save-restriction (let ((pos from) - (alist (char-table-extra-slot indian-2-column-to-ucs-chartable 0))) - (narrow-to-region from to) - (decompose-region from to) - (goto-char (point-min)) - (while (re-search-forward indian-2-column-to-ucs-regexp nil t) - (let ((len (- (match-end 0) (match-beginning 0))) - subst) - (if (= len 1) - (setq subst (aref indian-2-column-to-ucs-chartable + (alist (char-table-extra-slot indian-2-column-to-ucs-chartable 0))) + (narrow-to-region from to) + (decompose-region from to) + (goto-char (point-min)) + (while (re-search-forward indian-2-column-to-ucs-regexp nil t) + (let ((len (- (match-end 0) (match-beginning 0))) + subst) + (if (= len 1) + (setq subst (aref indian-2-column-to-ucs-chartable (char-after (match-beginning 0)))) - (setq subst (cdr (assoc (match-string 0) alist)))) - (replace-match (if subst subst "?")))) - (indian-compose-region (point-min) (point-max)))))) - - ;;;###autoload - (defun indian-glyph-char (index &optional script) - "Return character of charset `indian-glyph' made from glyph index INDEX. - The variable `indian-default-script' specifies the script of the glyph. - Optional argument SCRIPT, if non-nil, overrides `indian-default-script'. - See also the function `indian-char-glyph'." - (or script - (setq script indian-default-script)) - (let ((offset (get script 'indian-glyph-code-offset))) - (or (integerp offset) - (error "Invalid script name: %s" script)) - (or (and (>= index 0) (< index 256)) - (error "Invalid glyph index: %d" index)) - (setq index (+ offset index)) - (make-char 'indian-glyph (+ (/ index 96) 32) (+ (% index 96) 32)))) - - (defvar indian-glyph-max-char - (indian-glyph-char - 255 (aref indian-script-table (1- (length indian-script-table)))) - "The maximum valid code of characters in the charset `indian-glyph'.") - - ;;;###autoload - (defun indian-char-glyph (char) - "Return information about the glyph code for CHAR of `indian-glyph' charset. - The value is (INDEX . SCRIPT), where INDEX is the glyph index - in the font that Indian script name SCRIPT specifies. - See also the function `indian-glyph-char'." - (let ((split (split-char char)) - code) - (or (eq (car split) 'indian-glyph) - (error "Charset of `%c' is not indian-glyph" char)) - (or (<= char indian-glyph-max-char) - (error "Invalid indian-glyph char: %d" char)) - (setq code (+ (* (- (nth 1 split) 32) 96) (nth 2 split) -32)) - (cons (% code 256) (aref indian-script-table (/ code 256))))) - (setq subst (assoc (match-string 0) alist))) ++ (setq subst (cdr (assoc (match-string 0) alist)))) + (replace-match (if subst subst "?")))) + (indian-compose-region (point-min) (point-max)))))) (provide 'ind-util) diff --cc lisp/language/indian.el index 47803f22342,7d13eb46fb6..d7b4c365bc8 --- a/lisp/language/indian.el +++ b/lisp/language/indian.el @@@ -29,127 -29,17 +29,122 @@@ ;;; Code: - (make-coding-system - 'in-is13194 2 ?D - "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." - '(ascii indian-is13194 nil nil - nil ascii-eol) - `((safe-chars . ,(let ((table (make-char-table 'safe-chars nil))) - (set-char-table-range table 'indian-is13194 t) - (dotimes (i 127) - (aset table i t) - (aset table (decode-char 'ucs (+ #x900 i)) t)) - table)) - (post-read-conversion . in-is13194-post-read-conversion) - (pre-write-conversion . in-is13194-pre-write-conversion))) - - (define-coding-system-alias 'devanagari 'in-is13194) + (define-coding-system 'in-is13194-devanagari + "8-bit encoding for ASCII (MSB=0) and IS13194-Devanagari (MSB=1)." + :coding-type 'iso-2022 + :mnemonic ?D + :designation [ascii indian-is13194 nil nil] + :charset-list '(ascii indian-is13194) + :post-read-conversion 'in-is13194-post-read-conversion + :pre-write-conversion 'in-is13194-pre-write-conversion) + + (define-coding-system-alias 'devanagari 'in-is13194-devanagari) +(defvar indian-font-foundry 'cdac + "Font foundry for Indian characters. +Currently supported foundries are `cdac' and `akruti'.") + +(defvar indian-script-language-alist + "Alist of Indian scripts vs the corresponding language list and font foundry. +Each element has this form: + + (SCRIPT LANGUAGE-LIST FONT-FOUNDRY) + +SCRIPT is one of Indian script names. + +LANGUAGE-LIST is a list of Indian langauge names SCRIPT is used for. +The list is in the priority order. + +FONT-FOUNDRY is a font foundry representing a group of Indian +fonts. If the value is nil, the value of `indian-font-foundry' +is used." + '((devanagari (hindi sanskrit) nil) + (bengali (bengali assamese) nil) + (gurmukhi (punjabi) nil) + (gujarati (gujarati) nil) + (oriya (oriya) nil) + (tamil (tamil) nil) + (telugu (telugu) nil) + (kannada (kannada) nil) + (malayalam (malayalam) nil))) + +(defconst indian-font-char-index-table + '( ; for which language(s) + ;; CDAC fonts + (#x0000 . cdac:dv-ttsurekh) ; hindi, etc + (#x0100 . cdac:sd-ttsurekh) ; sanskrit + (#x0200 . cdac:bn-ttdurga) ; bengali + (#x0300 . cdac:tm-ttvalluvar) ; tamil + (#x0400 . cdac:tl-tthemalatha) ; telugu + (#x0500 . cdac:as-ttdurga) ; assamese + (#x0600 . cdac:or-ttsarala) ; oriya + (#x0700 . cdac:kn-ttuma) ; kannada + (#x0800 . cdac:ml-ttkarthika) ; malayalam + (#x0900 . cdac:gj-ttavantika) ; gujarati + (#x0A00 . cdac:pn-ttamar) ; punjabi + + ;; AKRUTI fonts + (#x0B00 . akruti:dev) ; hindi, etc + (#x0C00 . akruti:bng) ; bengali + (#x0D00 . akruti:pnj) ; punjabi + (#x0E00 . akruti:guj) ; gujarati + (#x0F00 . akruti:ori) ; oriya + (#x1000 . akruti:tml) ; tamil + (#x1100 . akruti:tlg) ; telugu + (#x1200 . akruti:knd) ; kannada + (#x1300 . akruti:mal) ; malayalam + ) + "Aliat of indices of `indian-glyph' character vs Indian font identifiers. +Each element has this form: (INDEX . FONT-IDENTIFIER) + +INDEX is an index number of the first character in the charset +`indian-glyph' assigned for glyphs in the font specified by +FONT-IDENTIFIER. Currently FONT-IDENTIFIERs are defined for CDAC +and AKRUTI font groups.") + +(defun indian-font-char (index font-identifier) + "Return character of charset `indian-glyph' made from glyph index INDEX. +FONT-IDENTIFIER is an identifier of an Indian font listed in the +variable `indian-font-char-index-table'. It specifies which +font INDEX is for." + (if (or (< index 0) (> index 255)) + (error "Invalid glyph index: %d" index)) + (let ((start (car (rassq font-identifier indian-font-char-index-table)))) + (if (not start) + (error "Unknown font identifier: %s" font-identifier)) + (setq index (+ start index)) + (make-char 'indian-glyph (+ (/ index 96) 32) (+ (% index 96) 32)))) + +;; Return a range of characters (cons of min and max character) of the +;; charset `indian-glyph' for displaying SCRIPT in LANGUAGE by a font +;; of FOUNDRY. + +(defun indian-font-char-range (font-identifier) + (cons (indian-font-char 0 font-identifier) + (indian-font-char 255 font-identifier))) + +(defvar indian-script-table + '[ + devanagari + sanskrit + bengali + tamil + telugu + assamese + oriya + kannada + malayalam + gujarati + punjabi + ] + "Vector of Indian script names.") + +(let ((len (length indian-script-table)) + (i 0)) + (while (< i len) + (put (aref indian-script-table i) 'indian-glyph-code-offset (* 256 i)) + (setq i (1+ i)))) + (defvar indian-default-script 'devanagari "Default script for Indian languages. Each Indian language environment sets this value diff --cc lisp/language/japanese.el index 64c74e02a0d,a53910f730e..ea90ac19f56 --- a/lisp/language/japanese.el +++ b/lisp/language/japanese.el @@@ -1,7 -1,7 +1,10 @@@ -;;; japanese.el --- support for Japanese -*- coding: iso-2022-7bit; -*- +;;; japanese.el --- support for Japanese -*- coding: iso-2022-7bit; no-byte-compile: t -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 ;; Keywords: multilingual, Japanese @@@ -67,14 -70,21 +73,22 @@@ (define-coding-system-alias 'shift_jis 'japanese-shift-jis) (define-coding-system-alias 'sjis 'japanese-shift-jis) + - (make-coding-system - 'japanese-iso-7bit-1978-irv 2 ?j - "ISO 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman." - '((ascii japanese-jisx0208-1978 japanese-jisx0208 - latin-jisx0201 japanese-jisx0212 katakana-jisx0201 t) nil nil nil - short ascii-eol ascii-cntl seven nil nil use-roman use-oldjis) - '(ascii japanese-jisx0208-1978 japanese-jisx0208 latin-jisx0201)) + ;; Fixme: AKA Shift-JIS according to + ;; . Is + ;; that correct? + + (define-coding-system 'japanese-iso-7bit-1978-irv + "ISO 2022 based 7-bit encoding for Japanese JISX0208-1978 and JISX0201-Roman." + :coding-type 'iso-2022 + :mnemonic ?j + :designation [(latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208 + japanese-jisx0212 katakana-jisx0201) + nil nil nil] + :flags '(short ascii-at-eol ascii-at-cntl 7-bit designation + use-roman use-oldjis) + :charset-list '(ascii latin-jisx0201 japanese-jisx0208-1978 japanese-jisx0208 + japanese-jisx0212)) (define-coding-system-alias 'iso-2022-jp-1978-irv 'japanese-iso-7bit-1978-irv) (define-coding-system-alias 'old-jis 'japanese-iso-7bit-1978-irv) diff --cc lisp/language/korean.el index 4dbc2cb5b8a,b1f658efe80..f010de69898 --- a/lisp/language/korean.el +++ b/lisp/language/korean.el @@@ -1,7 -1,7 +1,10 @@@ -;;; korean.el --- support for Korean -*- coding: iso-2022-7bit; -*- +;;; korean.el --- support for Korean -*- coding: iso-2022-7bit; no-byte-compile: t -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 ;; Keywords: multilingual, Korean diff --cc lisp/language/lao-util.el index 7105ae1beb6,0dee3e6285d..4db213dab02 --- a/lisp/language/lao-util.el +++ b/lisp/language/lao-util.el @@@ -1,10 -1,10 +1,13 @@@ ;;; lao-util.el --- utilities for Lao -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001 Free Software Foundation, Inc. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 - ;; Keywords: multilingual, Lao + ;; Keywords: multilingual, Lao, i18n ;; This file is part of GNU Emacs. diff --cc lisp/language/lao.el index 72e90930abb,d1a43b805c4..8edc282a46c --- a/lisp/language/lao.el +++ b/lisp/language/lao.el @@@ -1,8 -1,8 +1,11 @@@ -;;; lao.el --- support for Lao -*- coding: iso-2022-7bit; -*- +;;; lao.el --- support for Lao -*- coding: iso-2022-7bit; no-byte-compile: t -*- ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. ;; Copyright (C) 2001 Free Software Foundation, Inc. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 ;; Keywords: multilingual, Lao diff --cc lisp/language/malayalam.el index 3a7c19892c7,00000000000..27bf122fd7c mode 100644,000000..100644 --- a/lisp/language/malayalam.el +++ b/lisp/language/malayalam.el @@@ -1,46 -1,0 +1,51 @@@ +;;; malayalam.el --- Support for Malayalam -*- coding: iso-2022-7bit; no-byte-compile: t -*- + +;; Copyright (C) 2003 Free Software Foundation, Inc. + +;; Maintainer: KAWABATA, Taichi +;; Keywords: multilingual, Indian, Malayalam + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; This file defines language-info of Malayalam script. + +;;; Code: + +(set-language-info-alist + "Malayalam" '((charset mule-unicode-0100-24ff indian-glyph ) + ;; indian-2-column + ;; comment out later + ;; ) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "malayalam-itrans") + (features mlm-util) + (documentation . "\ +South Indian language Malayalam is supported in this language environment.")) + '("Indian")) + ++;; For automatic composition. ++(set-char-table-range composition-function-table '(#x0d00 . #x0d7f) ++ 'malayalam-composition-function) ++ ++ +(provide 'malayalam) + +;;; malayalam.el ends here diff --cc lisp/language/mlm-util.el index a01f3c4a88f,00000000000..b492d269ff2 mode 100644,000000..100644 --- a/lisp/language/mlm-util.el +++ b/lisp/language/mlm-util.el @@@ -1,410 -1,0 +1,412 @@@ +;;; mlm-util.el --- support for composing malayalam characters -*-coding: iso-2022-7bit;-*- + +;; Copyright (C) 2003 Free Software Foundation, Inc. + +;; Maintainer: KAWABATA, Taichi +;; Keywords: multilingual, Malayalam + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;; Created: Feb. 11. 2003 + +;;; Commentary: + +;; This file provides character(Unicode) to glyph(CDAC) conversion and +;; composition of Malayalam script characters. + +;;; Code: + +;; Malayalam Composable Pattern +;; C .. Consonants +;; V .. Vowel +;; H .. Halant +;; M .. Matra +;; V .. Vowel +;; A .. Anuswar +;; D .. Chandrabindu +;; (N .. Zerowidth Non Joiner) +;; (J .. Zerowidth Joiner. ) +;; 1. vowel +;; V(A|visargam)? +;; 2. syllable : maximum of 5 consecutive consonants. (e.g. kartsnya) +;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? + +(defconst malayalam-consonant + "[$,1@5(B-$,1@Y(B]") + +(defconst malayalam-composable-pattern + (concat + "\\([$,1@%(B-$,1@4(B][$,1@"(B]?\\)\\|$,1@#(B" + "\\|\\(" + "\\(?:\\(?:[$,1@5(B-$,1@Y(B]$,1@m(B\\)?\\(?:[$,1@5(B-$,1@Y(B]$,1@m(B\\)?\\(?:[$,1@5(B-$,1@Y(B]$,1@m(B\\)?[$,1@5(B-$,1@Y(B]$,1@m(B\\)?" + "[$,1@5(B-$,1@Y(B]\\(?:$,1@m(B\\|[$,1@^(B-$,1@c@f@g@h@j@j@k@l(B]?[$,1@"@m(B]?\\)?" + "\\)") + "Regexp matching a composable sequence of Malayalam characters.") + +;;;###autoload +(defun malayalam-compose-region (from to) + (interactive "r") + (save-excursion + (save-restriction + (narrow-to-region from to) + (goto-char (point-min)) + (while (re-search-forward malayalam-composable-pattern nil t) + (malayalam-compose-syllable-region (match-beginning 0) + (match-end 0)))))) +(defun malayalam-compose-string (string) + (with-temp-buffer + (insert (decompose-string string)) + (malayalam-compose-region (point-min) (point-max)) + (buffer-string))) + +(defun malayalam-post-read-conversion (len) + (save-excursion + (save-restriction + (let ((buffer-modified-p (buffer-modified-p))) + (narrow-to-region (point) (+ (point) len)) + (malayalam-compose-region (point-min) (point-max)) + (set-buffer-modified-p buffer-modified-p) + (- (point-max) (point-min)))))) + +(defun malayalam-range (from to) + "Make the list of the integers of range FROM to TO." + (let (result) + (while (<= from to) (setq result (cons to result) to (1- to))) result)) + +(defun malayalam-regexp-of-hashtbl-keys (hashtbl) + "Return a regular expression that matches all keys in hashtable HASHTBL." + (let ((max-specpdl-size 1000)) + (regexp-opt + (sort + (let (dummy) + (maphash (function (lambda (key val) (setq dummy (cons key dummy)))) hashtbl) + dummy) + (function (lambda (x y) (> (length x) (length y)))))))) + + +;;;###autoload - (defun malayalam-composition-function (from to pattern &optional string) - "Compose Malayalam characters in REGION, or STRING if specified. - Assume that the REGION or STRING must fully match the composable - PATTERN regexp." - (if string (malayalam-compose-syllable-string string) - (malayalam-compose-syllable-region from to)) - (- to from)) - - ;; Register a function to compose Malayalam characters. - (mapc - (function (lambda (ucs) - (aset composition-function-table (decode-char 'ucs ucs) - (list (cons malayalam-composable-pattern - 'malayalam-composition-function))))) - (nconc '(#x0d02 #x0d03) (malayalam-range #x0d05 #x0d39))) ++(defun malayalam-composition-function (pos &optional string) ++ "Compose Malayalam characters after the position POS. ++If STRING is not nil, it is a string, and POS is an index to the string. ++In this case, compose characters after POS of the string." ++ (if string ++ ;; Not yet implemented. ++ nil ++ (goto-char pos) ++ (if (looking-at malayalam-composable-pattern) ++ (prog1 (match-end 0) ++ (malayalam-compose-syllable-region pos (match-end 0)))))) + +;; Notes on conversion steps. + +;; 1. chars to glyphs +;; +;; Simple replacement of characters to glyphs is done. + +;; 2. glyphs reordering. +;; +;; Two special reordering rule takes place. +;; a. following "$,46[(B" goes to the front. +;; b. following "$,46S6S(B", "$,46S(B" or "$,46T(B" goes to the front. +;; This reordering occurs only to the last cluster of consonants. +;; Preceding consonants with halant characters are not affected. + +;; 3. Composition. +;; +;; left modifiers will be attached at the left. +;; others will be attached right. + +(defvar mlm-char-glyph + '(;; various signs + ("$,1@"(B" . "$,46W(B") + ("$,1@#(B" . "$,46X(B") + ;; Independent Vowels + ("$,1@%(B" . "$,46!(B") + ("$,1@&(B" . "$,46"(B") + ("$,1@'(B" . "$,46#(B") + ("$,1@((B" . "$,46#6U(B") + ("$,1@)(B" . "$,46$(B") + ("$,1@*(B" . "$,46$6U(B") + ("$,1@+(B" . "$,46%(B") + ("$,1@,(B" . "nil") ;; not in present use, not supported. + ("$,1@.(B" . "$,46&(B") + ("$,1@/(B" . "$,46'(B") + ("$,1@0(B" . "$,46S6&(B") + ("$,1@2(B" . "$,46((B") + ("$,1@3(B" . "$,46(6M(B") + ("$,1@4(B" . "$,46(6U(B") + ;; Consonants + ("$,1@5(B" . "$,46)(B") + ("$,1@5@m@5(B" . "$,47!(B") + ("$,1@5@m@S(B" . "$,47"(B") + ("$,1@5@m@W(B" . "$,47#(B") + ("$,1@5@m@?(B" . "$,47N(B") + ("$,1@5@m@D(B" . "$,47`(B") + ("$,1@5@a(B" . "$,47f(B") + ("$,1@5@m@5@a(B" . "$,47g(B") + ("$,1@5@a(B" . "$,47f(B") + ("$,1@5@m@5@a(B" . "$,47g(B") + + ("$,1@6(B" . "$,46*(B") + + ("$,1@7(B" . "$,46+(B") + ("$,1@7@m@7(B" . "$,47$(B") + ("$,1@7@m@R(B" . "$,47%(B") + ("$,1@7@m@N(B" . "$,47\(B") + ("$,1@7@m@H(B" . "$,47a(B") + + ("$,1@8(B" . "$,46,(B") + + ("$,1@9(B" . "$,46-(B") + ("$,1@9@m@5(B" . "$,47&(B") + ("$,1@9@m@9(B" . "$,47'(B") + ("$,1@9@m@5@a(B" . "$,47h(B") + + ("$,1@:(B" . "$,46.(B") + ("$,1@:@m@:(B" . "$,47((B") ;; duplicate + ("$,1@:@m@;(B" . "$,47Q(B") + + ("$,1@;(B" . "$,46/(B") + + ("$,1@<(B" . "$,460(B") + ("$,1@<@m@<(B" . "$,47V(B") + ("$,1@<@m@>(B" . "$,47Z(B") + + ("$,1@=(B" . "$,461(B") + + ("$,1@>(B" . "$,462(B") + ("$,1@>@m@:(B" . "$,47)(B") + ("$,1@>@m@>(B" . "$,47*(B") + + ("$,1@?(B" . "$,463(B") + ("$,1@?@m@?(B" . "$,47+(B") + + ("$,1@@(B" . "$,464(B") + ("$,1@A(B" . "$,465(B") + ("$,1@A@m@A(B" . "$,47M(B") + ("$,1@B(B" . "$,466(B") + + ("$,1@C(B" . "$,467(B") + ("$,1@C@a@m(B" . "$,47,(B") ;; half consonant + ("$,1@C@m@?(B" . "$,47-(B") + ("$,1@C@m@C(B" . "$,47.(B") + ("$,1@C@m@N(B" . "$,47W(B") + ("$,1@C@m@A(B" . "$,47^(B") + ("$,1@C@a(B" . "$,47i(B") + + ("$,1@D(B" . "$,468(B") + ("$,1@D@m@D(B" . "$,47/(B") + ("$,1@D@m@E(B" . "$,470(B") + ("$,1@D@m@X(B" . "$,47U(B") + ("$,1@D@m@M(B" . "$,47[(B") + ("$,1@D@m@N(B" . "$,47_(B") + + ("$,1@E(B" . "$,469(B") + + ("$,1@F(B" . "$,46:(B") + ("$,1@F@m@F(B" . "$,471(B") + ("$,1@F@m@G(B" . "$,472(B") + + ("$,1@G(B" . "$,46;(B") + + ("$,1@H(B" . "$,46<(B") + ("$,1@H@a@m(B" . "$,473(B") ;; half consonant + ("$,1@H@m@D(B" . "$,474(B") + ("$,1@H@m@F(B" . "$,475(B") + ("$,1@H@m@H(B" . "$,476(B") + ("$,1@H@m@N(B" . "$,477(B") + ("$,1@H@m@G(B" . "$,47T(B") + ("$,1@H@m@E(B" . "$,47Y(B") + ("$,1@H@m@Q(B" . "$,47b(B") + ("$,1@H@a(B" . "$,47k(B") + ("$,1@H@m@H@a(B" . "$,47l(B") + + ("$,1@J(B" . "$,46=(B") + ("$,1@J@m@J(B" . "$,478(B") ;; duplicate + ("$,1@J@m@R(B" . "$,479(B") ;; lakar + + ("$,1@K(B" . "$,46>(B") + + ("$,1@L(B" . "$,46?(B") + ("$,1@L@m@L(B" . "$,47:(B") ;; duplicate + ("$,1@L@m@R(B" . "$,47;(B") ;; lakar + ("$,1@L@m@G(B" . "$,47O(B") + ("$,1@L@m@F(B" . "$,47P(B") + + ("$,1@M(B" . "$,46@(B") + + ("$,1@N(B" . "$,46A(B") + ("$,1@N@m@J(B" . "$,47<(B") + ("$,1@N@m@N(B" . "$,47=(B") + ("$,1@N@m@R(B" . "$,47>(B") ;; lakar + + ("$,1@O(B" . "$,46B(B") + ("$,1@O@m@O(B" . "$,47?(B") ;; duplicate + ("$,1@O@m@5@m@5(B" . "$,47m(B") + + ("$,1@P(B" . "$,46C(B") + ("$,1@P@a@m(B" . "$,47@(B") + ("$,1@P@a(B" . "$,47j(B") + + ("$,1@Q(B" . "$,46D(B") + ("$,1@Q@m(B" . "$,47@(B") ;; same glyph as "$,1@P@m(B" + ("$,1@Q@a@m(B" . "$,47@(B") ;; same glyph as "$,1@P@m(B" + ;;("$,1@Q@m@Q(B" . "$,47A(B") + ("$,1@Q@m@Q(B" . "$,47d(B") + + ("$,1@R(B" . "$,46E(B") + ("$,1@R@a@m(B" . "$,47B(B") + ("$,1@R@m@R(B" . "$,47C(B") ;; lakar + ("$,1@R@m@J(B" . "$,47e(B") + + ("$,1@S(B" . "$,46F(B") + ("$,1@S@a@m(B" . "$,47D(B") + ("$,1@S@m@S(B" . "$,47E(B") + + ("$,1@T(B" . "$,46G(B") + + ("$,1@U(B" . "$,46H(B") + ("$,1@U@m@U(B" . "$,47F(B") + + ("$,1@V(B" . "$,46I(B") + ("$,1@V@m@R(B" . "$,47G(B") + ("$,1@V@m@V(B" . "$,47H(B") + ("$,1@V@m@:(B" . "$,47](B") + + ("$,1@W(B" . "$,46J(B") + ("$,1@W@m@?(B" . "$,47c(B") + + ("$,1@X(B" . "$,46K(B") + ("$,1@X@m@R(B" . "$,47I(B") + ("$,1@X@m@X(B" . "$,47J(B") + ("$,1@X@m@Q@m@Q(B" . "$,47L(B") + ("$,1@X@m@E(B" . "$,47X(B") + + ("$,1@Y(B" . "$,46L(B") + ("$,1@Y@m@R(B" . "$,47K(B") + ("$,1@Y@m@N(B" . "$,47R(B") + ("$,1@Y@m@H(B" . "$,47S(B") + + ;; Dependent vowel signs + ("$,1@^(B" . "$,46M(B") + ("$,1@_(B" . "$,46N(B") + ("$,1@`(B" . "$,46O(B") + ("$,1@a(B" . "$,46P(B") + ("$,1@b(B" . "$,46Q(B") + ("$,1@c(B" . "$,46R(B") + ("$,1@f(B" . "$,46S(B") + ("$,1@g(B" . "$,46T(B") + ("$,1@h(B" . "$,46S6S(B") + ("$,1@j(B" . "$,46S6M(B") + ("$,1@k(B" . "$,46T6M(B") + ("$,1@l(B" . "$,46U(B") + ;; Various signs + ("$,1@m(B" . "$,46V(B") + ("$,1@m@O(B" . "$,46Y(B") ;; yakar + ("$,1@m@O@a(B" . "$,46\(B") ;; yakar + u + ("$,1@m@O@b(B" . "$,46](B") ;; yakar + uu + ("$,1@m@U(B" . "$,46Z(B") ;; vakar modifier + ("$,1@m@P(B" . "$,46[(B") ;; rakar modifier is the same to rra modifier. + ("$,1@m@P@m(B" . "$,46R(B") ;; halant + rakar + halant + ("$,1@m@Q(B" . "$,46[(B") ;; rrakar modifier + ("$,1@m@Q@m(B" . "$,46R(B") ;; halant + rrakar + halant + ("$,1@m@m(B" . "$,46V(B") ;; double omission sign to stop forming half consonant. + ("$,1@w(B" . "$,46U(B") ;; not in present use, already at 0D4C. + )) + +(defvar mlm-char-glyph-hash + (let* ((hash (make-hash-table :test 'equal))) + (mapc (function (lambda (x) (puthash (car x) (cdr x) hash))) + mlm-char-glyph) + hash)) + +(defvar mlm-char-glyph-regexp + (malayalam-regexp-of-hashtbl-keys mlm-char-glyph-hash)) + +;; Malayalam languages needed to be reordered in a complex mannar. + +(defvar mlm-consonants + (concat + "$,46)6*6+6,6-6.6/606162636465666768696:6;6<6=6>6?6@6A6B6C6D6E6F6G6H6I6J6K6L(B" + "$,47!7"7#7$7%7&7'7(7)7*7+7,7-7.7/707172737475767778797:7;7<7=7>7?7@7A7B7C7D7E7F7G7H7I7J7K7L7M7N7O7P7Q7R7S7T7U7V7W7X7Y7Z7[7\7]7^7_7`7a7b7c7d7e(B" + )) + +(defvar mlm-consonants-regexp + (concat "\\($,46[(B?[" mlm-consonants "][$,46Y6Z(B]?\\)")) + +(defvar mlm-glyph-reorder-key-glyphs "[$,46[6S6T(B]") + +(defvar mlm-glyph-reordering-regexp-list + `((,(concat "\\([" mlm-consonants "][$,46Y6Z(B]?\\)$,46[(B") . "$,46[(B\\1") + (,(concat mlm-consonants-regexp "$,46S6S(B") . "$,46S6S(B\\1") + (,(concat mlm-consonants-regexp "$,46S(B") . "$,46S(B\\1") + (,(concat mlm-consonants-regexp "$,46T(B") . "$,46T(B\\1"))) + +(defun malayalam-compose-syllable-string (string) + (with-temp-buffer + (insert (decompose-string string)) + (malayalam-compose-syllable-region (point-min) (point-max)) + (buffer-string))) + +(defun malayalam-compose-syllable-region (from to) + "Compose malayalam syllable in region FROM to TO." + (let (glyph-str + match-str + glyph-reorder-regexps + glyph-reorder-replace + glyph-reorder-regexp) + (save-excursion + (save-restriction + (narrow-to-region from to) + (goto-char (point-min)) + ;; char-glyph-conversion - (while (re-search-forward mlm-char-glyph-regexp nil t) - (setq match-str (match-string 0)) - (setq glyph-str - (concat glyph-str (gethash match-str mlm-char-glyph-hash)))) ++ (while (not (eobp)) ++ (if (looking-at mlm-char-glyph-regexp) ++ (progn ++ (setq match-str (match-string 0) ++ glyph-str ++ (concat glyph-str ++ (gethash match-str mlm-char-glyph-hash))) ++ (goto-char (match-end 0))) ++ (setq glyph-str (concat glyph-str (string (following-char)))) ++ (forward-char 1))) + (when (string-match mlm-glyph-reorder-key-glyphs glyph-str) + ;; glyph reordering + (setq glyph-reorder-regexps mlm-glyph-reordering-regexp-list) + (while glyph-reorder-regexps + (setq glyph-reorder-regexp (caar glyph-reorder-regexps)) + (setq glyph-reorder-replace (cdar glyph-reorder-regexps)) + (setq glyph-reorder-regexps (cdr glyph-reorder-regexps)) + (if (string-match glyph-reorder-regexp glyph-str) + (setq glyph-str + (replace-match glyph-reorder-replace nil nil + glyph-str))))) + ;; concatenate and attach reference-points. + (setq glyph-str + (cdr + (apply + 'nconc + (mapcar + (function + (lambda (x) (list '(5 . 3) x))) ;; default ref. point. + glyph-str)))) + (compose-region from to glyph-str))))) + +(provide 'mlm-util) + +;;; devan-util.el ends here diff --cc lisp/language/romanian.el index 2c4d2de94ab,aef4c109065..a89a9ab777c --- a/lisp/language/romanian.el +++ b/lisp/language/romanian.el @@@ -1,9 -1,9 +1,9 @@@ -;;; romanian.el --- support for Romanian -*- coding: iso-latin-2; -*- +;;; romanian.el --- support for Romanian -*- coding: iso-latin-2; no-byte-compile: t -*- - ;; Copyright (C) 1998 Free Software Foundation. + ;; Copyright (C) 1998, 2002 Free Software Foundation. ;; Author: Dan Nicolaescu - ;; Keywords: multilingual, Romanian + ;; Keywords: multilingual, Romanian, i18n ;; This file is part of GNU Emacs. @@@ -29,12 -29,11 +29,11 @@@ ;;; Code: (set-language-info-alist -- "Romanian" '((charset . (ascii latin-iso8859-2)) - (coding-system . (iso-8859-2)) - (coding-system . (iso-8859-2 iso-latin-10)) -- (coding-priority . (iso-8859-2)) - (nonascii-translation . latin-iso8859-2) ++ "Romanian" '((charset iso-8859-2) ++ (coding-system iso-8859-2 iso-latin-10) ++ (coding-priority iso-8859-2) + (nonascii-translation . iso-8859-2) (input-method . "latin-2-postfix") - (unibyte-syntax . "latin-2") (unibyte-display . iso-8859-2) (tutorial . "TUTORIAL.ro") (sample-text . "Bunã ziua, bine aþi venit!") diff --cc lisp/language/tamil.el index be02f07376c,00000000000..04f3eacc5e5 mode 100644,000000..100644 --- a/lisp/language/tamil.el +++ b/lisp/language/tamil.el @@@ -1,43 -1,0 +1,46 @@@ +;;; tamil.el --- Support for Tamil -*- coding: iso-2022-7bit; no-byte-compile: t -*- + +;; Copyright (C) 2003 Free Software Foundation, Inc. + +;; Maintainer: KAWABATA, Taichi +;; Keywords: multilingual, Indian, Tamil + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; This file defines language-info of Tamil script. + +;;; Code: + +(set-language-info-alist + "Tamil" '((charset mule-unicode-0100-24ff indian-glyph ) + (coding-system utf-8) + (coding-priority utf-8) + (input-method . "tamil-itrans") + (features tml-util) + (documentation . "\ +South Indian Language Tamil supported in this language environment.")) + '("Indian")) + ++;; For automatic composition. ++(set-char-table-range composition-function-table '(#x0b80 . #x0bff) ++ 'tamil-composition-function) +(provide 'tamil) + +;;; tamil.el ends here diff --cc lisp/language/thai-util.el index 58588f974d7,734ea4de5f3..251c1fee5bc --- a/lisp/language/thai-util.el +++ b/lisp/language/thai-util.el @@@ -1,9 -1,10 +1,12 @@@ ;;; thai-util.el --- utilities for Thai -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. -;; Copyright (C) 2001 Free Software Foundation, Inc. ++;; Licensed to the Free Software Foundation. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 - ;; Keywords: mule, multilingual, thai + ;; Keywords: mule, multilingual, Thai, i18n ;; This file is part of GNU Emacs. diff --cc lisp/language/thai.el index f822e93bd1b,858814ea3fb..0723c3d182b --- a/lisp/language/thai.el +++ b/lisp/language/thai.el @@@ -1,9 -1,10 +1,13 @@@ -;;; thai.el --- support for Thai -*- coding: iso-2022-7bit; -*- +;;; thai.el --- support for Thai -*- coding: iso-2022-7bit; no-byte-compile: t -*- ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. + ;; Copyright (C) 2002 Free Software Foundation, Inc. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 - ;; Keywords: multilingual, Thai + ;; Keywords: multilingual, Thai, i18n ;; This file is part of GNU Emacs. @@@ -50,18 -48,33 +51,32 @@@ (input-method . "thai-kesmanee") (unibyte-display . thai-tis620) (features thai-util) - (sample-text + (sample-text . (thai-compose-string - (copy-sequence "Thai (,T@RIRd7B(B) ,TJGQJ4U$CQ:(B, ,TJGQJ4U$hP(B"))) + (copy-sequence "Thai (,T@RIRd7B(B) ,TJ0GQ1J04U1$0CQ1:(B, ,TJ0GQ1J04U10$h1P(B"))) (documentation . t))) -- - ;; Register a function to compose Thai characters. - (let ((patterns '(("\\c0\\c4\\|\\c0\\(\\c2\\|\\c3\\)\\c4?" - . thai-composition-function)))) - (aset composition-function-table (make-char 'thai-tis620) patterns) - (dotimes (i (1+ (- #xe7f #xe00))) - (aset composition-function-table (decode-char 'ucs (+ i #xe00)) patterns))) + (define-coding-system 'cp874 + "DOS codepage 874 (Thai)" + :coding-type 'charset + :mnemonic ?D + :charset-list '(cp874) + :mime-charset 'cp874) + (define-coding-system-alias 'ibm874 'cp874) + + (define-coding-system 'iso-8859-11 + "ISO/IEC 8859/11 (Latin/Thai) + This is the same as `thai-tis620' with the addition of no-break-space." + :coding-type 'charset + :mnemonic ?* + :mime-charset 'iso-8859-11 ; not actually registered as of 2002-05-24 + :charset-list '(iso-8859-11)) + + ;; For automatic composition. + (let ((chars ",TQTUVWXYZghijklmn(B")) + (dotimes (i (length chars)) + (aset composition-function-table (aref chars i) + 'thai-composition-function))) (provide 'thai) diff --cc lisp/language/tibet-util.el index 2336b988fd0,6412b4f3654..260cf7efe54 --- a/lisp/language/tibet-util.el +++ b/lisp/language/tibet-util.el @@@ -35,23 -35,6 +35,23 @@@ ;;; Code: +(defconst tibetan-obsolete-glyphs - `(("$(7!=(B" . "$(8!=(B") ; 2 col <-> 1 col - ("$(7!?(B" . "$(8!?(B") - ("$(7!@(B" . "$(8!@(B") - ("$(7!A(B" . "$(8!A(B") - ("$(7"`(B" . "$(8"`(B") - ("$(7!;(B" . "$(8!;(B") - ("$(7!D(B" . "$(8!D(B") ++ `(("$(7!=(B" . "$(7!=(B") ; 2 col <-> 1 col ++ ("$(7!?(B" . "$(7!?(B") ++ ("$(7!@(B" . "$(7!@(B") ++ ("$(7!A(B" . "$(7!A(B") ++ ("$(7"`(B" . "$(7"`(B") ++ ("$(7!;(B" . "$(7!;(B") ++ ("$(7!D(B" . "$(7!D(B") + ;; Yes these are dirty. But ... + ("$(7!>(B $(7!>(B" . ,(compose-string "$(7!>(B $(7!>(B" 0 3 [?$(7!>(B (Br . Bl) ? (Br . Bl) ?$(7!>(B])) + ("$(7!4!5!5(B" . ,(compose-string + "$(7#R#S#S#S(B" 0 4 + [?$(7#R(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B])) + ("$(7!4!5(B" . ,(compose-string "$(7#R#S#S(B" 0 3 [?$(7#R(B (Br . Bl) ?$(7#S(B (Br . Bl) ?$(7#S(B])) + ("$(7!6(B" . ,(compose-string "$(7#R#S!I(B" 0 3 [?$(7#R(B (Br . Bl) ?$(7#S(B (br . tr) ?$(7!I(B])) + ("$(7!4(B" . ,(compose-string "$(7#R#S(B" 0 2 [?$(7#R(B (Br . Bl) ?$(7#S(B])))) + ;;;###autoload (defun tibetan-char-p (ch) "Check if char CH is Tibetan character. @@@ -163,7 -146,7 +163,7 @@@ The returned string has no composition ;; If 'a follows a consonant, turn it into the subjoined form. ;; * Disabled by Tomabechi 2000/06/09 * ;; Because in Unicode, $(7"A(B may follow directly a consonant without - ;; any intervening vowel, as in 4$(7"90"914""0"""Q14"A0"A1!;(B=4$(7"90"91(B 4$(7""0""1(B 4$(7"A0"A1(B not 4$(7"90"91(B 4$(7""0""1(B $(7"Q(B 4$(7"A0"A1(B - ;; any intervening vowel, as in $(7"9"""Q"A!;(B=$(7"9(B $(7""(B $(7"A(B not $(7"9(B $(7""(B $(7"Q(B $(7"A(B ++ ;; any intervening vowel, as in $(7"9"""Q"A!;(B=$(7"9(B $(7""(B $(7"A(B not $(7"9(B $(7""(B $(7"Q(B $(7"A(B ;;(if (and (= char ?$(7"A(B) ;; (aref (char-category-set (car last)) ?0)) ;; (setq char ?$(7"R(B)) ;; modified for new font by Tomabechi 1999/12/10 @@@ -185,7 -168,7 +185,8 @@@ ;; Compose lower vowel sign vertically under. ((aref (char-category-set char) ?3) -- (if (eq char ?$(7"Q(B) ;; `$(7"Q(B' should not visible when composed. ++ (if (or (eq char ?$(7"Q(B) ;; `$(7"Q(B' and `$,1FP(B' should not visible when composed. ++ (eq char #xF70)) (setq rule nil) (setq rule stack-under))) ;; Transform ra-mgo (superscribed r) if followed by a subjoined diff --cc lisp/language/tibetan.el index 3ca1da9d839,9ee39ddae87..ab9516f73d1 --- a/lisp/language/tibetan.el +++ b/lisp/language/tibetan.el @@@ -1,9 -1,9 +1,12 @@@ ;;; tibetan.el --- support for Tibetan language -*- coding: iso-2022-7bit; -*- ;; Copyright (C) 1997 Electrotechnical Laboratory, JAPAN. --;; Licensed to the Free Software Foundation. ++;; Licensed to the Free Software Foundation. ++;; Copyright (C) 2003 ++;; National Institute of Advanced Industrial Science and Technology (AIST) ++;; Registration Number H13PRO009 --;; Keywords: multilingual, Tibetan ++;; Keywords: multilingual, Tibetan, i18n ;; This file is part of GNU Emacs. @@@ -100,11 -98,11 +101,7 @@@ (input-method . "tibetan-wylie") (features tibet-util) (documentation . t) - (sample-text - (sample-text -- . (tibetan-compose-string -- (copy-sequence - "Tibetan (4$(7"7r'"]0"7"]14"20"21!;4%P0"G#!"Q14"20"21!;(B) $(7!4!5!5!>4"70"714$P0"!#C"Q1!;4"Er'"S0"E"S14"G0"G1!;4"70"714"2r'"[0"2"[1!;4"Dr'"[0"D"[14"#0"#14"G0"G1!>4"Ir'"]r'"_0"I"]"_1!;4"90"9"Q1!;4"/r'"S0"/"S1!;4"50"5"Q14#2x!#9r'"[0"2#9"[1!;4"Hx!"Rx!"Ur'"c0"H"A"U"c1!>(B"))))) -"Tibetan ($(7"7"]"2!;"G#!"Q"2!;(B) $(7!4!5!5!>"7"!#C"Q!;"E"S"G!;"7"2"[!;"D"["#"G!>"I"]"_!;"9"Q!;"/"S!;"5"Q"2#9"[!;"H"A"U"c!>(B"))))) -- ++ (sample-text "Tibetan ($(7"7"]"2!;"G#!"Q"2!;(B) $(7!4!5!5!>"7"!#C"Q!;"E"S"G!;"7"2"[!;"D"["#"G!>"I"]"_!;"9"Q!;"/"S!;"5"Q"2#9"[!;"H"A"U"c!>(B"))) ;; `$(7"A(B' is included in the pattern for subjoined consonants because we ;; treat it specially in tibetan-add-components. @@@ -115,13 -113,9 +112,9 @@@ ;; $(7"A(B is removed from the class of subjoined. Tomabechi 2000/06/08 ;; (for Unicode support) (defconst tibetan-composable-pattern -- "[$(7"!(B-$(7"J"K(B][$(7#!(B-$(7#J#K#L#M(B]*[$(7"Q"R"S(B-$(7"^"a"b"e(B]*[$(7"_"c"d"g(B-$(7"l!I!e!g(B]*" ++ "[$(7"!(B-$(7"J"K(B][$(7#!(B-$(7#J#K#L#M(B]*[$,1FP$(7"Q"R"S(B-$(7"^"a"b"e(B]*[$(7"_"c"d"g(B-$(7"l!I!e!g(B]*" "Regexp matching a composable sequence of Tibetan characters.") - ;; Register a function to compose Tibetan characters. - (aset composition-function-table (make-char 'tibetan) - (list (cons tibetan-composable-pattern 'tibetan-composition-function))) - ;;; ;;; Definitions of conversion data. ;;; @@@ -608,6 -619,15 +601,16 @@@ This also matches some punctuation char (defvar tibetan-decomposed nil) (defvar tibetan-decomposed-temp nil) ++ + ;; For automatic composition. + (dolist (range '((?$(7#!(B . ?$(7#J(B) "$(7#K#L#M"Q"R(B" (?$(7"S(B . ?$(7"^(B) "$(7"a"b"e"_"c"d(B" (?$(7"g(B . ?$(7"l(B) "$(7!I!e!g(B")) + (if (stringp range) + (dotimes (i (length range)) + (aset composition-function-table (aref range i) + 'tibetan-composition-function)) + (set-char-table-range composition-function-table range + 'tibetan-composition-function))) + (provide 'tibetan) ;;; tibetan.el ends here diff --cc lisp/language/tml-util.el index bb8c8f19e04,00000000000..34c18741e97 mode 100644,000000..100644 --- a/lisp/language/tml-util.el +++ b/lisp/language/tml-util.el @@@ -1,367 -1,0 +1,371 @@@ +;;; tml-util.el --- support for composing tamil characters -*-coding: iso-2022-7bit;-*- + +;; Copyright (C) 2001 Free Software Foundation, Inc. + +;; Maintainer: KAWABATA, Taichi +;; Keywords: multilingual, Indian, Tamil + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;; Created: Nov. 08. 2002 + +;;; Commentary: + +;; This file provides character(Unicode) to glyph(CDAC) conversion and +;; composition of Tamil script characters. + +;;; Code: + +;; Tamil Composable Pattern +;; C .. Consonants +;; V .. Vowel +;; H .. Pulli +;; M .. Matra +;; V .. Vowel +;; A .. Anuswar +;; D .. Chandrabindu +;; 1. vowel +;; V +;; 2. syllable : only ligature-formed pattern forms composition. +;; (CkHCs|C)(H|M)? +;; 3. sri special +;; (CsHCrVi) + +;; oririnal +;; ((CH)?(CH)?(CH)?CH)?C(H|M?(A|D)?)? + +(defconst tamil-consonant + "[$,1<5(B-$,1 (length x) (length y)))))))) + + - ;;;###autoload - (defun tamil-composition-function (from to pattern &optional string) - "Compose Tamil characters in REGION, or STRING if specified. - Assume that the REGION or STRING must fully match the composable - PATTERN regexp." - (if string (tamil-compose-syllable-string string) - (tamil-compose-syllable-region from to)) - (- to from)) - - ;; Register a function to compose Tamil characters. - (mapc - (function (lambda (ucs) - (aset composition-function-table (decode-char 'ucs ucs) - (list (cons tamil-composable-pattern - 'tamil-composition-function))))) - (nconc '(#x0b82 #x0b83) (tamil-range #x0b85 #x0bb9))) - +;; Notes on conversion steps. + +;; 1. chars to glyphs +;; Simple replacement of characters to glyphs is done. + +;; 2. glyphs reordering. +;; following "$,4)j(B", "$,4)k(B", "$,4)l(B" goes to the front. + +;; 3. glyphs to glyphs +;; reordered vowels are ligatured to consonants. + +;; 4. Composition. +;; left modifiers will be attached at the left. +;; others will be attached right. + +(defvar tml-char-glyph + '(;; various signs - ;;("$,1<"(B" . "") ++ ("$,1<"(B" . "$,4)b(B") ;; not good + ("$,1<#(B" . "$,4*G(B") + ;; Independent Vowels + ("$,1<%(B" . "$,4*<(B") + ("$,1<&(B" . "$,4*=(B") + ("$,1<'(B" . "$,4*>(B") + ("$,1<((B" . "$,4*?(B") + ("$,1<)(B" . "$,4*@(B") + ("$,1<*(B" . "$,4*A(B") + ("$,1<.(B" . "$,4*B(B") + ("$,1(B" . "$,4*K(B") + ("$,1= char 128) (encode-coding-char char coding))) (setq encoding-msg (if encoded diff --cc lisp/startup.el index 4b0ab342ef2,84430ae4f6e..6bcb04855e1 --- a/lisp/startup.el +++ b/lisp/startup.el @@@ -1216,10 -1168,15 +1216,13 @@@ where FACE is a valid face specificatio (insert-image img (propertize "xxx" 'help-echo help-echo 'keymap map))) (insert "\n")))) + (fancy-splash-insert + :face '(variable-pitch :background "red") + "\n!! This version is ALPHA status. It may lose your data!!\n\n") - (if (eq system-type 'gnu/linux) - (fancy-splash-insert - :face '(variable-pitch :foreground "red") - "GNU Emacs is one component of a Linux-based GNU system.") - (fancy-splash-insert - :face '(variable-pitch :foreground "red") + (fancy-splash-insert + :face '(variable-pitch :foreground "red") + (if (eq system-type 'gnu/linux) + "GNU Emacs is one component of the GNU/Linux operating system." "GNU Emacs is one component of the GNU operating system.")) (insert "\n") (unless (equal (buffer-name fancy-splash-outer-buffer) "*scratch*") diff --cc lisp/tar-mode.el index 03cb199de73,4adad6fe374..cf795e5d92a --- a/lisp/tar-mode.el +++ b/lisp/tar-mode.el @@@ -201,8 -202,8 +202,9 @@@ This information is useful, but it take (defun tar-header-block-tokenize (string) "Return a `tar-header' structure. -This is a list of name, mode, uid, gid, size, +This is a list of name, mode, uid, gid, size, write-date, checksum, link-type, and link-name." ++ (setq string (string-as-unibyte string)) (cond ((< (length string) 512) nil) (;(some 'plusp string) ; <-- oops, massive cycle hog! (or (not (= 0 (aref string 0))) ; This will do. @@@ -295,6 -295,6 +296,7 @@@ (defun tar-header-block-checksum (string) "Compute and return a tar-acceptable checksum for this block." ++ (setq string (string-as-unibyte string)) (let* ((chk-field-start tar-chk-offset) (chk-field-end (+ chk-field-start 8)) (sum 0) @@@ -403,10 -378,10 +405,9 @@@ MODE should be an integer which is a fi Place a dired-like listing on the front; then narrow to it, so that only that listing is visible (and the real data of the buffer is hidden)." -- (set-buffer-multibyte nil) (message "Parsing tar file...") (let* ((result '()) - (pos 1) + (pos (point-min)) (bs (max 1 (- (buffer-size) 1024))) ; always 2+ empty blocks at end. (bs100 (max 1 (/ bs 100))) tokens) @@@ -459,15 -434,13 +460,11 @@@ (cons (tar-header-block-summarize (tar-desc-tokens tar-desc)) (cons "\n" summaries)))) - (if default-enable-multibyte-characters - (set-buffer-multibyte t 'to)) (let ((total-summaries (apply 'concat summaries))) - (if (multibyte-string-p total-summaries) - (set-buffer-multibyte t)) (insert total-summaries)) (make-local-variable 'tar-header-offset) (setq tar-header-offset (point)) - (narrow-to-region 1 tar-header-offset) + (narrow-to-region (point-min) tar-header-offset) - (if enable-multibyte-characters - (setq tar-header-offset (position-bytes tar-header-offset))) (set-buffer-modified-p nil)))) (defvar tar-mode-map nil "*Local keymap for Tar mode listings.") @@@ -584,7 -561,7 +585,7 @@@ See also: variables `tar-update-datesta (set (make-local-variable 'write-contents-hooks) '(tar-mode-write-file)) (widen) (if (and (boundp 'tar-header-offset) tar-header-offset) - (narrow-to-region (point-min) (byte-to-position tar-header-offset)) - (narrow-to-region 1 tar-header-offset) ++ (narrow-to-region (point-min) tar-header-offset) (tar-summarize-buffer) (tar-next-line 0))) @@@ -692,11 -669,10 +693,10 @@@ appear on disk when you save the tar-fi (tokens (tar-desc-tokens descriptor)) (name (tar-header-name tokens)) (size (tar-header-size tokens)) - (start (+ (tar-desc-data-start descriptor) tar-header-offset -1)) + (start (+ (tar-desc-data-start descriptor) + (- tar-header-offset (point-min)))) (end (+ start size))) (let* ((tar-buffer (current-buffer)) -- (tar-buffer-multibyte enable-multibyte-characters) (tarname (buffer-name)) (bufname (concat (file-name-nondirectory name) " (" @@@ -713,58 -690,35 +714,32 @@@ (setq bufname (buffer-name buffer)) (setq just-created t) (unwind-protect - (progn + (let (coding) + (narrow-to-region start end) + (goto-char start) + (setq coding (or coding-system-for-read + (and set-auto-coding-function + (funcall set-auto-coding-function + name (- end start))))) + (if (or (not coding) + (eq (coding-system-type coding) 'undecided)) + (setq coding (detect-coding-region start end t))) - (if (eq (coding-system-type coding) 'undecided) - (setq coding - (coding-system-change-text-conversion coding - 'us-ascii))) ++ (if (and default-enable-multibyte-characters ++ (coding-system-get coding :for-unibyte)) ++ (save-excursion ++ (set-buffer buffer) ++ (set-buffer-multibyte nil))) + (widen) - (set-buffer-multibyte nil) ++ (decode-coding-region start end coding buffer) (save-excursion (set-buffer buffer) - (if enable-multibyte-characters - (progn - ;; We must avoid unibyte->multibyte conversion. - (set-buffer-multibyte nil) - (insert-buffer-substring tar-buffer start end) - (set-buffer-multibyte t)) - (insert-buffer-substring tar-buffer start end)) - (if (and enable-multibyte-characters - (eq (coding-system-type 'raw-text) coding)) - (set-buffer-multibyte nil)) (goto-char (point-min)) (setq buffer-file-name new-buffer-file-name) (setq buffer-file-truename - (abbreviate-file-name buffer-file-name))) - (decode-coding-region start end coding buffer) - (save-excursion - (set-buffer buffer) - (goto-char (point-min)) + (abbreviate-file-name buffer-file-name)) - ;; We need to mimic the parts of insert-file-contents - ;; which determine the coding-system and decode the text. - (let ((coding - (or coding-system-for-read - (and set-auto-coding-function - (save-excursion - (funcall set-auto-coding-function - name (- (point-max) (point))))))) - (multibyte enable-multibyte-characters) - (detected (detect-coding-region - (point-min) - (min (+ (point-min) 16384) (point-max)) t))) - (if coding - (or (numberp (coding-system-eol-type coding)) - (setq coding (coding-system-change-eol-conversion - coding - (coding-system-eol-type detected)))) - (setq coding - (or (find-new-buffer-file-coding-system detected) - (let ((file-coding - (find-operation-coding-system - 'insert-file-contents buffer-file-name))) - (if (consp file-coding) - (setq file-coding (car file-coding)) - file-coding))))) - (if (or (eq coding 'no-conversion) - (eq (coding-system-type coding) 5)) - (setq multibyte (set-buffer-multibyte nil))) - (or multibyte - (setq coding - (coding-system-change-text-conversion - coding 'raw-text))) - (decode-coding-region (point-min) (point-max) coding) - (set-buffer-file-coding-system coding)) ++ (set-buffer-file-coding-system coding) ;; Set the default-directory to the dir of the - ;; superior buffer. + ;; superior buffer. (setq default-directory (save-excursion (set-buffer tar-buffer) @@@ -779,8 -733,8 +754,8 @@@ (set-buffer-modified-p nil) (tar-subfile-mode 1)) (set-buffer tar-buffer)) - (narrow-to-region 1 tar-header-offset) + (narrow-to-region (point-min) tar-header-offset) - (set-buffer-multibyte tar-buffer-multibyte))) + (goto-char pos))) (if view-p (view-buffer buffer (and just-created 'kill-buffer)) (if (eq other-window-p 'display) @@@ -835,10 -789,8 +810,9 @@@ the current tar-entry. (tokens (tar-desc-tokens descriptor)) (name (tar-header-name tokens)) (size (tar-header-size tokens)) - (start (+ (tar-desc-data-start descriptor) tar-header-offset -1)) + (start (+ (tar-desc-data-start descriptor) + (- tar-header-offset (point-min)))) (end (+ start size)) - (multibyte enable-multibyte-characters) (inhibit-file-name-handlers inhibit-file-name-handlers) (inhibit-file-name-operation inhibit-file-name-operation)) (save-restriction @@@ -949,8 -895,7 +917,7 @@@ for this to be permanent. (forward-line 1))) ;; after doing the deletions, add any padding that may be necessary. (tar-pad-to-blocksize) - (narrow-to-region 1 tar-header-offset)) + (narrow-to-region (point-min) tar-header-offset)) - (set-buffer-multibyte multibyte) (if (zerop n) (message "Nothing to expunge.") (message "%s files expunged. Be sure to save this buffer." n))))) @@@ -1063,10 -1009,9 +1031,9 @@@ for this to be permanent. (forward-line 1) (delete-region p (point)) (insert (tar-header-block-summarize tokens) "\n") - (setq tar-header-offset (position-bytes (point-max)))) + (setq tar-header-offset (point-max))) - + (widen) - (set-buffer-multibyte nil) (let* ((start (+ (tar-desc-data-start descriptor) tar-header-offset -513))) ;; ;; delete the old field and insert a new one. @@@ -1089,8 -1040,7 +1062,7 @@@ (buffer-substring start (+ start 512)) chk (tar-header-name tokens)) ))) - (narrow-to-region 1 tar-header-offset) + (narrow-to-region (point-min) tar-header-offset) - (set-buffer-multibyte multibyte) (tar-next-line 0)))) @@@ -1136,18 -1079,19 +1102,19 @@@ to make your changes permanent. (error "Can't find this tar file entry in its parent tar file!")) (unwind-protect (save-excursion - (widen) - (set-buffer-multibyte nil) ;; delete the old data... - (let* ((data-start (+ start tar-header-offset -1)) + (let* ((data-start (+ start (- tar-header-offset (point-min)))) (data-end (+ data-start (ash (ash (+ size 511) -9) 9)))) - (delete-region data-start data-end) + (narrow-to-region data-start data-end) + (delete-region (point-min) (point-max)) ;; insert the new data... (goto-char data-start) - (insert-buffer subfile) - (setq subfile-size - (encode-coding-region - data-start (+ data-start subfile-size) coding)) + (save-excursion + (set-buffer subfile) + (save-restriction + (widen) + (encode-coding-region 1 (point-max) coding tar-superior-buffer))) + (setq subfile-size (- (point-max) (point-min))) ;; ;; pad the new data out to a multiple of 512... (let ((subfile-size-pad (ash (ash (+ subfile-size 511) -9) 9))) @@@ -1208,8 -1151,7 +1174,7 @@@ ))) ;; after doing the insertion, add any final padding that may be necessary. (tar-pad-to-blocksize)) - (narrow-to-region (point-min) tar-header-offset) - (set-buffer-multibyte tar-buffer-multibyte))) - (narrow-to-region 1 tar-header-offset))) ++ (narrow-to-region (point-min) tar-header-offset))) (set-buffer-modified-p t) ; mark the tar file as modified (tar-next-line 0) (set-buffer subfile) @@@ -1271,7 -1210,7 +1233,7 @@@ Leaves the region wide. buffer-file-name nil t)) (tar-clear-modification-flags) (set-buffer-modified-p nil)) - (narrow-to-region (point-min) (byte-to-position tar-header-offset))) - (narrow-to-region 1 tar-header-offset)) ++ (narrow-to-region (point-min) tar-header-offset)) ;; Return t because we've written the file. t) diff --cc lisp/term.el index 9f49ee96045,37fa1780c2c..d57355416aa --- a/lisp/term.el +++ b/lisp/term.el @@@ -866,218 -1114,7 +866,216 @@@ is buffer-local." (define-key term-mode-map [menu-bar signals] (setq term-signals-menu (cons "Signals" newmap))) ))) + +;; Set up term-raw-map, etc. + +(defun term-set-escape-char (c) + "Change term-escape-char and keymaps that depend on it." + (if term-escape-char + (define-key term-raw-map term-escape-char 'term-send-raw)) + (setq c (make-string 1 c)) + (define-key term-raw-map c term-raw-escape-map) + ;; Define standard bindings in term-raw-escape-map + (define-key term-raw-escape-map "\C-v" + (lookup-key (current-global-map) "\C-v")) + (define-key term-raw-escape-map "\C-u" + (lookup-key (current-global-map) "\C-u")) + (define-key term-raw-escape-map c 'term-send-raw) + (define-key term-raw-escape-map "\C-q" 'term-pager-toggle) + ;; The keybinding for term-char-mode is needed by the menubar code. + (define-key term-raw-escape-map "\C-k" 'term-char-mode) + (define-key term-raw-escape-map "\C-j" 'term-line-mode) + ;; It's convenient to have execute-extended-command here. + (define-key term-raw-escape-map [?\M-x] 'execute-extended-command)) + +(let* ((map (make-keymap)) + (esc-map (make-keymap)) + (i 0)) + (while (< i 128) + (define-key map (make-string 1 i) 'term-send-raw) + (define-key esc-map (make-string 1 i) 'term-send-raw-meta) + (setq i (1+ i))) - (dolist (elm (generic-character-list)) - (define-key map (vector elm) 'term-send-raw)) + (define-key map "\e" esc-map) + (setq term-raw-map map) + (setq term-raw-escape-map + (copy-keymap (lookup-key (current-global-map) "\C-x"))) + +;;; Added nearly all the 'grey keys' -mm + + (progn + (term-if-xemacs + (define-key term-raw-map [button2] 'term-mouse-paste)) + (term-ifnot-xemacs + (define-key term-raw-map [mouse-2] 'term-mouse-paste) + (define-key term-raw-map [menu-bar terminal] term-terminal-menu) + (define-key term-raw-map [menu-bar signals] term-signals-menu)) + (define-key term-raw-map [up] 'term-send-up) + (define-key term-raw-map [down] 'term-send-down) + (define-key term-raw-map [right] 'term-send-right) + (define-key term-raw-map [left] 'term-send-left) + (define-key term-raw-map [delete] 'term-send-del) + (define-key term-raw-map [backspace] 'term-send-backspace) + (define-key term-raw-map [home] 'term-send-home) + (define-key term-raw-map [end] 'term-send-end) + (define-key term-raw-map [prior] 'term-send-prior) + (define-key term-raw-map [next] 'term-send-next))) + +(term-set-escape-char ?\C-c) + +(put 'term-mode 'mode-class 'special) + +(defun term-mode () + "Major mode for interacting with an inferior interpreter. +The interpreter name is same as buffer name, sans the asterisks. + +There are two submodes: line mode and char mode. By default, you are +in char mode. In char sub-mode, each character (except +`term-escape-char') is set immediately. +In line mode, you send a line of input at a time; use +\\[term-send-input] to send. + +In line mode, this maintains an input history of size +`term-input-ring-size', and you can access it with the commands +\\[term-next-input], \\[term-previous-input], and +\\[term-dynamic-list-input-ring]. Input ring history expansion can be +achieved with the commands \\[term-replace-by-expanded-history] or +\\[term-magic-space]. Input ring expansion is controlled by the +variable `term-input-autoexpand', and addition is controlled by the +variable `term-input-ignoredups'. + +Input to, and output from, the subprocess can cause the window to scroll to +the end of the buffer. See variables `term-scroll-to-bottom-on-input', +and `term-scroll-to-bottom-on-output'. + +If you accidentally suspend your process, use \\[term-continue-subjob] +to continue it. + +This mode can be customised to create specific modes for running +particular subprocesses. This can be done by setting the hooks +`term-input-filter-functions', `term-input-filter', +`term-input-sender' and `term-get-old-input' to appropriate functions, +and the variable `term-prompt-regexp' to the appropriate regular +expression. + +Commands in raw mode: + +\\{term-raw-map} + +Commands in line mode: + +\\{term-mode-map} + +Entry to this mode runs the hooks on `term-mode-hook'." + (interactive) + ;; Do not remove this. All major modes must do this. + (kill-all-local-variables) + (setq major-mode 'term-mode) + (setq mode-name "Term") + (use-local-map term-mode-map) + (make-local-variable 'term-home-marker) + (setq term-home-marker (copy-marker 0)) + (make-local-variable 'term-saved-home-marker) + (make-local-variable 'term-height) + (make-local-variable 'term-width) + (setq term-width (1- (window-width))) + (setq term-height (1- (window-height))) + (make-local-variable 'term-terminal-parameter) + (make-local-variable 'term-saved-cursor) + (make-local-variable 'term-last-input-start) + (setq term-last-input-start (make-marker)) + (make-local-variable 'term-last-input-end) + (setq term-last-input-end (make-marker)) + (make-local-variable 'term-last-input-match) + (setq term-last-input-match "") + (make-local-variable 'term-prompt-regexp) ; Don't set; default + (make-local-variable 'term-input-ring-size) ; ...to global val. + (make-local-variable 'term-input-ring) + (make-local-variable 'term-input-ring-file-name) + (or (and (boundp 'term-input-ring) term-input-ring) + (setq term-input-ring (make-ring term-input-ring-size))) + (make-local-variable 'term-input-ring-index) + (or (and (boundp 'term-input-ring-index) term-input-ring-index) + (setq term-input-ring-index nil)) + + (make-local-variable 'term-command-hook) + (setq term-command-hook (symbol-function 'term-command-hook)) + +;;; I'm not sure these saves are necessary but, since I +;;; haven't tested the whole thing on a net connected machine with +;;; a properly configured ange-ftp, I've decided to be conservative +;;; and put them in. -mm + + (make-local-variable 'term-ansi-at-host) + (setq term-ansi-at-host (system-name)) + + (make-local-variable 'term-ansi-at-dir) + (setq term-ansi-at-dir default-directory) + + (make-local-variable 'term-ansi-at-message) + (setq term-ansi-at-message nil) + +;;; For user tracking purposes -mm + (make-local-variable 'ange-ftp-default-user) + (make-local-variable 'ange-ftp-default-password) + (make-local-variable 'ange-ftp-generate-anonymous-password) + +;;; You may want to have different scroll-back sizes -mm + (make-local-variable 'term-buffer-maximum-size) + +;;; Of course these have to be buffer-local -mm + (make-local-variable 'term-ansi-current-bold) + (make-local-variable 'term-ansi-current-color) + (make-local-variable 'term-ansi-face-already-done) + (make-local-variable 'term-ansi-current-bg-color) + (make-local-variable 'term-ansi-current-underline) + (make-local-variable 'term-ansi-current-highlight) + (make-local-variable 'term-ansi-current-reverse) + (make-local-variable 'term-ansi-current-invisible) + + (make-local-variable 'term-terminal-state) + (make-local-variable 'term-kill-echo-list) + (make-local-variable 'term-start-line-column) + (make-local-variable 'term-current-column) + (make-local-variable 'term-current-row) + (make-local-variable 'term-log-buffer) + (make-local-variable 'term-scroll-start) + (make-local-variable 'term-scroll-end) + (setq term-scroll-end term-height) + (make-local-variable 'term-scroll-with-delete) + (make-local-variable 'term-pager-count) + (make-local-variable 'term-pager-old-local-map) + (make-local-variable 'term-old-mode-map) + (make-local-variable 'term-insert-mode) + (make-local-variable 'term-dynamic-complete-functions) + (make-local-variable 'term-completion-fignore) + (make-local-variable 'term-get-old-input) + (make-local-variable 'term-matching-input-from-input-string) + (make-local-variable 'term-input-autoexpand) + (make-local-variable 'term-input-ignoredups) + (make-local-variable 'term-delimiter-argument-list) + (make-local-variable 'term-input-filter-functions) + (make-local-variable 'term-input-filter) + (make-local-variable 'term-input-sender) + (make-local-variable 'term-eol-on-send) + (make-local-variable 'term-scroll-to-bottom-on-output) + (make-local-variable 'term-scroll-show-maximum-output) + (make-local-variable 'term-ptyp) + (make-local-variable 'term-exec-hook) + (make-local-variable 'term-vertical-motion) + (make-local-variable 'term-pending-delete-marker) + (setq term-pending-delete-marker (make-marker)) + (make-local-variable 'term-current-face) + (make-local-variable 'term-pending-frame) + (setq term-pending-frame nil) + (run-hooks 'term-mode-hook) + (term-if-xemacs + (set-buffer-menubar + (append current-menubar (list term-terminal-menu)))) + (or term-input-ring + (setq term-input-ring (make-ring term-input-ring-size))) + (term-update-mode-line)) + (defun term-reset-size (height width) (setq term-height height) (setq term-width width) diff --cc lisp/term/mac-win.el index f9712a5a52f,862e6d89820..f00507898fe --- a/lisp/term/mac-win.el +++ b/lisp/term/mac-win.el @@@ -197,51 -183,14 +183,41 @@@ Switch to a buffer editing the last fil ;; and mule-unicode-xxxx-yyyy) are displayed by a mac-roman font. (if (fboundp 'new-fontset) - (progn - (require 'fontset) - (setup-default-fontset) - (create-fontset-from-fontset-spec - "-etl-fixed-medium-r-normal-*-16-*-*-*-*-*-fontset-mac, - ascii:-*-Monaco-*-*-*-*-12-*-*-*-*-*-mac-roman") - (let ((monaco-font '("monaco" . "mac-roman"))) - (map-char-table - (function - (lambda (key val) - (or (generic-char-p key) - (memq (char-charset val) - '(ascii eight-bit-control eight-bit-graphic)) - (set-fontset-font "fontset-mac" val monaco-font)))) - (get 'mac-roman-decoder 'translation-table))))) + (create-fontset-from-fontset-spec + "-etl-fixed-medium-r-normal-*-16-*-*-*-*-*-fontset-mac, + ascii:-*-Monaco-*-*-*-*-12-*-*-*-*-*-mac-roman + mac-roman:-*-Monaco-*-*-*-*-12-*-*-*-*-*-mac-roman")) + -;; To display filenames in Chinese or Japanese, replace mac-roman with -;; big5 or sjis -(setq file-name-coding-system 'mac-roman) + +(if (eq system-type 'darwin) + ;; On Darwin filenames are encoded in UTF-8 + (setq file-name-coding-system 'utf-8) + ;; To display filenames in Chinese or Japanese, replace mac-roman with + ;; big5 or sjis + (setq file-name-coding-system 'mac-roman)) + +;; If Emacs is started from the Finder, change the default directory +;; to the user's home directory. +(if (string= default-directory "/") + (cd "~")) + +;; Tell Emacs to use pipes instead of pty's for processes because the +;; latter sometimes lose characters. Pty support is compiled in since +;; ange-ftp will not work without it. +(setq process-connection-type nil) + +;; Assume that fonts are always scalable on the Mac. This sometimes +;; results in characters with jagged edges. However, without it, +;; fonts with both truetype and bitmap representations but no italic +;; or bold bitmap versions will not display these variants correctly. +(setq scalable-fonts-allowed t) + +;; Make suspend-emacs [C-z] collapse the current frame +(substitute-key-definition 'suspend-emacs 'iconify-frame + global-map) + +;; Support mouse-wheel scrolling +(mouse-wheel-mode 1) ;; (prefer-coding-system 'mac-roman) diff --cc lisp/textmodes/sgml-mode.el index 1028bb3122b,ad99ed7fecb..c62721270fb --- a/lisp/textmodes/sgml-mode.el +++ b/lisp/textmodes/sgml-mode.el @@@ -111,8 -104,9 +111,6 @@@ This takes effect when first loading th (define-key map "\"" 'sgml-name-self)) (when (memq ?' sgml-specials) (define-key map "'" 'sgml-name-self))) - (define-key map (vector (make-char 'latin-iso8859-1)) - 'sgml-maybe-name-self) - (dotimes (i 96) - (define-key map (vector (encode-char (+ i 32) 'latin-iso8859-1)) - 'sgml-maybe-name-self)) (let ((c 127) (map (nth 1 map))) (while (< (setq c (1+ c)) 256) diff --cc lisp/wid-edit.el index ff65fb56e59,1ac4e3c2542..c33790a511d --- a/lisp/wid-edit.el +++ b/lisp/wid-edit.el @@@ -3045,10 -3062,7 +3082,7 @@@ It will read a directory name from the (interactive) (lisp-complete-symbol 'boundp)) :tag "Variable") - + - (defvar widget-coding-system-prompt-value-history nil - "History of input to `widget-coding-system-prompt-value'.") - (define-widget 'coding-system 'symbol "A MULE coding-system." :format "%{%t%}: %v" diff --cc src/.gdbinit index ca6f5f255a2,86998b41209..ca6648e162f --- a/src/.gdbinit +++ b/src/.gdbinit @@@ -1,4 -1,4 +1,4 @@@ --# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001 ++# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2003 # Free Software Foundation, Inc. # # This file is part of GNU Emacs. @@@ -237,10 -238,10 +237,10 @@@ en define xchartable print (struct Lisp_Char_Table *) (($ & $valmask) | gdb_data_seg_bits) --printf "Purpose: " - output (char*)&((struct Lisp_Symbol *) ((((int) $->purpose) & $valmask) | gdb_data_seg_bits))->name->data - printf " %d extra slots", ($->size & 0x1ff) - 388 -output (char*)((struct Lisp_Symbol *) ((((int) $->purpose) & $valmask) | gdb_data_seg_bits))->name->data + printf " %d extra slots", ($->size & 0x1ff) - 68 echo \n ++printf "Purpose: " ++xprintsym $->purpose end document xchartable Print the address of the char-table $, and its purpose. @@@ -332,9 -340,51 +332,32 @@@ document xprintsy Print argument as a symbol. end + define xcoding + set $tmp = (struct Lisp_Hash_Table *) ((Vcoding_system_hash_table & $valmask) | gdb_data_seg_bits) + set $tmp = (struct Lisp_Vector *) (($tmp->key_and_value & $valmask) | gdb_data_seg_bits) + set $name = $tmp->contents[$arg0 * 2] + print $name + pr + print $tmp->contents[$arg0 * 2 + 1] + pr + end + document xcoding - Print a coding system whose id is the argument. ++ Print the name and attributes of coding system that has ID (argument). + end + + define xcharset + set $tmp = (struct Lisp_Hash_Table *) ((Vcharset_hash_table & $valmask) | gdb_data_seg_bits) + set $tmp = (struct Lisp_Vector *) (($tmp->key_and_value & $valmask) | gdb_data_seg_bits) + p $tmp->contents[$arg0->hash_index * 2] + pr + end + document xcharset - Print a charset name whose id is the argument. -end - -define xcurbuf - echo GAPSIZE: - output current_buffer->text->gap_size - echo \nGPT: - output current_buffer->text->gpt - echo / - output current_buffer->text->gpt_byte - echo \nZ: - output current_buffer->text->z - echo / - output current_buffer->text->z_byte - echo \nTEXT: - if current_buffer->text->gpt > 1 - print current_buffer->text->beg[0]@80 - else - print current_buffer->text->beg[current_buffer->text->gpt_byte-1]@80 - end ++ Print the name of charset that has ID (argument). + end + define xbacktrace set $bt = backtrace_list - while $bt + while $bt set $type = (enum Lisp_Type) ((*$bt->function >> gdb_valbits) & 0x7) if $type == Lisp_Symbol xprintsym *$bt->function diff --cc src/ChangeLog.22 index 00000000000,00000000000..355ffa20515 new file mode 100644 --- /dev/null +++ b/src/ChangeLog.22 @@@ -1,0 -1,0 +1,2061 @@@ ++2003-07-09 Kenichi Handa ++ ++ * coding.c (decode_coding_sjis): Check bytes more rigidly. ++ ++2003-06-26 Kenichi Handa ++ ++ * fileio.c (choose_write_coding_system): Return a decided coding ++ system. ++ (Fwrite_region): Set Vlast_coding_system_used to the return value ++ of choose_write_coding_system. ++ ++2003-06-06 Kenichi Handa ++ ++ * charset.c (Fset_charset_priority): Pay attention to duplicated ++ arguments. ++ ++ * coding.c (QCcategory): New variable. ++ (syms_of_coding): Defsym it. Set all elements of ++ Vcoding_category_table and their symbol values. ++ (Fset_coding_system_priority): Doc fix. Update symbol qvalues of ++ coding-category-XXX, and coding-category-list. ++ (Fdefine_coding_system_internal): Add category in the plist. ++ ++2003-06-05 Kenichi Handa ++ ++ * callproc.c (Fcall_process): Handle carryover correctly. ++ ++ * coding.c (decode_coding_iso_2022): Fix handling of invalid ++ bytes. ++ (raw_text_coding_system): Check NILP (coding_system). ++ (coding_inherit_eol_type): Check NILP (coding_system) and ++ NILP (parent). ++ (consume_chars): Fix for the case of raw-text. ++ ++ * process.c (read_process_output): Handle carryover correctly. ++ ++2003-06-02 Dave Love ++ ++ * regex.c (re_search_2): Fix last change. ++ ++2003-05-30 Kenichi Handa ++ ++ * regex.c (GET_CHAR_BEFORE_2): Check multibyte, not ++ target_multibyte. Even in a unibyte case, return a converted ++ multibyte char. ++ (GET_CHAR_AFTER): New macro. ++ (PATFETCH): Translate via multibyte char. ++ (HANDLE_UNIBYTE_RANGE): Delete this macro. ++ (SETUP_MULTIBYTE_RANGE): New macro. ++ (regex_compile): Setup compiled code so that its multibyteness ++ matches that of a target. Fix the handling of "[X-YZ]" using ++ SETUP_MULTIBYTE_RANGE. ++ (analyse_first) : For filling fastmap for all multibyte ++ characters, don't check by BASE_LEADING_CODE_P. ++ (re_search_2): Don't check RE_TARGET_MULTIBYTE_P (bufp). It is ++ the same as RE_MULTIBYTE_P (bufp) now. ++ (mutually_exclusive_p): Check by (! multibyte || ++ IS_REAL_ASCII (c)). ++ (TARGET_CHAR_AND_LENGTH): Delete this macro. ++ (TRANSLATE_VIA_MULTIBYTE): New macro. ++ (re_match_2_internal): Don't check RE_TARGET_MULTIBYTE_P (bufp). ++ It is the same as RE_MULTIBYTE_P (bufp) now. ++ : Translate via multibyte. ++ : Fetch a character by RE_STRING_CHAR_AND_LENGTH. Don't ++ translate it. ++ : Fetch a character by ++ RE_STRING_CHAR_AND_LENGTH. Translate via multibyte. ++ : Call bcmp_translate with the last arg `multibyte'. ++ Fetch a character ++ by GET_CHAR_AFTER. ++ (bcmp_translate): Likewise. ++ ++ * search.c (compile_pattern): Check the member target_multibyte, ++ not the member multibyte of buf. ++ ++ * lread.c (read1): While reading a string, set force_singlebyte ++ and force_multibyte correctly. ++ ++ * charset.c (Fset_unibyte_charset): Fix setting up of ++ unibyte_to_multibyte_table. ++ (init_charset_once): Likewise. ++ ++2003-05-29 Kenichi Handa ++ ++ * coding.c (setup_coding_system): If coding has ++ post-read-conversion or pre-write-conversion, set ++ CODING_REQUIRE_DECODING_MASK and CODING_REQUIRE_ENCODING_MASK ++ respectively. ++ (decode_coding_gap): Run post-read-conversion if any. ++ ++ * fileio.c (Finsert_file_contents): Even if we read into a ++ unibyte buffer, check if we must decode the result or not. ++ ++2003-05-29 Kenichi Handa ++ ++ * coding.c (make_conversion_work_buffer): Change the work buffer ++ name to the same one as that of Emacs 21. ++ ++2003-05-28 Kenichi Handa ++ ++ * coding.h (make_conversion_work_buffer): Prototype adjusted. ++ (code_conversion_restore): Don't extern it. ++ ++ * coding.c (detected_mask): Delete unused variable. ++ (decode_coding_iso_2022): Pay attention to the byte sequence of ++ CTEXT extended segment, and retain those bytes as is. ++ (decode_coding_ccl): Delete unused variable `valids'. ++ (setup_coding_system): Delete unused variable `category'. ++ (consume_chars): Delete unused variable `category'. Make it work ++ for non-multibyte case. ++ (make_conversion_work_buffer): Argument changed. ++ (saved_coding): Delete unused variable. ++ (code_conversion_restore): Don't check saved_coding->destination. ++ (code_conversion_save): New function. ++ (decode_coding_gap, encode_coding_gap): Call code_conversion_save ++ instead of record_unwind_protect. ++ (decode_coding_object, encode_coding_object): Likewise. Recover ++ PT. ++ (detect_coding_system): Delete unused variable `mask'. ++ (Fdefine_coding_system_internal): Delete unsed vaiable id; ++ ++ * fileio.c (kill_workbuf_unwind): New function. ++ (Finsert_file_contents): On replacing, call ++ make_conversion_work_buffer with correct args, and call ++ record_unwind_protect with the first arg kill_workbuf_unwind. ++ ++ * lisp.h (Fgenerate_new_buffer_name): EXFUN it. ++ ++2003-05-20 Kenichi Handa ++ ++ * fontset.c (BASE_FONTSET_P): Check FONTSET_BASE, not ++ FONTSET_NAME. ++ (fontset_add): Fix for the case that TO is less than TO1. ++ (Ffontset_info): Don't use fallback fontset on checking the ++ default fontset. ++ (dump_fontset): New function for debugging. ++ ++ * coding.c (Fdefine_coding_system_internal): Fix for the case that ++ coding_type is Qcharset. ++ ++2003-05-07 Kenichi Handa ++ ++ * chartab.c (map_sub_char_table): New argument DEFAULT_VAL. ++ (map_char_table): Don't inherit the value from the parent on ++ initializing VAL. Adjusted for the above change. ++ ++2003-05-06 Kenichi Handa ++ ++ * coding.c (Qsignature, Qendian): Delete these variables. ++ (syms_of_coding): Don't initialize them. ++ (CATEGORY_MASK_UTF_16_AUTO): New macro. ++ (detect_coding_utf_16): Add CATEGORY_MASK_UTF_16_AUTO in ++ detect_info->found. ++ (decode_coding_utf_16): Don't detect BOM here. ++ (encode_coding_utf_16): Produce BOM if CODING_UTF_16_BOM (coding) ++ is NOT utf_16_without_bom. ++ (setup_coding_system): For a coding system of type utf-16, check ++ if the attribute :endian is Qbig or not (not nil or not), and set ++ CODING_REQUIRE_DETECTION_MASK if BOM detection is required. ++ (detect_coding): If coding type is utf-16 and BOM detection is ++ required, detect it. ++ (Fdefine_coding_system_internal): For a coding system of type ++ utf-16, check if the attribute :endian is Qbig or not (not nil or ++ not). ++ ++2003-05-06 Kenichi Handa ++ ++ * coding.c (coding_set_source): Fix for the case that the current ++ buffer is different from coding->src_object. ++ (decode_coding_object): Don't use the conversion work buffer if ++ DST_OBJECT is a buffer. ++ ++2003-05-04 Dave Love ++ ++ * lread.c (read_emacs_mule_char) [len==2]: Index ++ emacs_mule_charset correctly. ++ ++2003-02-16 Dave Love ++ ++ * coding.c (Qbig5, Vbig5_coding_system, CATEGORY_MASK_BIG5) ++ (detect_coding_big5, decode_coding_big5, encode_coding_big5) ++ (Fdecode_big5_char, Fencode_big5_char): Deleted. (Big5 no longer ++ treated specially.) ++ (setup_coding_system, coding_category, CATEGORY_MASK_ANY) ++ (detected_mask): Remove Big5 bits. ++ ++2003-04-09 Kenichi Handa ++ ++ The following changes are to make the font rescaling facility ++ compatible with Emacs 21. ++ ++ * xfaces.c (Vface_font_rescale_alist): Renamed from ++ Vface_resizing_fonts. ++ (struct font_name): Rename member resizing_ratio to rescale_ratio. ++ (font_rescale_ratio): Renamed from font_resizing_ratio. ++ (split_font_name): Set font->rescale_ratio. ++ (better_font_p): Pay attention to font->rescale_ratio. ++ (build_scalable_font_name): Likewise. Change RESX, and RESY ++ fields. ++ (syms_of_xfaces): Declare Vface_font_rescale_alist as a Lisp ++ variable. ++ ++2003-03-28 Kenichi Handa ++ ++ * coding.c (Qutf_16_be_nosig, Qutf_16_be, Qutf_16_le_nosig) ++ (Qutf_16_le): Remove these variables. ++ (syms_of_coding): Don't DEFSYM them. ++ (decode_coding_utf_16): Fix handling of BOM. ++ (encode_coding_utf_16): Fix handling of BOM. ++ ++2003-03-14 Kenichi Handa ++ ++ * fileio.c (Finsert_file_contents): On replacing, before decoding ++ the file into the work buffer, set point of the work buffer to the ++ end. ++ ++2003-02-13 Dave Love ++ ++ * coding.c (Fcheck_coding_systems_region): Fix type errors. ++ ++2003-02-04 Dave Love ++ ++ * xterm.c (XTread_socket): Check Lisp types for Vx_keysym_table ++ and fix C types. ++ ++2003-01-31 Kenichi Handa ++ ++ * xdisp.c (SKIP_GLYPHS): New macro. ++ (set_cursor_from_row): Pay attention to string display properties. ++ ++ * category.c (copy_category_entry): Fix for the case that RANGE ++ is an integer. ++ ++ * xterm.c (x_encode_char): Call ccl_driver with the last arg Qnil. ++ ++ * w32term.c (w32_encode_char): Call ccl_driver with the last arg ++ Qnil. ++ ++2003-01-30 Kenichi Handa ++ ++ * charset.c (Fcharset_id_internal): New function. ++ (syms_of_charset): Defsubr it. ++ ++ * coding.c (decode_coding_ccl, encode_coding_ccl): Call ccl_driver ++ with the last arg charset_list acquired from coding. ++ (Fdefine_coding_system_internal): For ccl-based coding system, fix ++ the attribute coding_attr_ccl_valids. ++ ++ * coding.h (enum define_coding_ccl_arg_index): Set the first ++ member coding_arg_ccl_decoder to coding_arg_max. ++ ++ * ccl.h (ccl_driver): Prototype adjusted. ++ ++ * ccl.c (CCL_DECODE_CHAR, CCL_ENCODE_CHAR): New macros. ++ (ccl_driver): New arg CHARSET_LIST. Use the above macros instead ++ of DECODE_CAHR, ENCODE_CHAR, CHAR_CHARSET. ++ (Fccl_execute): Call ccl_driver with the last arg Qnil. ++ (Fccl_execute_on_string): Likewise. ++ ++2003-01-11 Kenichi Handa ++ ++ * charset.h (ENCODE_CHAR): If the method is SUBSET or SUPERSET, ++ call encode_char. ++ ++ * charset.c (encode_char): Fix handling of methods SUBSET and ++ SUPERSET. ++ ++ * xterm.c (x_new_fontset): Fix previous change. ++ ++2003-01-10 Dave Love ++ ++ * composite.c (syms_of_composite): Make composition_hash_table ++ weak. ++ ++2003-01-10 Kenichi Handa ++ ++ * dispextern.h (check_face_attributes, generate_ascii_font_name) ++ (font_name_registry): Don't extern them. ++ (split_font_name_into_vector, build_font_name_from_vector): Extern ++ them. ++ ++ * fontset.h (Qfontset): Don't extern it. ++ (new_fontset_from_font_name): Extern it. ++ ++ * fontset.c: Give 8 extra slots to fontset objects. ++ (Qfontset_info): New variable. ++ (syms_of_fontset): Defsym it. ++ (FONTSET_FALLBACK): New macro. ++ (fontset_face): Try also the default fontset. ++ (make_fontset): Realize a fallback fontset from the default ++ fontset. ++ (generate_ascii_font_name): Moved from xfaces.c. Rewritten by ++ using split_font_name_into_vector and build_font_name_from_vector. ++ (Fset_fontset_font): Access the elements of font_spec by enum ++ FONT_SPEC_INDEX. If font_spec is a string, extract the registry ++ name by using split_font_name_into_vector. ++ (Fnew_fontset): If no ASCII font is specified in FONTLIST, ++ generate a proper font name from the fontset name. Update ++ Vfontset_alias_alist. ++ (n_auto_fontsets): New variable. ++ (new_fontset_from_font_name): New function. ++ (Ffont_info): Store the information about fonts generated from the ++ default fontset in the first extra slot of the returned ++ char-table. ++ ++ * xfaces.c (generate_ascii_font_name): Moved to fontset.c. ++ (font_name_registry): Function deleted. ++ (split_font_name_into_vector): New function. ++ (build_font_name_from_vector): New function. ++ (font_list): The argument REGISTRY is now a list of registry ++ names. ++ (choose_face_font): If we are choosing an ASCII font, and ATTRS ++ specifies an explicit font name, return the name as is. Make a ++ list of registy names. ++ ++ * xfns.c (x_set_font, x_create_tip_frame): Adjusted to the change ++ of x_new_fontset. ++ (Fx_create_frame): Don't call x_new_fontset here. Just use ++ x_list_fonts to check the existence of fonts. ++ ++ * xterm.h (x_new_fontset): Prototype adjusted. ++ ++ * xterm.c (x_new_fontset): Change the arg FONTSETNAME to Lisp ++ string. Use new_fontset_from_font_name to create a fontset from a ++ font name. ++ ++2003-01-07 Dave Love ++ ++ * Makefile.in: Fix some dependencies. ++ ++ * keymap.c (Fapropos_internal): Don't gcpro apropos_predicate but ++ set it to nil before returning. ++ ++ * composite.c (update_compositions): Fix type error. ++ ++ * syntax.c (skip_chars, skip_syntaxes): Fix type errors. ++ ++2003-01-07 Kenichi Handa ++ ++ * xterm.c (x_new_font): Optimize for the case that the font is ++ already set for the frame. ++ ++2003-01-06 Kenichi Handa ++ ++ * chartab.c (char_table_ascii): Check if the char table contents ++ is sub-char-table or not. ++ (char_table_set): Fix argument to char_table_ascii. ++ (char_table_set_range): Likewise. ++ ++ * coding.c (CATEGORY_MASK_RAW_TEXT): New macro. ++ (detect_coding_utf_8, detect_coding_utf_16) ++ (detect_coding_emacs_mule, detect_coding_iso_2022) ++ (detect_coding_sjis, detect_coding_big5) ++ (detect_coding_ccl, detect_coding_charset): Change argument MASK ++ to DETECT_INFO. Update DETECT_INFO and return 1 if the byte ++ sequence is valid in this coding system. Callers changed. ++ (MAX_ANNOTATION_LENGTH): New macro. ++ (ADD_ANNOTATION_DATA): New macro. ++ (ADD_COMPOSITION_DATA): Argument changed. Callers changed. Call ++ ADD_ANNOTATION_DATA. The format of annotation data changed. ++ (ADD_CHARSET_DATA): New macro. ++ (emacs_mule_char): New argument ID. Callers changed. ++ (decode_coding_emacs_mule, decode_coding_iso_2022) ++ (decode_coding_sjis, decode_coding_big5, decode_coding_charset): ++ Produce charset annotation data in coding->charbuf. ++ (encode_coding_emacs_mule, encode_coding_iso_2022): Pay attention ++ to charset annotation data in coding->charbuf. ++ (setup_coding_system): Add CODING_ANNOTATE_CHARSET_MASK ++ coding->common_flags if the coding system is iso-2022 based and ++ uses designation. ++ (produce_composition): Adjusted for the new annotation data ++ format. ++ (produce_charset): New function. ++ (produce_annotation): Handle charset annotation. ++ (handle_composition_annotation, handle_charset_annotation): New ++ functions. ++ (consume_chars): Handle charset annotation. Utilize the above two ++ functions. ++ (encode_coding_object): If SRC_OBJECT and DST_OBJECT are the same ++ buffer, get the deleted text as a string and set ++ coding->src_object to that string. ++ (detect_coding, detect_coding_system): Use the new struct ++ coding_detection_info. ++ ++ * coding.h (struct coding_detection_info): New structure. ++ (struct coding_system): Prototype of the member `detector' ++ adjusted. ++ (CODING_ANNOTATE_CHARSET_MASK): New macro. ++ ++2003-01-06 Kenichi Handa ++ ++ * insdel.c (insert_from_gap): Fix argument to offset_intervals. ++ ++2003-01-03 Dave Love ++ ++ * keymap.c (apropos_predicate, apropos_accumulate): Declare ++ static. ++ (Fapropos_internal): Don't gcpro apropos_accumulate. Set result ++ to new local and nullify apropos_accumulate before returning. ++ (syms_of_keymap): Staticpro and initialize apropos_accumulate. ++ ++2002-12-05 Kenichi Handa ++ ++ * charset.c (Fdefine_charset_internal): Setup charset.fast_map ++ correctly. ++ ++2002-11-26 Dave Love ++ ++ * fns.c (Flanginfo): Call synchronize_system_time_locale. ++ ++2002-11-07 Kenichi Handa ++ ++ The following changes are to make character composition happen ++ automatically on displaying. ++ ++ * Makefile.in (lisp, shortlisp): Add composite.elc ++ ++ * composite.h (Qauto_composed, Vauto_composition_function, ++ Qauto_composition_function): Extern them. ++ ++ * composite.c (Vcomposition_function_table, ++ Qcomposition_function_table): Delete variables. ++ (Qauto_composed, Vauto_composition_function, ++ Qauto_composition_function): New variables. ++ (run_composition_function): Don't call ++ compose-chars-after-function. ++ (update_compositions): Clear `auto-composed' text property. ++ (compose_chars_in_text): Delete this function. ++ (syms_of_composite): Staticpro Qauto_composed and ++ Qauto_composition_function. Declare Vauto_composition_function as ++ a Lisp variable. ++ ++ * dispextern.h (enum prop_idx): Add member AUTO_COMPOSED_PROP_IDX. ++ ++ * xdisp.c (it_props): Add an entry for Qauto_composed. ++ (handle_auto_composed_prop): New function. ++ ++ * xselect.c (selection_data_to_lisp_data): Don't call ++ compose_chars_in_text. ++ ++2002-11-06 Dave Love ++ ++ * keyboard.c (read_char): Modify checking around use of ++ Vkeyboard_translate_table. ++ ++ * xterm.c (XTread_socket): Check Lisp types for Vx_keysym_table ++ and fix C types. ++ ++2002-11-06 Kenichi Handa ++ ++ * coding.c (decode_coding_utf_8): When eol_type is Qdos, handle ++ the case that the last byte is '\r' correctly. ++ (decode_coding_emacs_mule): Likewise. ++ (decode_coding_iso_2022): Likewise. ++ (decode_coding_sjis): Likewise. ++ (decode_coding_big5): Likewise. ++ (decode_coding_charset): Likewise. ++ (produce_chars): Likewise. ++ (decode_coding): Flushing out the unprocessed data correctly. ++ (decode_coding_gap): Set CODING_MODE_LAST_BLOCK bit of ++ coding->mode. ++ ++2002-10-31 Dave Love ++ ++ * xterm.c (XTread_socket): Fix changes for defined keysyms. Add ++ XK_ISO... case. ++ (xaw_scroll_callback): Revert last change. ++ ++2002-10-30 Kenichi Handa ++ ++ * charset.c (Fset_charset_priority): Update ++ Viso_2022_charset_list. ++ ++2002-10-29 Kenichi Handa ++ ++ * xfaces.c (Vface_resizing_fonts): New variable. ++ (struct font_name): New member `resizing_ratio'. ++ (font_resizing_ratio): New function. ++ (split_font_name): Set font->resizing_ratio. ++ (better_font_p): Pay attention to font->resizing_ratio. ++ (build_scalable_font_name): Likewise. Don't change POINT_SIZE, ++ RESX, and RESY fields. ++ (try_alternative_families): Try scalable fonts if ++ Vscalable_fonts_allowed is not Qt. ++ (syms_of_xfaces): Declare Vface_resizing_fonts as a Lisp variable. ++ ++2002-10-29 Dave Love ++ ++ * xterm.c (xaw_scroll_callback): Cast correctly. ++ ++2002-10-28 Dave Love ++ ++ * keyboard.c (lispy_accent_codes, lispy_accent_keys): Extend. ++ (lispy_kana_keys): Comment out. ++ (make_lispy_event) [XK_kana_A]: Comment out. ++ ++ * xterm.c (xaw_scroll_callback): Cast call_data. ++ (XTread_socket): Deal with ASCII keysyms. ++ (syms_of_xterm) : Fix args of make_hash_table. ++ ++2002-10-27 Dave Love ++ ++ * xterm.c (Vx_keysym_table): New. ++ (syms_of_xterm): Initialize it. ++ (XTread_socket): Use it. ++ From head: Eliminate incorrect optimization that tried to avoid ++ decoding the output of X*LookupString. ++ (x_get_font_repertory): Delete charset declaration. ++ ++2002-10-16 Kenichi Handa ++ ++ * coding.c (detect_coding): Fix previous change. ++ (detect_coding_charset): If only ASCII bytes are found, return 0. ++ (detect_coding_system): Fix previous change. ++ (Fdefine_coding_system_internal): Setup CODING_ATTR_ASCII_COMPAT ++ (attrs) correctly. ++ ++2002-10-15 Dave Love ++ ++ * coding.c (Fcheck_coding_system): Doc fix. ++ ++ * editfns.c (Finsert_byte): Return a proper value. ++ ++2002-10-14 Kenichi Handa ++ ++ * coding.c (decode_coding): Fix args to translate_chars. Pay ++ attention to Vstandard_translation_table_for_decode. ++ (encode_coding): Fix args to translate_chars. Pay attention to ++ Vstandard_translation_table_for_encode. ++ ++ * data.c (Faset): Check NEWELT by ASCII_CHAR_P, not by ++ SINGLE_BYTE_CHAR_P. ++ ++ * editfns.c (general_insert_function): Check VAL by ASCII_CHAR_P, ++ not by SINGLE_BYTE_CHAR_P. ++ ++ * fns.c (concat): Check CH by ASCII_CHAR_P, not by ++ SINGLE_BYTE_CHAR_P. ++ ++ * insdel.c (copy_text): Check C by ASCII_CHAR_P, not by ++ SINGLE_BYTE_CHAR_P. ++ ++ * keymap.c (Ftext_char_description): Check C by ASCII_CHAR_P, not ++ by SINGLE_BYTE_CHAR_P. ++ ++ * search.c (Freplace_match): Check C by ASCII_CHAR_P, not by ++ SINGLE_BYTE_CHAR_P. ++ ++2002-10-14 Dave Love ++ ++ * fns.c (Fstring_as_multibyte, Fstring_to_multibyte): Doc fix. ++ ++2002-10-10 Dave Love ++ ++ * fns.c (Flanginfo): Fix typo. ++ ++ * unexelf.c (unexec): Make last change conditional on Irix 6.5. ++ ++2002-10-10 Kenichi Handa ++ ++ * coding.c (detect_coding_utf_8): Check incomplete byte sequence. ++ Don't update *mask when correctly detected. ++ (detect_coding_utf_16): Likewise. ++ (detect_coding_emacs_mule): Likewise. ++ (detect_coding_iso_2022): Likewise. ++ (detect_coding_sjis): Likewise. ++ (detect_coding_big5): Likewise. ++ (detect_coding_ccl): Likewise. ++ (decode_coding_sjis): Fix decoding of katakana-jisx0201. ++ (detect_eol): Delete the argument CODING, and add the argument ++ CATEGORY. ++ (detect_coding): Adjusted for the changes above. ++ (detect_coding_system): Likewise. ++ ++2002-10-09 Kenichi Handa ++ ++ * character.c (char_string): Renamed from ++ char_string_with_unification. Pay attention to ++ CHAR_MODIFIER_MASK. ++ (string_char): Renamed from string_char. ++ ++ * character.h (CHAR_STRING): Call char_string if C is greater than ++ MAX_3_BYTE_CHAR. ++ (CHAR_STRING_ADVANCE): Likewise. ++ (STRING_CHAR): Call string_char instead of ++ string_char_with_unification. ++ (STRING_CHAR_AND_LENGTH): Likewise. ++ (STRING_CHAR_ADVANCE): Likewise. ++ ++2002-10-09 Dave Love ++ ++ * coding.c (decode_coding_utf_8): Treat surrogates as invalid. ++ ++2002-10-07 Kenichi Handa ++ ++ * keymap.c (push_key_description): Pay attention to ++ force_multibyte. ++ ++ * regex.c (re_search_2): Fix for the case of unibyte buffer. ++ ++2002-10-06 Dave Love ++ ++ * charset.c (define_charset_internal): Rename `supprementary'. ++ ++ * Makefile.in (lisp, shortlisp): Remove latin-N. ++ ++2002-10-05 Dave Love ++ ++ * xfns.c (x_window, x_window): Use use_xim. ++ ++ * xterm.c (use_xim): Initialize. ++ (xim_open_dpy, xim_initialize, xim_close_dpy): Use use_xim. ++ (x_term_init): Maybe set use_xim. ++ ++ * xterm.h (use_xim) [HAVE_X_I18N]: Declare. ++ ++2002-10-01 Kenichi Handa ++ ++ * search.c (search_buffer): Fix case-fold-search of multibyte ++ characters. ++ (boyer_moore): Rename the last argument to char_high_bits. ++ ++2002-09-27 Kenichi Handa ++ ++ * xdisp.c (display_string): Fix for the case of zero width glyph. ++ ++ * xfns.c (x_set_font): Change the error message of the case that ++ x_new_fontset returns Qt. ++ ++ * xfaces.c (set_lface_from_font_name): Reject the default fontset. ++ (Finternal_set_lisp_face_attribute): Use signal_error for the ++ error of invalid fontset. ++ ++ * xterm.c (x_new_fontset): If FONTSETNAME specifies the default ++ fontset, return Qt. ++ ++2002-09-19 Kenichi Handa ++ ++ * regex.c (re_search_2): Fix previous change. ++ ++2002-09-18 Kenichi Handa ++ ++ * syntax.c (skip_syntaxes): Fix previous change. ++ ++2002-09-13 Kenichi Handa ++ ++ * syntax.c (skip_chars): Fix previous change. ++ (skip_syntaxes): Fix previous change. ++ ++2002-09-06 Dave Love ++ ++ * config.in: Restore it. ++ ++2002-09-05 Dave Love ++ ++ * config.in: Removed (now auto-generated). ++ ++ * s/usg5-4.h: Fix last change. ++ ++ * unexelf.c (unexec): Make .got handling not SGI-specific. ++ ++ * syntax.c (syms_of_syntax) : Doc fix. ++ ++ * regex.c: Use `ifdef HAVE_ALLOCA_H', not `if HAVE_ALLOCA_H'. ++ ++ * keyboard.c (read_key_sequence): Fix type error. ++ ++ * buffer.c (Fset_buffer_multibyte, Fset_buffer_multibyte): Fix ++ type error. ++ ++ * fontset.c (fontset_add): Return Lisp_Object. ++ ++2002-09-03 Dave Love ++ ++ * charset.h (charset_ordered_list_tick): Declare extern. ++ ++2002-09-03 Kenichi Handa ++ ++ The following changes (and some of 2002-08-20 changes of mine) are ++ for handling syntax, category, and case conversion for unibyte ++ characters by converting them to multibyte on the fly. With these ++ changes, we don't have to setup syntax and case tables for unibyte ++ characters in each language environment. ++ ++ * abbrev.c (Fexpand_abbrev): Convert a unibyte character to ++ multibyte if necessary. ++ ++ * bytecode.c (Fbyte_code): Likewise. ++ ++ * character.h (LEADING_CODE_LATIN_1_MIN) ++ (LEADING_CODE_LATIN_1_MAX): New macros. ++ (unibyte_to_multibyte_table): Extern it. ++ (unibyte_char_to_multibyte): New macro. ++ (MAKE_CHAR_MULTIBYTE): Use unibyte_to_multibyte_table. ++ (CHAR_LEADING_CODE): New macro. ++ (FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE): New macro. ++ ++ * character.c (unibyte_to_multibyte_table): New variable. ++ (unibyte_char_to_multibyte): Move to character.h and defined as ++ macro. ++ (multibyte_char_to_unibyte): If C is an eight-bit character, ++ convert it to the corresponding byte value. ++ ++ * charset.c (Fset_unibyte_charset): If the dimension of CHARSET is ++ not 1, singals an error. Update the elements of ++ unibyte_to_multibyte_table. ++ (init_charset_once): Initialize unibyte_to_multibyte_table. ++ (syms_of_charset): Define the charset `iso-8859-1'. ++ ++ * casefiddle.c (casify_object): Fix previous change. ++ ++ * cmds.c (internal_self_insert): In a multibyte buffer, insert C ++ as is without converting it to unibyte. In a unibyte buffer, ++ convert C to multibyte before checking the syntax. ++ ++ * lisp.h (unibyte_char_to_multibyte): Extern deleted. ++ ++ * minibuf.c (Fminibuffer_complete_word): Use the macro ++ FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE. ++ ++ * regex.h (struct re_pattern_buffer): New member target_multibyte. ++ ++ * regex.c (RE_TARGET_MULTIBYTE_P): New macro. ++ (GET_CHAR_BEFORE_2): Check target_multibyte, not multibyte. If ++ that is zero, convert an eight-bit char to multibyte. ++ (MAKE_CHAR_MULTIBYTE, CHAR_LEADING_CODE): New dummy new macros for ++ non-emacs case. ++ (PATFETCH): Convert an eight-bit char to multibyte. ++ (HANDLE_UNIBYTE_RANGE): New macro. ++ (regex_compile): Setup the compiled pattern for multibyte chars ++ even if the given regex string is unibyte. Use PATFETCH_RAW ++ instead of PATFETCH in many places. To handle `charset' ++ specification of unibyte, call HANDLE_UNIBYTE_RANGE. Use bitmap ++ only for ASCII chars. ++ (analyse_first) : Simplified because the compiled pattern ++ is multibyte. ++ : Setup fastmap from bitmap only for ASCII chars. ++ : Use CHAR_LEADING_CODE to get leading codes. ++ : If multibyte, setup fastmap only for ASCII chars ++ here. ++ (re_compile_fastmap) [emacs]: Call analyse_first with the arg ++ multibyte always 1. ++ (re_search_2) In emacs, set the locale variable multibyte to 1, ++ otherwise to 0. New local variable target_multibyte. Check it ++ to decide the multibyteness of STR1 and STR2. If ++ target_multibyte is zero, convert unibyte chars to multibyte ++ before translating and checking fastmap. ++ (TARGET_CHAR_AND_LENGTH): New macro. ++ (re_match_2_internal): In emacs, set the locale variable multibyte ++ to 1, otherwise to 0. New local variable target_multibyte. Check ++ it to decide the multibyteness of STR1 and STR2. Use ++ TARGET_CHAR_AND_LENGTH to fetch a character from D. ++ : If multibyte is nonzero, check fastmap ++ only for ASCII chars. Call bcmp_translate with ++ target_multibyte, not with multibyte. ++ : Declare the local variable C as `unsigned'. ++ (bcmp_translate): Change the last arg name to target_multibyte. ++ ++ * search.c (compile_pattern_1): Don't adjust the multibyteness of ++ the regexp pattern and the matching target. Set cp->buf.multibyte ++ to the multibyteness of the regexp pattern. Set ++ cp->but.target_multibyte to the multibyteness of the matching ++ target. ++ (wordify): Use FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE instead of ++ FETCH_STRING_CHAR_ADVANCE. ++ (Freplace_match): Convert unibyte chars to multibyte. ++ ++ * syntax.c (char_quoted): Use FETCH_CHAR_AS_MULTIBYTE to convert ++ unibyte chars to multibyte. ++ (back_comment): Likewise. ++ (scan_words): Likewise. ++ (skip_chars): The arg syntaxp is deleted, and the code for ++ handling syntaxes is moved to skip_syntaxes. Callers changed. ++ Fix the case that the multibyteness of STRING and the current ++ buffer doesn't match. ++ (skip_syntaxes): New function. ++ (SYNTAX_WITH_MULTIBYTE_CHECK): Check C by ASCII_CHAR_P, not by ++ SINGLE_BYTE_CHAR_P. ++ (Fforward_comment): Use FETCH_CHAR_AS_MULTIBYTE to convert unibyte ++ chars to multibyte. ++ (scan_lists): Likewise. ++ (Fbackward_prefix_chars): Likewise. ++ (scan_sexps_forward): Likewise. ++ ++2002-08-23 Kenichi Handa ++ ++ * xfaces.c (QCfontset): New variable. ++ (LFACE_FONTSET): New macro. ++ (check_lface_attrs): Check also LFACE_FONTSET_INDEX. ++ (set_lface_from_font_name): Setup LFACE_FONTSET (lface). ++ (Finternal_set_lisp_face_attribute): Handle QCfontset. ++ (Finternal_get_lisp_face_attribute): Likewise. ++ (lface_same_font_attributes_p): Fix checking of LFACE_FONT_INDEX, ++ check also LFACE_FONTSET_INDEX. ++ (face_fontset): Check attrs[LFACE_FONTSET_INDEX], not ++ attrs[LFACE_FONT_INDEX]. ++ (syms_of_xfaces): Intern and staticpro QCfontset. ++ ++ * dispextern.h (enum lface_attribute_index): New member ++ LFACE_FONTSET_INDEX. ++ ++ * fns.c (base64_encode_1): Handle eight-bit chars correctly. ++ ++2002-08-21 Kenichi Handa ++ ++ * coding.c (coding_set_destination): Fix coding->destination for ++ the case converting a region. ++ (encode_coding_utf_8): Encode eight-bit chars as single byte. ++ (encode_coding_object): Fix coding->dst_pos and ++ coding->dst_pos_byte for the case converting a region. ++ ++ * insdel.c (insert_from_gap): Make it work even if PT != GTP. ++ ++ * character.h (BYTE8_STRING): New macro. ++ ++ * fns.c (base64_decode_1): Insert eight-bit chars correctly. ++ ++2002-08-20 Kenichi Handa ++ ++ * xdisp.c (get_next_display_element): Don't display unibyte 8-bit ++ characters by octal form. ++ ++ * abbrev.c (Fexpand_abbrev): Fix for the multibyte case. ++ ++ * buffer.h (_fetch_multibyte_char_len): Extern deleted. ++ (FETCH_MULTIBYTE_CHAR): Don't use _fetch_multibyte_char_len. ++ (BUF_FETCH_MULTIBYTE_CHAR): Likewise. ++ (FETCH_CHAR_AS_MULTIBYTE): New macro. ++ ++ * casetab.c (set_canon, set_identity, shuffle): Simplified. ++ ++ * casefiddle.c (casify_object): Simplified. Handle the case that ++ the case conversion change the byte length. ++ (casify_region): Likewise ++ ++ * character.h (MAKE_CHAR_UNIBYTE, MAKE_CHAR_MULTIBYTE): New ++ macros. ++ ++ * character.c (_fetch_multibyte_char_len): This variable deleted. ++ (syms_of_character): Setup Vprintable_chars. ++ ++ * editfns.c (Fchar_equal): Fix for the unibyte case. ++ (Finsert_byte): New function. ++ (syms_of_editfns): Defsubr it. ++ ++ * keyboard.c (read_key_sequence): Use ~CHAR_MODIFIER_MASK instead ++ of direct code 0x3ffff. ++ ++ * search.c (Freplace_match): Fix for the unibyte case. ++ ++2002-08-19 Kenichi Handa ++ ++ * lread.c (safe_to_load_p): Fix the logic. ++ ++ * syntax.c (scan_words): Don't treat characters belonging to ++ different scripts as constituting a word. ++ ++ * editfns.c (Fformat): Use ASCII_CHAR_P, not SINGLE_BYTE_CHAR_P. ++ ++ * fontset.c (Fset_fontset_font): Treat `ascii' as charset, not ++ script. ++ ++ * emacs.c (main): In the case of --unibyte, instead of aborting on ++ finding non-empty buffer, make it unibyte. ++ ++2002-08-18 Kenichi Handa ++ ++ * xterm.c (x_new_fontset): Call `create-fontset-from-ascii-font' ++ to create a fontset. ++ ++2002-08-18 Dave Love ++ ++ * character.c (Funibyte_char_to_multibyte): Doc fix. ++ ++ * xfns.c [HAVE_STDLIB_H]: Fix last change. ++ ++2002-08-15 Kenichi Handa ++ ++ * fontset.c (fontset_add): Make the type `int'. ++ (fontset_id_valid_p): Define it if FONTSET_DEBUG is defined. ++ ++ * character.c (unibyte_char_to_multibyte): Refer to ++ charset_unibyte, not charset_primary. ++ (multibyte_char_to_unibyte): Likewise. ++ (Funibyte_char_to_multibyte): Likewise. ++ ++ * charset.h: (charset_unibyte): Extern it instead of ++ charset_primary. ++ ++ * charset.c (charset_unibyte): Renamed from charset_primary. ++ (Funibyte_charset): Renamed from Fprimary_charset. ++ (Fset_unibyte_charset): Renamed from Fset_primary_charset. ++ (syms_of_charset): Adjusted for the above changes. ++ ++ * w32term.c (x_produce_glyphs): Use ASCII_CHAR_P, not ++ SINGLE_BYTE_CHAR_P. Fix the logic of handling non-ASCII char when ++ it->multibyte_p is zero. ++ ++ * lisp.h (nonascii_insert_offset, Vnonascii_translation_table): ++ Extern deleted. ++ ++2002-08-08 Kenichi Handa ++ ++ * coding.c (Fdefine_coding_system_internal): Fix category setting ++ for a coding system of type iso-2022. ++ ++2002-08-02 Kenichi Handa ++ ++ * fontset.h (FS_LOAD_FONT): Call fs_load_font with the arg CHARSET ++ -1. ++ ++2002-08-01 Kenichi Handa ++ ++ * syntax.c (Vnext_word_boundary_function_table): New variable. ++ (syms_of_syntax): Declare it as a Lisp variable. ++ (scan_words): Call functions in Vnext_word_boundary_function_table ++ if any. ++ ++ * xterm.c (x_load_font): Initialize fontp->fontset to -1. ++ ++ * fontset.c (fs_load_font): If fontp->charset is not negative, ++ return fontp without setting its members. ++ ++2002-07-31 Dave Love ++ ++ * config.in: Generated with autoheader. ++ ++ * xfns.c [HAVE_STDLIB_H]: Change logic (instead of fixing typo). ++ ++ * m/sparc.h (HAVE_ALLOCA): Delete. ++ ++ * s/irix6-5.h: Don't include strings.h. ++ (bcopy, bzero, bcmp): Don't undef. ++ ++ * s/irix6-0.h (bcopy, bzero, bcmp): Don't undef. ++ ++ * s/usg5-4.h (NO_SIOCTL_H): Don't define. ++ (TIOCSIGSEND): Don't test IRIX6. ++ (bcopy, bzero, bcmp): Define conditionally. ++ ++2002-07-31 Kenichi Handa ++ ++ * buffer.c (Qas, Qmake, Qto): New variables. ++ (Fset_buffer_multibyte): New optional arg METHOD. Caller changed. ++ (syms_of_buffer): Intern and staticpro Qas, Qmake, and Qto. ++ ++ * callproc.c (Fcall_process): Don't call insert_1_both directly if ++ we are inserting a process output into a multibyte buffer. ++ ++ * character.h (CHAR_TO_BYTE8): If C is not eight-bit char, call ++ multibyte_char_to_unibyte. ++ ++ * character.c (Funibyte_char_to_multibyte): If C can't be decoded ++ by the primary charset, make it eight-bit char. ++ (Fmultibyte_char_to_unibyte): Call CHAR_TO_BYTE8. ++ ++ * charset.c: (charset_eight_bit, Qeight_bit_control): New ++ variables. ++ (charset_8_bit__control, charset_8_bit_graphic, ++ Qeight_bit_control, Qeight_bit_graphic): These variables deleted. ++ (define_charset_internal): New function. ++ (syms_of_charset): Call define_charset_internal for pre-defined ++ charsets. ++ ++ * charset.h (charset_8_bit): Extern it. ++ ++ * coding.c (make_conversion_work_buffer): Adjusted for the change ++ of Fset_buffer_multibyte. ++ (encode_coding_raw_text): Increment p0 in the loop. ++ ++ * lisp.h (Fset_buffer_multibyte): Prototype adjusted. ++ ++ * xdisp.c (setup_echo_area_for_printing, set_message_1): Adjusted ++ for the change of Fset_buffer_multibyte. ++ ++ * fns.c (Fstring_to_multibyte): New function. ++ (syms_of_fns): Declare Fstring_to_multibyte as Lisp subroutine. ++ ++2002-07-30 Dave Love ++ ++ * xfns.c (x_put_x_image): Declare args. ++ ++ * xfaces.c (font_name_registry, choose_face_font): Delete unused ++ vars. ++ (try_font_list): Declare an arg. ++ ++ * xdisp.c (message2_nolog, set_message): Declare an arg. ++ ++ * terminfo.c (tparam): Declare an arg. Use P_ to declare tparm. ++ ++ * syntax.c (scan_sexps_forward): Declare an arg. ++ ++ * scroll.c (calculate_scrolling, calculate_direct_scrolling): ++ Declare an arg. ++ ++ * lisp.h (Fnew_fontset): Declare. ++ ++ * keymap.c (push_key_description): Call CHARACTERP correctly. ++ ++ * fontset.c (fontset_add): Declare args. Call make_number ++ correctly. ++ (face_for_char): Delete unused vars. ++ (Fset_fontset_font): Doc fix. Delete unused vars. ++ ++ * doc.c (Fsubstitute_command_keys): Delete unused vars. ++ ++ * composite.c (update_compositions): Declare arg. ++ ++ * cm.c (calccost, cmgoto): Declare args. ++ ++ * charset.c: Remove `emacs' conditional. Doc fixes. ++ (map_char_table_for_charset): Declare. ++ ++ * character.c (syms_of_character) : Doc ++ fix. ++ ++ * ccl.c: Remove `emacs' conditional. Include hash table stuff ++ from trunk. ++ ++2002-07-26 Kenichi Handa ++ ++ The following changes are to allow specifying multiple font ++ patterns for a character range (specified by script or charset). ++ ++ * Makefile.in (abbrev.o): Depend on syntax.h. ++ (xfaces.o): Depend on charset.h. ++ ++ * alloc.c (Fmake_string): Use ASCII_CHAR_P, not ++ SINGLE_BYTE_CHAR_P. ++ ++ * ccl.c (Fccl_execute_on_string): Add `const' to local variables. ++ ++ * character.h (Vchar_script_table): Extern it. ++ ++ * character.c (Vscript_alist): This variable deleted. ++ (Vchar_script_table, Qchar_script_table): New variable. ++ (syms_of_character): Declare Vchar_script_table as a lisp variable ++ and initialize it. ++ ++ * chartab.c (Fmake_char_table): Doc fixed. If PURPOSE doesn't ++ have property char-table-extra-slots, make no extra slot. ++ ++ * dispextern.h (struct face): Member `charset' deleted. ++ (FACE_SUITABLE_FOR_CHAR_P): Use ASCII_CHAR_P, not ++ SINGLE_BYTE_CHAR_P. ++ (FACE_FOR_CHAR): Likewise. ++ (choose_face_font, lookup_non_ascii_face, font_name_registry): Add ++ prototypes ++ (lookup_face, lookup_named_face, lookup_derived_face): Prototype ++ fixed. ++ (generate_ascii_font_name): Renamed from generate_ascii_font. ++ ++ * fontset.h (get_font_repertory_func): New prototype. ++ (make_fontset_for_ascii_face, fs_load_font): Prototypes fixed. ++ (FS_LOAD_FONT): Call fs_load_font with the 3rd arg charset_ascii. ++ ++ * fontset.c (Qprepend, Qappend): New variables. ++ (FONTSET_CHARSET_ALIST, FONTSET_FACE_ALIST): These macros deleted. ++ (FONTSET_NOFONT_FACE, FONTSET_REPERTORY): New macros. ++ (FONTSET_REF): Optimize if FONTSET is Vdefault_fontset. ++ (FONTSET_REF_AND_RANGE, FONTSET_ADD): New macros. ++ (fontset_ref_and_range, fontset_add, reorder_font_vector) ++ (load_font_get_repertory): New functions. ++ (fontset_set): This function deleted. ++ (fontset_face): New arg FACE. Return face ID, not face. ++ Completely re-written to handle new fontset structure. Caller ++ changed. ++ (free_face_fontset): Use ASET istead of AREF (X) = Y. ++ (face_for_char): Don't call lookup_face. ++ (make_fontset_for_ascii_face): New arg FACE. ++ (fs_load_font): New arg CHARSET_ID. Don't check ++ Vfont_encoding_alist here. ++ (find_font_encoding): New function. ++ (list_fontsets): Use STRINGP, not ! NILP. ++ (accumulate_script_ranges): New function. ++ (Fset_fontset_font, Fnew_fontset, Ffontset_info): Completely ++ re-written to handle new fontset structure. ++ (Ffontset_font): Return a copy of element. ++ (syms_of_fontset): Define symbols Qprepend and Qappend. Fix ++ docstring of font-encoding-alist. ++ ++ * lisp.h (CHAR_TABLE_REF): Remove unnecessary check (IDX >= 0). ++ (Fset_fotset_font): Fix arguments to 5. ++ ++ * msdos.c (XMenuActivate): Adjuted for the change of ++ lookup_derived_face. ++ ++ * xdisp.c (message_dolog, set_message_1, extend_face_to_end_of_line): ++ Use ASCII_CHAR_P, not SINGLE_BYTE_CHAR_P. ++ (highlight_trailing_whitespace): Adjusted for the change of ++ lookup_named_face. ++ ++ * xfaces.c: Include charset.h. ++ (load_face_font): Argument C deleted. Caller changed. ++ (generate_ascii_font_name): Renamed from generate_ascii_font. ++ (font_name_registry): New function. ++ (cache_face): Store ascii faces before non-ascii faces in buckets. ++ (lookup_face): Arguments C and BASE_FACE deleted. Caller changed. ++ Lookup only ascii faces. ++ (lookup_non_ascii_face): New function. ++ (lookup_named_face): Argument C deleted. Caller changed. ++ (lookup_derived_face): Argument C deleted. Caller changed. ++ (try_font_list): New arg PATTERN. Caller changed. If PATTERN is ++ a string, just call font_list with it. ++ (choose_face_font): Arguments FACE and C deleted. New arg ++ FONT_SPEC. Caller changed. ++ (realize_face): Arguments C and BASE_FACE deleted. Caller ++ (realize_x_face): Likewise. ++ (realize_non_ascii_face): New function. ++ (realize_x_face): Call load_face_font here. ++ (realize_tty_face): Argument C deleted. Caller changed. ++ (compute_char_face): If CH is not ascii, call FACE_FOR_CHAR to ++ get a face ID. ++ (dump_realized_face): Don't print charset of FACE. ++ ++ * xfns.c (x_set_font): Always call x_new_fontset and ++ store_frame_parameter. ++ (Fx_create_frame): Call x_new_fontset, not x_new_font. ++ (syms_of_xfns): Set get_font_repertory_func to ++ x_get_font_repertory. ++ ++ * xterm.h (x_get_font_repertory): Extern it. ++ ++ * xterm.c (x_produce_glyphs): Use ASCII_CHAR_P, not ++ SINGLE_BYTE_CHAR_P. Fix the logic of handling non-ASCII char when ++ it->multibyte_p is zero. ++ (XTread_socket): Use ASCII_CHAR_P, not SINGLE_BYTE_CHAR_P. ++ (x_new_fontset): If FONTSETNAME doesn't match any existing ++ fontsets, create a new one. ++ (x_get_font_repertory): New function. ++ ++2002-07-25 Kenichi Handa ++ ++ * coding.c (Ffind_coding_systems_region_internal): Detect an ++ ASCII only string correctly. ++ ++ * lread.c (Fload): Don't load with Qload_force_doc_strings t if ++ version is 0. ++ ++2002-07-24 Kenichi Handa ++ ++ * lread.c: Include "coding.h". ++ (Qget_emacs_mule_file_char, Qload_force_doc_strings, ++ load_each_byte, unread_char): New variables. ++ (readchar_backlog): This variable deleted. ++ (readchar): Return a character unless load_each_byte is nonzero. ++ Handle the case that readcharfun is Qget_emacs_mule_file_char or a ++ cons. If unread_char is not -1, simply return it. ++ (unreadchar): Handle the case that readcharfun is ++ Qget_emacs_mule_file_char or a cons. Set unread_char if ++ necessary. ++ (read_multibyte): This function deleted. ++ (readbyte_for_lambda, readbyte_from_file, readbyte_from_string) ++ (read_emacs_mule_char): New functions. ++ (Fload): Even if the file doesn't have the extention ".elc", if ++ safe_to_load_p returns a positive version number, assume that the ++ file contains bytecompiled code. If the version is less than 22, ++ load the file while decoding multibyte sequences by emacs-mule. ++ (readevalloop): Don't use readchar_backlog. ++ (Fread): Likewise. Pay attention to the case that STREAM is a ++ cons. ++ (Fread_from_string): Pay attention to the case that STREAM is a ++ cons. ++ (read_escape): The arg BYTEREP deleted. ++ (read1): Set load_each_byte to 1 temporarily while handling ++ #@NUMBER. Don't call read_multibyte. ++ (read_vector): Call Fread with a cons. If readcharfun is ++ Qget_emacs_mule_file_char, decode the read string by emacs-mule. ++ (read_list): If doc_reference is 2, make the cdr part string as ++ unibyte. ++ (syms_of_lread): Intern and staticpro Qget_emacs_mule_file_char ++ and Qload_force_doc_strings. ++ ++2002-07-23 Kenichi Handa ++ ++ * xdisp.c (face_before_or_after_it_pos): Call ++ FETCH_MULTIBYTE_CHAR with byte postion, not char position. ++ ++2002-07-22 Kenichi Handa ++ ++ * character.h (TRAILING_CODE_P): New macro. ++ (MAYBE_UNIFY_CHAR): Adjusted for the change of Funify_charset. ++ (string_char_with_unification): Fix prototype. ++ (Vscript_alist): Extern it. ++ ++ * character.c (Vscript_alist): New variable. ++ (string_char_with_unification): Add `const' to local variables. ++ (str_as_unibyte): Likewise. ++ (string_escape_byte8): Likewise. ++ (syms_of_character): Declare script-alist as a Lisp variable. ++ ++ * charset.h (Vcharset_ordered_list): Extern it. ++ (charset_ordered_list_tick): Extern it. ++ (EMACS_MULE_LEADING_CODE_PRIVATE_11) ++ (EMACS_MULE_LEADING_CODE_PRIVATE_12) ++ (EMACS_MULE_LEADING_CODE_PRIVATE_21) ++ (EMACS_MULE_LEADING_CODE_PRIVATE_22): New macros ++ (Funify_charset): Adjusted for the change of Funify_charset. ++ ++ * charset.c (charset_ordered_list_tick): New variable. ++ (Fdefine_charset_internal): Increment charset_ordered_list_tick. ++ (Funify_charset): New optional arg DEUNIFY. If it is non-nil, ++ deunify intead of unify a charset. ++ (string_xstring_p): Add `const' to local variables. ++ (find_charsets_in_text): Add `const' to arguemnts and local ++ variables. ++ (encode_char): Adjusted for the change of Funify_charset. Fix ++ detecting of invalid code. ++ (Fset_charset_priority): Increment charset_ordered_list_tick. ++ (Fmap_charset_chars): Fix handling of default value for FROM_CODE ++ and TO_CODE. ++ ++ * coding.c (LEADING_CODE_PRIVATE_11, LEADING_CODE_PRIVATE_12) ++ (LEADING_CODE_PRIVATE_21, LEADING_CODE_PRIVATE_22): Macros ++ deleted. Callers changed to use ++ EMACS_MULE_LEADING_CODE_PRIVATE_11, etc. ++ (decode_coding_ccl): Add `const' to local variables. ++ (consume_chars): Likewise. ++ (Ffind_coding_systems_region_internal): Likewise. ++ (Fcheck_coding_systems_region): Likewise. ++ ++ * print.c (print_object): Use octal form for printing the ++ contents of a bool vector. ++ ++2002-07-18 Dave Love ++ ++ * lread.c (Fload) : Don't leak fd. ++ : Refuse to load. ++ ++2002-07-17 Dave Love ++ ++ * fns.c: Move coding.h. ++ (Qcodeset, Qdays, Qmonths): New. ++ (concat): Use CHARACTERP instead of INTERGERP. ++ (Flocale_codeset): Deleted. ++ (Flanginfo): New function. ++ (syms_of_fns): Changed accordingly. ++ ++ * coding.c (adjust_coding_eol_type): Fix eol_type/eol_seen mixup. ++ ++2002-07-16 Dave Love ++ ++ * casetab.c (init_casetab_once, init_casetab_once): Fix ++ CHAR_TABLE_SET call. ++ ++ * category.c (Fmodify_category_entry): Fix CATEGORY_MEMBER call. ++ ++ * character.c (syms_of_character): Fix CHAR_TABLE_SET call. ++ ++ * charset.c (Fmap_charset_chars): Check args. Convert Lisp types. ++ (load_charset_map, Fdeclare_equiv_charset, Fencode_char) ++ (Fset_charset_priority, syms_of_charset): Convert Lisp types. ++ ++ * charset.h (CHECK_CHARSET_GET_ID): Use XINT on AREF result. ++ ++ * coding.c (ENCODE_DESIGNATION, decode_eol) ++ (make_conversion_work_buffer, code_conversion_restore) ++ (Fdefine_coding_system_internal): Convert Lisp types. ++ (code_conversion_restore): Use EQ, not ==. ++ (Fencode_coding_string): Fix code_convert_string call. ++ ++ * coding.h (code_convert_region): Fix prototype. ++ ++ * dispextern.h (redraw_frame, redraw_garbaged_frames): Removed. ++ ++ * fontset.c (fontset_ref, fontset_set, fs_load_font) ++ (Ffontset_info): Convert Lisp types. ++ ++ * syntax.h (SYNTAX_ENTRY_INT): Don't use make_number. ++ ++ * xterm.c (note_mouse_movement): Fix call of window_from_coordinates. ++ ++ * xdisp.c (display_mode_element): Fix call of Fset_text_properties. ++ ++ * chartab.c: Include "...h", not <...h> in some cases. ++ ++ * callproc.c (Fcall_process): Remove unused variables. ++ ++2002-07-12 Dave Love ++ ++ * coding.c (Fset_coding_system_priority): Allow null arg list. ++ ++2002-07-03 Dave Love ++ ++ * minibuf.c (Fminibuffer_complete_word): Remove unused var. ++ (Fself_insert_and_exit): Use CHARACTERP. ++ ++ * callproc.c (Fcall_process): Remove unused vars. ++ ++ * xterm.c (XTread_socket): Add extra dead keysyms. ++ ++ * xdisp.c (decode_mode_spec_coding): Use CHARACTERP. ++ ++ * dispextern.h: Remove prototypes for redraw_frame, ++ redraw_garbaged_frames. ++ ++ * cmds.c (Fself_insert_command): Use CHARACTERP. ++ ++ * chartab.c (make_sub_char_table): Remove unused var. ++ (Fset_char_table_default, Fmap_char_table): Doc fix. ++ ++ * keymap.c (access_keymap): Remove generic char code. ++ (push_key_description): Use CHARACTERP. ++ ++2002-07-01 Dave Love ++ ++ * charset.c: Doc fixes. ++ (Funify_charset): Extra checking. ++ ++2002-06-24 Dave Love ++ ++ * lread.c: Remove some unused variables. ++ (safe_to_load_p): If safe, return the magic number version byte. ++ (Fload): Maybe use load-with-code-conversion. ++ ++2002-06-12 Kenichi Handa ++ ++ * category.c (Fmodify_category_entry): Don't modify the contents ++ of category_set for characters out of the range. Avoid ++ unnecessary modification. ++ ++ * character.h (MAYBE_UNIFY_CHAR): Adjusted for the change of ++ Vchar_unify_table. The default value of the table is now nil. ++ ++ * character.c (syms_of_character): Setup Vchar_width_table for ++ eight-bit-control and raw-byte chars. ++ ++ * charset.h (enum define_charset_arg_index): Delete ++ charset_arg_parents and add charset_arg_subset and ++ charset_arg_superset. ++ (enum charset_attr_index): Delete charset_parents and add ++ charset_subset and charset_superset. ++ (enum charset_method): Delete CHARSET_METHOD_INHERIT and add ++ CHARSET_METHOD_SUBSET and CHARSET_METHOD_SUPERSET. ++ (CHARSET_ATTR_PARENTS, CHARSET_PARENTS): Macros deleted. ++ (CHARSET_ATTR_SUBSET, CHARSET_ATTR_SUPERSET, CHARSET_SUBSET) ++ (CHARSET_SUPERSET): New macros. ++ (charset_work): Extern it. ++ (ENCODE_CHAR): Use charset_work. ++ (CHAR_CHARSET_P): Adjusted for the change of encoder format. ++ (map_charset_chars): Extern it. ++ ++ * charset.c (load_charset_map): Set the default value of encoder ++ and deunifier char-tables to nil. ++ (map_charset_chars): Argument changed. Callers changed. Use ++ map_char_table_for_charset instead of map_char_table. ++ (Fmap_charset_chars): New optional args from_code and to_code. ++ (Fdefine_charset_internal): Adjusted for the change of ++ `define-charset' (:parents -> :subset or :superset). ++ (charset_work): New variable. ++ (encode_char): Adjusted for the change of ++ Fdefine_charset_internal. ++ (syms_of_charset): Likewise. ++ (Ffind_charset_string): Setup the vector `charsets' correctly. ++ ++ * chartab.c (sub_char_table_ref_and_range): New arg defalt. Fix ++ the previous change. ++ (char_table_ref_and_range): Adjusted for the above change. ++ (map_sub_char_table_for_charset): New function. ++ (map_char_table_for_charset): New function. ++ ++ * keymap.c (describe_vector): Handle a char-table directly here. ++ (describe_char_table): Deleted. ++ ++ * lisp.h (map_charset_chars): Deleted. ++ ++2002-06-11 Dave Love ++ ++ * fns.c (count_combining): Comment out (unused). ++ (Flocale_codeset): New. ++ (syms_of_fns): Defsubr it. ++ ++ * config.in (HAVE_PTY_H, HAVE_SIZE_T, HAVE_LANGINFO_CODESET): New. ++ (size_t): Removed. ++ ++2002-06-06 Dave Love ++ ++ * Makefile.in (chartab.o): Depend on charset.h ++ ++2002-06-03 Kenichi Handa ++ ++ * character.c (syms_of_character): Set the default value of ++ Vprintable_chars to Qnil. ++ ++2002-05-31 Dave Love ++ ++ * Makefile.in (lisp, shortlisp): Change indian.elc to indian.el. ++ ++2002-05-31 Kenichi Handa ++ ++ * charset.c (load_charset_map): Handle the case that from < to ++ correctly. ++ ++ * coding.c (encode_coding_emacs_mule): Pay attention to raw-8-bit ++ chars. ++ (encode_coding_iso_2022): Likewise. ++ (encode_coding_sjis): Likewise. ++ (encode_coding_big5): Likewise. ++ (encode_coding_charset): Likewise. ++ ++2002-05-30 Kenichi Handa ++ ++ * Makefile.in (lisp): Change chinese.elc to chinese.el. They are ++ not bytecompiled now. ++ (shortlisp): Likewise. ++ ++ * charset.c (charset_jisx0201_roman, charset_jisx0208_1978) ++ (charset_jisx0208): New variables. ++ (Fdefine_charset_internal): Setup them if appropriate. ++ (init_charset_once): Initialize them to -1. ++ ++ * charset.h (charset_jisx0201_roman, charset_jisx0208_1978, ++ charset_jisx0208): Extern them. ++ ++ * coding.c (CODING_ISO_FLAG_USE_ROMAN): New macro ++ (CODING_ISO_FLAG_USE_OLDJIS): New macro. ++ (CODING_ISO_FLAG_FULL_SUPPORT): Macro definition changed. ++ (setup_iso_safe_charsets): Fix arguemtns to Fassq. ++ (DECODE_DESIGNATION): Pay attention to CODING_ISO_FLAG_USE_ROMAN ++ and CODING_ISO_FLAG_USE_OLDJIS. ++ (ENCODE_ISO_CHARACTER_DIMENSION1): Likewise. ++ (ENCODE_ISO_CHARACTER_DIMENSION2): Likewise. ++ (encode_coding_iso_2022): Change the 1st arg to ++ ENCODE_ISO_CHARACTER to a variable. ++ ++2002-05-29 Kenichi Handa ++ ++ * charset.h (enum define_charset_arg_index): New enums ++ charset_arg_min_code and charset_arg_max_code. ++ (struct charset): New member char_index_offset. ++ ++ * charset.c (CODE_POINT_TO_INDEX): Take charset->char_index_offset ++ into account. ++ (INDEX_TO_CODE_POINT): Likewise. ++ (Fdefine_charset_internal): Handle args[charset_arg_min_code] and ++ args[charset_arg_max_code]. Setup charset.char_index_offset. ++ (syms_of_charset): Fix args to Fdefine_charset_internal. ++ ++2002-05-27 Dave Love ++ ++ * coding.c (decode_coding_utf_8): Reject overlong sequences. ++ ++2002-05-26 Dave Love ++ ++ * coding.c: Doc fixes. ++ (Fcoding_system_aliases): Fix return value. ++ (Qmac): Remove (duplicated) definition. ++ ++2002-05-25 Dave Love ++ ++ * charset.c (Fcharset_priority_list, Fset_charset_priority): New ++ functions. ++ ++ * character.c (Fstring): Doc fix. ++ ++ * charset.c (Fdefine_charset_alias): Update Vcharset_list. ++ ++ * fontset.c (Ffontset_info): Doc fix. Return charset names, not ++ ids. ++ (font-encoding-alist): Doc fix. ++ ++2002-05-24 Dave Love ++ ++ * term.c (costs_set): Declare static, non-initialized for pcc. ++ (encode_terminal_code): Remove ensued var. ++ ++ * keyboard.c (kbd_buffer_store_event): Fix interrupt_signal decl ++ for K&R. ++ ++ * xterm.c (xlwmenu_window_p): Fix prototype for K&R. ++ ++ * coding.c (setup_iso_safe_charsets): Fix arg decl for K&R. ++ (suffixes): Moved out of make_subsidiaries for K&R. ++ ++ * charset.c (map_charset_chars): Fix c_function declaration for ++ K&R. ++ ++ * lisp.h (DEFUN) [!PROTOTYPES]: Remove spurious `args'. ++ ++2002-05-23 Dave Love ++ ++ * data.c (Fchar_or_string_p): Doc fix. Use CHARACTERP. ++ ++ * category.c (Fmodify_category_entry): Doc fix. Remove unused ++ vars. ++ ++2002-05-23 Yong Lu ++ ++ * charset.c (Fdefine_charset_internal): Fix argument to bzero. ++ ++ * coding.c (Fdefine_coding_system_internal): Fix previous change. ++ (decode_coding_charset): Workaround for the bug of GCC 2.96. ++ ++2002-05-23 Kenichi Handa ++ ++ * Makefile.in (lisp): Change cyrillic.elc to cyrillic.el, ++ vietnamese.elc to vietnamese.el. They are not bytecompiled now. ++ (shortlisp): Likewise. ++ ++2002-05-22 Kenichi Handa ++ ++ * coding.c (decode_coding_charset): Adjusted for the change of ++ Fdefine_coding_system_internal. ++ (Fdefine_coding_system_internal): For a coding system of ++ `charset' type, store a list of charset IDs in ++ `charset_attr_charset_valids' element of coding attributes. ++ ++ * charset.c (Fmake_char): Fix previous change. ++ ++2002-05-21 Kenichi Handa ++ ++ * coding.c (ONE_MORE_BYTE_NO_CHECK): Increment consumed_chars. ++ (emacs_mule_char): New arg src. Delete arg `composition'. Caller ++ changed. Handle 2-byte and 3-byte charsets correctly. ++ (DECODE_EMACS_MULE_COMPOSITION_RULE_20): Renamed from ++ DECODE_EMACS_MULE_COMPOSITION_RULE. Caller changed. ++ (DECODE_EMACS_MULE_COMPOSITION_RULE_21): New macro. ++ (DECODE_EMACS_MULE_21_COMPOSITION): Call ++ DECODE_EMACS_MULE_COMPOSITION_RULE_21. Produce correct annotation ++ sequence. ++ (decode_coding_emacs_mule): Handle composition correctly. Rewind ++ `src' and `consumed_chars' correctly before calling ++ emacs_mule_char. ++ (DECODE_COMPOSITION_START): Correctly handle the case of altchar ++ and alt&rule composition. ++ (decode_coding_iso_2022): Handle composition correctly. ++ (init_coding_once): Setup emacs_mule_bytes for private charsets. ++ ++ * charset.c (Fdefine_charset_internal): Fix bug for the case of ++ re-defining a charset. If the charset has :emacs-mule-id, setup ++ emacs_mule_bytes. ++ (Fmake_char): If CODE1 is nil, use the minimum code of the ++ charset. ++ ++2002-05-20 Kenichi Handa ++ ++ * coding.c (encode_coding_iso_2022): If coding requires safe ++ encoding, produce a character specified by ++ CODING_INHIBIT_CHARACTER_SUBSTITUTION. ++ (encode_coding_sjis): Likewise. ++ (encode_coding_big5): Likewise. ++ (encode_coding_charset): Likewise. ++ ++2002-05-17 Dave Love ++ ++ * xterm.c (XSetIMValues): Declare. ++ ++ * process.c: Conditionally include sys/wait.h, pty.h. ++ ++ * print.c (print_object): Fix print format for 64-bit ++ systems. ++ ++ * keyboard.c (modify_event_symbol): Fix print format for 64-bit ++ systems. ++ ++ * buffer.c (emacs_strerror): Declare. ++ (MMAP_ALLOCATED_P, mmap_enlarge, syms_of_buffer): Import changes ++ from trunk. ++ ++ * fontset.c (Fclear_face_cache): Declare. ++ (accumulate_font_info): Commented-out (unused). ++ (face_for_char, Fset_fontset_font, Ffontset_info): Remove unused ++ variables. ++ ++ * character.h (string_escape_byte8): Declare. ++ ++ * charset.c (load_charset_map, load_charset_map_from_file): Remove ++ unused vars. ++ (Fdefine_charset_internal, Fsplit_char, syms_of_charset) ++ (Fmap_charset_chars): Doc fix. ++ ++ * coding.c (Vchar_coding_system_table, Qchar_coding_system): ++ Removed. ++ (Fset_coding_system_priority, Fset_coding_system_priority) ++ (Fdefine_coding_system_internal): Doc fix. ++ ++2002-05-16 Dave Love ++ ++ * s/osf5-0.h (C_SWITCH_SYSTEM) [!__GNUC__]: Remove -nointrinsics. ++ ++2002-05-16 Kenichi Handa ++ ++ * character.c (string_escape_byte8): Make multibyte string with ++ correct size. ++ ++ * charset.c (Fmake_char): Delete unnecessary code. ++ ++2002-05-14 Kenichi Handa ++ ++ * xfns.c (x_encode_text): Allocate coding.destination here, and ++ call encode_coding_object with dst_object Qnil. ++ ++ * buffer.c (Fset_buffer_multibyte): Convert 8-bit bytes to ++ multibyte form correctly. ++ ++ * fontset.c (fs_load_font): Check fontp->full_name (not fontname) ++ against Vfont_encoding_alist. ++ ++ * coding.c (Fdecode_sjis_char): Fix typo (0x7F->0xFF). Fix the ++ handling of charset list. ++ (encode_coding_iso_2022): Setup coding->safe_charsets in advance. ++ (decode_coding_object): Move point to coding->dst_pos before ++ calling post-read-conversion function. ++ (encode_coding_object): Give correct arguments to ++ pre-write-conversion. Ignore the return value of ++ pre-write-conversion function. Pay attention to the case that ++ pre-write-conversion changes the current buffer. If dst_object is ++ Qt, even if coding->src_bytes is zero, allocate at least one byte ++ to coding->destination. ++ ++ * coding.h (JIS_TO_SJIS): Fix typo (j1->s1, j2->s2). ++ ++ * charset.c (Fmake_char): Make it more backward compatible. ++ (Fmap_charset_chars): Fix docstring. ++ ++2002-05-13 Dave Love ++ ++ * coding.c: Doc fixes. ++ (Fdefine_coding_system_alias): Use names, not symbols, in ++ coding-system-alist. ++ ++2002-05-13 Kenichi Handa ++ ++ * fontset.c (free_realized_fontsets): Call Fclear_face_cache instead ++ of calling free_realized_face. ++ ++2002-05-10 Yong Lu ++ ++ * charset.c (load_charset_map): Fix previous change. ++ (read_hex): Don't treat SPC as a comment starter. ++ (decode_char): If CODE_POINT_TO_INDEX retruns -1, always return ++ -1. ++ (Fdecode_char): Fix typo. ++ ++2002-05-10 Kenichi Handa ++ ++ * charset.h (struct charset): New member `code_space_mask'. ++ ++ * coding.c (coding_set_source): Delete the local variable ++ beg_byte. ++ (encode_coding_charset): Delete the local variable charset. ++ (Fdefine_coding_system_internal): Likewise. ++ (Fdefine_coding_system_internal): Setup ++ attrs[coding_attr_charset_valids] correctly. ++ ++ * charset.c (CODE_POINT_TO_INDEX): Utilize `code_space_mask' ++ member to check if CODE is valid or not. ++ (Fdefine_charset_internal): Initialize `code_space_mask' member. ++ (encode_char): Before calling CODE_POINT_TO_INDEX, check if CODE ++ is within the range of charset->min_code and carset->max_code. ++ ++2002-05-09 Dave Love ++ ++ * syntax.h (syntax_temp) [!__GNUC__]: Declare. ++ ++ * dispextern.h (generate_ascii_font): Fix return type. ++ ++ * xfaces.c (generate_ascii_font): Fix arg declaration. ++ ++ * coding.c (coding_inherit_eol_type) ++ (Fset_terminal_coding_system_internal) ++ (Fset_safe_terminal_coding_system_internal): Fix arg declarations. ++ ++2002-05-08 Kenichi Handa ++ ++ * coding.c (decode_coding_charset, encode_coding_charset): Handle ++ multiple charsets correctly. ++ ++2002-05-07 Kenichi Handa ++ ++ * search.c (boyer_moore): Fix handling of mulitbyte character ++ translation. ++ ++ * xdisp.c (display_mode_element): When the variable `elt' is ++ changed, update `this' and `lisp_string'. ++ ++2002-05-07 Kenichi Handa ++ ++ * buffer.c (Fset_buffer_multibyte): Fix 8-bit char handling. ++ ++ * callproc.c (Fcall_process): Be sure to give the current buffer ++ to decode_coding_c_string. Update PT and PT_BYTE after the ++ insertion. ++ ++ * charset.c (struct charset_map_entries): New struct. ++ (load_charset_map): Renamed from parse_charset_map. New args ++ entries and n_entries. Caller changed. ++ (load_charset_map_from_file): Renamed from load_charset_map. ++ Caller changed. New arg control_flag. Call load_charset_map at ++ the tail. ++ (load_charset_map_from_vector): New function. ++ (Fdefine_charset_internal): Setup charset.compact_codes_p. ++ (encode_char): If the charset is compact, change a character index ++ to a code point. ++ ++ * coding.c (coding_alloc_by_making_gap): Check the case that the ++ source and destination are the same correctly. ++ (decode_coding_raw_text): Set coding->consumed_char and ++ coding->consumed to 0. ++ (produce_chars): If coding->chars_at_source is nonzero, update ++ coding->consumed_char and coding->consumed before calling ++ alloc_destination. ++ (Fdefine_coding_system_alias): Register ALIAS in ++ Vcoding_system_alist. ++ (syms_of_coding): Define `no-convesion' coding system at the tail. ++ ++ * fileio.c (Finsert_file_contents): Set coding_system instead of ++ val. If the current buffer is multibyte, always call ++ decode_coding_gap. ++ ++ * xfaces.c (try_font_list): Give higher priority to fontset's ++ family than face's family. ++ ++2002-04-18 Kenichi Handa ++ ++ * callproc.c (Fcall_process): Be sure to give the current buffer ++ to decode_coding_c_string. ++ ++ * xfaces.c (try_font_list): Give a family specified in a fontset ++ higher priority than a family specified in a face. ++ ++2002-04-09 Kenichi Handa ++ ++ * fileio.c (Finsert_file_contents): Fix calculation of `inserted'. ++ Fix arguments to insert_from_buffer. ++ ++ * xdisp.c (display_mode_element): Fix calculation of `bytepos'. ++ ++2002-03-11 Kenichi Handa ++ ++ * coding.c (produce_chars): Set the variable `multibytep' correctly. ++ (decode_coding_gap): Set coding->dst_multibyte correctly. ++ ++2002-03-07 Kenichi Handa ++ ++ * coding.c (encode_coding_utf_8): Initialize produced_chars to 0. ++ (decode_coding_utf_16): Fix converting high and low bytes to ++ code-point. ++ (encode_coding_utf_16): Substitute coding->default_char for ++ non-Unicode characters. ++ (decode_coding): Don't call record_insert here. ++ (setup_coding_system): Initialize `surrogate' of ++ coding->spec.utf_16 to 0. ++ (EMIT_ONE_BYTE): Fix for multibyte case. ++ ++ * insdel.c (insert_from_gap): Call record_insert. ++ ++2002-03-04 Kenichi Handa ++ ++ * casefiddle.c (casify_region): Fix multibyte case. ++ ++ * character.c (c_string_width): Add return type `int'. ++ (char_string_with_unification): Arg ADVANCED deleted. ++ ++ * character.h (CHAR_VALID_P): Don't call CHARACTERP. ++ (CHAR_STRING): Adjusted for the change of ++ char_string_with_unification. ++ (CHAR_STRING_ADVANCE): Make it do-while statement. ++ ++ * chartab.c (sub_char_table_set_range): Optimized for the case ++ DEPTH == 3. Add workaround code for a GCC optimization bug. ++ ++ * charset.c (parse_charset_map): Remove an unused variable. ++ ++ * coding.c: Delete unused variables. ++ ++ * fileio.c (Finsert_file_contents): Set coding_system to Qnil ++ earlier. If inserted is zero and the coding system doesn't ++ require flushing, don't call decode_coding_gap. ++ ++ * syntax.h (SET_RAW_SYNTAX_ENTRY): Don't call make_number. ++ ++2002-03-01 Kenichi Handa ++ ++ The following changes are for using Unicode as an internal ++ character model, and use UTF-8 format for buffer/string ++ representation. ++ ++ * .gdbinit (xchartable): Adjusted for the change of char table ++ structure. ++ (xsubchartable, xcoding, xcharset, xcurbuf): New commands. ++ ++ * Makefile.in (obj): Add character.o and chartab.o. ++ (lisp, shortlisp): Remove utf-8.elc: ++ (*.o): For many files, change dependency on charset.h to ++ character.h, and add dependency on character.h. ++ (character.o, chartab.o): New targets. ++ ++ * abbrev.c, bytecode.c, casefiddle.c, cmds.c, dispnew.c, doc.c, ++ doprnt.c, dosfns.c, frame.c, marker.c, minibuf.c, msdos.c, ++ w16select.c, w32bdf.c, w32console.c: Include "character.h" instead ++ of "charset.h". ++ ++ * dired.c, filelock.c: Include "character.h". ++ ++ * alloc.c: Include "character.h" instead of "charset.h". ++ (Fmake_char_table): Moved to chartab.c. ++ (make_sub_char_table): Likewise. ++ (syms_of_alloc): Remove defsubr for Smake_char_table. ++ ++ * buffer.c: Include "character.h" instead of "charset.h", don't ++ include "coding.h". ++ (Fset_buffer_multibyte): Adjuted for UTF-8. ++ ++ * buffer.h: EXFUN Fbuffer_live_p. ++ ++ * callproc.c: Include "character.h" instead of "charset.h". ++ (Fcall_process): Big change for the new code-conversion APIs. ++ ++ * casetab.c: Include "character.h" instead of "charset.h". ++ (set_canon, set_identity, shuffle): Adjusted for the new ++ map_char_table spec. ++ (init_casetab_once): Call CHAR_TABLE_SET instead of directly ++ accessing the char table structure. ++ ++ * chartab.c: New file that implements char table. ++ ++ * category.c: Include "character.h". ++ (copy_category_entry): New function. ++ (copy_category_table): Call map_char_table and copy_category_entry. ++ (Fmake_category_table): Initialize all top-vel slots. ++ (char_category_set): New function. ++ (modify_lower_category_set): Deleted. ++ (Fmodify_category_entry): Call char_table_ref_and_range. ++ ++ * category.h (CATEGORY_SET): Just call char_category_set. ++ ++ * ccl.c: Include "character.h". ++ (Qccl, Qcclp): New variables. ++ (CCL_WRITE_CHAR): Alway treat the arg CH as a character even if ++ it's less than 256. ++ (CCL_WRITE_MULTIBYTE_CHAR): Deleted. ++ (CCL_WRITE_STRING, CCL_READ_CHAR): Adjusted for the change of SRC ++ and DST type. ++ (ccl_driver): Types of arguments changed. Code adjusted for that. ++ (Fccl_execute, Fccl_execute_on_string): Adjusted for the change of ++ ccl_driver. ++ (syms_of_ccl): Intern and staticpro Qccl and Qcclp. ++ ++ * ccl.h (struct ccl_program): Members eol_type and multibyte ++ deleted. New members src_multibyte, dst_multibyte, consumed, and ++ produced. ++ (struct ccl_spec): Members decoder and encoder deleted. New ++ memeber ccl. ++ (CODING_SPEC_CCL_PROGRAM): New macro. ++ (ccl_driver): Prototype updated. ++ (Qccl, Qcclp, Fccl_program_p): Extern them. ++ (CHECK_CCL_PROGRAM): New macro. ++ ++ * character.c, character.h, chartab.c: New files. ++ ++ * charset.c: Mostly re-written. Character and multibyte sequence ++ handling codes are moved to character.c. ++ ++ * charset.h: Mostly re-written. Character and multibyte sequence ++ handling codes are moved to character.h. ++ ++ * coding.c, coding.h: Mostly re-written. ++ ++ * composite.c: Include "character.h" instead of "charset.h". ++ (CHAR_WIDTH): Moved to character.h. ++ (HASH_KEY, HASH_VALUE): Deleted. ++ ++ * composite.h (enum composition_method): Order of enumeration ++ symbols changed. ++ ++ * data.c: Include "character.h" instead of "charset.h". ++ (Faref): Call CHAR_TABLE_REF for a char table. ++ (Faset): Call CHAR_TABLE_SET for a char table. ++ ++ * dispextern.h (free_realized_face, check_face_attribytes, ++ generate_ascii_font): Extern them. ++ (free_realized_multibyte_face): Extern deleted. ++ ++ * disptab.h (DISP_CHAR_VECTOR): Adjusted for the change of char ++ table structure. ++ ++ * editfns.c: Include "character.h" instead of "charset.h". ++ (Fchar_to_string): Always call CHAR_STRING. ++ ++ * emacs.c (main): Call init_charset_once, init_charset, ++ syms_of_chartab, and syms_of_character. ++ ++ * fileio.c: Include "character.h" instead of "charset.h". ++ (Finsert_file_contents): Big change for the new code-conversion ++ API. ++ (choose_write_coding_system): Likewise. ++ (Fwrite_region): Likewise. ++ (build_annotations_2): Deleted. ++ (e_write): Big change for the new code-conversion API. ++ ++ * fns.c: Include "character.h" instead of "charset.h". ++ (copy_sub_char_table): Moved to chartab.c. ++ (Fcopy_sequence): Call copy_char_table for a char table. ++ (concat): Delete codes calling count_multibyte. ++ (string_char_to_byte): Adjusted for the new multibyte form. ++ (string_byte_to_char): Likewise. ++ (internal_equal): Adjusted for the change of char table structure. ++ (Fchar_table_subtype, Fchar_table_parent, Fset_char_table_parent, ++ Fchar_table_extra_slot, Fset_char_table_extra_slot, ++ Fchar_table_range, Fset_char_table_range, Fset_char_table_default, ++ char_table_translate, optimize_sub_char_table, ++ Foptimize_char_table, map_char_table, Fmap_char_table): Moved to ++ chartab.c. ++ (char_table_ref_and_index): Deleted. ++ (HASH_KEY, HASH_VALUE): Moved to lisp.h. ++ (Fmd5): Call preferred_coding_system instead of accessing ++ Vcoding_category_list. Adjusted for the new code-conversion API. ++ (syms_of_fns): Defsubr for char table related functions moved to ++ chartab.c. ++ ++ * fontset.c: Mostly re-written. ++ ++ * fontset.h (struct font_info): Type of the member encoding_type ++ changed. ++ (enum FONT_SPEC_INDEX): New enum. ++ (fontset_font_pattern, fs_load_font): Prototype updated. ++ (FS_LOAD_FONT): Adjusted for the change of fs_load_font. ++ ++ * indent.c: Include "character.h" instead of "charset.h". ++ (MULTIBYTE_BYTES_WIDTH): Call CHAR_WIDTH instead of ++ WIDTH_BY_CHAR_HEAD. ++ ++ * insdel.c: Include "character.h" instead of "charset.h". ++ (copy_text): Don't refer to Vnonascii_translation_table. ++ (insert_from_gap): New function. ++ ++ * keyboard.c: Include "character.h" instead of "charset.h". ++ (command_loop_1): Never call direct_output_forward_char before ++ a non-ASCII character. ++ (read_char): If Vkeyboard_translate_table is a char table, always ++ translated a character. ++ ++ * keymap.c: Include "character.h". ++ (store_in_keymap): Handle the case that IDX is a cons. ++ (Fdefine_key): Handle the case that KEY is a cons and the car part ++ is also a cons (range). ++ (push_key_description): Adjusted for the new character code. ++ (describe_vector): Call describe_char_table for a char table. ++ (describe_char_table): New function. ++ ++ * keymap.h (describe_char_table): Extern it. ++ ++ * lisp.h (enum pvec_type): New member PVEC_SUB_CHAR_TABLE. ++ (XSUB_CHAR_TABLE, XSETSUB_CHAR_TABLE): New macros. ++ (CHAR_TABLE_ORDINARY_SLOTS, CHAR_TABLE_SINGLE_BYTE_SLOTS, ++ SUB_CHAR_TABLE_ORDINARY_SLOTS, SUB_CHAR_TABLE_STANDARD_SLOTS): ++ Deleted. ++ (CHAR_TABLE_REF, CHAR_TABLE_SET): Adjusted for the new char table ++ structure. ++ (CHAR_TABLE_TRANSLATE): Just call char_table_translate. ++ (CHARTAB_SIZE_BITS_0, CHARTAB_SIZE_BITS_1, CHARTAB_SIZE_BITS_2, ++ CHARTAB_SIZE_BITS_3): New macros. ++ (chartab_size): Extern it. ++ (struct Lisp_Char_Table): Re-designed. ++ (struct Lisp_Sub_Char_Table): New structure. ++ (HASH_KEY, HASH_VALUE): Moved from fns.c. ++ (CHARACTERBITS): Defined as 22. ++ (GLYPH_MASK_FACE, GLYPH_MASK_CHAR): Adjusted for the above change. ++ (SUB_CHAR_TABLE_P): Check PVEC_CHAR_TABLE. ++ (GC_SUB_CHAR_TABLE_P): New macro. ++ (Fencode_coding_string, Fdecode_coding_string): EXFUN Updated. ++ (code_convert_string_norecord): Extern deleted. ++ (init_character_once, syms_of_character, init_charset, ++ syms_of_composite, Qeq, Fmakehash, insert_from_gap): Extern them. ++ ++ * lread.c: Include "character.h". ++ (read_multibyte): New arg NBYTES. ++ (read_escape): The meaning of returned *BYTEREP changed. ++ (to_multibyte): Deleted. ++ (read1): Adjuted the handling of char table and string. ++ ++ * print.c: Include "character.h" instead of "charset.h". ++ (print_string): Convert 8-bit raw bytes to octal form by ++ string_escape_byte8. ++ (print_object): Adjusted for the new multibyte form. Print 8-bit ++ raw bytes always in octal form. Handle sub char table correctly. ++ ++ * process.c: Include "character.h" instead of "charset.h". ++ (read_process_output): Adjusted for the new code-conversion API. ++ (send_process): Likewise. ++ ++ * puresize.h (BASE_PURESIZE): Increased. ++ ++ * regex.c: Include "character.h" instead of "charset.h". ++ (BYTE8_TO_CHAR, CHAR_BYTE8_P) [not emacs]: New dummy macros. ++ (regex_compile): Accept a range whose starting and ending ++ character have different leading bytes. ++ (analyse_first): Adjusted for the above change. ++ ++ * search.c: Include "character.h" instead of "charset.h". ++ (search_buffer, boyer_moore): Adjusted for the new multibyte form. ++ (Freplace_match): Adjusted for the change of ++ multibyte_char_to_unibyte. ++ ++ * syntax.c: Include "character.h" instead of "charset.h". ++ (syntax_parent_lookup): Deleted. ++ (Fmodify_syntax_entry): Accept a cons as CHAR. ++ (skip_chars): Adjusted for the new multibyte form. ++ (init_syntax_once): Call char_table_set_range instead of directly ++ accessing the structure of a char table. ++ ++ * syntax.h (SET_RAW_SYNTAX_ENTRY): Call CHAR_TABLE_SET. ++ (SYNTAX_ENTRY_FOLLOW_PARENT): Macro deleted. ++ (SET_RAW_SYNTAX_ENTRY_RANGE): New macro. ++ (SYNTAX_ENTRY_INT): Call CHAR_TABLE_REF. ++ ++ * term.c: Include "buffer.h" and "character.h". ++ (encode_terminal_code): Adjusted for the new code-conversion API. ++ (write_glyphs): Likewise. ++ (produce_glyphs): Call CHAR_WIDTH instead of CHARSET_WIDTH. ++ ++ * w32term.c (x_new_font): Adjusted for the change of FS_LOAD_FONT. ++ ++ * xdisp.c: Include "character.h". ++ (get_next_display_element): Adjusted for the new multibyte form. ++ (disp_char_vector): Adjusted for the new char table structure. ++ (decode_mode_spec_coding): Adjusted for the new structure of ++ coding system. ++ (decode_mode_spec): Adjusted for the new code-conversion API. ++ ++ * xfaces.c: Include "character.h" instead of "charset.h". ++ (load_face_font): Adjusted for the change of choose_face_font and ++ FS_LOAD_FONT. ++ (generate_ascii_font): New function. ++ (set_lface_from_font_name): Adjusted for the change of ++ FS_LOAD_FONT. ++ (set_font_frame_param): Adjusted for the change of ++ choose_face_font. ++ (free_realized_face): Make it public. ++ (free_realized_faces_for_fontset): Renamed from ++ free_realized_multibyte_face. Free also faces realized for ASCII. ++ (choose_face_font): Argments changed. Adjusted for the change of ++ fontset_font_pattern and FS_LOAD_FONT. ++ ++ * xfns.c: Include "character.h". ++ (x_encode_text): Adjusted for the new code-conversion API. ++ ++ * xselect.c: Don't include "charset.h". ++ (selection_data_to_lisp_data): Adjusted for the new code ++ covnersion API. ++ ++ * xterm.c: Include "character.h". ++ (x_encode_char): New argument CHARSET. Caller changed. ++ (x_get_char_face_and_encoding): Call ENCODE_CHAR instead of ++ SPLIT_CHAR. ++ (x_get_glyph_face_and_encoding): Likewise. ++ (x_produce_glyphs): Don't check Vnonascii_translation_table Call ++ CHAR_WIDTH instead of CHARSET_WIDTH. ++ (XTread_socket): Adjusted for the new code-conversion API. ++ (x_new_font): Adjusted for the change of FS_LOAD_FONT. ++ (x_load_font): Adjusted for the change of struct font. ++ ++;; Local Variables: ++;; coding: iso-2022-7bit ++;; End: ++ ++ Copyright (C) 2002 Free Software Foundation, Inc. ++ Copying and distribution of this file, with or without modification, ++ are permitted provided the copyright notice and this notice are preserved. diff --cc src/Makefile.in index 1961dfcdb13,8aef6ce49e8..0f3ac171442 --- a/src/Makefile.in +++ b/src/Makefile.in @@@ -571,8 -530,9 +571,8 @@@ XMENU_OBJ = xmenu. /* lastfile must follow all files whose initialized data areas should be dumped as pure by dump-emacs. */ -obj= dispnew.o frame.o scroll.o xdisp.o xmenu.o window.o \ - charset.o coding.o category.o ccl.o\ - character.o chartab.o\ +obj= dispnew.o frame.o scroll.o xdisp.o $(XMENU_OBJ) window.o \ - charset.o coding.o category.o ccl.o \ ++ charset.o coding.o category.o ccl.o character.o chartab.o \ cm.o term.o xfaces.o $(XOBJ) \ emacs.o keyboard.o macros.o keymap.o sysdep.o \ buffer.o filelock.o insdel.o marker.o \ @@@ -689,8 -648,8 +689,9 @@@ lisp= ${lispsource}abbrev.elc \ ${lispsource}buff-menu.elc \ ${lispsource}button.elc \ - ${lispsource}byte-run.elc \ + ${lispsource}emacs-lisp/byte-run.elc \ + ${lispsource}composite.elc \ + ${lispsource}cus-face.elc \ ${lispsource}cus-start.elc \ ${lispsource}custom.elc \ ${lispsource}emacs-lisp/backquote.elc \ @@@ -716,23 -675,11 +717,13 @@@ ${lispsource}international/mule-conf.el \ ${lispsource}international/mule-cmds.elc \ ${lispsource}international/characters.elc \ - ${lispsource}international/ucs-tables.elc \ - ${lispsource}international/utf-8.elc \ - ${lispsource}international/utf-16.elc \ - ${lispsource}international/latin-1.el \ - ${lispsource}international/latin-2.el \ - ${lispsource}international/latin-3.el \ - ${lispsource}international/latin-4.el \ - ${lispsource}international/latin-5.el \ - ${lispsource}international/latin-8.el \ - ${lispsource}international/latin-9.el \ ${lispsource}case-table.elc \ - ${lispsource}language/chinese.elc \ - ${lispsource}language/cyrillic.elc \ - ${lispsource}language/indian.elc \ + ${lispsource}language/chinese.el \ + ${lispsource}language/cyrillic.el \ + ${lispsource}language/indian.el \ ${lispsource}language/devanagari.el \ + ${lispsource}language/malayalam.el \ + ${lispsource}language/tamil.el \ ${lispsource}language/english.el \ ${lispsource}language/ethiopic.elc \ ${lispsource}language/european.elc \ @@@ -780,8 -725,8 +771,9 @@@ shortlisp= ../lisp/abbrev.elc \ ../lisp/buff-menu.elc \ ../lisp/button.elc \ - ../lisp/byte-run.elc \ + ../lisp/emacs-lisp/byte-run.elc \ + ../lisp/composite.elc \ + ../lisp/cus-face.elc \ ../lisp/cus-start.elc \ ../lisp/custom.elc \ ../lisp/emacs-lisp/backquote.elc \ @@@ -805,23 -750,11 +797,13 @@@ ../lisp/international/mule-conf.el \ ../lisp/international/mule-cmds.elc \ ../lisp/international/characters.elc \ - ../lisp/international/ucs-tables.elc \ - ../lisp/international/utf-8.elc \ - ../lisp/international/utf-16.elc \ - ../lisp/international/latin-1.el \ - ../lisp/international/latin-2.el \ - ../lisp/international/latin-3.el \ - ../lisp/international/latin-4.el \ - ../lisp/international/latin-5.el \ - ../lisp/international/latin-8.el \ - ../lisp/international/latin-9.el \ ../lisp/case-table.elc \ - ../lisp/language/chinese.elc \ - ../lisp/language/cyrillic.elc \ - ../lisp/language/indian.elc \ + ../lisp/language/chinese.el \ + ../lisp/language/cyrillic.el \ + ../lisp/language/indian.el \ ../lisp/language/devanagari.el \ + ../lisp/language/malayalam.el \ + ../lisp/language/tamil.el \ ../lisp/language/english.el \ ../lisp/language/ethiopic.elc \ ../lisp/language/european.elc \ @@@ -1063,64 -994,68 +1045,70 @@@ alloca.o : alloca.s $(config_h it is so often changed in ways that do not require any recompilation and so rarely changed in ways that do require any. */ - abbrev.o: abbrev.c buffer.h window.h dispextern.h commands.h charset.h \ + abbrev.o: abbrev.c buffer.h window.h dispextern.h commands.h character.h \ - syntax.h $(config_h) + $(config_h) buffer.o: buffer.c buffer.h region-cache.h commands.h window.h \ - dispextern.h $(INTERVAL_SRC) blockinput.h atimer.h systime.h charset.h \ + dispextern.h $(INTERVAL_SRC) blockinput.h atimer.h systime.h character.h \ $(config_h) callint.o: callint.c window.h commands.h buffer.h \ keyboard.h dispextern.h $(config_h) callproc.o: callproc.c epaths.h buffer.h commands.h $(config_h) \ - process.h systty.h syssignal.h charset.h coding.h ccl.h msdos.h \ - process.h systty.h syssignal.h character.h coding.h ccl.h msdos.h \ - composite.h ++ process.h systty.h syssignal.h character.h coding.h ccl.h msdos.h \ + composite.h - casefiddle.o: casefiddle.c syntax.h commands.h buffer.h composite.h $(config_h) + casefiddle.o: casefiddle.c syntax.h commands.h buffer.h character.h \ + composite.h $(config_h) casetab.o: casetab.c buffer.h $(config_h) - category.o: category.c category.h buffer.h charset.h $(config_h) - ccl.o: ccl.c ccl.h charset.h coding.h $(config_h) - charset.o: charset.c charset.h buffer.h coding.h composite.h disptab.h \ - $(config_h) - coding.o: coding.c coding.h ccl.h buffer.h charset.h $(config_h) + category.o: category.c category.h buffer.h charset.h character.h $(config_h) + ccl.o: ccl.c ccl.h charset.h character.h coding.h $(config_h) + character.o: character.c character.h buffer.h charset.h composite.h disptab.h \ + $(config.h) + charset.o: charset.c charset.h character.h buffer.h coding.h composite.h \ + disptab.h $(config_h) + chartab.o: charset.h character.h $(config.h) + coding.o: coding.c coding.h ccl.h buffer.h character.h charset.h composite.h \ + $(config_h) cm.o: cm.c cm.h termhooks.h $(config_h) - cmds.o: cmds.c syntax.h buffer.h charset.h commands.h window.h $(config_h) \ + cmds.o: cmds.c syntax.h buffer.h character.h commands.h window.h $(config_h) \ msdos.h dispextern.h pre-crt0.o: pre-crt0.c ecrt0.o: ecrt0.c $(config_h) CRT0_COMPILE ${srcdir}/ecrt0.c - dired.o: dired.c commands.h buffer.h $(config_h) charset.h coding.h regex.h \ - systime.h + dired.o: dired.c commands.h buffer.h $(config_h) character.h charset.h \ + coding.h regex.h systime.h -dispnew.o: dispnew.c commands.h frame.h window.h buffer.h dispextern.h \ - termchar.h termopts.h termhooks.h cm.h disptab.h systty.h systime.h \ +dispnew.o: dispnew.c systty.h systime.h commands.h process.h frame.h \ + window.h buffer.h dispextern.h termchar.h termopts.h termhooks.h cm.h \ + disptab.h \ - xterm.h blockinput.h atimer.h charset.h msdos.h composite.h keyboard.h \ + xterm.h blockinput.h atimer.h character.h msdos.h composite.h keyboard.h \ $(config_h) - doc.o: doc.c $(config_h) epaths.h buffer.h keyboard.h charset.h - doprnt.o: doprnt.c charset.h $(config_h) + doc.o: doc.c $(config_h) epaths.h buffer.h keyboard.h character.h + doprnt.o: doprnt.c character.h $(config_h) dosfns.o: buffer.h termchar.h termhooks.h frame.h msdos.h dosfns.h $(config_h) - editfns.o: editfns.c window.h buffer.h systime.h $(INTERVAL_SRC) charset.h \ + editfns.o: editfns.c window.h buffer.h systime.h $(INTERVAL_SRC) character.h \ coding.h dispextern.h $(config_h) emacs.o: emacs.c commands.h systty.h syssignal.h blockinput.h process.h \ termhooks.h buffer.h atimer.h systime.h $(INTERVAL_SRC) $(config_h) - fileio.o: fileio.c window.h buffer.h systime.h $(INTERVAL_SRC) charset.h \ + fileio.o: fileio.c window.h buffer.h systime.h $(INTERVAL_SRC) character.h \ coding.h ccl.h msdos.h dispextern.h $(config_h) - filelock.o: filelock.c buffer.h systime.h epaths.h $(config_h) -filelock.o: filelock.c buffer.h charset.h coding.h systime.h epaths.h \ - $(config_h) character.h ++filelock.o: filelock.c buffer.h character.h charset.h coding.h systime.h \ ++ epaths.h $(config_h) filemode.o: filemode.c $(config_h) frame.o: frame.c xterm.h window.h frame.h termhooks.h commands.h keyboard.h \ - blockinput.h atimer.h systime.h buffer.h charset.h fontset.h \ - buffer.h character.h fontset.h msdos.h dosfns.h dispextern.h $(config_h) ++ blockinput.h atimer.h systime.h buffer.h character.h fontset.h \ + msdos.h dosfns.h dispextern.h $(config_h) - fontset.o: dispextern.h fontset.h fontset.c ccl.h buffer.h charset.h frame.h \ - keyboard.h $(config_h) + fontset.o: dispextern.h fontset.h fontset.c ccl.h buffer.h character.h \ + charset.h frame.h keyboard.h $(config_h) getloadavg.o: getloadavg.c $(config_h) indent.o: indent.c frame.h window.h indent.h buffer.h $(config_h) termchar.h \ - termopts.h disptab.h region-cache.h charset.h composite.h dispextern.h \ - keyboard.h - insdel.o: insdel.c window.h buffer.h $(INTERVAL_SRC) blockinput.h charset.h \ + termopts.h disptab.h region-cache.h character.h category.h composite.h \ + dispextern.h keyboard.h + insdel.o: insdel.c window.h buffer.h $(INTERVAL_SRC) blockinput.h character.h \ - dispextern.h atimer.h systime.h $(config_h) region-cache.h + dispextern.h atimer.h systime.h $(config_h) - keyboard.o: keyboard.c termchar.h termhooks.h termopts.h buffer.h charset.h \ + keyboard.o: keyboard.c termchar.h termhooks.h termopts.h buffer.h character.h \ commands.h frame.h window.h macros.h disptab.h keyboard.h syssignal.h \ systty.h systime.h dispextern.h syntax.h $(INTERVAL_SRC) blockinput.h \ atimer.h xterm.h puresize.h msdos.h $(config_h) keymap.o: keymap.c buffer.h commands.h keyboard.h termhooks.h blockinput.h \ - atimer.h systime.h puresize.h charset.h intervals.h $(config_h) - atimer.h systime.h puresize.h character.h charset.h $(INTERVAL_SRC) $(config_h) ++ atimer.h systime.h puresize.h character.h intervals.h $(config_h) lastfile.o: lastfile.c $(config_h) macros.o: macros.c window.h buffer.h commands.h macros.h keyboard.h \ dispextern.h $(config_h) @@@ -1128,32 -1063,34 +1116,35 @@@ malloc.o: malloc.c $(config_h gmalloc.o: gmalloc.c $(config_h) ralloc.o: ralloc.c $(config_h) vm-limit.o: vm-limit.c mem-limits.h $(config_h) - marker.o: marker.c buffer.h charset.h $(config_h) + marker.o: marker.c buffer.h character.h $(config_h) +md5.o: md5.c md5.h $(config_h) minibuf.o: minibuf.c syntax.h dispextern.h frame.h window.h keyboard.h \ - buffer.h commands.h charset.h msdos.h $(config_h) + buffer.h commands.h character.h msdos.h $(config_h) mktime.o: mktime.c $(config_h) msdos.o: msdos.c msdos.h dosfns.h systime.h termhooks.h dispextern.h frame.h \ - termopts.h termchar.h charset.h coding.h ccl.h disptab.h window.h \ + termopts.h termchar.h character.h coding.h ccl.h disptab.h window.h \ keyboard.h $(config_h) process.o: process.c process.h buffer.h window.h termhooks.h termopts.h \ commands.h syssignal.h systime.h systty.h syswait.h frame.h dispextern.h \ - blockinput.h atimer.h character.h coding.h ccl.h msdos.h composite.h \ + blockinput.h atimer.h charset.h coding.h ccl.h msdos.h composite.h \ keyboard.h $(config_h) - regex.o: regex.c syntax.h buffer.h $(config_h) regex.h category.h charset.h + regex.o: regex.c syntax.h buffer.h $(config_h) regex.h category.h character.h \ + charset.h region-cache.o: region-cache.c buffer.h region-cache.h scroll.o: scroll.c termchar.h dispextern.h frame.h msdos.h keyboard.h \ $(config_h) search.o: search.c regex.h commands.h buffer.h region-cache.h syntax.h \ - blockinput.h atimer.h systime.h category.h charset.h composite.h $(config_h) + blockinput.h atimer.h systime.h category.h character.h composite.h \ + $(config_h) strftime.o: strftime.c $(config_h) - syntax.o: syntax.c syntax.h buffer.h commands.h category.h charset.h \ + syntax.o: syntax.c syntax.h buffer.h commands.h category.h character.h \ composite.h $(config_h) -sysdep.o: sysdep.c $(config_h) dispextern.h termhooks.h termchar.h termopts.h \ - frame.h syssignal.h systty.h systime.h syswait.h blockinput.h atimer.h \ - window.h msdos.h dosfns.h keyboard.h +sysdep.o: sysdep.c syssignal.h systty.h systime.h syswait.h blockinput.h \ + process.h dispextern.h termhooks.h termchar.h termopts.h \ + frame.h atimer.h window.h msdos.h dosfns.h keyboard.h $(config_h) term.o: term.c termchar.h termhooks.h termopts.h $(config_h) cm.h frame.h \ - disptab.h dispextern.h keyboard.h charset.h coding.h ccl.h msdos.h + disptab.h dispextern.h keyboard.h character.h charset.h coding.h ccl.h \ + msdos.h keymap.h termcap.o: termcap.c $(config_h) terminfo.o: terminfo.c $(config_h) tparam.o: tparam.c $(config_h) @@@ -1169,47 -1106,46 +1160,51 @@@ widget.o: widget.c xterm.h frame.h disp window.o: window.c indent.h commands.h frame.h window.h buffer.h termchar.h \ termhooks.h disptab.h keyboard.h dispextern.h msdos.h composite.h \ $(config_h) -xdisp.o: xdisp.c macros.h commands.h indent.h buffer.h dispextern.h coding.h \ +xdisp.o: xdisp.c macros.h commands.h process.h indent.h buffer.h dispextern.h coding.h \ - termchar.h frame.h window.h disptab.h termhooks.h charset.h $(config_h) \ - msdos.h composite.h fontset.h blockinput.h atimer.h systime.h keymap.h + termchar.h frame.h window.h disptab.h termhooks.h character.h charset.h \ - $(config_h) msdos.h composite.h fontset.h region-cache.h ++ $(config_h) msdos.h composite.h fontset.h blockinput.h atimer.h systime.h \ ++ keymap.h region-cache.h xfaces.o: xfaces.c dispextern.h frame.h xterm.h buffer.h blockinput.h \ - window.h charset.h msdos.h dosfns.h composite.h atimer.h systime.h $(config_h) + window.h character.h charset.h msdos.h dosfns.h composite.h atimer.h \ + systime.h fontset.h $(config_h) $(INTERVAL_SRC) xfns.o: xfns.c buffer.h frame.h window.h keyboard.h xterm.h dispextern.h \ $(srcdir)/../lwlib/lwlib.h blockinput.h atimer.h systime.h epaths.h \ - charset.h gtkutil.h $(config_h) - character.h charset.h coding.h $(config_h) termhooks.h coding.h ++ character.h charset.h coding.h gtkutil.h $(config_h) termhooks.h xmenu.o: xmenu.c xterm.h termhooks.h window.h dispextern.h frame.h buffer.h \ - keyboard.h $(srcdir)/../lwlib/lwlib.h blockinput.h atimer.h systime.h \ - gtkutil.h msdos.h $(config_h) + charset.h keyboard.h $(srcdir)/../lwlib/lwlib.h blockinput.h atimer.h \ - systime.h msdos.h $(config_h) coding.h ++ systime.h gtkutil.h msdos.h $(config_h) coding.h xterm.o: xterm.c xterm.h termhooks.h termopts.h termchar.h window.h buffer.h \ - dispextern.h frame.h disptab.h blockinput.h atimer.h systime.h syssignal.h \ - keyboard.h gnu.h charset.h ccl.h fontset.h composite.h \ - coding.h process.h gtkutil.h $(config_h) + dispextern.h frame.h disptab.h blockinput.h atimer.h systime.h syssignal.h \ + keyboard.h gnu.h character.h charset.h ccl.h fontset.h composite.h \ - coding.h $(config_h) $(INTERVAL_SRC) -xselect.o: xselect.c dispextern.h frame.h xterm.h blockinput.h character.h \ - coding.h composite.h ccl.h buffer.h atimer.h systime.h $(config_h) ++ coding.h process.h gtkutil.h $(config_h) +xselect.o: xselect.c process.h dispextern.h frame.h xterm.h blockinput.h \ + buffer.h atimer.h systime.h $(config_h) xrdb.o: xrdb.c $(config_h) epaths.h +xsmfns.o: xsmfns.c $(config_h) systime.h sysselect.h termhooks.h +gtkutil.o: gtkutil.c gtkutil.h xterm.h lisp.h frame.h $(config_h) \ + blockinput.h window.h atimer.h termhooks.h + hftctl.o: hftctl.c $(config_h) sound.o: sound.c dispextern.h $(config_h) atimer.o: atimer.c atimer.h systime.h $(config_h) /* The files of Lisp proper */ -alloc.o: alloc.c frame.h window.h buffer.h puresize.h syssignal.h keyboard.h \ +alloc.o: alloc.c process.h frame.h window.h buffer.h puresize.h syssignal.h keyboard.h \ - blockinput.h atimer.h systime.h charset.h dispextern.h $(config_h) $(INTERVAL_SRC) - bytecode.o: bytecode.c buffer.h syntax.h charset.h $(config_h) - data.o: data.c buffer.h puresize.h charset.h syssignal.h keyboard.h $(config_h) + blockinput.h atimer.h systime.h character.h dispextern.h $(config_h) \ + $(INTERVAL_SRC) + bytecode.o: bytecode.c buffer.h syntax.h character.h $(config_h) + data.o: data.c buffer.h puresize.h character.h syssignal.h keyboard.h \ + $(config_h) eval.o: eval.c commands.h keyboard.h blockinput.h atimer.h systime.h \ $(config_h) floatfns.o: floatfns.c $(config_h) - fns.o: fns.c commands.h $(config_h) frame.h buffer.h charset.h keyboard.h \ + fns.o: fns.c commands.h $(config_h) frame.h buffer.h character.h keyboard.h \ - frame.h window.h dispextern.h coding.h $(INTERVAL_SRC) + frame.h window.h dispextern.h $(INTERVAL_SRC) coding.h - print.o: print.c process.h frame.h window.h buffer.h keyboard.h charset.h \ + print.o: print.c process.h frame.h window.h buffer.h keyboard.h character.h \ $(config_h) dispextern.h msdos.h composite.h - lread.o: lread.c commands.h keyboard.h buffer.h epaths.h charset.h $(config_h) \ - termhooks.h coding.h msdos.h + lread.o: lread.c commands.h keyboard.h buffer.h epaths.h character.h \ - charset.h $(config_h) termhooks.h msdos.h coding.h ++ charset.h $(config_h) termhooks.h coding.h msdos.h /* Text properties support */ textprop.o: textprop.c buffer.h window.h dispextern.h $(INTERVAL_SRC) \ diff --cc src/abbrev.c index dabc03b2a55,7ba0f575a2d..e3e0e28210b --- a/src/abbrev.c +++ b/src/abbrev.c @@@ -385,9 -381,9 +385,15 @@@ Returns the abbrev symbol, if expansio int pos = wordstart_byte; /* Find the initial. */ -- while (pos < PT_BYTE - && SYNTAX (*BUF_BYTE_ADDRESS (current_buffer, pos)) != Sword) - && SYNTAX (FETCH_CHAR_AS_MULTIBYTE (pos)) != Sword) -- pos++; ++ if (multibyte) ++ while (pos < PT_BYTE ++ && SYNTAX (FETCH_MULTIBYTE_CHAR (pos)) != Sword) ++ INC_POS (pos); ++ else ++ while (pos < PT_BYTE ++ && (SYNTAX (*BUF_BYTE_ADDRESS (current_buffer, pos)) ++ != Sword)) ++ pos++; /* Change just that. */ pos = BYTE_TO_CHAR (pos); diff --cc src/alloc.c index 102bc637b58,b1208c359e5..4ebb97aec18 --- a/src/alloc.c +++ b/src/alloc.c @@@ -624,206 -606,30 +624,208 @@@ lisp_malloc (nbytes, type return val; } +/* Free BLOCK. This must be called to free memory allocated with a + call to lisp_malloc. */ -/* Return a new buffer structure allocated from the heap with - a call to lisp_malloc. */ - -struct buffer * -allocate_buffer () +static void +lisp_free (block) + POINTER_TYPE *block; { - struct buffer *b - = (struct buffer *) lisp_malloc (sizeof (struct buffer), - MEM_TYPE_BUFFER); - VALIDATE_LISP_STORAGE (b, sizeof *b); - return b; + BLOCK_INPUT; + free (block); +#if GC_MARK_STACK && !defined GC_MALLOC_CHECK + mem_delete (mem_find (block)); +#endif + UNBLOCK_INPUT; } +/* Allocation of aligned blocks of memory to store Lisp data. */ +/* The entry point is lisp_align_malloc which returns blocks of at most */ +/* BLOCK_BYTES and guarantees they are aligned on a BLOCK_ALIGN boundary. */ -/* Free BLOCK. This must be called to free memory allocated with a - call to lisp_malloc. */ + +/* BLOCK_ALIGN has to be a power of 2. */ +#define BLOCK_ALIGN (1 << 10) + +/* Padding to leave at the end of a malloc'd block. This is to give + malloc a chance to minimize the amount of memory wasted to alignment. + It should be tuned to the particular malloc library used. + On glibc-2.3.2, malloc never tries to align, so a padding of 0 is best. + posix_memalign on the other hand would ideally prefer a value of 4 + because otherwise, there's 1020 bytes wasted between each ablocks. + But testing shows that those 1020 will most of the time be efficiently + used by malloc to place other objects, so a value of 0 is still preferable + unless you have a lot of cons&floats and virtually nothing else. */ +#define BLOCK_PADDING 0 +#define BLOCK_BYTES \ + (BLOCK_ALIGN - sizeof (struct aligned_block *) - BLOCK_PADDING) + +/* Internal data structures and constants. */ + +#define ABLOCKS_SIZE 16 + +/* An aligned block of memory. */ +struct ablock +{ + union + { + char payload[BLOCK_BYTES]; + struct ablock *next_free; + } x; + /* `abase' is the aligned base of the ablocks. */ + /* It is overloaded to hold the virtual `busy' field that counts + the number of used ablock in the parent ablocks. + The first ablock has the `busy' field, the others have the `abase' + field. To tell the difference, we assume that pointers will have + integer values larger than 2 * ABLOCKS_SIZE. The lowest bit of `busy' + is used to tell whether the real base of the parent ablocks is `abase' + (if not, the word before the first ablock holds a pointer to the + real base). */ + struct ablocks *abase; + /* The padding of all but the last ablock is unused. The padding of + the last ablock in an ablocks is not allocated. */ +#if BLOCK_PADDING + char padding[BLOCK_PADDING]; +#endif +}; + +/* A bunch of consecutive aligned blocks. */ +struct ablocks +{ + struct ablock blocks[ABLOCKS_SIZE]; +}; + +/* Size of the block requested from malloc or memalign. */ +#define ABLOCKS_BYTES (sizeof (struct ablocks) - BLOCK_PADDING) + +#define ABLOCK_ABASE(block) \ + (((unsigned long) (block)->abase) <= (1 + 2 * ABLOCKS_SIZE) \ + ? (struct ablocks *)(block) \ + : (block)->abase) + +/* Virtual `busy' field. */ +#define ABLOCKS_BUSY(abase) ((abase)->blocks[0].abase) + +/* Pointer to the (not necessarily aligned) malloc block. */ +#ifdef HAVE_POSIX_MEMALIGN +#define ABLOCKS_BASE(abase) (abase) +#else +#define ABLOCKS_BASE(abase) \ + (1 & (int) ABLOCKS_BUSY (abase) ? abase : ((void**)abase)[-1]) +#endif + +/* The list of free ablock. */ +static struct ablock *free_ablock; + +/* Allocate an aligned block of nbytes. + Alignment is on a multiple of BLOCK_ALIGN and `nbytes' has to be + smaller or equal to BLOCK_BYTES. */ +static POINTER_TYPE * +lisp_align_malloc (nbytes, type) + size_t nbytes; + enum mem_type type; +{ + void *base, *val; + struct ablocks *abase; + + eassert (nbytes <= BLOCK_BYTES); + + BLOCK_INPUT; + +#ifdef GC_MALLOC_CHECK + allocated_mem_type = type; +#endif + + if (!free_ablock) + { + int i, aligned; + +#ifdef DOUG_LEA_MALLOC + /* Prevent mmap'ing the chunk. Lisp data may not be mmap'ed + because mapped region contents are not preserved in + a dumped Emacs. */ + mallopt (M_MMAP_MAX, 0); +#endif + +#ifdef HAVE_POSIX_MEMALIGN + { + int err = posix_memalign (&base, BLOCK_ALIGN, ABLOCKS_BYTES); + abase = err ? (base = NULL) : base; + } +#else + base = malloc (ABLOCKS_BYTES); + abase = ALIGN (base, BLOCK_ALIGN); +#endif + + aligned = (base == abase); + if (!aligned) + ((void**)abase)[-1] = base; + +#ifdef DOUG_LEA_MALLOC + /* Back to a reasonable maximum of mmap'ed areas. */ + mallopt (M_MMAP_MAX, MMAP_MAX_AREAS); +#endif + ++ /* If the memory just allocated cannot be addressed thru a Lisp ++ object's pointer, and it needs to be, that's equivalent to ++ running out of memory. */ ++ if (type != MEM_TYPE_NON_LISP) ++ { ++ Lisp_Object tem; ++ char *end = (char *) base + ABLOCKS_BYTES - 1; ++ XSETCONS (tem, end); ++ if ((char *) XCONS (tem) != end) ++ { ++ lisp_malloc_loser = base; ++ free (base); ++ UNBLOCK_INPUT; ++ memory_full (); ++ } ++ } ++ + /* Initialize the blocks and put them on the free list. + Is `base' was not properly aligned, we can't use the last block. */ + for (i = 0; i < (aligned ? ABLOCKS_SIZE : ABLOCKS_SIZE - 1); i++) + { + abase->blocks[i].abase = abase; + abase->blocks[i].x.next_free = free_ablock; + free_ablock = &abase->blocks[i]; + } + ABLOCKS_BUSY (abase) = (struct ablocks *) aligned; + + eassert (0 == ((EMACS_UINT)abase) % BLOCK_ALIGN); + eassert (ABLOCK_ABASE (&abase->blocks[3]) == abase); /* 3 is arbitrary */ + eassert (ABLOCK_ABASE (&abase->blocks[0]) == abase); + eassert (ABLOCKS_BASE (abase) == base); + eassert (aligned == (int)ABLOCKS_BUSY (abase)); + } + + abase = ABLOCK_ABASE (free_ablock); + ABLOCKS_BUSY (abase) = (struct ablocks *) (2 + (int) ABLOCKS_BUSY (abase)); + val = free_ablock; + free_ablock = free_ablock->x.next_free; + - /* If the memory just allocated cannot be addressed thru a Lisp - object's pointer, and it needs to be, - that's equivalent to running out of memory. */ - if (val && type != MEM_TYPE_NON_LISP) - { - Lisp_Object tem; - XSETCONS (tem, (char *) val + nbytes - 1); - if ((char *) XCONS (tem) != (char *) val + nbytes - 1) - { - lisp_malloc_loser = val; - free (val); - val = 0; - } - } - +#if GC_MARK_STACK && !defined GC_MALLOC_CHECK + if (val && type != MEM_TYPE_NON_LISP) + mem_insert (val, (char *) val + nbytes, type); +#endif + + UNBLOCK_INPUT; + if (!val && nbytes) + memory_full (); + + eassert (0 == ((EMACS_UINT)val) % BLOCK_ALIGN); + return val; +} static void -lisp_free (block) +lisp_align_free (block) POINTER_TYPE *block; { + struct ablock *ablock = block; + struct ablocks *abase = ABLOCK_ABASE (ablock); + BLOCK_INPUT; - free (block); #if GC_MARK_STACK && !defined GC_MALLOC_CHECK mem_delete (mem_find (block)); #endif @@@ -5024,6 -4708,6 +4983,7 @@@ mark_object (arg since all markable slots in current buffer marked anyway. */ /* Don't need to do Lisp_Objfwd, since the places they point are protected with staticpro. */ ++ case Lisp_Misc_Save_Value: break; case Lisp_Misc_Overlay: diff --cc src/buffer.c index 40ee3f071de,6b9c2ca046c..b7efdbe5504 --- a/src/buffer.c +++ b/src/buffer.c @@@ -183,9 -181,10 +183,10 @@@ Lisp_Object Qinsert_behind_hooks static void alloc_buffer_text P_ ((struct buffer *, size_t)); static void free_buffer_text P_ ((struct buffer *b)); -static Lisp_Object copy_overlays P_ ((struct buffer *, Lisp_Object)); -static void modify_overlay P_ ((struct buffer *, int, int)); +static struct Lisp_Overlay * copy_overlays P_ ((struct buffer *, struct Lisp_Overlay *)); +static void modify_overlay P_ ((struct buffer *, EMACS_INT, EMACS_INT)); + extern char * emacs_strerror P_ ((int)); /* For debugging; temporary. See set_buffer_internal. */ /* Lisp_Object Qlisp_mode, Vcheck_symbol; */ @@@ -2073,23 -2012,45 +2074,25 @@@ advance_to_char_boundary (byte_pos return byte_pos; } - -/* Symbols used as the 2nd arg of Fset_buffer_multibyte. */ -static Lisp_Object Qas, Qmake, Qto; - - DEFUN ("set-buffer-multibyte", Fset_buffer_multibyte, Sset_buffer_multibyte, - 1, 2, 0, + 1, 1, 0, doc: /* Set the multibyte flag of the current buffer to FLAG. If FLAG is t, this makes the buffer a multibyte buffer. -If FLAG is nil, this makes the buffer a unibyte buffer. - -Optional second arg METHOD specifies how to convert the byte sequence -of the buffer. - -If it is nil or `as', the buffer contents remain unchanged as a -sequence of bytes but the contents viewed as characters do change. - -If it is `make', convert each character by unibyte-char-to-multibyte -or multibyte-char-to-unibyte. - -If it is `to', convert each character by byte-to-char or -char-to-byte. */) - (flag, method) - Lisp_Object flag, method; +If FLAG is nil, this makes the buffer a single-byte buffer. - The buffer contents remain unchanged as a sequence of bytes - but the contents viewed as characters do change. */) ++In these cases, the buffer contents remain unchanged as a sequence of ++bytes but the contents viewed as characters do change. ++If FLAG is `to', this makes the buffer a multibyte buffer by changing ++all eight-bit bytes to eight-bit characters. */) + (flag) + Lisp_Object flag; { - Lisp_Object tail, markers; + struct Lisp_Marker *tail, *markers; struct buffer *other; int undo_enabled_p = !EQ (current_buffer->undo_list, Qt); - int begv = BEGV, zv = ZV; - int narrowed = (BEG != begv || Z != zv); + int begv, zv; + int narrowed = (BEG != BEGV || Z != ZV); int modified_p = !NILP (Fbuffer_modified_p (Qnil)); - CHECK_SYMBOL (method); - if (NILP (method)) - method = Qas; - else if (! EQ (method, Qas) && ! EQ (method, Qmake) && ! EQ (method, Qto)) - error ("Invalid unibyte<->multibyte conversion method: %s", - XSYMBOL (method)->name->data); - if (current_buffer->base_buffer) error ("Cannot do `set-buffer-multibyte' on an indirect buffer"); @@@ -2168,6 -2127,27 +2171,11 @@@ zv -= bytes; stop = Z; } - else if (EQ (method, Qas)) ++ else + { + bytes = BYTES_BY_CHAR_HEAD (*p); + p += bytes, pos += bytes; + } - else - { - /* Delete all bytes for this character but the last one, - and change the last one to the unibyte code. */ - c = STRING_CHAR_AND_LENGTH (p, stop - pos, bytes); - bytes--; - del_range_2 (pos, pos, pos + bytes, pos + bytes, 0); - p = GAP_END_ADDR; - *p++ = CHAR_TO_BYTE (c); - pos++; - if (begv > pos) - begv -= bytes; - if (zv > pos) - zv -= bytes; - stop = Z; - } } if (narrowed) Fnarrow_to_region (make_number (begv), make_number (zv)); @@@ -2176,13 -2156,14 +2184,14 @@@ { int pt = PT; int pos, stop; - unsigned char *p; + unsigned char *p, *pend; /* Be sure not to have a multibyte sequence striding over the GAP. - Ex: We change this: "...abc\201 _GAP_ \241def..." - to: "...abc _GAP_ \201\241def..." */ + Ex: We change this: "...abc\302 _GAP_ \241def..." + to: "...abc _GAP_ \302\241def..." */ - if (GPT_BYTE > 1 && GPT_BYTE < Z_BYTE - if (EQ (method, Qas) ++ if (EQ (flag, Qt) + && GPT_BYTE > 1 && GPT_BYTE < Z_BYTE && ! CHAR_HEAD_P (*(GAP_END_ADDR))) { unsigned char *p = GPT_ADDR - 1; @@@ -2210,16 -2192,26 +2220,21 @@@ if (pos == Z) break; p = GAP_END_ADDR; + pend = Z_ADDR; stop = Z; } - + - if (UNIBYTE_STR_AS_MULTIBYTE_P (p, stop - pos, bytes)) + if (ASCII_BYTE_P (*p)) + p++, pos++; - else if (EQ (method, Qas) - && (bytes = MULTIBYTE_LENGTH (p, pend)) > 0) ++ else if (EQ (flag, Qt) && (bytes = MULTIBYTE_LENGTH (p, pend)) > 0) p += bytes, pos += bytes; else { unsigned char tmp[MAX_MULTIBYTE_LENGTH]; + int c; - bytes = CHAR_STRING (*p, tmp); - if (EQ (method, Qmake)) - c = unibyte_char_to_multibyte (*p); - else - c = BYTE8_TO_CHAR (*p); - ++ c = BYTE8_TO_CHAR (*p); + bytes = CHAR_STRING (c, tmp); *p = tmp[0]; TEMP_SET_PT_BOTH (pos + 1, pos + 1); bytes--; @@@ -3737,23 -3651,15 +3753,21 @@@ buffer. */ Fset_marker (OVERLAY_START (overlay), beg, buffer); Fset_marker (OVERLAY_END (overlay), end, buffer); - /* Put the overlay on the wrong list. */ + /* Put the overlay on the wrong list. */ end = OVERLAY_END (overlay); - if (OVERLAY_POSITION (end) < XINT (b->overlay_center)) - b->overlays_after = Fcons (overlay, b->overlays_after); + if (OVERLAY_POSITION (end) < b->overlay_center) + { - if (b->overlays_after) - XOVERLAY (overlay)->next = b->overlays_after; - b->overlays_after = XOVERLAY (overlay); ++ XOVERLAY (overlay)->next = b->overlays_after; ++ b->overlays_after = XOVERLAY (overlay); + } else - b->overlays_before = Fcons (overlay, b->overlays_before); + { - if (b->overlays_before) - XOVERLAY (overlay)->next = b->overlays_before; - b->overlays_before = XOVERLAY (overlay); ++ XOVERLAY (overlay)->next = b->overlays_before; ++ b->overlays_before = XOVERLAY (overlay); + } /* This puts it in the right list, and in the right order. */ - recenter_overlay_lists (b, XINT (b->overlay_center)); + recenter_overlay_lists (b, b->overlay_center); return unbind_to (count, overlay); } diff --cc src/buffer.h index 8c340a23d23,447c618f77a..b52f83a6e06 --- a/src/buffer.h +++ b/src/buffer.h @@@ -327,10 -326,18 +326,18 @@@ extern unsigned char *_fetch_multibyte_ #define FETCH_MULTIBYTE_CHAR(pos) \ (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \ - + (pos) + BEG_ADDR - BEG_BYTE), \ - _fetch_multibyte_char_len \ - = ((pos) >= GPT_BYTE ? ZV_BYTE : GPT_BYTE) - (pos), \ - STRING_CHAR (_fetch_multibyte_char_p, _fetch_multibyte_char_len)) - + (pos) + BEG_ADDR - 1), \ ++ + (pos) + BEG_ADDR - BEG_BYTE), \ + STRING_CHAR (_fetch_multibyte_char_p, 0)) + + /* Return character at position POS. If the current buffer is unibyte + and the character is not ASCII, make the returning character + multibyte. */ + + #define FETCH_CHAR_AS_MULTIBYTE(pos) \ + (!NILP (current_buffer->enable_multibyte_characters) \ + ? FETCH_MULTIBYTE_CHAR ((pos)) \ + : unibyte_char_to_multibyte (FETCH_BYTE ((pos)))) + /* Macros for accessing a character or byte, or converting between byte positions and addresses, @@@ -378,11 -385,8 +385,8 @@@ #define BUF_FETCH_MULTIBYTE_CHAR(buf, pos) \ (_fetch_multibyte_char_p \ = (((pos) >= BUF_GPT_BYTE (buf) ? BUF_GAP_SIZE (buf) : 0) \ - + (pos) + BUF_BEG_ADDR (buf) - 1), \ + + (pos) + BUF_BEG_ADDR (buf) - BEG_BYTE), \ - _fetch_multibyte_char_len \ - = (((pos) >= BUF_GPT_BYTE (buf) ? BUF_ZV_BYTE (buf) : BUF_GPT_BYTE (buf)) \ - - (pos)), \ - STRING_CHAR (_fetch_multibyte_char_p, _fetch_multibyte_char_len)) + STRING_CHAR (_fetch_multibyte_char_p, 0)) /* Define the actual buffer data structures. */ @@@ -817,9 -808,10 +821,10 @@@ extern void set_buffer_internal_1 P_ (( extern void set_buffer_temp P_ ((struct buffer *)); extern void record_buffer P_ ((Lisp_Object)); extern void buffer_slot_type_mismatch P_ ((int)); -extern void fix_overlays_before P_ ((struct buffer *, int, int)); +extern void fix_overlays_before P_ ((struct buffer *, EMACS_INT, EMACS_INT)); extern void mmap_set_vars P_ ((int)); + EXFUN (Fbuffer_live_p, 1); EXFUN (Fbuffer_name, 1); EXFUN (Fget_file_buffer, 1); EXFUN (Fnext_overlay_change, 1); diff --cc src/bytecode.c index 141f5adda84,df6eb266eb9..f3a07dced35 --- a/src/bytecode.c +++ b/src/bytecode.c @@@ -37,9 -37,8 +37,9 @@@ by Hallvard #include #include "lisp.h" #include "buffer.h" - #include "charset.h" + #include "character.h" #include "syntax.h" +#include "window.h" #ifdef CHECK_FRAME_FONT #include "frame.h" @@@ -1437,10 -1433,17 +1437,17 @@@ If the third argument is incorrect, Ema break; case Bchar_syntax: - BEFORE_POTENTIAL_GC (); - CHECK_NUMBER (TOP); - AFTER_POTENTIAL_GC (); - XSETFASTINT (TOP, syntax_code_spec[(int) SYNTAX (XINT (TOP))]); + { + int c; + + BEFORE_POTENTIAL_GC (); + CHECK_CHARACTER (TOP); + AFTER_POTENTIAL_GC (); + c = XFASTINT (TOP); + if (NILP (current_buffer->enable_multibyte_characters)) + MAKE_CHAR_MULTIBYTE (c); + XSETFASTINT (TOP, syntax_code_spec[(int) SYNTAX (c)]); - } ++ } break; case Bbuffer_substring: diff --cc src/callproc.c index d92176ccd91,8c6df89ea7e..20b3ee22add --- a/src/callproc.c +++ b/src/callproc.c @@@ -408,13 -401,9 +408,9 @@@ usage: (call-process PROGRAM &optional { argument_coding.src_multibyte = STRING_MULTIBYTE (args[i]); if (CODING_REQUIRE_ENCODING (&argument_coding)) - { - /* We must encode this argument. */ - args[i] = encode_coding_string (args[i], &argument_coding, 1); - if (argument_coding.type == coding_type_ccl) - setup_ccl_program (&(argument_coding.spec.ccl.encoder), Qnil); - } + /* We must encode this argument. */ + args[i] = encode_coding_string (&argument_coding, args[i], 1); - new_argv[i - 3] = XSTRING (args[i])->data; + new_argv[i - 3] = SDATA (args[i]); } UNGCPRO; new_argv[nargs - 3] = 0; @@@ -780,10 -761,11 +768,11 @@@ /* Now NREAD is the total amount of data in the buffer. */ immediate_quit = 0; - + if (!NILP (buffer)) { - if (! CODING_MAY_REQUIRE_DECODING (&process_coding)) + if (NILP (current_buffer->enable_multibyte_characters) + && ! CODING_MAY_REQUIRE_DECODING (&process_coding)) insert_1_both (bufptr, nread, nread, 0, 1, 0); else { /* We have to decode the input. */ @@@ -825,72 -794,10 +801,10 @@@ carryover = nread; continue; } - + - if (process_coding.produced > 0) - insert_1_both (decoding_buf, process_coding.produced_char, - process_coding.produced, 0, 1, 0); - xfree (decoding_buf); - - if (process_coding.result == CODING_FINISH_INCONSISTENT_EOL) - { - Lisp_Object eol_type, coding; - - if (process_coding.eol_type == CODING_EOL_CR) - { - /* CRs have been replaced with LFs. Undo - that in the text inserted above. */ - unsigned char *p; - - move_gap_both (PT, PT_BYTE); - - p = BYTE_POS_ADDR (pt_byte_orig); - for (; p < GPT_ADDR; ++p) - if (*p == '\n') - *p = '\r'; - } - else if (process_coding.eol_type == CODING_EOL_CRLF) - { - /* CR LFs have been replaced with LFs. Undo - that by inserting CRs in front of LFs in - the text inserted above. */ - EMACS_INT bytepos, old_pt, old_pt_byte, nCR; - - old_pt = PT; - old_pt_byte = PT_BYTE; - nCR = 0; - - for (bytepos = PT_BYTE - 1; - bytepos >= pt_byte_orig; - --bytepos) - if (FETCH_BYTE (bytepos) == '\n') - { - EMACS_INT charpos = BYTE_TO_CHAR (bytepos); - TEMP_SET_PT_BOTH (charpos, bytepos); - insert_1_both ("\r", 1, 1, 0, 1, 0); - ++nCR; - } - - TEMP_SET_PT_BOTH (old_pt + nCR, old_pt_byte + nCR); - } - - /* Set the coding system symbol to that for - Unix-like EOL. */ - eol_type = Fget (saved_coding.symbol, Qeol_type); - if (VECTORP (eol_type) - && ASIZE (eol_type) == 3 - && SYMBOLP (AREF (eol_type, CODING_EOL_LF))) - coding = AREF (eol_type, CODING_EOL_LF); - else - coding = saved_coding.symbol; - - process_coding.symbol = coding; - process_coding.eol_type = CODING_EOL_LF; - process_coding.mode - &= ~CODING_MODE_INHIBIT_INCONSISTENT_EOL; - } - - nread -= process_coding.consumed; - carryover = nread; + TEMP_SET_PT_BOTH (PT + process_coding.produced_char, + PT_BYTE + process_coding.produced); + carryover = process_coding.carryover_bytes; if (carryover > 0) /* As CARRYOVER should not be that large, we had better avoid overhead of bcopy. */ @@@ -935,33 -833,12 +840,12 @@@ } give_up: ; - if (!NILP (buffer) - && process_coding.cmp_data) - { - coding_restore_composition (&process_coding, Fcurrent_buffer ()); - coding_free_composition_data (&process_coding); - } - - { - int post_read_count = SPECPDL_INDEX (); - - record_unwind_protect (save_excursion_restore, save_excursion_save ()); - inserted = PT - pt_orig; - TEMP_SET_PT_BOTH (pt_orig, pt_byte_orig); - if (SYMBOLP (process_coding.post_read_conversion) - && !NILP (Ffboundp (process_coding.post_read_conversion))) - call1 (process_coding.post_read_conversion, make_number (inserted)); - - Vlast_coding_system_used = process_coding.symbol; - - /* If the caller required, let the buffer inherit the - coding-system used to decode the process output. */ - if (inherit_process_coding_system) - call1 (intern ("after-insert-file-set-buffer-file-coding-system"), - make_number (total_read)); - - unbind_to (post_read_count, Qnil); - } + Vlast_coding_system_used = CODING_ID_NAME (process_coding.id); + /* If the caller required, let the buffer inherit the + coding-system used to decode the process output. */ + if (inherit_process_coding_system) + call1 (intern ("after-insert-file-set-buffer-file-coding-system"), - make_number (total_read)); ++ make_number (total_read)); } /* Wait for it to terminate, unless it already has. */ diff --cc src/casefiddle.c index 8b92d39cbb3,fcfebe9b300..1e502af9c02 --- a/src/casefiddle.c +++ b/src/casefiddle.c @@@ -66,66 -71,44 +71,43 @@@ casify_object (flag, obj if (STRINGP (obj)) { int multibyte = STRING_MULTIBYTE (obj); + int i, i_byte, len; - int size = XSTRING (obj)->size; ++ int size = SCHARS (obj); obj = Fcopy_sequence (obj); - len = SBYTES (obj); - - /* Scan all single-byte characters from start of string. */ - for (i = 0; i < len;) + for (i = i_byte = 0; i < size; i++, i_byte += len) { - c = SREF (obj, i); - - if (multibyte && c >= 0x80) - /* A multibyte character can't be handled in this - simple loop. */ - break; + if (multibyte) - c = STRING_CHAR_AND_LENGTH (XSTRING (obj)->data + i_byte, - 0, len); ++ c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len); + else + { - c = XSTRING (obj)->data[i_byte]; ++ c = SREF (obj, i_byte); + len = 1; + MAKE_CHAR_MULTIBYTE (c); + } - c1 = c; ++ c1 = c; if (inword && flag != CASE_CAPITALIZE_UP) c = DOWNCASE (c); else if (!UPPERCASEP (c) && (!inword || flag != CASE_CAPITALIZE_UP)) - c = UPCASE1 (c); - /* If this char won't fit in a single-byte string. - fall out to the multibyte case. */ - if (multibyte ? ! ASCII_BYTE_P (c) - : ! SINGLE_BYTE_CHAR_P (c)) - break; - - SSET (obj, i, c); + c = UPCASE1 (c1); if ((int) flag >= (int) CASE_CAPITALIZE) -- inword = SYNTAX (c) == Sword; - i++; - } - - /* If we didn't do the whole string as single-byte, - scan the rest in a more complex way. */ - if (i < len) - { - /* The work is not yet finished because of a multibyte - character just encountered. */ - int fromlen, j_byte = i; - char *buf - = (char *) alloca ((len - i) * MAX_MULTIBYTE_LENGTH + i); - - /* Copy data already handled. */ - bcopy (SDATA (obj), buf, i); - - /* From now on, I counts bytes. */ - while (i < len) ++ inword = (SYNTAX (c) == Sword); + if (c != c1) { - c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i, - len - i, fromlen); - if (inword && flag != CASE_CAPITALIZE_UP) - c = DOWNCASE (c); - else if (!UPPERCASEP (c) - && (!inword || flag != CASE_CAPITALIZE_UP)) - c = UPCASE1 (c); - i += fromlen; - j_byte += CHAR_STRING (c, buf + j_byte); - if ((int) flag >= (int) CASE_CAPITALIZE) - inword = SYNTAX (c) == Sword; + if (! multibyte) + { + MAKE_CHAR_UNIBYTE (c); - XSTRING (obj)->data[i_byte] = c; ++ SSET (obj, i_byte, c); + } + else if (ASCII_CHAR_P (c1) && ASCII_CHAR_P (c)) - XSTRING (obj)->data[i_byte] = c; ++ SSET (obj, i_byte, c); + else + { + Faset (obj, make_number (i), make_number (c)); + i_byte += CHAR_BYTES (c) - len; + } } - obj = make_multibyte_string (buf, SCHARS (obj), - j_byte); } return obj; } @@@ -187,7 -170,7 +169,6 @@@ casify_region (flag, b, e enum case_action flag; Lisp_Object b, e; { -- register int i; register int c; register int inword = flag == CASE_DOWN; register int multibyte = !NILP (current_buffer->enable_multibyte_characters); @@@ -223,70 -217,42 +215,45 @@@ else if (!UPPERCASEP (c) && (!inword || flag != CASE_CAPITALIZE_UP)) c = UPCASE1 (c); - FETCH_BYTE (i) = c; - if (c != c2) - changed = 1; if ((int) flag >= (int) CASE_CAPITALIZE) - inword = SYNTAX (c) == Sword && (inword || !SYNTAX_PREFIX (c)); - } - if (i < end_byte) - { - /* The work is not yet finished because of a multibyte character - just encountered. */ - int opoint = PT; - int opoint_byte = PT_BYTE; - int c2; - - while (i < end_byte) - inword = SYNTAX (c) == Sword; ++ inword = ((SYNTAX (c) == Sword) && (inword || !SYNTAX_PREFIX (c))); + if (c != c2) { - if ((c = FETCH_BYTE (i)) >= 0x80) - c = FETCH_MULTIBYTE_CHAR (i); - c2 = c; - if (inword && flag != CASE_CAPITALIZE_UP) - c2 = DOWNCASE (c); - else if (!UPPERCASEP (c) - && (!inword || flag != CASE_CAPITALIZE_UP)) - c2 = UPCASE1 (c); - if (c != c2) + changed = 1; + if (! multibyte) + { + MAKE_CHAR_UNIBYTE (c); + FETCH_BYTE (start_byte) = c; + } + else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c)) + FETCH_BYTE (start_byte) = c; + else if (len == CHAR_BYTES (c)) { - int fromlen, tolen, j; + int j; unsigned char str[MAX_MULTIBYTE_LENGTH]; - changed = 1; - /* Handle the most likely case */ - if (c < 0400 && c2 < 0400) - FETCH_BYTE (i) = c2; - else if (fromlen = CHAR_STRING (c, str), - tolen = CHAR_STRING (c2, str), - fromlen == tolen) - { - for (j = 0; j < tolen; ++j) - FETCH_BYTE (i + j) = str[j]; - } - else - { - error ("Can't casify letters that change length"); - #if 0 /* This is approximately what we'd like to be able to do here */ - if (tolen < fromlen) - del_range_1 (i + tolen, i + fromlen, 0, 0); - else if (tolen > fromlen) - { - TEMP_SET_PT (i + fromlen); - insert_1 (str + fromlen, tolen - fromlen, 1, 0, 0); - } - #endif - } + CHAR_STRING (c, str); + for (j = 0; j < len; ++j) + FETCH_BYTE (start_byte + j) = str[j]; + } + else + { + TEMP_SET_PT_BOTH (start, start_byte); + del_range_2 (start, start_byte, start + 1, start_byte + len, 0); + insert_char (c); + len = CHAR_BYTES (c); } - if ((int) flag >= (int) CASE_CAPITALIZE) - inword = SYNTAX (c2) == Sword; - INC_BOTH (start, i); } - TEMP_SET_PT_BOTH (opoint, opoint_byte); + start++; + start_byte += len; } - start = XFASTINT (b); ++ if (PT != opoint) ++ TEMP_SET_PT_BOTH (opoint, opoint_byte); ++ if (changed) { + start = XFASTINT (b); signal_after_change (start, end - start, end - start); update_compositions (start, end, CHECK_ALL); } diff --cc src/casetab.c index 9f9c4f8c5b2,6abb1e2b096..64b0b4cbb29 --- a/src/casetab.c +++ b/src/casetab.c @@@ -138,8 -138,8 +138,8 @@@ set_case_table (table, standard if (NILP (up)) { up = Fmake_char_table (Qcase_table, Qnil); - map_char_table (set_identity, Qnil, table, table, up, 0, indices); - map_char_table (shuffle, Qnil, table, table, up, 0, indices); - map_char_table (set_identity, Qnil, table, up, 0, indices); - map_char_table (shuffle, Qnil, table, up, 0, indices); ++ map_char_table (set_identity, Qnil, table, up); ++ map_char_table (shuffle, Qnil, table, up); XCHAR_TABLE (table)->extras[0] = up; } @@@ -147,14 -147,14 +147,14 @@@ { canon = Fmake_char_table (Qcase_table, Qnil); XCHAR_TABLE (table)->extras[1] = canon; - map_char_table (set_canon, Qnil, table, table, table, 0, indices); - map_char_table (set_canon, Qnil, table, table, 0, indices); ++ map_char_table (set_canon, Qnil, table, table); } if (NILP (eqv)) { eqv = Fmake_char_table (Qcase_table, Qnil); - map_char_table (set_identity, Qnil, canon, canon, eqv, 0, indices); - map_char_table (shuffle, Qnil, canon, canon, eqv, 0, indices); - map_char_table (set_identity, Qnil, canon, eqv, 0, indices); - map_char_table (shuffle, Qnil, canon, eqv, 0, indices); ++ map_char_table (set_identity, Qnil, canon, eqv); ++ map_char_table (shuffle, Qnil, canon, eqv); XCHAR_TABLE (table)->extras[2] = eqv; } @@@ -176,30 -173,45 +176,45 @@@ /* The following functions are called in map_char_table. */ --/* Set CANON char-table element for C to a translated ELT by UP and -- DOWN char-tables. This is done only when ELT is a character. The -- char-tables CANON, UP, and DOWN are in CASE_TABLE. */ ++/* Set CANON char-table element for characters in RANGE to a ++ translated ELT by UP and DOWN char-tables. This is done only when ++ ELT is a character. The char-tables CANON, UP, and DOWN are in ++ CASE_TABLE. */ static void - set_canon (case_table, c, elt) - Lisp_Object case_table, c, elt; + set_canon (case_table, range, elt) + Lisp_Object case_table, range, elt; { Lisp_Object up = XCHAR_TABLE (case_table)->extras[0]; Lisp_Object canon = XCHAR_TABLE (case_table)->extras[1]; - int from, to; if (NATNUMP (elt)) - Faset (canon, c, Faref (case_table, Faref (up, elt))); + Fset_char_table_range (canon, range, Faref (case_table, Faref (up, elt))); } - /* Set elements of char-table TABLE for C to C itself. This is done - only when ELT is a character. This is called in map_char_table. */ -/* Set elements of char-table TABLE for characters in RANGE to - themselves. This is done only when ELT is a character. This is - called in map_char_table. */ ++/* Set elements of char-table TABLE for C to C itself. C may be a ++ cons specifying a character range. In that case, set characters in ++ that range to themselves. This is done only when ELT is a ++ character. This is called in map_char_table. */ static void -set_identity (table, range, elt) - Lisp_Object table, range, elt; +set_identity (table, c, elt) + Lisp_Object table, c, elt; { - int from, to; - if (NATNUMP (elt)) - Faset (table, c, c); + { - if (CONSP (range)) ++ int from, to; ++ ++ if (CONSP (c)) + { - from = XINT (XCAR (range)); - to = XINT (XCDR (range)); ++ from = XINT (XCAR (c)); ++ to = XINT (XCDR (c)); + } + else - from = to = XINT (range); - ++ from = to = XINT (c); + for (; from <= to; from++) + CHAR_TABLE_SET (table, from, make_number (from)); + } } /* Permute the elements of TABLE (which is initially an identity @@@ -208,14 -220,29 +223,28 @@@ operated. */ static void -shuffle (table, range, elt) - Lisp_Object table, range, elt; +shuffle (table, c, elt) + Lisp_Object table, c, elt; { - if (NATNUMP (elt) && !EQ (c, elt)) - int from, to; - + if (NATNUMP (elt)) { Lisp_Object tem = Faref (table, elt); - Faset (table, elt, c); - Faset (table, c, tem); ++ int from, to; + - if (CONSP (range)) ++ if (CONSP (c)) + { - from = XINT (XCAR (range)); - to = XINT (XCDR (range)); ++ from = XINT (XCAR (c)); ++ to = XINT (XCDR (c)); + } + else - from = to = XINT (range); ++ from = to = XINT (c); + + for (; from <= to; from++) + if (from != XINT (elt)) + { + Faset (table, elt, make_number (from)); + Faset (table, make_number (from), tem); + } } } @@@ -240,22 -267,21 +269,24 @@@ init_casetab_once ( Vascii_downcase_table = down; XCHAR_TABLE (down)->purpose = Qcase_table; - for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++) - XSETFASTINT (XCHAR_TABLE (down)->contents[i], - (i >= 'A' && i <= 'Z') ? i + ('a' - 'A') : i); + for (i = 0; i < 128; i++) - CHAR_TABLE_SET (down, i, - make_number ((i >= 'A' && i <= 'Z') ? i + ('a' - 'A') : i)); ++ { ++ int c = (i >= 'A' && i <= 'Z') ? i + ('a' - 'A') : i; ++ CHAR_TABLE_SET (down, i, make_number (c)); ++ } XCHAR_TABLE (down)->extras[1] = Fcopy_sequence (down); up = Fmake_char_table (Qcase_table, Qnil); XCHAR_TABLE (down)->extras[0] = up; - for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++) - XSETFASTINT (XCHAR_TABLE (up)->contents[i], - ((i >= 'A' && i <= 'Z') - ? i + ('a' - 'A') - : ((i >= 'a' && i <= 'z') - ? i + ('A' - 'a') - : i))); + for (i = 0; i < 128; i++) - CHAR_TABLE_SET (up, i, make_number ((i >= 'A' && i <= 'Z') - ? i + ('a' - 'A') - : ((i >= 'a' && i <= 'z') - ? i + ('A' - 'a') - : i))); ++ { ++ int c = ((i >= 'A' && i <= 'Z') ? i + ('a' - 'A') ++ : ((i >= 'a' && i <= 'z') ? i + ('A' - 'a') ++ : i));; ++ CHAR_TABLE_SET (up, i, make_number (c)); ++ } XCHAR_TABLE (down)->extras[2] = Fcopy_sequence (up); } diff --cc src/category.c index 4846ae8f7f6,91b015d5868..89e4d907378 --- a/src/category.c +++ b/src/category.c @@@ -1,6 -1,9 +1,9 @@@ /* GNU Emacs routines to deal with category tables. Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. -- Licensed to the Free Software Foundation. - Copyright (C) 2001, 2002 ++ Licensed to the Free Software Foundation. ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@@ -186,6 -190,19 +190,18 @@@ This is the one used for new buffers. return Vstandard_category_table; } + + static void -copy_category_entry (table, range, val) - Lisp_Object table, range, val; ++copy_category_entry (table, c, val) ++ Lisp_Object table, c, val; + { + val = Fcopy_sequence (val); - if (CONSP (range)) - char_table_set_range (table, XINT (XCAR (range)), XINT (XCDR (range)), - val); ++ if (CONSP (c)) ++ char_table_set_range (table, XINT (XCAR (c)), XINT (XCDR (c)), val); + else - char_table_set (table, XINT (range), val); ++ char_table_set (table, XINT (c), val); + } + /* Return a copy of category table TABLE. We can't simply use the function copy-sequence because no contents should be shared between the original and the copy. This function is called recursively by @@@ -195,44 -212,15 +211,14 @@@ Lisp_Objec copy_category_table (table) Lisp_Object table; { - Lisp_Object tmp; - int i, to; + table = copy_char_table (table); - if (!NILP (XCHAR_TABLE (table)->top)) - { - /* TABLE is a top level char table. - At first, make a copy of tree structure of the table. */ - table = Fcopy_sequence (table); - - /* Then, copy elements for single byte characters one by one. */ - for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++) - if (!NILP (tmp = XCHAR_TABLE (table)->contents[i])) - XCHAR_TABLE (table)->contents[i] = Fcopy_sequence (tmp); - to = CHAR_TABLE_ORDINARY_SLOTS; - - /* Also copy the first (and sole) extra slot. It is a vector - containing docstring of each category. */ - Fset_char_table_extra_slot - (table, make_number (0), - Fcopy_sequence (Fchar_table_extra_slot (table, make_number (0)))); - } - else - { - i = 32; - to = SUB_CHAR_TABLE_ORDINARY_SLOTS; - } - - /* If the table has non-nil default value, copy it. */ - if (!NILP (tmp = XCHAR_TABLE (table)->defalt)) - XCHAR_TABLE (table)->defalt = Fcopy_sequence (tmp); - - /* At last, copy the remaining elements while paying attention to a - sub char table. */ - for (; i < to; i++) - if (!NILP (tmp = XCHAR_TABLE (table)->contents[i])) - XCHAR_TABLE (table)->contents[i] - = (SUB_CHAR_TABLE_P (tmp) - ? copy_category_table (tmp) : Fcopy_sequence (tmp)); + if (! NILP (XCHAR_TABLE (table)->defalt)) + XCHAR_TABLE (table)->defalt + = Fcopy_sequence (XCHAR_TABLE (table)->defalt); + XCHAR_TABLE (table)->extras[0] + = Fcopy_sequence (XCHAR_TABLE (table)->extras[0]); - - map_char_table (copy_category_entry, Qnil, table, table, 0, NULL); ++ map_char_table (copy_category_entry, Qnil, table, table); return table; } @@@ -261,6 -250,8 +248,8 @@@ DEFUN ("make-category-table", Fmake_cat val = Fmake_char_table (Qcategory_table, Qnil); XCHAR_TABLE (val)->defalt = MAKE_CATEGORY_SET; - for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++) ++ for (i = 0; i < (1 << CHARTAB_SIZE_BITS_0); i++) + XCHAR_TABLE (val)->contents[i] = MAKE_CATEGORY_SET; Fset_char_table_extra_slot (val, make_number (0), Fmake_vector (make_number (95), Qnil)); return val; @@@ -366,108 -338,47 +336,48 @@@ then delete CATEGORY from the category (character, category, table, reset) Lisp_Object character, category, table, reset; { - int c, charset, c1, c2; Lisp_Object set_value; /* Actual value to be set in category sets. */ - Lisp_Object val, category_set; + Lisp_Object category_set; + int start, end; + int from, to; - CHECK_NUMBER (character); - c = XINT (character); - CHECK_CATEGORY (category); - table = check_category_table (table); - - if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category)))) - error ("Undefined category: %c", XFASTINT (category)); - - set_value = NILP (reset) ? Qt : Qnil; - - if (c < CHAR_TABLE_SINGLE_BYTE_SLOTS) + if (INTEGERP (character)) { - val = XCHAR_TABLE (table)->contents[c]; - if (!CATEGORY_SET_P (val)) - XCHAR_TABLE (table)->contents[c] = (val = MAKE_CATEGORY_SET); - SET_CATEGORY_SET (val, category, set_value); - return Qnil; + CHECK_CHARACTER (character); + start = end = XFASTINT (character); } - - SPLIT_CHAR (c, charset, c1, c2); - - /* The top level table. */ - val = XCHAR_TABLE (table)->contents[charset + 128]; - if (CATEGORY_SET_P (val)) - category_set = val; - else if (!SUB_CHAR_TABLE_P (val)) - { - category_set = val = MAKE_CATEGORY_SET; - XCHAR_TABLE (table)->contents[charset + 128] = category_set; - } - - if (c1 <= 0) - { - /* Only a charset is specified. */ - if (SUB_CHAR_TABLE_P (val)) - /* All characters in CHARSET should be the same as for having - CATEGORY or not. */ - modify_lower_category_set (val, category, set_value); - else - SET_CATEGORY_SET (category_set, category, set_value); - return Qnil; - } - - /* The second level table. */ - if (!SUB_CHAR_TABLE_P (val)) + else { - val = make_sub_char_table (Qnil); - XCHAR_TABLE (table)->contents[charset + 128] = val; - /* We must set default category set of CHARSET in `defalt' slot. */ - XCHAR_TABLE (val)->defalt = category_set; + CHECK_CONS (character); - CHECK_CHARACTER (XCAR (character)); - CHECK_CHARACTER (XCDR (character)); ++ CHECK_CHARACTER_CAR (character); ++ CHECK_CHARACTER_CDR (character); + start = XFASTINT (XCAR (character)); + end = XFASTINT (XCDR (character)); } - table = val; - val = XCHAR_TABLE (table)->contents[c1]; - if (CATEGORY_SET_P (val)) - category_set = val; - else if (!SUB_CHAR_TABLE_P (val)) - { - category_set = val = Fcopy_sequence (XCHAR_TABLE (table)->defalt); - XCHAR_TABLE (table)->contents[c1] = category_set; - } + CHECK_CATEGORY (category); + table = check_category_table (table); - if (c2 <= 0) - { - if (SUB_CHAR_TABLE_P (val)) - /* All characters in C1 group of CHARSET should be the same as - for CATEGORY. */ - modify_lower_category_set (val, category, set_value); - else - SET_CATEGORY_SET (category_set, category, set_value); - return Qnil; - } + if (NILP (CATEGORY_DOCSTRING (table, XFASTINT (category)))) + error ("Undefined category: %c", XFASTINT (category)); - + - /* The third (bottom) level table. */ - if (!SUB_CHAR_TABLE_P (val)) - { - val = make_sub_char_table (Qnil); - XCHAR_TABLE (table)->contents[c1] = val; - /* We must set default category set of CHARSET and C1 in - `defalt' slot. */ - XCHAR_TABLE (val)->defalt = category_set; - } - table = val; + set_value = NILP (reset) ? Qt : Qnil; - val = XCHAR_TABLE (table)->contents[c2]; - if (CATEGORY_SET_P (val)) - category_set = val; - else if (!SUB_CHAR_TABLE_P (val)) + while (start <= end) { - category_set = Fcopy_sequence (XCHAR_TABLE (table)->defalt); - XCHAR_TABLE (table)->contents[c2] = category_set; + category_set = char_table_ref_and_range (table, start, &from, &to); + if (CATEGORY_MEMBER (XFASTINT (category), category_set) != NILP (reset)) + { + category_set = Fcopy_sequence (category_set); + SET_CATEGORY_SET (category_set, category, set_value); + if (to > end) + char_table_set_range (table, start, end, category_set); + else + char_table_set_range (table, start, to, category_set); + } + start = to + 1; } - else - /* This should never happen. */ - error ("Invalid category table"); - - SET_CATEGORY_SET (category_set, category, set_value); + return Qnil; } diff --cc src/category.h index d48d99df805,e09a71d420d..0b909de7ddb --- a/src/category.h +++ b/src/category.h @@@ -1,6 -1,9 +1,9 @@@ /* Declarations having to do with Emacs category tables. Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. Licensed to the Free Software Foundation. - Copyright (C) 2001, 2002 ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. diff --cc src/ccl.c index 4a47ca063f3,1750ce2f075..b9dd47ff17c --- a/src/ccl.c +++ b/src/ccl.c @@@ -1,7 -1,10 +1,10 @@@ /* CCL (Code Conversion Language) interpreter. Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. - Copyright (C) 2001 Free Software Foundation, Inc. - Licensed to the Free Software Foundation. - Copyright (C) 2001, 2002 ++ Licensed to the Free Software Foundation. + Copyright (C) 2001, 2002 Free Software Foundation, Inc. - Licensed to the Free Software Foundation. ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@@ -61,7 -67,11 +67,9 @@@ Lisp_Object Vtranslation_hash_table_vec /* Return a hash table of id number ID. */ #define GET_HASH_TABLE(id) \ (XHASH_TABLE (XCDR(XVECTOR(Vtranslation_hash_table_vector)->contents[(id)]))) -/* Copied from fns.c. */ -#define HASH_VALUE(H, IDX) AREF ((H)->key_and_value, 2 * (IDX) + 1) + extern int charset_unicode; + /* CCL (Code Conversion Language) is a simple language which has operations on one input buffer, one output buffer, and 7 registers. The syntax of CCL is described in `ccl.el'. Emacs Lisp function @@@ -777,78 -755,58 +753,55 @@@ while(0 CCL_SUSPEND (CCL_STAT_SUSPEND_BY_DST); \ } while (0) - /* Read one byte from the current input buffer into REGth register. */ - #define CCL_READ_CHAR(REG) \ - do { \ - if (!src) \ - CCL_INVALID_CMD; \ - else if (src < src_end) \ - { \ - REG = *src++; \ - if (REG == '\n' \ - && ccl->eol_type != CODING_EOL_LF) \ - { \ - /* We are encoding. */ \ - if (ccl->eol_type == CODING_EOL_CRLF) \ - { \ - if (ccl->cr_consumed) \ - ccl->cr_consumed = 0; \ - else \ - { \ - ccl->cr_consumed = 1; \ - REG = '\r'; \ - src--; \ - } \ - } \ - else \ - REG = '\r'; \ - } \ - if (REG == LEADING_CODE_8_BIT_CONTROL \ - && ccl->multibyte) \ - REG = *src++ - 0x20; \ - } \ - else if (ccl->last_block) \ - { \ - ic = ccl->eof_ic; \ - goto ccl_repeat; \ - } \ - else \ - CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \ - } while (0) - - - /* Set C to the character code made from CHARSET and CODE. This is - like MAKE_CHAR but check the validity of CHARSET and CODE. If they - are not valid, set C to (CODE & 0xFF) because that is usually the - case that CCL_ReadMultibyteChar2 read an invalid code and it set - CODE to that invalid byte. */ - - #define CCL_MAKE_CHAR(charset, code, c) \ + /* Read one byte from the current input buffer into Rth register. */ + #define CCL_READ_CHAR(r) \ + do { \ + if (! src) \ + CCL_INVALID_CMD; \ + else if (src < src_end) \ + r = *src++; \ + else if (ccl->last_block) \ + { \ + ic = ccl->eof_ic; \ + goto ccl_repeat; \ + } \ + else \ + CCL_SUSPEND (CCL_STAT_SUSPEND_BY_SRC); \ + } while (0) + + /* Decode CODE by a charset whose id is ID. If ID is 0, return CODE + as is for backward compatibility. Assume that we can use the + variable `charset'. */ + + #define CCL_DECODE_CHAR(id, code) \ + ((id) == 0 ? (code) \ + : (charset = CHARSET_FROM_ID ((id)), DECODE_CHAR (charset, (code)))) + - + /* Encode character C by some of charsets in CHARSET_LIST. Set ID to + the id of the used charset, ENCODED to the resulf of encoding. + Assume that we can use the variable `charset'. */ + -#define CCL_ENCODE_CHAR(c, charset_list, id, encoded) \ - do { \ - unsigned code; \ - \ - charset = char_charset ((c), (charset_list), &code); \ - if (! charset && ! NILP (charset_list)) \ - charset = char_charset ((c), Qnil, &code); \ - if (charset) \ - { \ - (id) = CHARSET_ID (charset); \ - (encoded) = code; \ - } \ - } while (0) - - ++#define CCL_ENCODE_CHAR(c, charset_list, id, encoded) \ + do { \ - if (charset == CHARSET_ASCII) \ - c = code & 0xFF; \ - else if (CHARSET_DEFINED_P (charset) \ - && (code & 0x7F) >= 32 \ - && (code < 256 || ((code >> 7) & 0x7F) >= 32)) \ - { \ - int c1 = code & 0x7F, c2 = 0; \ ++ unsigned code; \ + \ - if (code >= 256) \ - c2 = c1, c1 = (code >> 7) & 0x7F; \ - c = MAKE_CHAR (charset, c1, c2); \ ++ charset = char_charset ((c), (charset_list), &code); \ ++ if (! charset && ! NILP (charset_list)) \ ++ charset = char_charset ((c), Qnil, &code); \ ++ if (charset) \ ++ { \ ++ (id) = CHARSET_ID (charset); \ ++ (encoded) = code; \ + } \ - else \ - c = code & 0xFF; \ - } while (0) - ++ } while (0) - /* Execute CCL code on SRC_BYTES length text at SOURCE. The resulting - text goes to a place pointed by DESTINATION, the length of which - should not exceed DST_BYTES. The bytes actually processed is - returned as *CONSUMED. The return value is the length of the - resulting text. As a side effect, the contents of CCL registers - are updated. If SOURCE or DESTINATION is NULL, only operations on - registers are permitted. */ + /* Execute CCL code on characters at SOURCE (length SRC_SIZE). The + resulting text goes to a place pointed by DESTINATION, the length + of which should not exceed DST_SIZE. As a side effect, how many + characters are consumed and produced are recorded in CCL->consumed + and CCL->produced, and the contents of CCL registers are updated. + If SOURCE or DESTINATION is NULL, only operations on registers are + permitted. */ #ifdef CCL_DEBUG #define CCL_DEBUG_BACKTRACE_LEN 256 @@@ -862,15 -820,15 +815,15 @@@ struct ccl_prog_stac int ic; /* Instruction Counter. */ }; -/* For the moment, we only support depth 256 of stack. */ +/* For the moment, we only support depth 256 of stack. */ static struct ccl_prog_stack ccl_prog_stack_struct[256]; - int - ccl_driver (ccl, source, destination, src_bytes, dst_bytes, consumed) + void + ccl_driver (ccl, source, destination, src_size, dst_size, charset_list) struct ccl_program *ccl; - unsigned char *source, *destination; - int src_bytes, dst_bytes; - int *consumed; + int *source, *destination; + int src_size, dst_size; + Lisp_Object charset_list; { register int *reg = ccl->reg; register int ic = ccl->ic; @@@ -1424,14 -1256,12 +1251,12 @@@ op = hash_lookup (h, make_number (reg[RRR]), NULL); if (op >= 0) { - Lisp_Object opl; + Lisp_Object opl; opl = HASH_VALUE (h, op); - if (!CHAR_VALID_P (XINT (opl), 0)) - if (!CHARACTERP (opl)) ++ if (! CHARACTERP (XINT (opl))) CCL_INVALID_CMD; - SPLIT_CHAR (XINT (opl), reg[RRR], i, j); - if (j != -1) - i = (i << 7) | j; - reg[rrr] = i; + reg[RRR] = charset_unicode; + reg[rrr] = op; reg[7] = 1; /* r7 true for success */ } else @@@ -1442,8 -1272,8 +1267,8 @@@ case CCL_LookupCharConstTbl: op = XINT (ccl_prog[ic]); /* table */ ic++; - CCL_MAKE_CHAR (reg[RRR], reg[rrr], i); + i = CCL_DECODE_CHAR (reg[RRR], reg[rrr]); - { + { struct Lisp_Hash_Table *h = GET_HASH_TABLE (op); op = hash_lookup (h, make_number (i), NULL); @@@ -1876,29 -1706,11 +1701,29 @@@ } msglen = strlen (msg); - if (dst + msglen <= (dst_bytes ? dst_end : src)) + if (dst + msglen <= dst_end) { - bcopy (msg, dst, msglen); - dst += msglen; + for (i = 0; i < msglen; i++) + *dst++ = msg[i]; } + + if (ccl->status == CCL_STAT_INVALID_CMD) + { +#if 0 /* If the remaining bytes contain 0x80..0x9F, copying them + results in an invalid multibyte sequence. */ + + /* Copy the remaining source data. */ + int i = src_end - src; + if (dst_bytes && (dst_end - dst) < i) + i = dst_end - dst; + bcopy (src, dst, i); + src += i; + dst += i; +#else + /* Signal that we've consumed everything. */ + src = src_end; +#endif + } } ccl_finish: @@@ -2058,9 -1868,7 +1881,8 @@@ setup_ccl_program (ccl, ccl_prog ccl->private_state = 0; ccl->status = 0; ccl->stack_idx = 0; - ccl->eol_type = CODING_EOL_LF; ccl->suppress_error = 0; + ccl->eight_bit_control = 0; return 0; } @@@ -2173,8 -1984,8 +1998,9 @@@ See the documentation of `define-ccl-pr if (ASIZE (status) != 9) error ("Length of vector STATUS is not 9"); CHECK_STRING (str); - str_chars = XSTRING (str)->size; - str_bytes = STRING_BYTES (XSTRING (str)); + - GCPRO2 (status, str); ++ str_chars = SCHARS (str); ++ str_bytes = SBYTES (str); for (i = 0; i < 8; i++) { @@@ -2189,34 -2000,89 +2015,89 @@@ if (ccl.ic < i && i < ccl.size) ccl.ic = i; } - outbufsize = SBYTES (str) * ccl.buf_magnification + 256; - outbuf = (char *) xmalloc (outbufsize); - ccl.last_block = NILP (contin); - ccl.multibyte = STRING_MULTIBYTE (str); - produced = ccl_driver (&ccl, SDATA (str), outbuf, - SBYTES (str), outbufsize, (int *) 0); - for (i = 0; i < 8; i++) - XSET (AREF (status, i), Lisp_Int, ccl.reg[i]); - XSETINT (AREF (status, 8), ccl.ic); - UNGCPRO; - if (NILP (unibyte_p)) + outbufsize = (ccl.buf_magnification + ? str_bytes * ccl.buf_magnification + 256 + : str_bytes + 256); + outp = outbuf = (unsigned char *) xmalloc (outbufsize); + + consumed_chars = consumed_bytes = 0; + produced_chars = 0; + while (consumed_bytes < str_bytes) { - int nchars; - const unsigned char *p = XSTRING (str)->data + consumed_bytes; - const unsigned char *endp = XSTRING (str)->data + str_bytes; ++ const unsigned char *p = SDATA (str) + consumed_bytes; ++ const unsigned char *endp = SDATA (str) + str_bytes; + int i = 0; + int *src, src_size; + + if (endp - p == str_chars - consumed_chars) + while (i < CCL_EXECUTE_BUF_SIZE && p < endp) + source[i++] = *p++; + else + while (i < CCL_EXECUTE_BUF_SIZE && p < endp) + source[i++] = STRING_CHAR_ADVANCE (p); + consumed_chars += i; - consumed_bytes = p - XSTRING (str)->data; ++ consumed_bytes = p - SDATA (str); + + if (consumed_bytes == str_bytes) + ccl.last_block = NILP (contin); + src = source; + src_size = i; + while (1) + { + ccl_driver (&ccl, src, destination, src_size, CCL_EXECUTE_BUF_SIZE, + Qnil); + if (ccl.status != CCL_STAT_SUSPEND_BY_DST) + break; + produced_chars += ccl.produced; + if (NILP (unibyte_p)) + { + if (outp - outbuf + MAX_MULTIBYTE_LENGTH * ccl.produced + > outbufsize) + { + int offset = outp - outbuf; + outbufsize += MAX_MULTIBYTE_LENGTH * ccl.produced; + outbuf = (unsigned char *) xrealloc (outbuf, outbufsize); + outp = outbuf + offset; + } + for (i = 0; i < ccl.produced; i++) + CHAR_STRING_ADVANCE (destination[i], outp); + } + else + { + if (outp - outbuf + ccl.produced > outbufsize) + { + int offset = outp - outbuf; + outbufsize += ccl.produced; + outbuf = (unsigned char *) xrealloc (outbuf, outbufsize); + outp = outbuf + offset; + } + for (i = 0; i < ccl.produced; i++) + *outp++ = destination[i]; + } + src += ccl.consumed; + src_size -= ccl.consumed; + } - produced = str_as_multibyte (outbuf, outbufsize, produced, &nchars); - val = make_multibyte_string (outbuf, nchars, produced); + if (ccl.status != CCL_STAT_SUSPEND_BY_SRC) + break; - } + } - else - val = make_unibyte_string (outbuf, produced); - xfree (outbuf); - QUIT; - if (ccl.status == CCL_STAT_SUSPEND_BY_DST) - error ("Output buffer for the CCL programs overflow"); + if (ccl.status != CCL_STAT_SUCCESS && ccl.status != CCL_STAT_SUSPEND_BY_SRC) error ("Error in CCL program at %dth code", ccl.ic); - + + for (i = 0; i < 8; i++) + XSET (XVECTOR (status)->contents[i], Lisp_Int, ccl.reg[i]); + XSETINT (XVECTOR (status)->contents[8], ccl.ic); + + if (NILP (unibyte_p)) + val = make_multibyte_string ((char *) outbuf, produced_chars, + outp - outbuf); + else + val = make_unibyte_string ((char *) outbuf, produced_chars); + xfree (outbuf); + return val; } diff --cc src/ccl.h index bfd4757b41a,6e8e45b9a51..e1b8285df19 --- a/src/ccl.h +++ b/src/ccl.h @@@ -1,6 -1,9 +1,9 @@@ /* Header for CCL (Code Conversion Language) interpreter. Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. -- Licensed to the Free Software Foundation. - Copyright (C) 2001, 2002 ++ Licensed to the Free Software Foundation. ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@@ -63,13 -62,12 +62,15 @@@ struct ccl_program format when the CCL program is used for encoding by a coding system. */ + int consumed; + int produced; int suppress_error; /* If nonzero, don't insert error message in the output. */ - int eight_bit_control; /* Set to nonzero if CCL_WRITE_CHAR - writes eight-bit-control char. */ + int eight_bit_control; /* If nonzero, ccl_driver counts all + eight-bit-control bytes written by + CCL_WRITE_CHAR. After execution, + if no such byte is written, set + this value to zero. */ }; /* This data type is used for the spec field of the structure diff --cc src/character.c index 00000000000,5501d8eb13a..b25aff8083c mode 000000,100644..100644 --- a/src/character.c +++ b/src/character.c @@@ -1,0 -1,975 +1,975 @@@ + /* Basic character support. + Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. ++ Licensed to the Free Software Foundation. + Copyright (C) 2001 Free Software Foundation, Inc. - Copyright (C) 2001, 2002 ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 + + This file is part of GNU Emacs. + + GNU Emacs is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU Emacs is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU Emacs; see the file COPYING. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + + /* At first, see the document in `character.h' to understand the code + in this file. */ + + #ifdef emacs + #include + #endif + + #include + + #ifdef emacs + + #include + #include "lisp.h" + #include "character.h" + #include "buffer.h" + #include "charset.h" + #include "composite.h" + #include "disptab.h" + + #else /* not emacs */ + + #include "mulelib.h" + + #endif /* emacs */ + + Lisp_Object Qcharacterp; + + /* Vector of translation table ever defined. + ID of a translation table is used to index this vector. */ + Lisp_Object Vtranslation_table_vector; + + /* A char-table for characters which may invoke auto-filling. */ + Lisp_Object Vauto_fill_chars; + + Lisp_Object Qauto_fill_chars; + + Lisp_Object Vchar_unify_table; + + /* A char-table. An element is non-nil iff the corresponding + character has a printable glyph. */ + Lisp_Object Vprintable_chars; + + /* A char-table. An elemnent is a column-width of the corresponding + character. */ + Lisp_Object Vchar_width_table; + + /* A char-table. An element is a symbol indicating the direction + property of corresponding character. */ + Lisp_Object Vchar_direction_table; + + /* Variable used locally in the macro FETCH_MULTIBYTE_CHAR. */ + unsigned char *_fetch_multibyte_char_p; + + /* Char table of scripts. */ + Lisp_Object Vchar_script_table; + + static Lisp_Object Qchar_script_table; + + /* Mapping table from unibyte chars to multibyte chars. */ + int unibyte_to_multibyte_table[256]; + + + + int + char_string (c, p) + int c; + unsigned char *p; + { + int bytes; + + if (c & CHAR_MODIFIER_MASK) + { + /* As a character not less than 256 can't have modifier bits, we + just ignore the bits. */ + if (SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) + { + /* For Meta, Shift, and Control modifiers, we need special care. */ + if (c & CHAR_META) + { + /* Move the meta bit to the right place for a string. */ + c = (c & ~CHAR_META) | 0x80; + } + if (c & CHAR_SHIFT) + { + /* Shift modifier is valid only with [A-Za-z]. */ + if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') + c &= ~CHAR_SHIFT; + else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') + c = (c & ~CHAR_SHIFT) - ('a' - 'A'); + } + if (c & CHAR_CTL) + { + /* Simulate the code in lread.c. */ + /* Allow `\C- ' and `\C-?'. */ + if (c == (CHAR_CTL | ' ')) + c = 0; + else if (c == (CHAR_CTL | '?')) + c = 127; + /* ASCII control chars are made from letters (both cases), + as well as the non-letters within 0100...0137. */ + else if ((c & 0137) >= 0101 && (c & 0137) <= 0132) + c &= (037 | (~0177 & ~CHAR_CTL)); + else if ((c & 0177) >= 0100 && (c & 0177) <= 0137) + c &= (037 | (~0177 & ~CHAR_CTL)); + } + } + + /* If C still has any modifier bits, just ignore it. */ + c &= ~CHAR_MODIFIER_MASK; + } + + MAYBE_UNIFY_CHAR (c); + + if (c <= MAX_3_BYTE_CHAR) + { + bytes = CHAR_STRING (c, p); + } + else if (c <= MAX_4_BYTE_CHAR) + { + p[0] = (0xF0 | (c >> 18)); + p[1] = (0x80 | ((c >> 12) & 0x3F)); + p[2] = (0x80 | ((c >> 6) & 0x3F)); + p[3] = (0x80 | (c & 0x3F)); + bytes = 4; + } + else if (c <= MAX_5_BYTE_CHAR) + { + p[0] = 0xF8; + p[1] = (0x80 | ((c >> 18) & 0x0F)); + p[2] = (0x80 | ((c >> 12) & 0x3F)); + p[3] = (0x80 | ((c >> 6) & 0x3F)); + p[4] = (0x80 | (c & 0x3F)); + bytes = 5; + } + else + { + c = CHAR_TO_BYTE8 (c); + bytes = BYTE8_STRING (c, p); + } + + return bytes; + } + + + int + string_char (p, advanced, len) + const unsigned char *p; + const unsigned char **advanced; + int *len; + { + int c; + const unsigned char *saved_p = p; + + if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10)) + { + c = STRING_CHAR_ADVANCE (p); + } + else if (! (*p & 0x08)) + { + c = ((((p)[0] & 0xF) << 18) + | (((p)[1] & 0x3F) << 12) + | (((p)[2] & 0x3F) << 6) + | ((p)[3] & 0x3F)); + p += 4; + } + else + { + c = ((((p)[1] & 0x3F) << 18) + | (((p)[2] & 0x3F) << 12) + | (((p)[3] & 0x3F) << 6) + | ((p)[4] & 0x3F)); + p += 5; + } + + MAYBE_UNIFY_CHAR (c); + + if (len) + *len = p - saved_p; + if (advanced) + *advanced = p; + return c; + } + + + /* Translate character C by translation table TABLE. If C is + negative, translate a character specified by CHARSET and CODE. If + no translation is found in TABLE, return the untranslated + character. */ + + int + translate_char (table, c) + Lisp_Object table; + int c; + { + Lisp_Object ch; + + if (! CHAR_TABLE_P (table)) + return c; + ch = CHAR_TABLE_REF (table, c); + if (! CHARACTERP (ch)) + return c; + return XINT (ch); + } + + /* Convert the multibyte character C to unibyte 8-bit character based + on the current value of charset_unibyte. If dimension of + charset_unibyte is more than one, return (C & 0xFF). + + The argument REV_TBL is now ignored. It will be removed in the + future. */ + + int + multibyte_char_to_unibyte (c, rev_tbl) + int c; + Lisp_Object rev_tbl; + { + struct charset *charset; + unsigned c1; + + if (CHAR_BYTE8_P (c)) + return CHAR_TO_BYTE8 (c); + charset = CHARSET_FROM_ID (charset_unibyte); + c1 = ENCODE_CHAR (charset, c); + return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF); + } + + + DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0, + doc: /* Return non-nil if OBJECT is a character. */) + (object, ignore) + Lisp_Object object, ignore; + { + return (CHARACTERP (object) ? Qt : Qnil); + } + + DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0, + doc: /* Return the character of the maximum code. */) + () + { + return make_number (MAX_CHAR); + } + + DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte, + Sunibyte_char_to_multibyte, 1, 1, 0, + doc: /* Convert the unibyte character CH to multibyte character. + The multibyte character is a result of decoding CH by + the current unibyte charset (see `unibyte-charset'). */) + (ch) + Lisp_Object ch; + { + int c; + struct charset *charset; + + CHECK_CHARACTER (ch); + c = XFASTINT (ch); + if (c >= 0400) + error ("Invalid unibyte character: %d", c); + charset = CHARSET_FROM_ID (charset_unibyte); + c = DECODE_CHAR (charset, c); + if (c < 0) + c = BYTE8_TO_CHAR (XFASTINT (ch)); + return make_number (c); + } + + DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte, + Smultibyte_char_to_unibyte, 1, 1, 0, + doc: /* Convert the multibyte character CH to unibyte character.\n\ + The unibyte character is a result of encoding CH by + the current primary charset (value of `charset-primary'). */) + (ch) + Lisp_Object ch; + { + int c; + + CHECK_CHARACTER (ch); + c = XFASTINT (ch); + c = CHAR_TO_BYTE8 (c); + return make_number (c); + } + + DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0, + doc: /* Return 1 regardless of the argument CHAR. + This is now an obsolete function. We keep it just for backward compatibility. */) + (ch) + Lisp_Object ch; + { + CHECK_CHARACTER (ch); + return make_number (1); + } + + DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0, + doc: /* Return width of CHAR when displayed in the current buffer. + The width is measured by how many columns it occupies on the screen. + Tab is taken to occupy `tab-width' columns. */) + (ch) + Lisp_Object ch; + { + Lisp_Object disp; + int c, width; + struct Lisp_Char_Table *dp = buffer_display_table (); + + CHECK_CHARACTER (ch); + c = XINT (ch); + + /* Get the way the display table would display it. */ + disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil; + + if (VECTORP (disp)) + width = ASIZE (disp); + else + width = CHAR_WIDTH (c); + + return make_number (width); + } + + /* Return width of string STR of length LEN when displayed in the + current buffer. The width is measured by how many columns it + occupies on the screen. If PRECISION > 0, return the width of + longest substring that doesn't exceed PRECISION, and set number of + characters and bytes of the substring in *NCHARS and *NBYTES + respectively. */ + + int + c_string_width (str, len, precision, nchars, nbytes) - unsigned char *str; ++ const unsigned char *str; + int precision, *nchars, *nbytes; + { + int i = 0, i_byte = 0; + int width = 0; + struct Lisp_Char_Table *dp = buffer_display_table (); + + while (i_byte < len) + { + int bytes, thiswidth; + Lisp_Object val; + int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); + + if (dp) + { + val = DISP_CHAR_VECTOR (dp, c); + if (VECTORP (val)) + thiswidth = XVECTOR (val)->size; + else + thiswidth = CHAR_WIDTH (c); + } + else + { + thiswidth = CHAR_WIDTH (c); + } + + if (precision > 0 + && (width + thiswidth > precision)) + { + *nchars = i; + *nbytes = i_byte; + return width; + } + i++; + i_byte += bytes; + width += thiswidth; + } + + if (precision > 0) + { + *nchars = i; + *nbytes = i_byte; + } + + return width; + } + + /* Return width of string STR of length LEN when displayed in the + current buffer. The width is measured by how many columns it + occupies on the screen. */ + + int + strwidth (str, len) + unsigned char *str; + int len; + { + return c_string_width (str, len, -1, NULL, NULL); + } + + /* Return width of Lisp string STRING when displayed in the current + buffer. The width is measured by how many columns it occupies on + the screen while paying attention to compositions. If PRECISION > + 0, return the width of longest substring that doesn't exceed + PRECISION, and set number of characters and bytes of the substring + in *NCHARS and *NBYTES respectively. */ + + int + lisp_string_width (string, precision, nchars, nbytes) + Lisp_Object string; + int precision, *nchars, *nbytes; + { - int len = XSTRING (string)->size; - unsigned char *str = XSTRING (string)->data; ++ int len = SCHARS (string); ++ unsigned char *str = SDATA (string); + int i = 0, i_byte = 0; + int width = 0; + struct Lisp_Char_Table *dp = buffer_display_table (); + + while (i < len) + { + int chars, bytes, thiswidth; + Lisp_Object val; + int cmp_id; + int ignore, end; + + if (find_composition (i, -1, &ignore, &end, &val, string) + && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string)) + >= 0)) + { + thiswidth = composition_table[cmp_id]->width; + chars = end - i; + bytes = string_char_to_byte (string, end) - i_byte; + } + else if (dp) + { + int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); + + chars = 1; + val = DISP_CHAR_VECTOR (dp, c); + if (VECTORP (val)) + thiswidth = XVECTOR (val)->size; + else + thiswidth = CHAR_WIDTH (c); + } + else + { + int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); + + chars = 1; + thiswidth = CHAR_WIDTH (c); + } + + if (precision > 0 + && (width + thiswidth > precision)) + { + *nchars = i; + *nbytes = i_byte; + return width; + } + i += chars; + i_byte += bytes; + width += thiswidth; + } + + if (precision > 0) + { + *nchars = i; + *nbytes = i_byte; + } + + return width; + } + + DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0, + doc: /* Return width of STRING when displayed in the current buffer. + Width is measured by how many columns it occupies on the screen. + When calculating width of a multibyte character in STRING, + only the base leading-code is considered; the validity of + the following bytes is not checked. Tabs in STRING are always + taken to occupy `tab-width' columns. */) + (str) + Lisp_Object str; + { + Lisp_Object val; + + CHECK_STRING (str); + XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL)); + return val; + } + + DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0, + doc: /* Return the direction of CHAR. + The returned value is 0 for left-to-right and 1 for right-to-left. */) + (ch) + Lisp_Object ch; + { + int c; + + CHECK_CHARACTER (ch); + c = XINT (ch); + return CHAR_TABLE_REF (Vchar_direction_table, c); + } + + DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0, + doc: /* Return number of characters between BEG and END. + This is now an obsolete function. We keep it just for backward compatibility. */) + (beg, end) + Lisp_Object beg, end; + { + int from, to; + + CHECK_NUMBER_COERCE_MARKER (beg); + CHECK_NUMBER_COERCE_MARKER (end); + + from = min (XFASTINT (beg), XFASTINT (end)); + to = max (XFASTINT (beg), XFASTINT (end)); + + return make_number (to - from); + } + + /* Return the number of characters in the NBYTES bytes at PTR. + This works by looking at the contents and checking for multibyte + sequences while assuming that there's no invalid sequence. + However, if the current buffer has enable-multibyte-characters = + nil, we treat each byte as a character. */ + + int + chars_in_text (ptr, nbytes) - unsigned char *ptr; ++ const unsigned char *ptr; + int nbytes; + { + /* current_buffer is null at early stages of Emacs initialization. */ + if (current_buffer == 0 + || NILP (current_buffer->enable_multibyte_characters)) + return nbytes; + + return multibyte_chars_in_text (ptr, nbytes); + } + + /* Return the number of characters in the NBYTES bytes at PTR. + This works by looking at the contents and checking for multibyte + sequences while assuming that there's no invalid sequence. It + ignores enable-multibyte-characters. */ + + int + multibyte_chars_in_text (ptr, nbytes) - unsigned char *ptr; ++ const unsigned char *ptr; + int nbytes; + { - unsigned char *endp = ptr + nbytes; ++ const unsigned char *endp = ptr + nbytes; + int chars = 0; + + while (ptr < endp) + { + int len = MULTIBYTE_LENGTH (ptr, endp); + + if (len == 0) + abort (); + ptr += len; + chars++; + } + + return chars; + } + + /* Parse unibyte text at STR of LEN bytes as a multibyte text, count + characters and bytes in it, and store them in *NCHARS and *NBYTES + respectively. On counting bytes, pay attention to that 8-bit + characters not constructing a valid multibyte sequence are + represented by 2-byte in a multibyte text. */ + + void + parse_str_as_multibyte (str, len, nchars, nbytes) - unsigned char *str; ++ const unsigned char *str; + int len, *nchars, *nbytes; + { - unsigned char *endp = str + len; ++ const unsigned char *endp = str + len; + int n, chars = 0, bytes = 0; + + if (len >= MAX_MULTIBYTE_LENGTH) + { - unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; ++ const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; + while (str < adjusted_endp) + { + if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0) + str += n, bytes += n; + else + str++, bytes += 2; + chars++; + } + } + while (str < endp) + { + if ((n = MULTIBYTE_LENGTH (str, endp)) > 0) + str += n, bytes += n; + else + str++, bytes += 2; + chars++; + } + + *nchars = chars; + *nbytes = bytes; + return; + } + + /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text. + It actually converts only such 8-bit characters that don't contruct + a multibyte sequence to multibyte forms of Latin-1 characters. If + NCHARS is nonzero, set *NCHARS to the number of characters in the + text. It is assured that we can use LEN bytes at STR as a work + area and that is enough. Return the number of bytes of the + resulting text. */ + + int + str_as_multibyte (str, len, nbytes, nchars) + unsigned char *str; + int len, nbytes, *nchars; + { + unsigned char *p = str, *endp = str + nbytes; + unsigned char *to; + int chars = 0; + int n; + + if (nbytes >= MAX_MULTIBYTE_LENGTH) + { + unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; + while (p < adjusted_endp + && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0) + p += n, chars++; + } + while ((n = MULTIBYTE_LENGTH (p, endp)) > 0) + p += n, chars++; + if (nchars) + *nchars = chars; + if (p == endp) + return nbytes; + + to = p; + nbytes = endp - p; + endp = str + len; + safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes); + p = endp - nbytes; + + if (nbytes >= MAX_MULTIBYTE_LENGTH) + { + unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; + while (p < adjusted_endp) + { + if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0) + { + while (n--) + *to++ = *p++; + } + else + { + int c = *p++; + c = BYTE8_TO_CHAR (c); + to += CHAR_STRING (c, to); + } + } + chars++; + } + while (p < endp) + { + if ((n = MULTIBYTE_LENGTH (p, endp)) > 0) + { + while (n--) + *to++ = *p++; - } ++ } + else + { + int c = *p++; + c = BYTE8_TO_CHAR (c); + to += CHAR_STRING (c, to); + } + chars++; + } + if (nchars) + *nchars = chars; + return (to - str); + } + + /* Parse unibyte string at STR of LEN bytes, and return the number of + bytes it may ocupy when converted to multibyte string by + `str_to_multibyte'. */ + + int + parse_str_to_multibyte (str, len) + unsigned char *str; + int len; + { + unsigned char *endp = str + len; + int bytes; + + for (bytes = 0; str < endp; str++) + bytes += (*str < 0x80) ? 1 : 2; + return bytes; + } + + + /* Convert unibyte text at STR of NBYTES bytes to a multibyte text + that contains the same single-byte characters. It actually + converts all 8-bit characters to multibyte forms. It is assured + that we can use LEN bytes at STR as a work area and that is + enough. */ + + int + str_to_multibyte (str, len, bytes) + unsigned char *str; + int len, bytes; + { + unsigned char *p = str, *endp = str + bytes; + unsigned char *to; + + while (p < endp && *p < 0x80) p++; + if (p == endp) + return bytes; + to = p; + bytes = endp - p; + endp = str + len; + safe_bcopy ((char *) p, (char *) (endp - bytes), bytes); + p = endp - bytes; - while (p < endp) ++ while (p < endp) + { + int c = *p++; + + if (c >= 0x80) + c = BYTE8_TO_CHAR (c); + to += CHAR_STRING (c, to); + } + return (to - str); + } + + /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It + actually converts characters in the range 0x80..0xFF to + unibyte. */ + + int + str_as_unibyte (str, bytes) + unsigned char *str; + int bytes; + { + const unsigned char *p = str, *endp = str + bytes; + unsigned char *to; + int c, len; + + while (p < endp) + { + c = *p; + len = BYTES_BY_CHAR_HEAD (c); + if (CHAR_BYTE8_HEAD_P (c)) + break; + p += len; + } + to = str + (p - str); - while (p < endp) ++ while (p < endp) + { + c = *p; + len = BYTES_BY_CHAR_HEAD (c); + if (CHAR_BYTE8_HEAD_P (c)) + { + c = STRING_CHAR_ADVANCE (p); + *to++ = CHAR_TO_BYTE8 (c); + } + else + { + while (len--) *to++ = *p++; + } + } + return (to - str); + } + + int + string_count_byte8 (string) + Lisp_Object string; + { + int multibyte = STRING_MULTIBYTE (string); - int nbytes = STRING_BYTES (XSTRING (string)); - unsigned char *p = XSTRING (string)->data; ++ int nbytes = SBYTES (string); ++ unsigned char *p = SDATA (string); + unsigned char *pend = p + nbytes; + int count = 0; + int c, len; + + if (multibyte) + while (p < pend) + { + c = *p; + len = BYTES_BY_CHAR_HEAD (c); + + if (CHAR_BYTE8_HEAD_P (c)) + count++; + p += len; + } + else + while (p < pend) + { + if (*p++ >= 0x80) + count++; + } + return count; + } + + + Lisp_Object + string_escape_byte8 (string) + Lisp_Object string; + { - int nchars = XSTRING (string)->size; - int nbytes = STRING_BYTES (XSTRING (string)); ++ int nchars = SCHARS (string); ++ int nbytes = SBYTES (string); + int multibyte = STRING_MULTIBYTE (string); + int byte8_count; + const unsigned char *src, *src_end; + unsigned char *dst; + Lisp_Object val; + int c, len; + + if (multibyte && nchars == nbytes) + return string; + + byte8_count = string_count_byte8 (string); + + if (byte8_count == 0) + return string; + + if (multibyte) + /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */ + val = make_uninit_multibyte_string (nchars + byte8_count * 3, + nbytes + byte8_count * 2); + else + /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */ + val = make_uninit_string (nbytes + byte8_count * 3); + - src = XSTRING (string)->data; ++ src = SDATA (string); + src_end = src + nbytes; - dst = XSTRING (val)->data; ++ dst = SDATA (val); + if (multibyte) + while (src < src_end) + { + c = *src; + len = BYTES_BY_CHAR_HEAD (c); + + if (CHAR_BYTE8_HEAD_P (c)) + { + c = STRING_CHAR_ADVANCE (src); + c = CHAR_TO_BYTE8 (c); + sprintf ((char *) dst, "\\%03o", c); + dst += 4; + } + else + while (len--) *dst++ = *src++; + } + else + while (src < src_end) + { + c = *src++; + if (c >= 0x80) + { + sprintf ((char *) dst, "\\%03o", c); + dst += 4; + } + else + *dst++ = c; + } + return val; + } + + -DEFUN ("string", Fstring, Sstring, 1, MANY, 0, ++DEFUN ("string", Fstring, Sstring, 0, MANY, 0, + doc: /* + Concatenate all the argument characters and make the result a string. + usage: (string &rest CHARACTERS) */) + (n, args) + int n; + Lisp_Object *args; + { + int i; + unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n); + unsigned char *p = buf; + int c; + + for (i = 0; i < n; i++) + { + CHECK_CHARACTER (args[i]); + c = XINT (args[i]); + p += CHAR_STRING (c, p); + } + + return make_string_from_bytes ((char *) buf, n, p - buf); + } + + void + init_character_once () + { + } + + #ifdef emacs + + void + syms_of_character () + { + DEFSYM (Qcharacterp, "characterp"); + DEFSYM (Qauto_fill_chars, "auto-fill-chars"); + + staticpro (&Vchar_unify_table); + Vchar_unify_table = Qnil; + + defsubr (&Smax_char); + defsubr (&Scharacterp); + defsubr (&Sunibyte_char_to_multibyte); + defsubr (&Smultibyte_char_to_unibyte); + defsubr (&Schar_bytes); + defsubr (&Schar_width); + defsubr (&Sstring_width); + defsubr (&Schar_direction); + defsubr (&Schars_in_region); + defsubr (&Sstring); + + DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector, + doc: /* + Vector recording all translation tables ever defined. + Each element is a pair (SYMBOL . TABLE) relating the table to the + symbol naming it. The ID of a translation table is an index into this vector. */); + Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil); + + DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars, + doc: /* + A char-table for characters which invoke auto-filling. + Such characters have value t in this table. */); + Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil); + CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt); + CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt); + + DEFVAR_LISP ("char-width-table", &Vchar_width_table, + doc: /* + A char-table for width (columns) of each character. */); + Vchar_width_table = Fmake_char_table (Qnil, make_number (1)); + char_table_set_range (Vchar_width_table, 0x80, 0x9F, make_number (4)); + char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR, + make_number (4)); + + DEFVAR_LISP ("char-direction-table", &Vchar_direction_table, + doc: /* A char-table for direction of each character. */); + Vchar_direction_table = Fmake_char_table (Qnil, make_number (1)); + + DEFVAR_LISP ("printable-chars", &Vprintable_chars, + doc: /* A char-table for each printable character. */); + Vprintable_chars = Fmake_char_table (Qnil, Qnil); + Fset_char_table_range (Vprintable_chars, + Fcons (make_number (32), make_number (126)), Qt); + Fset_char_table_range (Vprintable_chars, + Fcons (make_number (160), + make_number (MAX_5_BYTE_CHAR)), Qt); + + DEFVAR_LISP ("char-script-table", &Vchar_script_table, + doc: /* Char table of script symbols. + It has one extra slot whose value is a list of script symbols. */); + + /* Intern this now in case it isn't already done. + Setting this variable twice is harmless. + But don't staticpro it here--that is done in alloc.c. */ + Qchar_table_extra_slots = intern ("char-table-extra-slots"); + DEFSYM (Qchar_script_table, "char-script-table"); + Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1)); + Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil); + } + + #endif /* emacs */ diff --cc src/character.h index 00000000000,0d41f5a0982..db9de8c3d5b mode 000000,100644..100644 --- a/src/character.h +++ b/src/character.h @@@ -1,0 -1,611 +1,652 @@@ + /* Header for multibyte character handler. + Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. - Copyright (C) 2001, 2002 ++ Licensed to the Free Software Foundation. ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 + + This file is part of GNU Emacs. + + GNU Emacs is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU Emacs is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU Emacs; see the file COPYING. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + + #ifndef EMACS_CHARACTER_H + #define EMACS_CHARACTER_H + + /* character code 1st byte byte sequence + -------------- -------- ------------- + 0-7F 00..7F 0xxxxxxx + 80-7FF C2..DF 110xxxxx 10xxxxxx + 800-FFFF E0..EF 1110xxxx 10xxxxxx 10xxxxxx + 10000-1FFFFF F0..F7 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + 200000-3FFF7F F8 11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx + invalid F9..FF + + raw-8-bit + 3FFF80-3FFFFF C0..C1 1100000x 10xxxxxx + */ + + /* Maximum character code ((1 << CHARACTERBITS) - 1). */ + #define MAX_CHAR 0x3FFFFF + + /* Maximum Unicode character code. */ + #define MAX_UNICODE_CHAR 0x10FFFF + + /* Maximum N-byte character codes. */ + #define MAX_1_BYTE_CHAR 0x7F + #define MAX_2_BYTE_CHAR 0x7FF + #define MAX_3_BYTE_CHAR 0xFFFF + #define MAX_4_BYTE_CHAR 0x1FFFFF + #define MAX_5_BYTE_CHAR 0x3FFF7F + + /* Leading code range of Latin-1 chars. */ + #define LEADING_CODE_LATIN_1_MIN 0xC2 + #define LEADING_CODE_LATIN_1_MAX 0xC3 + + /* Nonzero iff C is a character that corresponds to a raw 8-bit + byte. */ + #define CHAR_BYTE8_P(c) ((c) > MAX_5_BYTE_CHAR) + + /* Return the character code for raw 8-bit byte BYTE. */ + #define BYTE8_TO_CHAR(byte) ((byte) + 0x3FFF00) + + /* Return the raw 8-bit byte for character C. */ + #define CHAR_TO_BYTE8(c) \ + (CHAR_BYTE8_P (c) \ + ? (c) - 0x3FFF00 \ + : multibyte_char_to_unibyte (c, Qnil)) + + /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character + that corresponds to a raw 8-bit byte. */ + #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1) + + /* Mapping table from unibyte chars to multibyte chars. */ + extern int unibyte_to_multibyte_table[256]; + + /* Convert the unibyte character C to the corresponding multibyte + character. If C can't be converted, return C. */ + #define unibyte_char_to_multibyte(c) \ + ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c)) + + /* If C is not ASCII, make it unibyte. */ + #define MAKE_CHAR_UNIBYTE(c) \ + do { \ + if (! ASCII_CHAR_P (c)) \ + c = CHAR_TO_BYTE8 (c); \ + } while (0) + + + /* If C is not ASCII, make it multibyte. It assumes C < 256. */ + #define MAKE_CHAR_MULTIBYTE(c) ((c) = unibyte_to_multibyte_table[(c)]) + + /* This is the maximum byte length of multibyte form. */ + #define MAX_MULTIBYTE_LENGTH 5 + + /* Return a Lisp character whose character code is C. */ + #define make_char(c) make_number (c) + + /* Nonzero iff C is an ASCII byte. */ + #define ASCII_BYTE_P(c) ((unsigned) (c) < 0x80) + + /* Nonzero iff X is a character. */ + #define CHARACTERP(x) (NATNUMP (x) && XFASTINT (x) <= MAX_CHAR) + + /* Nonzero iff C is valid as a character code. GENERICP is not used + now. */ + #define CHAR_VALID_P(c, genericp) ((unsigned) (c) <= MAX_CHAR) + + /* Check if Lisp object X is a character or not. */ + #define CHECK_CHARACTER(x) \ + do { \ + if (! CHARACTERP(x)) x = wrong_type_argument (Qcharacterp, (x)); \ + } while (0) + ++#define CHECK_CHARACTER_CAR(x) \ ++ do { \ ++ Lisp_Object tmp = XCAR (x); \ ++ CHECK_CHARACTER (tmp); \ ++ XSETCAR ((x), tmp); \ ++ } while (0) ++ ++#define CHECK_CHARACTER_CDR(x) \ ++ do { \ ++ Lisp_Object tmp = XCDR (x); \ ++ CHECK_CHARACTER (tmp); \ ++ XSETCDR ((x), tmp); \ ++ } while (0) ++ + /* Nonzero iff C is an ASCII character. */ + #define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80) + + /* Nonzero iff C is a character of code less than 0x100. */ + #define SINGLE_BYTE_CHAR_P(c) ((unsigned) (c) < 0x100) + + /* Nonzero if character C has a printable glyph. */ + #define CHAR_PRINTABLE_P(c) \ + (((c) >= 32 && ((c) < 127) \ + || ! NILP (CHAR_TABLE_REF (Vprintable_chars, (c))))) + + /* Return byte length of multibyte form for character C. */ + #define CHAR_BYTES(c) \ + ( (c) <= MAX_1_BYTE_CHAR ? 1 \ + : (c) <= MAX_2_BYTE_CHAR ? 2 \ + : (c) <= MAX_3_BYTE_CHAR ? 3 \ + : (c) <= MAX_4_BYTE_CHAR ? 4 \ + : (c) <= MAX_5_BYTE_CHAR ? 5 \ + : 2) + + + /* Return the leading code of multibyte form of C. */ + #define CHAR_LEADING_CODE(c) \ + ((c) <= MAX_1_BYTE_CHAR ? c \ + : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6)) \ + : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12)) \ + : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18)) \ + : (c) <= MAX_5_BYTE_CHAR ? 0xF8 \ + : (0xC0 | (((c) >> 6) & 0x01))) + + + /* Store multibyte form of the character C in P. The caller should + allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance. + Returns the length of the multibyte form. */ + + #define CHAR_STRING(c, p) \ + ((unsigned) (c) <= MAX_1_BYTE_CHAR \ + ? ((p)[0] = (c), \ + 1) \ + : (unsigned) (c) <= MAX_2_BYTE_CHAR \ + ? ((p)[0] = (0xC0 | ((c) >> 6)), \ + (p)[1] = (0x80 | ((c) & 0x3F)), \ + 2) \ + : (unsigned) (c) <= MAX_3_BYTE_CHAR \ + ? ((p)[0] = (0xE0 | ((c) >> 12)), \ + (p)[1] = (0x80 | (((c) >> 6) & 0x3F)), \ + (p)[2] = (0x80 | ((c) & 0x3F)), \ + 3) \ + : char_string (c, p)) + + /* Store multibyte form of byte B in P. The caller should allocate at + least MAX_MULTIBYTE_LENGTH bytes area at P in advance. Returns the + length of the multibyte form. */ + + #define BYTE8_STRING(b, p) \ + ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)), \ + (p)[1] = (0x80 | ((c) & 0x3F)), \ + 2) + + + /* Store multibyte form of the character C in P. The caller should + allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance. + And, advance P to the end of the multibyte form. */ + + #define CHAR_STRING_ADVANCE(c, p) \ + do { \ + if ((c) <= MAX_1_BYTE_CHAR) \ + *(p)++ = (c); \ + else if ((c) <= MAX_2_BYTE_CHAR) \ + *(p)++ = (0xC0 | ((c) >> 6)), \ + *(p)++ = (0x80 | ((c) & 0x3F)); \ + else if ((c) <= MAX_3_BYTE_CHAR) \ + *(p)++ = (0xE0 | ((c) >> 12)), \ + *(p)++ = (0x80 | (((c) >> 6) & 0x3F)), \ + *(p)++ = (0x80 | ((c) & 0x3F)); \ + else \ + (p) += char_string ((c), (p)); \ + } while (0) + + + /* Nonzero iff BYTE starts a non-ASCII character in a multibyte + form. */ + #define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0) + + /* Nonzero iff BYTE is a trailing code of a non-ASCII character in a + multibyte form. */ + #define TRAILING_CODE_P(byte) (((byte) & 0xC0) == 0x80) + + /* Nonzero iff BYTE starts a character in a multibyte form. + This is equivalent to: + (ASCII_BYTE_P (byte) || LEADING_CODE_P (byte)) */ + #define CHAR_HEAD_P(byte) (((byte) & 0xC0) != 0x80) + + /* Just kept for backward compatibility. This macro will be removed + in the future. */ + #define BASE_LEADING_CODE_P LEADING_CODE_P + + /* How many bytes a character that starts with BYTE occupies in a + multibyte form. */ + #define BYTES_BY_CHAR_HEAD(byte) \ + (!((byte) & 0x80) ? 1 \ + : !((byte) & 0x20) ? 2 \ + : !((byte) & 0x10) ? 3 \ + : !((byte) & 0x08) ? 4 \ + : 5) + + + /* Return the length of the multi-byte form at string STR of length + LEN while assuming that STR points a valid multi-byte form. As + this macro isn't necessary anymore, all callers will be changed to + use BYTES_BY_CHAR_HEAD directly in the future. */ + + #define MULTIBYTE_FORM_LENGTH(str, len) \ + BYTES_BY_CHAR_HEAD (*(str)) + + /* Parse multibyte string STR of length LENGTH and set BYTES to the + byte length of a character at STR while assuming that STR points a + valid multibyte form. As this macro isn't necessary anymore, all + callers will be changed to use BYTES_BY_CHAR_HEAD directly in the + future. */ + + #define PARSE_MULTIBYTE_SEQ(str, length, bytes) \ + (bytes) = BYTES_BY_CHAR_HEAD (*(str)) + + /* The byte length of multibyte form at unibyte string P ending at + PEND. If STR doesn't point a valid multibyte form, return 0. */ + + #define MULTIBYTE_LENGTH(p, pend) \ + (p >= pend ? 0 \ + : !((p)[0] & 0x80) ? 1 \ + : ((p + 1 >= pend) || (((p)[1] & 0xC0) != 0x80)) ? 0 \ + : ((p)[0] & 0xE0) == 0xC0 ? 2 \ + : ((p + 2 >= pend) || (((p)[2] & 0xC0) != 0x80)) ? 0 \ + : ((p)[0] & 0xF0) == 0xE0 ? 3 \ + : ((p + 3 >= pend) || (((p)[3] & 0xC0) != 0x80)) ? 0 \ + : ((p)[0] & 0xF8) == 0xF0 ? 4 \ + : ((p + 4 >= pend) || (((p)[4] & 0xC0) != 0x80)) ? 0 \ + : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \ + : 0) + + + /* Like MULTIBYTE_LENGTH but don't check the ending address. */ + + #define MULTIBYTE_LENGTH_NO_CHECK(p) \ + (!((p)[0] & 0x80) ? 1 \ + : ((p)[1] & 0xC0) != 0x80 ? 0 \ + : ((p)[0] & 0xE0) == 0xC0 ? 2 \ + : ((p)[2] & 0xC0) != 0x80 ? 0 \ + : ((p)[0] & 0xF0) == 0xE0 ? 3 \ + : ((p)[3] & 0xC0) != 0x80 ? 0 \ + : ((p)[0] & 0xF8) == 0xF0 ? 4 \ + : ((p)[4] & 0xC0) != 0x80 ? 0 \ + : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5 \ + : 0) + ++/* If P is before LIMIT, advance P to the next character boundary. It ++ assumes that P is already at a character boundary of the sane ++ mulitbyte form whose end address is LIMIT. */ ++ ++#define NEXT_CHAR_BOUNDARY(p, limit) \ ++ do { \ ++ if ((p) < (limit)) \ ++ (p) += BYTES_BY_CHAR_HEAD (*(p)); \ ++ } while (0) ++ ++ ++/* If P is after LIMIT, advance P to the previous character boundary. ++ It assumes that P is already at a character boundary of the sane ++ mulitbyte form whose beginning address is LIMIT. */ ++ ++#define PREV_CHAR_BOUNDARY(p, limit) \ ++ do { \ ++ if ((p) > (limit)) \ ++ { \ ++ const unsigned char *p0 = (p); \ ++ do { \ ++ p0--; \ ++ } while (p0 >= limit && ! CHAR_HEAD_P (*p0)); \ ++ (p) = (BYTES_BY_CHAR_HEAD (*p0) == (p) - p0) ? p0 : (p) - 1; \ ++ } \ ++ } while (0) + + /* Return the character code of character whose multibyte form is at + P. The argument LEN is ignored. It will be removed in the + future. */ + + #define STRING_CHAR(p, len) \ + (!((p)[0] & 0x80) \ + ? (p)[0] \ + : ! ((p)[0] & 0x20) \ + ? (((((p)[0] & 0x1F) << 6) \ + | ((p)[1] & 0x3F)) \ + + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)) \ + : ! ((p)[0] & 0x10) \ + ? ((((p)[0] & 0x0F) << 12) \ + | (((p)[1] & 0x3F) << 6) \ + | ((p)[2] & 0x3F)) \ + : string_char ((p), NULL, NULL)) + + + /* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte + form. The argument LEN is ignored. It will be removed in the + future. */ + + #define STRING_CHAR_AND_LENGTH(p, len, actual_len) \ + (!((p)[0] & 0x80) \ + ? ((actual_len) = 1, (p)[0]) \ + : ! ((p)[0] & 0x20) \ + ? ((actual_len) = 2, \ + (((((p)[0] & 0x1F) << 6) \ + | ((p)[1] & 0x3F)) \ + + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0))) \ + : ! ((p)[0] & 0x10) \ + ? ((actual_len) = 3, \ + ((((p)[0] & 0x0F) << 12) \ + | (((p)[1] & 0x3F) << 6) \ + | ((p)[2] & 0x3F))) \ + : string_char ((p), NULL, &actual_len)) + + + /* Like STRING_CHAR but advacen P to the end of multibyte form. */ + + #define STRING_CHAR_ADVANCE(p) \ + (!((p)[0] & 0x80) \ + ? *(p)++ \ + : ! ((p)[0] & 0x20) \ + ? ((p) += 2, \ + ((((p)[-2] & 0x1F) << 6) \ + | ((p)[-1] & 0x3F) \ - | (((unsigned char) (p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \ ++ | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0))) \ + : ! ((p)[0] & 0x10) \ + ? ((p) += 3, \ + ((((p)[-3] & 0x0F) << 12) \ + | (((p)[-2] & 0x3F) << 6) \ + | ((p)[-1] & 0x3F))) \ + : string_char ((p), &(p), NULL)) + + + /* Fetch the "next" character from Lisp string STRING at byte position + BYTEIDX, character position CHARIDX. Store it into OUTPUT. + + All the args must be side-effect-free. + BYTEIDX and CHARIDX must be lvalues; + we increment them past the character fetched. */ + + #define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \ + if (1) \ + { \ + CHARIDX++; \ + if (STRING_MULTIBYTE (STRING)) \ + { \ + unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \ + int len; \ + \ + OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \ + BYTEIDX += len; \ + } \ + else \ + OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \ + } \ + else + + /* Like FETCH_STRING_CHAR_ADVANCE */ + + #define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \ + if (1) \ + { \ + CHARIDX++; \ + if (STRING_MULTIBYTE (STRING)) \ + { \ + unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \ + int len; \ + \ + OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \ + BYTEIDX += len; \ + } \ + else \ + { \ + OUTPUT = XSTRING (STRING)->data[BYTEIDX++]; \ + MAKE_CHAR_MULTIBYTE (OUTPUT); \ + } \ + } \ + else + + + /* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte. */ + + #define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \ + if (1) \ + { \ + unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX]; \ + int len; \ + \ + OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len); \ + BYTEIDX += len; \ + CHARIDX++; \ + } \ + else + + + /* Like FETCH_STRING_CHAR_ADVANCE but fetch character from the current + buffer. */ + + #define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX) \ + if (1) \ + { \ + CHARIDX++; \ + if (!NILP (current_buffer->enable_multibyte_characters)) \ + { \ + unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \ + int len; \ + \ + OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \ + BYTEIDX += len; \ + } \ + else \ + { \ + OUTPUT = *(BYTE_POS_ADDR (BYTEIDX)); \ + BYTEIDX++; \ + } \ + } \ + else + + + /* Like FETCH_CHAR_ADVANCE but assumes STRING is multibyte. */ + + #define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX) \ + if (1) \ + { \ + unsigned char *ptr = BYTE_POS_ADDR (BYTEIDX); \ + int len; \ + \ + OUTPUT= STRING_CHAR_AND_LENGTH (ptr, 0, len); \ + BYTEIDX += len; \ + CHARIDX++; \ + } \ + else + + + /* Increase the buffer byte position POS_BYTE of the current buffer to + the next character boundary. No range checking of POS. */ + + #define INC_POS(pos_byte) \ + do { \ + unsigned char *p = BYTE_POS_ADDR (pos_byte); \ + pos_byte += BYTES_BY_CHAR_HEAD (*p); \ + } while (0) + + + /* Decrease the buffer byte position POS_BYTE of the current buffer to + the previous character boundary. No range checking of POS. */ + + #define DEC_POS(pos_byte) \ + do { \ + unsigned char *p; \ + \ + pos_byte--; \ + if (pos_byte < GPT_BYTE) \ + p = BEG_ADDR + pos_byte - 1; \ + else \ + p = BEG_ADDR + GAP_SIZE + pos_byte - 1; \ + while (!CHAR_HEAD_P (*p)) \ + { \ + p--; \ + pos_byte--; \ + } \ + } while (0) + + /* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */ + + #define INC_BOTH(charpos, bytepos) \ + do \ + { \ + (charpos)++; \ + if (NILP (current_buffer->enable_multibyte_characters)) \ + (bytepos)++; \ + else \ + INC_POS ((bytepos)); \ + } \ + while (0) + + + /* Decrement both CHARPOS and BYTEPOS, each in the appropriate way. */ + + #define DEC_BOTH(charpos, bytepos) \ + do \ + { \ + (charpos)--; \ + if (NILP (current_buffer->enable_multibyte_characters)) \ + (bytepos)--; \ + else \ + DEC_POS ((bytepos)); \ + } \ + while (0) + + + /* Increase the buffer byte position POS_BYTE of the current buffer to + the next character boundary. This macro relies on the fact that + *GPT_ADDR and *Z_ADDR are always accessible and the values are + '\0'. No range checking of POS_BYTE. */ + + #define BUF_INC_POS(buf, pos_byte) \ + do { \ + unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \ + pos_byte += BYTES_BY_CHAR_HEAD (*p); \ + } while (0) + + + /* Decrease the buffer byte position POS_BYTE of the current buffer to + the previous character boundary. No range checking of POS_BYTE. */ + + #define BUF_DEC_POS(buf, pos_byte) \ + do { \ + unsigned char *p; \ + pos_byte--; \ + if (pos_byte < BUF_GPT_BYTE (buf)) \ + p = BUF_BEG_ADDR (buf) + pos_byte - 1; \ + else \ + p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \ + while (!CHAR_HEAD_P (*p)) \ + { \ + p--; \ + pos_byte--; \ + } \ + } while (0) + + + #define MAYBE_UNIFY_CHAR(c) \ + if (c > MAX_UNICODE_CHAR \ + && CHAR_TABLE_P (Vchar_unify_table)) \ + { \ + Lisp_Object val; \ + int unified; \ + \ + val = CHAR_TABLE_REF (Vchar_unify_table, c); \ + if (! NILP (val)) \ + { \ + if (SYMBOLP (val)) \ + { \ + Funify_charset (val, Qnil, Qnil); \ + val = CHAR_TABLE_REF (Vchar_unify_table, c); \ + } \ + if ((unified = XINT (val)) >= 0) \ + c = unified; \ + } \ + } \ + else + + + /* Return the width of ASCII character C. The width is measured by + how many columns occupied on the screen when displayed in the + current buffer. */ + + #define ASCII_CHAR_WIDTH(c) \ + (c < 0x20 \ + ? (c == '\t' \ + ? XFASTINT (current_buffer->tab_width) \ + : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \ + : (c < 0x7f \ + ? 1 \ + : ((NILP (current_buffer->ctl_arrow) ? 4 : 2)))) + + /* Return the width of character C. The width is measured by how many + columns occupied on the screen when displayed in the current + buffer. */ + + #define CHAR_WIDTH(c) \ + (ASCII_CHAR_P (c) \ + ? ASCII_CHAR_WIDTH (c) \ + : XINT (CHAR_TABLE_REF (Vchar_width_table, c))) + + extern int char_resolve_modifier_mask P_ ((int)); + extern int char_string P_ ((int, unsigned char *)); + extern int string_char P_ ((const unsigned char *, + const unsigned char **, int *)); + + extern int translate_char P_ ((Lisp_Object, int c)); + extern int char_printable_p P_ ((int c)); -extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *)); ++extern void parse_str_as_multibyte P_ ((const unsigned char *, int, int *, ++ int *)); + extern int parse_str_to_multibyte P_ ((unsigned char *, int)); + extern int str_as_multibyte P_ ((unsigned char *, int, int, int *)); + extern int str_to_multibyte P_ ((unsigned char *, int, int)); + extern int str_as_unibyte P_ ((unsigned char *, int)); + extern int strwidth P_ ((unsigned char *, int)); -extern int c_string_width P_ ((unsigned char *, int, int, int *, int *)); ++extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *)); + extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *)); + + extern Lisp_Object Vprintable_chars; + + extern Lisp_Object Qcharacterp, Qauto_fill_chars; + extern Lisp_Object Vtranslation_table_vector; + extern Lisp_Object Vchar_width_table; + extern Lisp_Object Vchar_direction_table; + extern Lisp_Object Vchar_unify_table; + + extern Lisp_Object string_escape_byte8 P_ ((Lisp_Object)); + + /* Return a translation table of id number ID. */ + #define GET_TRANSLATION_TABLE(id) \ + (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)])) + + /* A char-table for characters which may invoke auto-filling. */ + extern Lisp_Object Vauto_fill_chars; + + extern Lisp_Object Vchar_script_table; + + /* Copy LEN bytes from FROM to TO. This macro should be used only + when a caller knows that LEN is short and the obvious copy loop is + faster than calling bcopy which has some overhead. Copying a + multibyte sequence of a character is the typical case. */ + + #define BCOPY_SHORT(from, to, len) \ + do { \ + int i = len; \ + unsigned char *from_p = from, *to_p = to; \ + while (i--) *to_p++ = *from_p++; \ + } while (0) + + #define DEFSYM(sym, name) \ + do { (sym) = intern ((name)); staticpro (&(sym)); } while (0) + + #endif /* EMACS_CHARACTER_H */ diff --cc src/charset.c index af5c6ff7068,a651d2ffb74..19c75538340 --- a/src/charset.c +++ b/src/charset.c @@@ -1,7 -1,10 +1,10 @@@ - /* Basic multilingual character support. - Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. + /* Basic character set support. + Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. ++ Licensed to the Free Software Foundation. Copyright (C) 2001 Free Software Foundation, Inc. - Copyright (C) 2001, 2002 ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@@ -20,849 -23,1391 +23,1391 @@@ along with GNU Emacs; see the file COPY the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - /* At first, see the document in `charset.h' to understand the code in - this file. */ - - #ifdef emacs #include - #endif #include - - #ifdef emacs - + #include + #include #include #include "lisp.h" - #include "buffer.h" + #include "character.h" #include "charset.h" - #include "composite.h" #include "coding.h" #include "disptab.h" + #include "buffer.h" - #else /* not emacs */ + /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) *** - #include "mulelib.h" + A coded character set ("charset" hereafter) is a meaningful + collection (i.e. language, culture, functionality, etc.) of + characters. Emacs handles multiple charsets at once. In Emacs Lisp + code, a charset is represented by a symbol. In C code, a charset is + represented by its ID number or by a pointer to a struct charset. - #endif /* emacs */ + The actual information about each charset is stored in two places. + Lispy information is stored in the hash table Vcharset_hash_table as + a vector (charset attributes). The other information is stored in + charset_table as a struct charset. - Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic; - Lisp_Object Qunknown; + */ - /* Declaration of special leading-codes. */ - EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */ - EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */ - EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */ - EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */ + /* List of all charsets. This variable is used only from Emacs + Lisp. */ + Lisp_Object Vcharset_list; - /* Declaration of special charsets. The values are set by - Fsetup_special_charsets. */ - int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */ - int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */ - int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */ - int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */ - int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */ - int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */ - int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */ + /* Hash table that contains attributes of each charset. Keys are + charset symbols, and values are vectors of charset attributes. */ + Lisp_Object Vcharset_hash_table; - Lisp_Object Qcharset_table; + /* Table of struct charset. */ + struct charset *charset_table; - /* A char-table containing information of each character set. */ - Lisp_Object Vcharset_table; + static int charset_table_size; + int charset_table_used; - /* A vector of charset symbol indexed by charset-id. This is used - only for returning charset symbol from C functions. */ - Lisp_Object Vcharset_symbol_table; + Lisp_Object Qcharsetp; - /* A list of charset symbols ever defined. */ - Lisp_Object Vcharset_list; + /* Special charset symbols. */ + Lisp_Object Qascii; + Lisp_Object Qeight_bit; + Lisp_Object Qiso_8859_1; + Lisp_Object Qunicode; - /* Vector of translation table ever defined. - ID of a translation table is used to index this vector. */ - Lisp_Object Vtranslation_table_vector; + /* The corresponding charsets. */ + int charset_ascii; + int charset_eight_bit; + int charset_iso_8859_1; + int charset_unicode; - /* A char-table for characters which may invoke auto-filling. */ - Lisp_Object Vauto_fill_chars; + /* The other special charsets. */ + int charset_jisx0201_roman; + int charset_jisx0208_1978; + int charset_jisx0208; - Lisp_Object Qauto_fill_chars; + /* Value of charset attribute `charset-iso-plane'. */ + Lisp_Object Qgl, Qgr; - /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */ - int bytes_by_char_head[256]; - int width_by_char_head[256]; + /* Charset of unibyte characters. */ + int charset_unibyte; - /* Mapping table from ISO2022's charset (specified by DIMENSION, - CHARS, and FINAL-CHAR) to Emacs' charset. */ - int iso_charset_table[2][2][128]; + /* List of charsets ordered by the priority. */ + Lisp_Object Vcharset_ordered_list; - /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */ - unsigned char *_fetch_multibyte_char_p; - int _fetch_multibyte_char_len; + /* Incremented everytime we change Vcharset_ordered_list. This is + unsigned short so that it fits in Lisp_Int and never matches + -1. */ + unsigned short charset_ordered_list_tick; - /* Offset to add to a non-ASCII value when inserting it. */ - EMACS_INT nonascii_insert_offset; + /* List of iso-2022 charsets. */ + Lisp_Object Viso_2022_charset_list; - /* Translation table for converting non-ASCII unibyte characters - to multibyte codes, or nil. */ - Lisp_Object Vnonascii_translation_table; + /* List of emacs-mule charsets. */ + Lisp_Object Vemacs_mule_charset_list; + + struct charset *emacs_mule_charset[256]; + + /* Mapping table from ISO2022's charset (specified by DIMENSION, + CHARS, and FINAL-CHAR) to Emacs' charset. */ + int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; + + Lisp_Object Vcharset_map_directory; + + Lisp_Object Vchar_unified_charset_table; + + /* Defined in chartab.c */ + extern void + map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object), + Lisp_Object function, Lisp_Object table, + Lisp_Object arg, struct charset *charset, + unsigned from, unsigned to)); + + #define CODE_POINT_TO_INDEX(charset, code) \ + ((charset)->code_linear_p \ + ? (code) - (charset)->min_code \ + : (((charset)->code_space_mask[(code) >> 24] & 0x8) \ + && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \ + && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \ + && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \ + ? (((((code) >> 24) - (charset)->code_space[12]) \ + * (charset)->code_space[11]) \ + + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \ + * (charset)->code_space[7]) \ + + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \ + * (charset)->code_space[3]) \ + + (((code) & 0xFF) - (charset)->code_space[0]) \ + - ((charset)->char_index_offset)) \ + : -1) + + + /* Convert the character index IDX to code-point CODE for CHARSET. + It is assumed that IDX is in a valid range. */ + + #define INDEX_TO_CODE_POINT(charset, idx) \ + ((charset)->code_linear_p \ + ? (idx) + (charset)->min_code \ + : (idx += (charset)->char_index_offset, \ + (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \ + | (((charset)->code_space[4] \ + + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \ + << 8) \ + | (((charset)->code_space[8] \ + + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \ + << 16) \ + | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \ + << 24)))) - /* List of all possible generic characters. */ - Lisp_Object Vgeneric_character_list; - void - invalid_character (c) - int c; - { - error ("Invalid character: 0%o, %d, 0x%x", c, c, c); - } - /* Parse string STR of length LENGTH and fetch information of a - character at STR. Set BYTES to the byte length the character - occupies, CHARSET, C1, C2 to proper values of the character. */ - - #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \ - do { \ - (c1) = *(str); \ - (bytes) = BYTES_BY_CHAR_HEAD (c1); \ - if ((bytes) == 1) \ - (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \ - else if ((bytes) == 2) \ - { \ - if ((c1) == LEADING_CODE_8_BIT_CONTROL) \ - (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \ - else \ - (charset) = (c1), (c1) = (str)[1] & 0x7F; \ - } \ - else if ((bytes) == 3) \ - { \ - if ((c1) < LEADING_CODE_PRIVATE_11) \ - (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \ - else \ - (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \ - } \ - else \ - (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \ - } while (0) - - /* 1 if CHARSET, C1, and C2 compose a valid character, else 0. - Note that this intentionally allows invalid components, such - as 0xA0 0xA0, because there exist many files that contain - such invalid byte sequences, especially in EUC-GB. */ - #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \ - ((charset) == CHARSET_ASCII \ - ? ((c1) >= 0 && (c1) <= 0x7F) \ - : ((charset) == CHARSET_8_BIT_CONTROL \ - ? ((c1) >= 0x80 && (c1) <= 0x9F) \ - : ((charset) == CHARSET_8_BIT_GRAPHIC \ - ? ((c1) >= 0x80 && (c1) <= 0xFF) \ - : (CHARSET_DIMENSION (charset) == 1 \ - ? ((c1) >= 0x20 && (c1) <= 0x7F) \ - : ((c1) >= 0x20 && (c1) <= 0x7F \ - && (c2) >= 0x20 && (c2) <= 0x7F))))) - - /* Store multi-byte form of the character C in STR. The caller should - allocate at least 4-byte area at STR in advance. Returns the - length of the multi-byte form. If C is an invalid character code, - return -1. */ + /* Set to 1 to warn that a charset map is loaded and thus a buffer + text and a string data may be relocated. */ + int charset_map_loaded; - int - char_to_string_1 (c, str) - int c; - unsigned char *str; + struct charset_map_entries { - unsigned char *p = str; + struct { + unsigned from, to; + int c; + } entry[0x10000]; + struct charset_map_entries *next; + }; + + /* Load the mapping information for CHARSET from ENTRIES. + + If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char. + + If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char, + CHARSET->decoder, and CHARSET->encoder. + + If CONTROL_FLAG is 2, setup CHARSET->deunifier and + Vchar_unify_table. If Vchar_unified_charset_table is non-nil, + setup it too. */ + + static void + load_charset_map (charset, entries, n_entries, control_flag) + struct charset *charset; + struct charset_map_entries *entries; + int n_entries; + int control_flag; + { + Lisp_Object vec, table; + unsigned max_code = CHARSET_MAX_CODE (charset); + int ascii_compatible_p = charset->ascii_compatible_p; + int min_char, max_char, nonascii_min_char; + int i; + unsigned char *fast_map = charset->fast_map; - if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */ + if (n_entries <= 0) + return; + + if (control_flag > 0) { - /* Multibyte character can't have a modifier bit. */ - if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) - return -1; + int n = CODE_POINT_TO_INDEX (charset, max_code) + 1; + + table = Fmake_char_table (Qnil, Qnil); + if (control_flag == 1) + vec = Fmake_vector (make_number (n), make_number (-1)); + else if (! CHAR_TABLE_P (Vchar_unify_table)) + Vchar_unify_table = Fmake_char_table (Qnil, Qnil); + + charset_map_loaded = 1; + } - /* For Meta, Shift, and Control modifiers, we need special care. */ - if (c & CHAR_META) + min_char = max_char = entries->entry[0].c; + nonascii_min_char = MAX_CHAR; + for (i = 0; i < n_entries; i++) + { + unsigned from, to; + int from_index, to_index; + int from_c, to_c; + int idx = i % 0x10000; + + if (i > 0 && idx == 0) + entries = entries->next; + from = entries->entry[idx].from; + to = entries->entry[idx].to; + from_c = entries->entry[idx].c; + from_index = CODE_POINT_TO_INDEX (charset, from); + if (from == to) { - /* Move the meta bit to the right place for a string. */ - c = (c & ~CHAR_META) | 0x80; + to_index = from_index; + to_c = from_c; } - if (c & CHAR_SHIFT) + else { - /* Shift modifier is valid only with [A-Za-z]. */ - if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') - c &= ~CHAR_SHIFT; - else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') - c = (c & ~CHAR_SHIFT) - ('a' - 'A'); + to_index = CODE_POINT_TO_INDEX (charset, to); + to_c = from_c + (to_index - from_index); } - if (c & CHAR_CTL) + if (from_index < 0 || to_index < 0) + continue; + + if (control_flag < 2) { - /* Simulate the code in lread.c. */ - /* Allow `\C- ' and `\C-?'. */ - if (c == (CHAR_CTL | ' ')) - c = 0; - else if (c == (CHAR_CTL | '?')) - c = 127; - /* ASCII control chars are made from letters (both cases), - as well as the non-letters within 0100...0137. */ - else if ((c & 0137) >= 0101 && (c & 0137) <= 0132) - c &= (037 | (~0177 & ~CHAR_CTL)); - else if ((c & 0177) >= 0100 && (c & 0177) <= 0137) - c &= (037 | (~0177 & ~CHAR_CTL)); - } + int c; - /* If C still has any modifier bits, just ignore it. */ - c &= ~CHAR_MODIFIER_MASK; - } + if (to_c > max_char) + max_char = to_c; + else if (from_c < min_char) + min_char = from_c; + if (ascii_compatible_p) + { + if (! ASCII_BYTE_P (from_c)) + { + if (from_c < nonascii_min_char) + nonascii_min_char = from_c; + } + else if (! ASCII_BYTE_P (to_c)) + { + nonascii_min_char = 0x80; + } + } - if (SINGLE_BYTE_CHAR_P (c)) - { - if (ASCII_BYTE_P (c) || c >= 0xA0) - *p++ = c; + for (c = from_c; c <= to_c; c++) + CHARSET_FAST_MAP_SET (c, fast_map); + + if (control_flag == 1) + { + unsigned code = from; + + if (CHARSET_COMPACT_CODES_P (charset)) + while (1) + { + ASET (vec, from_index, make_number (from_c)); - CHAR_TABLE_SET (table, from_c, make_number (code)); ++ if (NILP (CHAR_TABLE_REF (table, from_c))) ++ CHAR_TABLE_SET (table, from_c, make_number (code)); + if (from_index == to_index) + break; + from_index++, from_c++; + code = INDEX_TO_CODE_POINT (charset, from_index); + } + else + for (; from_index <= to_index; from_index++, from_c++) + { + ASET (vec, from_index, make_number (from_c)); - CHAR_TABLE_SET (table, from_c, make_number (from_index)); ++ if (NILP (CHAR_TABLE_REF (table, from_c))) ++ CHAR_TABLE_SET (table, from_c, make_number (from_index)); + } + } + } else { - *p++ = LEADING_CODE_8_BIT_CONTROL; - *p++ = c + 0x20; + unsigned code = from; + + while (1) + { + int c1 = DECODE_CHAR (charset, code); - ++ + if (c1 >= 0) + { + CHAR_TABLE_SET (table, from_c, make_number (c1)); + CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c)); + if (CHAR_TABLE_P (Vchar_unified_charset_table)) + CHAR_TABLE_SET (Vchar_unified_charset_table, c1, + CHARSET_NAME (charset)); + } + if (from_index == to_index) + break; + from_index++, from_c++; + code = INDEX_TO_CODE_POINT (charset, from_index); + } } } - else if (CHAR_VALID_P (c, 0)) + + if (control_flag < 2) { - int charset, c1, c2; - - SPLIT_CHAR (c, charset, c1, c2); - - if (charset >= LEADING_CODE_EXT_11) - *p++ = (charset < LEADING_CODE_EXT_12 - ? LEADING_CODE_PRIVATE_11 - : (charset < LEADING_CODE_EXT_21 - ? LEADING_CODE_PRIVATE_12 - : (charset < LEADING_CODE_EXT_22 - ? LEADING_CODE_PRIVATE_21 - : LEADING_CODE_PRIVATE_22))); - *p++ = charset; - if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32)) - return -1; - if (c1) + CHARSET_MIN_CHAR (charset) = (ascii_compatible_p + ? nonascii_min_char : min_char); + CHARSET_MAX_CHAR (charset) = max_char; + if (control_flag == 1) { - *p++ = c1 | 0x80; - if (c2 > 0) - *p++ = c2 | 0x80; + CHARSET_DECODER (charset) = vec; + CHARSET_ENCODER (charset) = table; } } else - return -1; - - return (p - str); - CHARSET_DEUNIFIER (charset) = table; ++ CHARSET_DEUNIFIER (charset) = table; } - /* Store multi-byte form of the character C in STR. The caller should - allocate at least 4-byte area at STR in advance. Returns the - length of the multi-byte form. If C is an invalid character code, - signal an error. + /* Read a hexadecimal number (preceded by "0x") from the file FP while + paying attention to comment charcter '#'. */ - Use macro `CHAR_STRING (C, STR)' instead of calling this function - directly if C can be an ASCII character. */ - - int - char_to_string (c, str) - int c; - unsigned char *str; + static INLINE unsigned + read_hex (fp, eof) + FILE *fp; + int *eof; { - int len; - len = char_to_string_1 (c, str); - if (len == -1) - invalid_character (c); - return len; - } + int c; + unsigned n; + while ((c = getc (fp)) != EOF) + { + if (c == '#') + { + while ((c = getc (fp)) != EOF && c != '\n'); + } + else if (c == '0') + { + if ((c = getc (fp)) == EOF || c == 'x') + break; + } - } ++ } + if (c == EOF) + { + *eof = 1; + return 0; + } + *eof = 0; + n = 0; + if (c == 'x') + while ((c = getc (fp)) != EOF && isxdigit (c)) + n = ((n << 4) + | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)); + else + while ((c = getc (fp)) != EOF && isdigit (c)) + n = (n * 10) + c - '0'; + if (c != EOF) + ungetc (c, fp); + return n; + } - /* Return the non-ASCII character corresponding to multi-byte form at - STR of length LEN. If ACTUAL_LEN is not NULL, store the byte - length of the multibyte form in *ACTUAL_LEN. - Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling - this function directly if you want ot handle ASCII characters as - well. */ + /* Return a mapping vector for CHARSET loaded from MAPFILE. + Each line of MAPFILE has this form + 0xAAAA 0xCCCC + where 0xAAAA is a code-point and 0xCCCC is the corresponding + character code, or this form + 0xAAAA-0xBBBB 0xCCCC + where 0xAAAA and 0xBBBB are code-points specifying a range, and + 0xCCCC is the first character code of the range. - int - string_to_char (str, len, actual_len) - const unsigned char *str; - int len, *actual_len; - { - int c, bytes, charset, c1, c2; + The returned vector has this form: + [ CODE1 CHAR1 CODE2 CHAR2 .... ] + where CODE1 is a code-point or a cons of code-points specifying a + range. */ - SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2); - c = MAKE_CHAR (charset, c1, c2); - if (actual_len) - *actual_len = bytes; - return c; - } + extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object)); - /* Return the length of the multi-byte form at string STR of length LEN. - Use the macro MULTIBYTE_FORM_LENGTH instead. */ - int - multibyte_form_length (str, len) - const unsigned char *str; - int len; + static void + load_charset_map_from_file (charset, mapfile, control_flag) + struct charset *charset; + Lisp_Object mapfile; + int control_flag; { - int bytes; - - PARSE_MULTIBYTE_SEQ (str, len, bytes); - return bytes; - } - - /* Check multibyte form at string STR of length LEN and set variables - pointed by CHARSET, C1, and C2 to charset and position codes of the - character at STR, and return 0. If there's no multibyte character, - return -1. This should be used only in the macro SPLIT_STRING - which checks range of STR in advance. */ + unsigned min_code = CHARSET_MIN_CODE (charset); + unsigned max_code = CHARSET_MAX_CODE (charset); + int fd; + FILE *fp; + int eof; + Lisp_Object suffixes; + struct charset_map_entries *head, *entries; + int n_entries; + + suffixes = Fcons (build_string (".map"), + Fcons (build_string (".TXT"), Qnil)); + + fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes, - NULL, 0); ++ NULL, Qnil); + if (fd < 0 + || ! (fp = fdopen (fd, "r"))) + { + add_to_log ("Failure in loading charset map: %S", mapfile, Qnil); + return; + } - int - split_string (str, len, charset, c1, c2) - const unsigned char *str; - unsigned char *c1, *c2; - int len, *charset; - { - register int bytes, cs, code1, code2 = -1; + head = entries = ((struct charset_map_entries *) + alloca (sizeof (struct charset_map_entries))); + n_entries = 0; + eof = 0; + while (1) + { + unsigned from, to; + int c; + int idx; - SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2); - if (cs == CHARSET_ASCII) - return -1; - *charset = cs; - *c1 = code1; - *c2 = code2; - return 0; - } + from = read_hex (fp, &eof); + if (eof) + break; + if (getc (fp) == '-') + to = read_hex (fp, &eof); + else + to = from; + c = (int) read_hex (fp, &eof); - /* Return 1 iff character C has valid printable glyph. - Use the macro CHAR_PRINTABLE_P instead. */ - int - char_printable_p (c) - int c; - { - int charset, c1, c2; + if (from < min_code || to > max_code || from > to || c > MAX_CHAR) + continue; - if (ASCII_BYTE_P (c)) - return 1; - else if (SINGLE_BYTE_CHAR_P (c)) - return 0; - else if (c >= MAX_CHAR) - return 0; + if (n_entries > 0 && (n_entries % 0x10000) == 0) + { + entries->next = ((struct charset_map_entries *) + alloca (sizeof (struct charset_map_entries))); + entries = entries->next; + } + idx = n_entries % 0x10000; + entries->entry[idx].from = from; + entries->entry[idx].to = to; + entries->entry[idx].c = c; + n_entries++; + } + fclose (fp); + close (fd); - SPLIT_CHAR (c, charset, c1, c2); - if (! CHARSET_DEFINED_P (charset)) - return 0; - if (CHARSET_CHARS (charset) == 94 - ? c1 <= 32 || c1 >= 127 - : c1 < 32) - return 0; - if (CHARSET_DIMENSION (charset) == 2 - && (CHARSET_CHARS (charset) == 94 - ? c2 <= 32 || c2 >= 127 - : c2 < 32)) - return 0; - return 1; + load_charset_map (charset, head, n_entries, control_flag); } - /* Translate character C by translation table TABLE. If C - is negative, translate a character specified by CHARSET, C1, and C2 - (C1 and C2 are code points of the character). If no translation is - found in TABLE, return C. */ - int - translate_char (table, c, charset, c1, c2) - Lisp_Object table; - int c, charset, c1, c2; + static void + load_charset_map_from_vector (charset, vec, control_flag) + struct charset *charset; + Lisp_Object vec; + int control_flag; { - Lisp_Object ch; - int alt_charset, alt_c1, alt_c2, dimension; - - if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F)); - if (!CHAR_TABLE_P (table) - || (ch = Faref (table, make_number (c)), !NATNUMP (ch))) - return c; - - SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2); - dimension = CHARSET_DIMENSION (alt_charset); - if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0)) - /* CH is not a generic character, just return it. */ - return XFASTINT (ch); - - /* Since CH is a generic character, we must return a specific - charater which has the same position codes as C from CH. */ - if (charset < 0) - SPLIT_CHAR (c, charset, c1, c2); - if (dimension != CHARSET_DIMENSION (charset)) - /* We can't make such a character because of dimension mismatch. */ - return c; - return MAKE_CHAR (alt_charset, c1, c2); - } + unsigned min_code = CHARSET_MIN_CODE (charset); + unsigned max_code = CHARSET_MAX_CODE (charset); + struct charset_map_entries *head, *entries; + int n_entries; + int len = ASIZE (vec); + int i; - /* Convert the unibyte character C to multibyte based on - Vnonascii_translation_table or nonascii_insert_offset. If they can't - convert C to a valid multibyte character, convert it based on - DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */ + if (len % 2 == 1) + { + add_to_log ("Failure in loading charset map: %V", vec, Qnil); + return; + } - int - unibyte_char_to_multibyte (c) - int c; - { - if (c < 0400 && c >= 0200) + head = entries = ((struct charset_map_entries *) + alloca (sizeof (struct charset_map_entries))); + n_entries = 0; + for (i = 0; i < len; i += 2) { - int c_save = c; + Lisp_Object val, val2; + unsigned from, to; + int c; + int idx; - if (! NILP (Vnonascii_translation_table)) + val = AREF (vec, i); + if (CONSP (val)) { - c = XINT (Faref (Vnonascii_translation_table, make_number (c))); - if (c >= 0400 && ! char_valid_p (c, 0)) - c = c_save + DEFAULT_NONASCII_INSERT_OFFSET; + val2 = XCDR (val); + val = XCAR (val); + CHECK_NATNUM (val); + CHECK_NATNUM (val2); + from = XFASTINT (val); + to = XFASTINT (val2); } - else if (c >= 0240 && nonascii_insert_offset > 0) + else { - c += nonascii_insert_offset; - if (c < 0400 || ! char_valid_p (c, 0)) - c = c_save + DEFAULT_NONASCII_INSERT_OFFSET; + CHECK_NATNUM (val); + from = to = XFASTINT (val); } - else if (c >= 0240) - c = c_save + DEFAULT_NONASCII_INSERT_OFFSET; - } - return c; - } + val = AREF (vec, i + 1); + CHECK_NATNUM (val); + c = XFASTINT (val); + if (from < min_code || to > max_code || from > to || c > MAX_CHAR) + continue; - /* Convert the multibyte character C to unibyte 8-bit character based - on Vnonascii_translation_table or nonascii_insert_offset. If - REV_TBL is non-nil, it should be a reverse table of - Vnonascii_translation_table, i.e. what given by: - Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */ + if ((n_entries % 0x10000) == 0) + { + entries->next = ((struct charset_map_entries *) + alloca (sizeof (struct charset_map_entries))); + entries = entries->next; + } + idx = n_entries % 0x10000; + entries->entry[idx].from = from; + entries->entry[idx].to = to; + entries->entry[idx].c = c; + n_entries++; + } - int - multibyte_char_to_unibyte (c, rev_tbl) - int c; - Lisp_Object rev_tbl; + load_charset_map (charset, head, n_entries, control_flag); + } + + static void + load_charset (charset) + struct charset *charset; { - if (!SINGLE_BYTE_CHAR_P (c)) + if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED) { - int c_save = c; + Lisp_Object map; - if (! CHAR_TABLE_P (rev_tbl) - && CHAR_TABLE_P (Vnonascii_translation_table)) - rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table, - make_number (0)); - if (CHAR_TABLE_P (rev_tbl)) - { - Lisp_Object temp; - temp = Faref (rev_tbl, make_number (c)); - if (INTEGERP (temp)) - c = XINT (temp); - if (c >= 256) - c = (c_save & 0177) + 0200; - } + map = CHARSET_MAP (charset); + if (STRINGP (map)) + load_charset_map_from_file (charset, map, 1); else - { - if (nonascii_insert_offset > 0) - c -= nonascii_insert_offset; - if (c < 128 || c >= 256) - c = (c_save & 0177) + 0200; - } + load_charset_map_from_vector (charset, map, 1); + CHARSET_METHOD (charset) = CHARSET_METHOD_MAP; } + } - return c; + + DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0, + doc: /* Return non-nil if and only if OBJECT is a charset.*/) + (object) + Lisp_Object object; + { + return (CHARSETP (object) ? Qt : Qnil); } - - /* Update the table Vcharset_table with the given arguments (see the - document of `define-charset' for the meaning of each argument). - Several other table contents are also updated. The caller should - check the validity of CHARSET-ID and the remaining arguments in - advance. */ void - update_charset_table (charset_id, dimension, chars, width, direction, - iso_final_char, iso_graphic_plane, - short_name, long_name, description) - Lisp_Object charset_id, dimension, chars, width, direction; - Lisp_Object iso_final_char, iso_graphic_plane; - Lisp_Object short_name, long_name, description; + map_charset_chars (c_function, function, arg, + charset, from, to) + void (*c_function) P_ ((Lisp_Object, Lisp_Object)); + Lisp_Object function, arg; + struct charset *charset; + unsigned from, to; - { - int charset = XINT (charset_id); - int bytes; - unsigned char leading_code_base, leading_code_ext; - - if (NILP (CHARSET_TABLE_ENTRY (charset))) - CHARSET_TABLE_ENTRY (charset) - = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil); - - if (NILP (long_name)) - long_name = short_name; - if (NILP (description)) - description = long_name; - - /* Get byte length of multibyte form, base leading-code, and - extended leading-code of the charset. See the comment under the - title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */ - bytes = XINT (dimension); - if (charset < MIN_CHARSET_PRIVATE_DIMENSION1) + Lisp_Object range; + int partial; + - if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED) ++ if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED) + load_charset (charset); + + partial = (from > CHARSET_MIN_CODE (charset) + || to < CHARSET_MAX_CODE (charset)); + + if (CHARSET_UNIFIED_P (charset) + && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset))) { - /* Official charset, it doesn't have an extended leading-code. */ - if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC) - bytes += 1; /* For a base leading-code. */ - leading_code_base = charset; - leading_code_ext = 0; + map_char_table_for_charset (c_function, function, + CHARSET_DEUNIFIER (charset), arg, + partial ? charset : NULL, from, to); } - else + + if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET) { - /* Private charset. */ - bytes += 2; /* For base and extended leading-codes. */ - leading_code_base - = (charset < LEADING_CODE_EXT_12 - ? LEADING_CODE_PRIVATE_11 - : (charset < LEADING_CODE_EXT_21 - ? LEADING_CODE_PRIVATE_12 - : (charset < LEADING_CODE_EXT_22 - ? LEADING_CODE_PRIVATE_21 - : LEADING_CODE_PRIVATE_22))); - leading_code_ext = charset; - if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes) - error ("Invalid dimension for the charset-ID %d", charset); + int from_idx = CODE_POINT_TO_INDEX (charset, from); + int to_idx = CODE_POINT_TO_INDEX (charset, to); + int from_c = from_idx + CHARSET_CODE_OFFSET (charset); + int to_c = to_idx + CHARSET_CODE_OFFSET (charset); + + range = Fcons (make_number (from_c), make_number (to_c)); + if (NILP (function)) + (*c_function) (range, arg); + else + call2 (function, range, arg); } - - CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id; - CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes); - CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension; - CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars; - CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width; - CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction; - CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX) - = make_number (leading_code_base); - CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX) - = make_number (leading_code_ext); - CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char; - CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX) - = iso_graphic_plane; - CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name; - CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name; - CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description; - CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil; - - { - /* If we have already defined a charset which has the same - DIMENSION, CHARS and ISO-FINAL-CHAR but the different - DIRECTION, we must update the entry REVERSE-CHARSET of both - charsets. If there's no such charset, the value of the entry - is set to nil. */ - int i; - - for (i = 0; i <= MAX_CHARSET; i++) - if (!NILP (CHARSET_TABLE_ENTRY (i))) + else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP) + { + if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) + return; + if (CHARSET_ASCII_COMPATIBLE_P (charset) && from <= 127) { - if (CHARSET_DIMENSION (i) == XINT (dimension) - && CHARSET_CHARS (i) == XINT (chars) - && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char) - && CHARSET_DIRECTION (i) != XINT (direction)) - { - CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX) - = make_number (i); - CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id; - break; - } - } - if (i > MAX_CHARSET) - /* No such a charset. */ - CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX) - = make_number (-1); - } + range = Fcons (make_number (from), make_number (to)); + if (to >= 128) + XSETCAR (range, make_number (127)); - if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC - && charset < MIN_CHARSET_PRIVATE_DIMENSION1) + if (NILP (function)) + (*c_function) (range, arg); + else + call2 (function, range, arg); + } + map_char_table_for_charset (c_function, function, + CHARSET_ENCODER (charset), arg, + partial ? charset : NULL, from, to); + } + else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET) { - bytes_by_char_head[leading_code_base] = bytes; - width_by_char_head[leading_code_base] = XINT (width); - - /* Update table emacs_code_class. */ - emacs_code_class[charset] = (bytes == 2 - ? EMACS_leading_code_2 - : (bytes == 3 - ? EMACS_leading_code_3 - : EMACS_leading_code_4)); + Lisp_Object subset_info; + int offset; + + subset_info = CHARSET_SUBSET (charset); + charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0))); + offset = XINT (AREF (subset_info, 3)); + from -= offset; + if (from < XFASTINT (AREF (subset_info, 1))) + from = XFASTINT (AREF (subset_info, 1)); + to -= offset; + if (to > XFASTINT (AREF (subset_info, 2))) + to = XFASTINT (AREF (subset_info, 2)); + map_charset_chars (c_function, function, arg, charset, from, to); } + else /* i.e. CHARSET_METHOD_SUPERSET */ + { + Lisp_Object parents; - /* Update table iso_charset_table. */ - if (XINT (iso_final_char) >= 0 - && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0) - ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset; + for (parents = CHARSET_SUPERSET (charset); CONSP (parents); + parents = XCDR (parents)) + { + int offset; + unsigned this_from, this_to; + + charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents)))); + offset = XINT (XCDR (XCAR (parents))); + this_from = from - offset; + this_to = to - offset; + if (this_from < CHARSET_MIN_CODE (charset)) + this_from = CHARSET_MIN_CODE (charset); + if (this_to > CHARSET_MAX_CODE (charset)) + this_to = CHARSET_MAX_CODE (charset); + map_charset_chars (c_function, function, arg, charset, from, to); + } + } } - - #ifdef emacs + DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0, + doc: /* Call FUNCTION for all characters in CHARSET. + FUNCTION is called with an argument RANGE and the optional 3rd + argument ARG. - /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL - is invalid. */ - int - get_charset_id (charset_symbol) - Lisp_Object charset_symbol; - { - Lisp_Object val; - int charset; - - /* This originally used a ?: operator, but reportedly the HP-UX - compiler version HP92453-01 A.10.32.22 miscompiles that. */ - if (SYMBOLP (charset_symbol) - && VECTORP (val = Fget (charset_symbol, Qcharset)) - && CHARSET_VALID_P (charset = - XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]))) - return charset; - else - return -1; - } + RANGE is a cons (FROM . TO), where FROM and TO indicate a range of + characters contained in CHARSET. - /* Return an identification number for a new private charset of - DIMENSION and WIDTH. If there's no more room for the new charset, - return 0. */ - Lisp_Object - get_new_private_charset_id (dimension, width) - int dimension, width; + The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the -range of code points of targer characters. */) ++range of code points of target characters. */) + (function, charset, arg, from_code, to_code) + Lisp_Object function, charset, arg, from_code, to_code; { - int charset, from, to; + struct charset *cs; + unsigned from, to; - if (dimension == 1) + CHECK_CHARSET_GET_CHARSET (charset, cs); + if (NILP (from_code)) + from = CHARSET_MIN_CODE (cs); + else { - from = LEADING_CODE_EXT_11; - to = LEADING_CODE_EXT_21; + CHECK_NATNUM (from_code); + from = XINT (from_code); + if (from < CHARSET_MIN_CODE (cs)) + from = CHARSET_MIN_CODE (cs); } + if (NILP (to_code)) + to = CHARSET_MAX_CODE (cs); else { - from = LEADING_CODE_EXT_21; - to = LEADING_CODE_EXT_MAX + 1; + CHECK_NATNUM (to_code); + to = XINT (to_code); + if (to > CHARSET_MAX_CODE (cs)) + to = CHARSET_MAX_CODE (cs); } + map_charset_chars (NULL, function, arg, cs, from, to); + return Qnil; + } - for (charset = from; charset < to; charset++) - if (!CHARSET_DEFINED_P (charset)) break; - return make_number (charset < to ? charset : 0); - } + /* Define a charset according to the arguments. The Nth argument is + the Nth attribute of the charset (the last attribute `charset-id' + is not included). See the docstring of `define-charset' for the + detail. */ - DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0, - doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR. - If CHARSET-ID is nil, it is decided automatically, which means CHARSET is - treated as a private charset. - INFO-VECTOR is a vector of the format: - [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE - SHORT-NAME LONG-NAME DESCRIPTION] - The meanings of each elements is as follows: - DIMENSION (integer) is the number of bytes to represent a character: 1 or 2. - CHARS (integer) is the number of characters in a dimension: 94 or 96. - WIDTH (integer) is the number of columns a character in the charset - occupies on the screen: one of 0, 1, and 2. - - DIRECTION (integer) is the rendering direction of characters in the - charset when rendering. If 0, render from left to right, else - render from right to left. - - ISO-FINAL-CHAR (character) is the final character of the - corresponding ISO 2022 charset. - It may be -1 if the charset is internal use only. - - ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked - while encoding to variants of ISO 2022 coding system, one of the - following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). - It may be -1 if the charset is internal use only. - - SHORT-NAME (string) is the short name to refer to the charset. - - LONG-NAME (string) is the long name to refer to the charset. - - DESCRIPTION (string) is the description string of the charset. */) - (charset_id, charset_symbol, info_vector) - Lisp_Object charset_id, charset_symbol, info_vector; + DEFUN ("define-charset-internal", Fdefine_charset_internal, + Sdefine_charset_internal, charset_arg_max, MANY, 0, + doc: /* For internal use only. + usage: (define-charset-internal ...) */) + (nargs, args) + int nargs; + Lisp_Object *args; { - Lisp_Object *vec; - - if (!NILP (charset_id)) - CHECK_NUMBER (charset_id); - CHECK_SYMBOL (charset_symbol); - CHECK_VECTOR (info_vector); + /* Charset attr vector. */ + Lisp_Object attrs; + Lisp_Object val; + unsigned hash_code; + struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table); + int i, j; + struct charset charset; + int id; + int dimension; + int new_definition_p; + int nchars; + + if (nargs != charset_arg_max) + return Fsignal (Qwrong_number_of_arguments, + Fcons (intern ("define-charset-internal"), + make_number (nargs))); + + attrs = Fmake_vector (make_number (charset_attr_max), Qnil); + + CHECK_SYMBOL (args[charset_arg_name]); + ASET (attrs, charset_name, args[charset_arg_name]); + + val = args[charset_arg_code_space]; + for (i = 0, dimension = 0, nchars = 1; i < 4; i++) + { + int min_byte, max_byte; + + min_byte = XINT (Faref (val, make_number (i * 2))); + max_byte = XINT (Faref (val, make_number (i * 2 + 1))); + if (min_byte < 0 || min_byte > max_byte || max_byte >= 256) + error ("Invalid :code-space value"); + charset.code_space[i * 4] = min_byte; + charset.code_space[i * 4 + 1] = max_byte; + charset.code_space[i * 4 + 2] = max_byte - min_byte + 1; + nchars *= charset.code_space[i * 4 + 2]; + charset.code_space[i * 4 + 3] = nchars; + if (max_byte > 0) + dimension = i + 1; + } - if (! NILP (charset_id)) + val = args[charset_arg_dimension]; + if (NILP (val)) + charset.dimension = dimension; + else { - if (! CHARSET_VALID_P (XINT (charset_id))) - error ("Invalid CHARSET: %d", XINT (charset_id)); - else if (CHARSET_DEFINED_P (XINT (charset_id))) - error ("Already defined charset: %d", XINT (charset_id)); + CHECK_NATNUM (val); + charset.dimension = XINT (val); + if (charset.dimension < 1 || charset.dimension > 4) + args_out_of_range_3 (val, make_number (1), make_number (4)); } - vec = XVECTOR (info_vector)->contents; - if (XVECTOR (info_vector)->size != 9 - || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2) - || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96) - || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2) - || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1) - || !INTEGERP (vec[4]) - || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')) - || !INTEGERP (vec[5]) - || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1) - || !STRINGP (vec[6]) - || !STRINGP (vec[7]) - || !STRINGP (vec[8])) - error ("Invalid info-vector argument for defining charset %s", - SDATA (SYMBOL_NAME (charset_symbol))); - - if (NILP (charset_id)) + charset.code_linear_p + = (charset.dimension == 1 + || (charset.code_space[2] == 256 + && (charset.dimension == 2 + || (charset.code_space[6] == 256 + && (charset.dimension == 3 + || charset.code_space[10] == 256))))); + + if (! charset.code_linear_p) { - charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2])); - if (XINT (charset_id) == 0) - error ("There's no room for a new private charset %s", - SDATA (SYMBOL_NAME (charset_symbol))); + charset.code_space_mask = (unsigned char *) xmalloc (256); + bzero (charset.code_space_mask, 256); + for (i = 0; i < 4; i++) + for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1]; + j++) + charset.code_space_mask[j] |= (1 << i); } - update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3], - vec[4], vec[5], vec[6], vec[7], vec[8]); - Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id))); - CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol; - Vcharset_list = Fcons (charset_symbol, Vcharset_list); - Fupdate_coding_systems_internal (); - return Qnil; - } + charset.iso_chars_96 = charset.code_space[2] == 96; - DEFUN ("generic-character-list", Fgeneric_character_list, - Sgeneric_character_list, 0, 0, 0, - doc: /* Return a list of all possible generic characters. - It includes a generic character for a charset not yet defined. */) - () - { - return Vgeneric_character_list; - } + charset.min_code = (charset.code_space[0] + | (charset.code_space[4] << 8) + | (charset.code_space[8] << 16) + | (charset.code_space[12] << 24)); + charset.max_code = (charset.code_space[1] + | (charset.code_space[5] << 8) + | (charset.code_space[9] << 16) + | (charset.code_space[13] << 24)); + charset.char_index_offset = 0; - DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char, - Sget_unused_iso_final_char, 2, 2, 0, - doc: /* Return an unsed ISO's final char for a charset of DIMENISION and CHARS. - DIMENSION is the number of bytes to represent a character: 1 or 2. - CHARS is the number of characters in a dimension: 94 or 96. + val = args[charset_arg_min_code]; + if (! NILP (val)) + { + unsigned code; - This final char is for private use, thus the range is `0' (48) .. `?' (63). - If there's no unused final char for the specified kind of charset, - return nil. */) - (dimension, chars) - Lisp_Object dimension, chars; - { - int final_char; + if (INTEGERP (val)) + code = XINT (val); + else + { + CHECK_CONS (val); - CHECK_NUMBER (XCAR (val)); - CHECK_NUMBER (XCDR (val)); ++ CHECK_NUMBER_CAR (val); ++ CHECK_NUMBER_CDR (val); + code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val))); + } + if (code < charset.min_code + || code > charset.max_code) + args_out_of_range_3 (make_number (charset.min_code), + make_number (charset.max_code), val); + charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code); + charset.min_code = code; + } - CHECK_NUMBER (dimension); - CHECK_NUMBER (chars); - if (XINT (dimension) != 1 && XINT (dimension) != 2) - error ("Invalid charset dimension %d, it should be 1 or 2", - XINT (dimension)); - if (XINT (chars) != 94 && XINT (chars) != 96) - error ("Invalid charset chars %d, it should be 94 or 96", - XINT (chars)); - for (final_char = '0'; final_char <= '?'; final_char++) + val = args[charset_arg_max_code]; + if (! NILP (val)) { - if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0) - break; + unsigned code; + + if (INTEGERP (val)) + code = XINT (val); + else + { + CHECK_CONS (val); - CHECK_NUMBER (XCAR (val)); - CHECK_NUMBER (XCDR (val)); ++ CHECK_NUMBER_CAR (val); ++ CHECK_NUMBER_CDR (val); + code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val))); + } + if (code < charset.min_code + || code > charset.max_code) + args_out_of_range_3 (make_number (charset.min_code), + make_number (charset.max_code), val); + charset.max_code = code; } - return (final_char <= '?' ? make_number (final_char) : Qnil); - } - DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset, - 4, 4, 0, - doc: /* Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET. - CHARSET should be defined by `defined-charset' in advance. */) - (dimension, chars, final_char, charset_symbol) - Lisp_Object dimension, chars, final_char, charset_symbol; - { - int charset; + charset.compact_codes_p = charset.max_code < 0x1000000; - CHECK_NUMBER (dimension); - CHECK_NUMBER (chars); - CHECK_NUMBER (final_char); - CHECK_SYMBOL (charset_symbol); + val = args[charset_arg_invalid_code]; + if (NILP (val)) + { + if (charset.min_code > 0) + charset.invalid_code = 0; + else + { + XSETINT (val, charset.max_code + 1); + if (XINT (val) == charset.max_code + 1) + charset.invalid_code = charset.max_code + 1; + else + error ("Attribute :invalid-code must be specified"); + } + } + else + { + CHECK_NATNUM (val); + charset.invalid_code = XFASTINT (val); + } - if (XINT (dimension) != 1 && XINT (dimension) != 2) - error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension)); - if (XINT (chars) != 94 && XINT (chars) != 96) - error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars)); - if (XINT (final_char) < '0' || XFASTINT (final_char) > '~') - error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars)); - if ((charset = get_charset_id (charset_symbol)) < 0) - error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset_symbol))); + val = args[charset_arg_iso_final]; + if (NILP (val)) + charset.iso_final = -1; + else + { + CHECK_NUMBER (val); + if (XINT (val) < '0' || XINT (val) > 127) + error ("Invalid iso-final-char: %d", XINT (val)); + charset.iso_final = XINT (val); + } - + - ISO_CHARSET_TABLE (dimension, chars, final_char) = charset; - return Qnil; - } + val = args[charset_arg_iso_revision]; + if (NILP (val)) + charset.iso_revision = -1; + else + { + CHECK_NUMBER (val); + if (XINT (val) > 63) + args_out_of_range (make_number (63), val); + charset.iso_revision = XINT (val); + } - /* Return information about charsets in the text at PTR of NBYTES - bytes, which are NCHARS characters. The value is: + val = args[charset_arg_emacs_mule_id]; + if (NILP (val)) + charset.emacs_mule_id = -1; + else + { + CHECK_NATNUM (val); + if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256) + error ("Invalid emacs-mule-id: %d", XINT (val)); + charset.emacs_mule_id = XINT (val); + } - 0: Each character is represented by one byte. This is always - true for unibyte text. - 1: No charsets other than ascii eight-bit-control, - eight-bit-graphic, and latin-1 are found. - 2: Otherwise. + charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]); - In addition, if CHARSETS is nonzero, for each found charset N, set - CHARSETS[N] to 1. For that, callers should allocate CHARSETS - (MAX_CHARSET + 1 elements) in advance. It may lookup a translation - table TABLE if supplied. For invalid charsets, set CHARSETS[1] to - 1 (note that there's no charset whose ID is 1). */ + charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]); - int - find_charset_in_text (ptr, nchars, nbytes, charsets, table) - const unsigned char *ptr; - int nchars, nbytes, *charsets; - Lisp_Object table; - { - if (nchars == nbytes) + charset.unified_p = 0; + + bzero (charset.fast_map, sizeof (charset.fast_map)); + + if (! NILP (args[charset_arg_code_offset])) + { + val = args[charset_arg_code_offset]; + CHECK_NUMBER (val); + + charset.method = CHARSET_METHOD_OFFSET; + charset.code_offset = XINT (val); + + i = CODE_POINT_TO_INDEX (&charset, charset.min_code); + charset.min_char = i + charset.code_offset; + i = CODE_POINT_TO_INDEX (&charset, charset.max_code); + charset.max_char = i + charset.code_offset; + if (charset.max_char > MAX_CHAR) + error ("Unsupported max char: %d", charset.max_char); + + i = (charset.min_char >> 7) << 7; + for (; i < 0x10000 && i <= charset.max_char; i += 128) + CHARSET_FAST_MAP_SET (i, charset.fast_map); + i = (i >> 12) << 12; + for (; i <= charset.max_char; i += 0x1000) + CHARSET_FAST_MAP_SET (i, charset.fast_map); + } + else if (! NILP (args[charset_arg_map])) + { + val = args[charset_arg_map]; + ASET (attrs, charset_map, val); + if (STRINGP (val)) + load_charset_map_from_file (&charset, val, 0); + else + load_charset_map_from_vector (&charset, val, 0); + charset.method = CHARSET_METHOD_MAP_DEFERRED; + } + else if (! NILP (args[charset_arg_subset])) + { + Lisp_Object parent; + Lisp_Object parent_min_code, parent_max_code, parent_code_offset; + struct charset *parent_charset; + + val = args[charset_arg_subset]; + parent = Fcar (val); + CHECK_CHARSET_GET_CHARSET (parent, parent_charset); + parent_min_code = Fnth (make_number (1), val); + CHECK_NATNUM (parent_min_code); + parent_max_code = Fnth (make_number (2), val); + CHECK_NATNUM (parent_max_code); + parent_code_offset = Fnth (make_number (3), val); + CHECK_NUMBER (parent_code_offset); + val = Fmake_vector (make_number (4), Qnil); + ASET (val, 0, make_number (parent_charset->id)); + ASET (val, 1, parent_min_code); + ASET (val, 2, parent_max_code); + ASET (val, 3, parent_code_offset); + ASET (attrs, charset_subset, val); + + charset.method = CHARSET_METHOD_SUBSET; + /* Here, we just copy the parent's fast_map. It's not accurate, + but at least it works for quickly detecting which character + DOESN'T belong to this charset. */ + for (i = 0; i < 190; i++) + charset.fast_map[i] = parent_charset->fast_map[i]; + + /* We also copy these for parents. */ + charset.min_char = parent_charset->min_char; + charset.max_char = parent_charset->max_char; + } + else if (! NILP (args[charset_arg_superset])) { - if (charsets && nbytes > 0) + val = args[charset_arg_superset]; + charset.method = CHARSET_METHOD_SUPERSET; + val = Fcopy_sequence (val); + ASET (attrs, charset_superset, val); + + charset.min_char = MAX_CHAR; + charset.max_char = 0; + for (; ! NILP (val); val = Fcdr (val)) { - const unsigned char *endp = ptr + nbytes; - int maskbits = 0; + Lisp_Object elt, car_part, cdr_part; + int this_id, offset; + struct charset *this_charset; - while (ptr < endp && maskbits != 7) + elt = Fcar (val); + if (CONSP (elt)) { - maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4); - ptr++; + car_part = XCAR (elt); + cdr_part = XCDR (elt); + CHECK_CHARSET_GET_ID (car_part, this_id); + CHECK_NUMBER (cdr_part); + offset = XINT (cdr_part); } - - if (maskbits & 1) - charsets[CHARSET_ASCII] = 1; - if (maskbits & 2) - charsets[CHARSET_8_BIT_CONTROL] = 1; - if (maskbits & 4) - charsets[CHARSET_8_BIT_GRAPHIC] = 1; + else + { + CHECK_CHARSET_GET_ID (elt, this_id); + offset = 0; + } + XSETCAR (val, Fcons (make_number (this_id), make_number (offset))); + + this_charset = CHARSET_FROM_ID (this_id); + if (charset.min_char > this_charset->min_char) + charset.min_char = this_charset->min_char; + if (charset.max_char < this_charset->max_char) + charset.max_char = this_charset->max_char; + for (i = 0; i < 190; i++) + charset.fast_map[i] |= this_charset->fast_map[i]; } - return 0; } else - { - int return_val = 1; - int bytes, charset, c1, c2; + error ("None of :code-offset, :map, :parents are specified"); - if (! CHAR_TABLE_P (table)) - table = Qnil; + val = args[charset_arg_unify_map]; + if (! NILP (val) && !STRINGP (val)) + CHECK_VECTOR (val); + ASET (attrs, charset_unify_map, val); - while (nchars-- > 0) - { - SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2); - ptr += bytes; + CHECK_LIST (args[charset_arg_plist]); + ASET (attrs, charset_plist, args[charset_arg_plist]); - if (!CHARSET_DEFINED_P (charset)) - charset = 1; - else if (! NILP (table)) - { - int c = translate_char (table, -1, charset, c1, c2); - if (c >= 0) - charset = CHAR_CHARSET (c); - } + charset.hash_index = hash_lookup (hash_table, args[charset_arg_name], + &hash_code); + if (charset.hash_index >= 0) + { + new_definition_p = 0; + id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name])); + HASH_VALUE (hash_table, charset.hash_index) = attrs; + } + else + { + charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs, + hash_code); + if (charset_table_used == charset_table_size) + { + struct charset *new_table + = (struct charset *) xmalloc (sizeof (struct charset) + * (charset_table_size + 16)); + bcopy (charset_table, new_table, + sizeof (struct charset) * charset_table_size); + charset_table_size += 16; + charset_table = new_table; + } + id = charset_table_used++; + new_definition_p = 1; + } - if (return_val == 1 - && charset != CHARSET_ASCII - && charset != CHARSET_8_BIT_CONTROL - && charset != CHARSET_8_BIT_GRAPHIC - && charset != charset_latin_iso8859_1) - return_val = 2; + ASET (attrs, charset_id, make_number (id)); + charset.id = id; + charset_table[id] = charset; - if (charsets) - charsets[charset] = 1; - else if (return_val == 2) - break; + if (charset.iso_final >= 0) + { + ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96, + charset.iso_final) = id; + if (new_definition_p) + Viso_2022_charset_list = nconc2 (Viso_2022_charset_list, + Fcons (make_number (id), Qnil)); + if (ISO_CHARSET_TABLE (1, 0, 'J') == id) + charset_jisx0201_roman = id; + else if (ISO_CHARSET_TABLE (2, 0, '@') == id) + charset_jisx0208_1978 = id; + else if (ISO_CHARSET_TABLE (2, 0, 'B') == id) + charset_jisx0208 = id; + } + + if (charset.emacs_mule_id >= 0) + { + emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id); + if (charset.emacs_mule_id < 0xA0) + emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1; + if (new_definition_p) + Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list, + Fcons (make_number (id), Qnil)); + } + + if (new_definition_p) + { + Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list); - Vcharset_ordered_list = nconc2 (Vcharset_ordered_list, ++ Vcharset_ordered_list = nconc2 (Vcharset_ordered_list, + Fcons (make_number (id), Qnil)); + charset_ordered_list_tick++; + } + + return Qnil; + } + + + /* Same as Fdefine_charset_internal but arguments are more convenient + to call from C (typically in syms_of_charset). This can define a + charset of `offset' method only. Return the ID of the new + charset. */ + + static int + define_charset_internal (name, dimension, code_space, min_code, max_code, + iso_final, iso_revision, emacs_mule_id, + ascii_compatible, supplementary, + code_offset) + Lisp_Object name; + int dimension; + unsigned char *code_space; + unsigned min_code, max_code; + int iso_final, iso_revision, emacs_mule_id; + int ascii_compatible, supplementary; + int code_offset; + { + Lisp_Object args[charset_arg_max]; + Lisp_Object plist[14]; + Lisp_Object val; + int i; + + args[charset_arg_name] = name; + args[charset_arg_dimension] = make_number (dimension); + val = Fmake_vector (make_number (8), make_number (0)); + for (i = 0; i < 8; i++) + ASET (val, i, make_number (code_space[i])); + args[charset_arg_code_space] = val; + args[charset_arg_min_code] = make_number (min_code); + args[charset_arg_max_code] = make_number (max_code); + args[charset_arg_iso_final] + = (iso_final < 0 ? Qnil : make_number (iso_final)); + args[charset_arg_iso_revision] = make_number (iso_revision); + args[charset_arg_emacs_mule_id] + = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id)); + args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil; + args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil; + args[charset_arg_invalid_code] = Qnil; + args[charset_arg_code_offset] = make_number (code_offset); + args[charset_arg_map] = Qnil; + args[charset_arg_subset] = Qnil; + args[charset_arg_superset] = Qnil; + args[charset_arg_unify_map] = Qnil; + + plist[0] = intern (":name"); + plist[1] = args[charset_arg_name]; + plist[2] = intern (":dimension"); + plist[3] = args[charset_arg_dimension]; + plist[4] = intern (":code-space"); + plist[5] = args[charset_arg_code_space]; + plist[6] = intern (":iso-final-char"); + plist[7] = args[charset_arg_iso_final]; + plist[8] = intern (":emacs-mule-id"); + plist[9] = args[charset_arg_emacs_mule_id]; + plist[10] = intern (":ascii-compatible-p"); + plist[11] = args[charset_arg_ascii_compatible_p]; + plist[12] = intern (":code-offset"); + plist[13] = args[charset_arg_code_offset]; + + args[charset_arg_plist] = Flist (14, plist); + Fdefine_charset_internal (charset_arg_max, args); + + return XINT (CHARSET_SYMBOL_ID (name)); + } + + + DEFUN ("define-charset-alias", Fdefine_charset_alias, + Sdefine_charset_alias, 2, 2, 0, + doc: /* Define ALIAS as an alias for charset CHARSET. */) + (alias, charset) + Lisp_Object alias, charset; + { + Lisp_Object attr; + + CHECK_CHARSET_GET_ATTR (charset, attr); + Fputhash (alias, attr, Vcharset_hash_table); + Vcharset_list = Fcons (alias, Vcharset_list); + return Qnil; + } + + + DEFUN ("unibyte-charset", Funibyte_charset, Sunibyte_charset, 0, 0, 0, + doc: /* Return the unibyte charset (set by `set-unibyte-charset'). */) + () + { + return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte)); + } + + + DEFUN ("set-unibyte-charset", Fset_unibyte_charset, Sset_unibyte_charset, + 1, 1, 0, + doc: /* Set the unibyte charset to CHARSET. + This determines how unibyte/multibyte conversion is done. See also + function `unibyte-charset'. */) + (charset) + Lisp_Object charset; + { + struct charset *cs; + int i, c; + + CHECK_CHARSET_GET_CHARSET (charset, cs); + if (! cs->ascii_compatible_p + || cs->dimension != 1) - error ("Inappropriate unibyte charset: %s", XSYMBOL (charset)->name->data); ++ error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset))); + charset_unibyte = cs->id; + for (i = 128; i < 256; i++) + { + c = DECODE_CHAR (cs, i); + unibyte_to_multibyte_table[i] = (c < 0 ? BYTE8_TO_CHAR (i) : c); + } + + return Qnil; + } + + + DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0, + doc: /* Return the property list of CHARSET. */) + (charset) + Lisp_Object charset; + { + Lisp_Object attrs; + + CHECK_CHARSET_GET_ATTR (charset, attrs); + return CHARSET_ATTR_PLIST (attrs); + } + + + DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0, + doc: /* Set CHARSET's property list to PLIST. */) + (charset, plist) + Lisp_Object charset, plist; + { + Lisp_Object attrs; + + CHECK_CHARSET_GET_ATTR (charset, attrs); + CHARSET_ATTR_PLIST (attrs) = plist; + return plist; + } + + + DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0, + doc: /* Unify characters of CHARSET with Unicode. + This means reading the relevant file and installing the table defined + by CHARSET's `:unify-map' property. + + Optional second arg UNIFY-MAP is a file name string or a vector. It has + the same meaning as the `:unify-map' attribute in the function + `define-charset' (which see). + + Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */) + (charset, unify_map, deunify) + Lisp_Object charset, unify_map, deunify; + { + int id; + struct charset *cs; - ++ + CHECK_CHARSET_GET_ID (charset, id); + cs = CHARSET_FROM_ID (id); + if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED) + load_charset (cs); + if (NILP (deunify) + ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs)) + : ! CHARSET_UNIFIED_P (cs)) + return Qnil; + + CHARSET_UNIFIED_P (cs) = 0; + if (NILP (deunify)) + { + if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET) - error ("Can't unify charset: %s", XSYMBOL (charset)->name->data); ++ error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset))); + if (NILP (unify_map)) + unify_map = CHARSET_UNIFY_MAP (cs); + if (STRINGP (unify_map)) + load_charset_map_from_file (cs, unify_map, 2); + else if (VECTORP (unify_map)) + load_charset_map_from_vector (cs, unify_map, 2); + else if (NILP (unify_map)) + error ("No unify-map for charset"); + else + error ("Bad unify-map arg"); + CHARSET_UNIFIED_P (cs) = 1; + } + else if (CHAR_TABLE_P (Vchar_unify_table)) + { + int min_code = CHARSET_MIN_CODE (cs); + int max_code = CHARSET_MAX_CODE (cs); + int min_char = DECODE_CHAR (cs, min_code); + int max_char = DECODE_CHAR (cs, max_code); - ++ + char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil); + } - ++ + return Qnil; + } + + DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char, + Sget_unused_iso_final_char, 2, 2, 0, + doc: /* + Return an unsed ISO final char for a charset of DIMENISION and CHARS. + DIMENSION is the number of bytes to represent a character: 1 or 2. + CHARS is the number of characters in a dimension: 94 or 96. + + This final char is for private use, thus the range is `0' (48) .. `?' (63). + If there's no unused final char for the specified kind of charset, + return nil. */) + (dimension, chars) + Lisp_Object dimension, chars; + { + int final_char; + + CHECK_NUMBER (dimension); + CHECK_NUMBER (chars); + if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3) + args_out_of_range_3 (dimension, make_number (1), make_number (3)); + if (XINT (chars) != 94 && XINT (chars) != 96) + args_out_of_range_3 (chars, make_number (94), make_number (96)); + for (final_char = '0'; final_char <= '?'; final_char++) + if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0) + break; + return (final_char <= '?' ? make_number (final_char) : Qnil); + } + + static void + check_iso_charset_parameter (dimension, chars, final_char) + Lisp_Object dimension, chars, final_char; + { + CHECK_NATNUM (dimension); + CHECK_NATNUM (chars); + CHECK_NATNUM (final_char); + + if (XINT (dimension) > 3) + error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension)); + if (XINT (chars) != 94 && XINT (chars) != 96) + error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars)); + if (XINT (final_char) < '0' || XINT (final_char) > '~') + error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars)); + } + + + DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset, + 4, 4, 0, + doc: /* + Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET. + CHARSET should be defined by `define-charset' in advance. */) + (dimension, chars, final_char, charset) + Lisp_Object dimension, chars, final_char, charset; + { + int id; + + CHECK_CHARSET_GET_ID (charset, id); + check_iso_charset_parameter (dimension, chars, final_char); + + ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), XINT (final_char)) = id; + return Qnil; + } + + + /* Return information about charsets in the text at PTR of NBYTES + bytes, which are NCHARS characters. The value is: + + 0: Each character is represented by one byte. This is always + true for a unibyte string. For a multibyte string, true if + it contains only ASCII characters. + + 1: No charsets other than ascii, control-1, and latin-1 are + found. + + 2: Otherwise. + */ + + int + string_xstring_p (string) + Lisp_Object string; + { - const unsigned char *p = XSTRING (string)->data; - const unsigned char *endp = p + STRING_BYTES (XSTRING (string)); ++ const unsigned char *p = SDATA (string); ++ const unsigned char *endp = p + SBYTES (string); + struct charset *charset; + - if (XSTRING (string)->size == STRING_BYTES (XSTRING (string))) ++ if (SCHARS (string) == SBYTES (string)) + return 0; + + charset = CHARSET_FROM_ID (charset_iso_8859_1); + while (p < endp) + { + int c = STRING_CHAR_ADVANCE (p); + + /* Fixme: comparison of unsigned expression < 0 is always false */ + if (ENCODE_CHAR (charset, c) < 0) + return 2; + } + return 1; + } + + + /* Find charsets in the string at PTR of NCHARS and NBYTES. + + CHARSETS is a vector. Each element is a cons of CHARSET and + FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t. + FOUND-FLAG t (or nil) means that the corresponding charset is + already found (or not yet found). + + It may lookup a translation table TABLE if supplied. */ + + static void + find_charsets_in_text (ptr, nchars, nbytes, charsets, table) + const unsigned char *ptr; + int nchars, nbytes; + Lisp_Object charsets, table; + { + const unsigned char *pend = ptr + nbytes; + int ncharsets = ASIZE (charsets); + + if (nchars == nbytes) + return; + + while (ptr < pend) + { + int c = STRING_CHAR_ADVANCE (ptr); + int i; + int all_found = 1; + Lisp_Object elt; + + if (!NILP (table)) + c = translate_char (table, c); + for (i = 0; i < ncharsets; i++) + { + elt = AREF (charsets, i); + if (NILP (XCDR (elt))) + { + struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt))); + + if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset)) - XCDR (elt) = Qt; ++ XSETCDR (elt, Qt); + else + all_found = 0; + } } - return return_val; + if (all_found) + break; } } @@@ -942,480 -1482,403 +1482,403 @@@ only `ascii', `eight-bit-control', and CHECK_STRING (str); - bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); - find_charset_in_text (SDATA (str), SCHARS (str), - SBYTES (str), charsets, table); + charsets = Fmake_vector (make_number (charset_table_used), Qnil); + for (i = 0; i < charset_table_used; i++) + ASET (charsets, i, Fcons (make_number (i), Qnil)); - find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size, - STRING_BYTES (XSTRING (str)), charsets, table); ++ find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str), ++ charsets, table); val = Qnil; - if (charsets[1]) - val = Fcons (Qunknown, val); - for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--) - if (charsets[i]) - val = Fcons (CHARSET_SYMBOL (i), val); - if (charsets[0]) - val = Fcons (Qascii, val); + for (i = charset_table_used - 1; i >= 0; i--) + if (!NILP (XCDR (AREF (charsets, i)))) + val = Fcons (CHARSET_NAME (charset_table + i), val); return val; } - DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0, - doc: /* Return a character made from arguments. - Internal use only. */) - (charset, code1, code2) - Lisp_Object charset, code1, code2; + + /* Return a character correponding to the code-point CODE of + CHARSET. */ + + int + decode_char (charset, code) + struct charset *charset; + unsigned code; { - int charset_id, c1, c2; + int c, char_index; + enum charset_method method = CHARSET_METHOD (charset); - CHECK_NUMBER (charset); - charset_id = XINT (charset); - if (!CHARSET_DEFINED_P (charset_id)) - error ("Invalid charset ID: %d", XINT (charset)); + if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset)) + return -1; - if (NILP (code1)) - c1 = 0; - else - { - CHECK_NUMBER (code1); - c1 = XINT (code1); - } - if (NILP (code2)) - c2 = 0; - else + if (method == CHARSET_METHOD_MAP_DEFERRED) { - CHECK_NUMBER (code2); - c2 = XINT (code2); + load_charset (charset); + method = CHARSET_METHOD (charset); } - if (charset_id == CHARSET_ASCII) + if (method == CHARSET_METHOD_SUBSET) { - if (c1 < 0 || c1 > 0x7F) - goto invalid_code_posints; - return make_number (c1); - } - else if (charset_id == CHARSET_8_BIT_CONTROL) - { - if (NILP (code1)) - c1 = 0x80; - else if (c1 < 0x80 || c1 > 0x9F) - goto invalid_code_posints; - return make_number (c1); + Lisp_Object subset_info; + + subset_info = CHARSET_SUBSET (charset); + charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0))); + code -= XINT (AREF (subset_info, 3)); + if (code < XFASTINT (AREF (subset_info, 1)) + || code > XFASTINT (AREF (subset_info, 2))) + c = -1; + else + c = DECODE_CHAR (charset, code); } - else if (charset_id == CHARSET_8_BIT_GRAPHIC) + else if (method == CHARSET_METHOD_SUPERSET) { - if (NILP (code1)) - c1 = 0xA0; - else if (c1 < 0xA0 || c1 > 0xFF) - goto invalid_code_posints; - return make_number (c1); - } - else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF) - goto invalid_code_posints; - c1 &= 0x7F; - c2 &= 0x7F; - if (c1 == 0 - ? c2 != 0 - : (c2 == 0 - ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20) - : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2))) - goto invalid_code_posints; - return make_number (MAKE_CHAR (charset_id, c1, c2)); - - invalid_code_posints: - error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2); - } + Lisp_Object parents; - DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, - doc: /* Return list of charset and one or two position-codes of CHAR. - If CHAR is invalid as a character code, - return a list of symbol `unknown' and CHAR. */) - (ch) - Lisp_Object ch; - { - int c, charset, c1, c2; + parents = CHARSET_SUPERSET (charset); + c = -1; + for (; CONSP (parents); parents = XCDR (parents)) + { + int id = XINT (XCAR (XCAR (parents))); + int code_offset = XINT (XCDR (XCAR (parents))); + unsigned this_code = code - code_offset; - CHECK_NUMBER (ch); - c = XFASTINT (ch); - if (!CHAR_VALID_P (c, 1)) - return Fcons (Qunknown, Fcons (ch, Qnil)); - SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); - return (c2 >= 0 - ? Fcons (CHARSET_SYMBOL (charset), - Fcons (make_number (c1), Fcons (make_number (c2), Qnil))) - : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil))); - } + charset = CHARSET_FROM_ID (id); + if ((c = DECODE_CHAR (charset, this_code)) >= 0) + break; + } + } + else + { + char_index = CODE_POINT_TO_INDEX (charset, code); + if (char_index < 0) + return -1; - DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0, - doc: /* Return charset of CHAR. */) - (ch) - Lisp_Object ch; - { - CHECK_NUMBER (ch); + if (method == CHARSET_METHOD_MAP) + { + Lisp_Object decoder; - return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch))); - } + decoder = CHARSET_DECODER (charset); + if (! VECTORP (decoder)) + return -1; + c = XINT (AREF (decoder, char_index)); + } + else + { + c = char_index + CHARSET_CODE_OFFSET (charset); + } + } - DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0, - doc: /* Return charset of a character in the current buffer at position POS. - If POS is nil, it defauls to the current point. - If POS is out of range, the value is nil. */) - (pos) - Lisp_Object pos; - { - Lisp_Object ch; - int charset; + if (CHARSET_UNIFIED_P (charset) + && c >= 0) + { + MAYBE_UNIFY_CHAR (c); + } - ch = Fchar_after (pos); - if (! INTEGERP (ch)) - return ch; - charset = CHAR_CHARSET (XINT (ch)); - return CHARSET_SYMBOL (charset); + return c; } - DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0, - doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR. - - ISO 2022's designation sequence (escape sequence) distinguishes charsets - by their DIMENSION, CHARS, and FINAL-CHAR, - where as Emacs distinguishes them by charset symbol. - See the documentation of the function `charset-info' for the meanings of - DIMENSION, CHARS, and FINAL-CHAR. */) - (dimension, chars, final_char) - Lisp_Object dimension, chars, final_char; - { - int charset; - - CHECK_NUMBER (dimension); - CHECK_NUMBER (chars); - CHECK_NUMBER (final_char); + /* Variable used temporarily by the macro ENCODE_CHAR. */ + Lisp_Object charset_work; - if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0) - return Qnil; - return CHARSET_SYMBOL (charset); - } + /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to + CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true, + use CHARSET's strict_max_char instead of max_char. */ - /* If GENERICP is nonzero, return nonzero iff C is a valid normal or - generic character. If GENERICP is zero, return nonzero iff C is a - valid normal character. Do not call this function directly, - instead use macro CHAR_VALID_P. */ - int - char_valid_p (c, genericp) - int c, genericp; + unsigned + encode_char (charset, c) + struct charset *charset; + int c; { - int charset, c1, c2; + unsigned code; + enum charset_method method = CHARSET_METHOD (charset); - if (c < 0 || c >= MAX_CHAR) - return 0; - if (SINGLE_BYTE_CHAR_P (c)) - return 1; - SPLIT_CHAR (c, charset, c1, c2); - if (genericp) + if (CHARSET_UNIFIED_P (charset)) { - if (c1) - { - if (c2 <= 0) c2 = 0x20; - } - else + Lisp_Object deunifier, deunified; + + deunifier = CHARSET_DEUNIFIER (charset); + if (! CHAR_TABLE_P (deunifier)) { - if (c2 <= 0) c1 = c2 = 0x20; + Funify_charset (CHARSET_NAME (charset), Qnil, Qnil); + deunifier = CHARSET_DEUNIFIER (charset); } + deunified = CHAR_TABLE_REF (deunifier, c); + if (! NILP (deunified)) + c = XINT (deunified); } - return (CHARSET_DEFINED_P (charset) - && CHAR_COMPONENTS_VALID_P (charset, c1, c2)); - } - DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0, - doc: /* Return t if OBJECT is a valid normal character. - If optional arg GENERICP is non-nil, also return t if OBJECT is - a valid generic character. */) - (object, genericp) - Lisp_Object object, genericp; - { - if (! NATNUMP (object)) - return Qnil; - return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil); - } + if (method == CHARSET_METHOD_SUBSET) + { + Lisp_Object subset_info; + struct charset *this_charset; + + subset_info = CHARSET_SUBSET (charset); + this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0))); + code = ENCODE_CHAR (this_charset, c); + if (code == CHARSET_INVALID_CODE (this_charset) + || code < XFASTINT (AREF (subset_info, 1)) + || code > XFASTINT (AREF (subset_info, 2))) + return CHARSET_INVALID_CODE (charset); + code += XINT (AREF (subset_info, 3)); + return code; + } - DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte, - Sunibyte_char_to_multibyte, 1, 1, 0, - doc: /* Convert the unibyte character CH to multibyte character. - The conversion is done based on `nonascii-translation-table' (which see) - or `nonascii-insert-offset' (which see). */) - (ch) - Lisp_Object ch; - { - int c; + if (method == CHARSET_METHOD_SUPERSET) + { + Lisp_Object parents; - CHECK_NUMBER (ch); - c = XINT (ch); - if (c < 0 || c >= 0400) - error ("Invalid unibyte character: %d", c); - c = unibyte_char_to_multibyte (c); - if (c < 0) - error ("Can't convert to multibyte character: %d", XINT (ch)); - return make_number (c); - } + parents = CHARSET_SUPERSET (charset); + for (; CONSP (parents); parents = XCDR (parents)) + { + int id = XINT (XCAR (XCAR (parents))); + int code_offset = XINT (XCDR (XCAR (parents))); + struct charset *this_charset = CHARSET_FROM_ID (id); - DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte, - Smultibyte_char_to_unibyte, 1, 1, 0, - doc: /* Convert the multibyte character CH to unibyte character. - The conversion is done based on `nonascii-translation-table' (which see) - or `nonascii-insert-offset' (which see). */) - (ch) - Lisp_Object ch; - { - int c; + code = ENCODE_CHAR (this_charset, c); + if (code != CHARSET_INVALID_CODE (this_charset)) + return code + code_offset; + } + return CHARSET_INVALID_CODE (charset); + } - CHECK_NUMBER (ch); - c = XINT (ch); - if (! CHAR_VALID_P (c, 0)) - error ("Invalid multibyte character: %d", c); - c = multibyte_char_to_unibyte (c, Qnil); - if (c < 0) - error ("Can't convert to unibyte character: %d", XINT (ch)); - return make_number (c); - } + if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map) + || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset)) + return CHARSET_INVALID_CODE (charset); - DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0, - doc: /* Return 1 regardless of the argument CHAR. */) - (ch) - Lisp_Object ch; - { - CHECK_NUMBER (ch); - return make_number (1); - } + if (method == CHARSET_METHOD_MAP_DEFERRED) + { + load_charset (charset); + method = CHARSET_METHOD (charset); + } - /* Return how many bytes C will occupy in a multibyte buffer. - Don't call this function directly, instead use macro CHAR_BYTES. */ - int - char_bytes (c) - int c; - { - int charset; + if (method == CHARSET_METHOD_MAP) + { + Lisp_Object encoder; + Lisp_Object val; - if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1))) - return 1; - if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0) - return 1; + encoder = CHARSET_ENCODER (charset); + if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) + return CHARSET_INVALID_CODE (charset); + val = CHAR_TABLE_REF (encoder, c); + if (NILP (val)) + return CHARSET_INVALID_CODE (charset); + code = XINT (val); + if (! CHARSET_COMPACT_CODES_P (charset)) + code = INDEX_TO_CODE_POINT (charset, code); + } + else /* method == CHARSET_METHOD_OFFSET */ + { + code = c - CHARSET_CODE_OFFSET (charset); + code = INDEX_TO_CODE_POINT (charset, code); + } - charset = CHAR_CHARSET (c); - return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1); + return code; } - /* Return the width of character of which multi-byte form starts with - C. The width is measured by how many columns occupied on the - screen when displayed in the current buffer. */ - - #define ONE_BYTE_CHAR_WIDTH(c) \ - (c < 0x20 \ - ? (c == '\t' \ - ? XFASTINT (current_buffer->tab_width) \ - : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \ - : (c < 0x7f \ - ? 1 \ - : (c == 0x7F \ - ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \ - : ((! NILP (current_buffer->enable_multibyte_characters) \ - && BASE_LEADING_CODE_P (c)) \ - ? WIDTH_BY_CHAR_HEAD (c) \ - : 4)))) - - DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0, - doc: /* Return width of CHAR when displayed in the current buffer. - The width is measured by how many columns it occupies on the screen. - Tab is taken to occupy `tab-width' columns. */) - (ch) - Lisp_Object ch; - { - Lisp_Object val, disp; - int c; - struct Lisp_Char_Table *dp = buffer_display_table (); - CHECK_NUMBER (ch); + DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0, + doc: /* Decode the pair of CHARSET and CODE-POINT into a character. + Return nil if CODE-POINT is not valid in CHARSET. - c = XINT (ch); + CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). - /* Get the way the display table would display it. */ - disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil; + Optional argument RESTRICTION specifies a way to map the pair of CCS + and CODE-POINT to a chracter. Currently not supported and just ignored. */) + (charset, code_point, restriction) + Lisp_Object charset, code_point, restriction; + { + int c, id; + unsigned code; + struct charset *charsetp; - if (VECTORP (disp)) - XSETINT (val, XVECTOR (disp)->size); - else if (SINGLE_BYTE_CHAR_P (c)) - XSETINT (val, ONE_BYTE_CHAR_WIDTH (c)); + CHECK_CHARSET_GET_ID (charset, id); + if (CONSP (code_point)) + { - CHECK_NATNUM (XCAR (code_point)); - CHECK_NATNUM (XCDR (code_point)); ++ CHECK_NATNUM_CAR (code_point); ++ CHECK_NATNUM_CDR (code_point); + code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point))); + } else { - int charset = CHAR_CHARSET (c); - - XSETFASTINT (val, CHARSET_WIDTH (charset)); + CHECK_NATNUM (code_point); + code = XINT (code_point); } - return val; + charsetp = CHARSET_FROM_ID (id); + c = DECODE_CHAR (charsetp, code); + return (c >= 0 ? make_number (c) : Qnil); } - /* Return width of string STR of length LEN when displayed in the - current buffer. The width is measured by how many columns it - occupies on the screen. */ - int - strwidth (str, len) - unsigned char *str; - int len; + DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0, + doc: /* Encode the character CH into a code-point of CHARSET. + Return nil if CHARSET doesn't include CH. + + Optional argument RESTRICTION specifies a way to map CHAR to a + code-point in CCS. Currently not supported and just ignored. */) + (ch, charset, restriction) + Lisp_Object ch, charset, restriction; { - return c_string_width (str, len, -1, NULL, NULL); + int id; + unsigned code; + struct charset *charsetp; + + CHECK_CHARSET_GET_ID (charset, id); + CHECK_NATNUM (ch); + charsetp = CHARSET_FROM_ID (id); + code = ENCODE_CHAR (charsetp, XINT (ch)); + if (code == CHARSET_INVALID_CODE (charsetp)) + return Qnil; + if (code > 0x7FFFFFF) + return Fcons (make_number (code >> 16), make_number (code & 0xFFFF)); + return make_number (code); } - /* Return width of string STR of length LEN when displayed in the - current buffer. The width is measured by how many columns it - occupies on the screen. If PRECISION > 0, return the width of - longest substring that doesn't exceed PRECISION, and set number of - characters and bytes of the substring in *NCHARS and *NBYTES - respectively. */ - int - c_string_width (str, len, precision, nchars, nbytes) - const unsigned char *str; - int len, precision, *nchars, *nbytes; + DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0, + doc: + /* Return a character of CHARSET whose position codes are CODEn. + + CODE1 through CODE4 are optional, but if you don't supply sufficient + position codes, it is assumed that the minimum code in each dimension + is specified. */) + (charset, code1, code2, code3, code4) + Lisp_Object charset, code1, code2, code3, code4; { - int i = 0, i_byte = 0; - int width = 0; - int chars; - struct Lisp_Char_Table *dp = buffer_display_table (); + int id, dimension; + struct charset *charsetp; + unsigned code; + int c; + + CHECK_CHARSET_GET_ID (charset, id); + charsetp = CHARSET_FROM_ID (id); - while (i_byte < len) + dimension = CHARSET_DIMENSION (charsetp); + if (NILP (code1)) + code = (CHARSET_ASCII_COMPATIBLE_P (charsetp) + ? 0 : CHARSET_MIN_CODE (charsetp)); + else { - int bytes, thiswidth; - Lisp_Object val; + CHECK_NATNUM (code1); + if (XFASTINT (code1) >= 0x100) + args_out_of_range (make_number (0xFF), code1); + code = XFASTINT (code1); - if (dp) + if (dimension > 1) { - int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); - - chars = 1; - val = DISP_CHAR_VECTOR (dp, c); - if (VECTORP (val)) - thiswidth = XVECTOR (val)->size; + code <<= 8; + if (NILP (code2)) + code |= charsetp->code_space[(dimension - 2) * 4]; else - thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]); - } - else - { - chars = 1; - PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes); - thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]); - } + { + CHECK_NATNUM (code2); + if (XFASTINT (code2) >= 0x100) + args_out_of_range (make_number (0xFF), code2); + code |= XFASTINT (code2); + } - if (precision > 0 - && (width + thiswidth > precision)) - { - *nchars = i; - *nbytes = i_byte; - return width; + if (dimension > 2) + { + code <<= 8; + if (NILP (code3)) + code |= charsetp->code_space[(dimension - 3) * 4]; + else + { + CHECK_NATNUM (code3); + if (XFASTINT (code3) >= 0x100) + args_out_of_range (make_number (0xFF), code3); + code |= XFASTINT (code3); + } + + if (dimension > 3) + { + code <<= 8; + if (NILP (code4)) + code |= charsetp->code_space[0]; + else + { + CHECK_NATNUM (code4); + if (XFASTINT (code4) >= 0x100) + args_out_of_range (make_number (0xFF), code4); + code |= XFASTINT (code4); + } + } + } } - i++; - i_byte += bytes; - width += thiswidth; - } - - if (precision > 0) - { - *nchars = i; - *nbytes = i_byte; } - return width; + if (CHARSET_ISO_FINAL (charsetp) >= 0) + code &= 0x7F7F7F7F; + c = DECODE_CHAR (charsetp, code); + if (c < 0) + error ("Invalid code(s)"); + return make_number (c); } - /* Return width of Lisp string STRING when displayed in the current - buffer. The width is measured by how many columns it occupies on - the screen while paying attention to compositions. If PRECISION > - 0, return the width of longest substring that doesn't exceed - PRECISION, and set number of characters and bytes of the substring - in *NCHARS and *NBYTES respectively. */ - int - lisp_string_width (string, precision, nchars, nbytes) - Lisp_Object string; - int precision, *nchars, *nbytes; - { - int len = SCHARS (string); - int len_byte = SBYTES (string); - const unsigned char *str = SDATA (string); - int i = 0, i_byte = 0; - int width = 0; - struct Lisp_Char_Table *dp = buffer_display_table (); - - while (i < len) - { - int chars, bytes, thiswidth; - Lisp_Object val; - int cmp_id; - int ignore, end; + /* Return the first charset in CHARSET_LIST that contains C. + CHARSET_LIST is a list of charset IDs. If it is nil, use + Vcharset_ordered_list. */ - if (find_composition (i, -1, &ignore, &end, &val, string) - && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string)) - >= 0)) - { - thiswidth = composition_table[cmp_id]->width; - chars = end - i; - bytes = string_char_to_byte (string, end) - i_byte; - } - else if (dp) - { - int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); + struct charset * + char_charset (c, charset_list, code_return) + int c; + Lisp_Object charset_list; + unsigned *code_return; + { + if (NILP (charset_list)) + charset_list = Vcharset_ordered_list; - chars = 1; - val = DISP_CHAR_VECTOR (dp, c); - if (VECTORP (val)) - thiswidth = XVECTOR (val)->size; - else - thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]); - } - else - { - chars = 1; - PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes); - thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]); - } + while (CONSP (charset_list)) + { + struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); + unsigned code = ENCODE_CHAR (charset, c); - if (precision > 0 - && (width + thiswidth > precision)) + if (code != CHARSET_INVALID_CODE (charset)) { - *nchars = i; - *nbytes = i_byte; - return width; + if (code_return) + *code_return = code; + return charset; } - i += chars; - i_byte += bytes; - width += thiswidth; - } - - if (precision > 0) - { - *nchars = i; - *nbytes = i_byte; + charset_list = XCDR (charset_list); } - - return width; + return NULL; } - DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0, - doc: /* Return width of STRING when displayed in the current buffer. - Width is measured by how many columns it occupies on the screen. - When calculating width of a multibyte character in STRING, - only the base leading-code is considered; the validity of - the following bytes is not checked. Tabs in STRING are always - taken to occupy `tab-width' columns. */) - (str) - Lisp_Object str; + + /* Fixme: `unknown' can't happen now? */ + DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, + doc: /*Return list of charset and one to three position-codes of CHAR. + If CHAR is invalid as a character code, return a list `(unknown CHAR)'. */) + (ch) + Lisp_Object ch; { + struct charset *charset; + int c, dimension; + unsigned code; Lisp_Object val; - CHECK_STRING (str); - XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL)); - return val; + CHECK_CHARACTER (ch); + c = XFASTINT (ch); + charset = CHAR_CHARSET (c); + if (! charset) + return Fcons (intern ("unknown"), Fcons (ch, Qnil)); - ++ + code = ENCODE_CHAR (charset, c); + if (code == CHARSET_INVALID_CODE (charset)) + abort (); + dimension = CHARSET_DIMENSION (charset); + val = (dimension == 1 ? Fcons (make_number (code), Qnil) + : dimension == 2 ? Fcons (make_number (code >> 8), + Fcons (make_number (code & 0xFF), Qnil)) + : Fcons (make_number (code >> 16), + Fcons (make_number ((code >> 8) & 0xFF), + Fcons (make_number (code & 0xFF), Qnil)))); + return Fcons (CHARSET_NAME (charset), val); } - DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0, - doc: /* Return the direction of CHAR. - The returned value is 0 for left-to-right and 1 for right-to-left. */) + + DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0, + doc: /* Return the charset of highest priority that contains CHAR. */) (ch) Lisp_Object ch; { @@@ -1770,64 -2073,49 +2073,54 @@@ init_charset_once ( void syms_of_charset () { - Qcharset = intern ("charset"); - staticpro (&Qcharset); - - Qascii = intern ("ascii"); - staticpro (&Qascii); - - Qeight_bit_control = intern ("eight-bit-control"); - staticpro (&Qeight_bit_control); - - Qeight_bit_graphic = intern ("eight-bit-graphic"); - staticpro (&Qeight_bit_graphic); - - /* Define special charsets ascii, eight-bit-control, and - eight-bit-graphic. */ - update_charset_table (make_number (CHARSET_ASCII), - make_number (1), make_number (94), - make_number (1), - make_number (0), - make_number ('B'), - make_number (0), - build_string ("ASCII"), - Qnil, /* same as above */ - build_string ("ASCII (ISO646 IRV)")); - CHARSET_SYMBOL (CHARSET_ASCII) = Qascii; - Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII)); - - update_charset_table (make_number (CHARSET_8_BIT_CONTROL), - make_number (1), make_number (96), - make_number (4), - make_number (0), - make_number (-1), - make_number (-1), - build_string ("8-bit control code (0x80..0x9F)"), - Qnil, /* same as above */ - Qnil); /* same as above */ - CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control; - Fput (Qeight_bit_control, Qcharset, - CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL)); - - update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC), - make_number (1), make_number (96), - make_number (4), - make_number (0), - make_number (-1), - make_number (-1), - build_string ("8-bit graphic char (0xA0..0xFF)"), - Qnil, /* same as above */ - Qnil); /* same as above */ - CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic; - Fput (Qeight_bit_graphic, Qcharset, - CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC)); - - Qauto_fill_chars = intern ("auto-fill-chars"); - staticpro (&Qauto_fill_chars); - Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0)); - - defsubr (&Sdefine_charset); - defsubr (&Sgeneric_character_list); + char *p; + + DEFSYM (Qcharsetp, "charsetp"); + + DEFSYM (Qascii, "ascii"); + DEFSYM (Qunicode, "unicode"); + DEFSYM (Qeight_bit, "eight-bit"); + DEFSYM (Qiso_8859_1, "iso-8859-1"); + + DEFSYM (Qgl, "gl"); + DEFSYM (Qgr, "gr"); + + p = (char *) xmalloc (30000); + + staticpro (&Vcharset_ordered_list); + Vcharset_ordered_list = Qnil; + + staticpro (&Viso_2022_charset_list); + Viso_2022_charset_list = Qnil; + + staticpro (&Vemacs_mule_charset_list); + Vemacs_mule_charset_list = Qnil; + + staticpro (&Vcharset_hash_table); - Vcharset_hash_table = Fmakehash (Qeq); ++ { ++ Lisp_Object args[2]; ++ args[0] = QCtest; ++ args[1] = Qeq; ++ Vcharset_hash_table = Fmake_hash_table (2, args); ++ } + + charset_table_size = 128; + charset_table = ((struct charset *) + xmalloc (sizeof (struct charset) * charset_table_size)); + charset_table_used = 0; + + staticpro (&Vchar_unified_charset_table); + Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1)); + + defsubr (&Scharsetp); + defsubr (&Smap_charset_chars); + defsubr (&Sdefine_charset_internal); + defsubr (&Sdefine_charset_alias); + defsubr (&Sunibyte_charset); + defsubr (&Sset_unibyte_charset); + defsubr (&Scharset_plist); + defsubr (&Sset_charset_plist); + defsubr (&Sunify_charset); defsubr (&Sget_unused_iso_final_char); defsubr (&Sdeclare_equiv_charset); defsubr (&Sfind_charset_region); diff --cc src/charset.h index fd8905e47d8,6db0165274a..1ecbb49027b --- a/src/charset.h +++ b/src/charset.h @@@ -1,7 -1,10 +1,10 @@@ - /* Header for multibyte character handler. + /* Header for charset handler. Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. -- Licensed to the Free Software Foundation. ++ Licensed to the Free Software Foundation. Copyright (C) 2001 Free Software Foundation, Inc. - Copyright (C) 2001, 2002 ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. diff --cc src/chartab.c index 00000000000,95dd346b86a..1288d49929f mode 000000,100644..100644 --- a/src/chartab.c +++ b/src/chartab.c @@@ -1,0 -1,1032 +1,965 @@@ + /* chartab.c -- char-table support - Copyright (C) 2001, 2002 ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 + + This file is part of GNU Emacs. + + GNU Emacs is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + GNU Emacs is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU Emacs; see the file COPYING. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + + #include + #include "lisp.h" + #include "character.h" + #include "charset.h" + #include "ccl.h" + + /* 64/16/32/128 */ + + /* Number of elements in Nth level char-table. */ + const int chartab_size[4] = + { (1 << CHARTAB_SIZE_BITS_0), + (1 << CHARTAB_SIZE_BITS_1), + (1 << CHARTAB_SIZE_BITS_2), + (1 << CHARTAB_SIZE_BITS_3) }; + + /* Number of characters each element of Nth level char-table + covers. */ + const int chartab_chars[4] = + { (1 << (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)), + (1 << (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3)), + (1 << CHARTAB_SIZE_BITS_3), + 1 }; + + /* Number of characters (in bits) each element of Nth level char-table + covers. */ + const int chartab_bits[4] = + { (CHARTAB_SIZE_BITS_1 + CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3), + (CHARTAB_SIZE_BITS_2 + CHARTAB_SIZE_BITS_3), + CHARTAB_SIZE_BITS_3, + 0 }; + + #define CHARTAB_IDX(c, depth, min_char) \ + (((c) - (min_char)) >> chartab_bits[(depth)]) + + + DEFUN ("make-char-table", Fmake_char_table, Smake_char_table, 1, 2, 0, + doc: /* Return a newly created char-table, with purpose PURPOSE. + Each element is initialized to INIT, which defaults to nil. + + PURPOSE should be a symbol. If it has a `char-table-extra-slots' + property, the property's value should be an integer between 0 and 10 + that specifies how many extra slots the char-table has. Otherwise, + the char-table has no extra slot. */) + (purpose, init) + register Lisp_Object purpose, init; + { + Lisp_Object vector; + Lisp_Object n; + int n_extras; + int size; + + CHECK_SYMBOL (purpose); + n = Fget (purpose, Qchar_table_extra_slots); + if (NILP (n)) + n_extras = 0; + else + { + CHECK_NATNUM (n); + n_extras = XINT (n); + if (n_extras > 10) + args_out_of_range (n, Qnil); + } + + size = VECSIZE (struct Lisp_Char_Table) - 1 + n_extras; + vector = Fmake_vector (make_number (size), init); + XCHAR_TABLE (vector)->parent = Qnil; + XCHAR_TABLE (vector)->purpose = purpose; + XSETCHAR_TABLE (vector, XCHAR_TABLE (vector)); + return vector; + } + + static Lisp_Object + make_sub_char_table (depth, min_char, defalt) + int depth, min_char; + Lisp_Object defalt; + { + Lisp_Object table; + int size = VECSIZE (struct Lisp_Sub_Char_Table) - 1 + chartab_size[depth]; + + table = Fmake_vector (make_number (size), defalt); + XSUB_CHAR_TABLE (table)->depth = make_number (depth); + XSUB_CHAR_TABLE (table)->min_char = make_number (min_char); + XSETSUB_CHAR_TABLE (table, XSUB_CHAR_TABLE (table)); + + return table; + } + + static Lisp_Object + char_table_ascii (table) + Lisp_Object table; + { + Lisp_Object sub; + + sub = XCHAR_TABLE (table)->contents[0]; + if (! SUB_CHAR_TABLE_P (sub)) + return sub; + sub = XSUB_CHAR_TABLE (sub)->contents[0]; + if (! SUB_CHAR_TABLE_P (sub)) + return sub; + return XSUB_CHAR_TABLE (sub)->contents[0]; + } + + Lisp_Object + copy_sub_char_table (table) + Lisp_Object table; + { + Lisp_Object copy; + int depth = XINT (XSUB_CHAR_TABLE (table)->depth); + int min_char = XINT (XSUB_CHAR_TABLE (table)->min_char); + Lisp_Object val; + int i; + + copy = make_sub_char_table (depth, min_char, Qnil); + /* Recursively copy any sub char-tables. */ + for (i = 0; i < chartab_size[depth]; i++) + { + val = XSUB_CHAR_TABLE (table)->contents[i]; + if (SUB_CHAR_TABLE_P (val)) + XSUB_CHAR_TABLE (copy)->contents[i] = copy_sub_char_table (val); + else + XSUB_CHAR_TABLE (copy)->contents[i] = val; + } + + return copy; + } + + + Lisp_Object + copy_char_table (table) + Lisp_Object table; + { + Lisp_Object copy; + int size = XCHAR_TABLE (table)->size & PSEUDOVECTOR_SIZE_MASK; + int i; + + copy = Fmake_vector (make_number (size), Qnil); + XCHAR_TABLE (copy)->defalt = XCHAR_TABLE (table)->defalt; + XCHAR_TABLE (copy)->parent = XCHAR_TABLE (table)->parent; + XCHAR_TABLE (copy)->purpose = XCHAR_TABLE (table)->purpose; + XCHAR_TABLE (copy)->ascii = XCHAR_TABLE (table)->ascii; + for (i = 0; i < chartab_size[0]; i++) + XCHAR_TABLE (copy)->contents[i] + = (SUB_CHAR_TABLE_P (XCHAR_TABLE (table)->contents[i]) + ? copy_sub_char_table (XCHAR_TABLE (table)->contents[i]) + : XCHAR_TABLE (table)->contents[i]); + if (SUB_CHAR_TABLE_P (XCHAR_TABLE (copy)->ascii)) + XCHAR_TABLE (copy)->ascii = char_table_ascii (copy); + size -= VECSIZE (struct Lisp_Char_Table) - 1; + for (i = 0; i < size; i++) + XCHAR_TABLE (copy)->extras[i] = XCHAR_TABLE (table)->extras[i]; + + XSETCHAR_TABLE (copy, XCHAR_TABLE (copy)); + return copy; + } + + Lisp_Object + sub_char_table_ref (table, c) + Lisp_Object table; + int c; + { + struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); + int depth = XINT (tbl->depth); + int min_char = XINT (tbl->min_char); + Lisp_Object val; + + val = tbl->contents[CHARTAB_IDX (c, depth, min_char)]; + if (SUB_CHAR_TABLE_P (val)) + val = sub_char_table_ref (val, c); + return val; + } + + Lisp_Object + char_table_ref (table, c) + Lisp_Object table; + int c; + { + struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); + Lisp_Object val; + + if (ASCII_CHAR_P (c)) + { + val = tbl->ascii; + if (SUB_CHAR_TABLE_P (val)) + val = XSUB_CHAR_TABLE (val)->contents[c]; + } + else + { + val = tbl->contents[CHARTAB_IDX (c, 0, 0)]; + if (SUB_CHAR_TABLE_P (val)) + val = sub_char_table_ref (val, c); + } + if (NILP (val)) + { + val = tbl->defalt; + if (NILP (val) && CHAR_TABLE_P (tbl->parent)) + val = char_table_ref (tbl->parent, c); + } + return val; -} ++} + + static Lisp_Object + sub_char_table_ref_and_range (table, c, from, to, defalt) + Lisp_Object table; + int c; + int *from, *to; + Lisp_Object defalt; + { + struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); + int depth = XINT (tbl->depth); + int min_char = XINT (tbl->min_char); + int max_char = min_char + chartab_chars[depth - 1] - 1; + int index = CHARTAB_IDX (c, depth, min_char); + Lisp_Object val; - ++ + val = tbl->contents[index]; + *from = min_char + index * chartab_chars[depth]; + *to = *from + chartab_chars[depth] - 1; + if (SUB_CHAR_TABLE_P (val)) + val = sub_char_table_ref_and_range (val, c, from, to, defalt); + else if (NILP (val)) + val = defalt; + + while (*from > min_char + && *from == min_char + index * chartab_chars[depth]) + { + Lisp_Object this_val; + int this_from = *from - chartab_chars[depth]; + int this_to = *from - 1; + + index--; + this_val = tbl->contents[index]; + if (SUB_CHAR_TABLE_P (this_val)) + this_val = sub_char_table_ref_and_range (this_val, this_to, + &this_from, &this_to, + defalt); + else if (NILP (this_val)) + this_val = defalt; + + if (! EQ (this_val, val)) + break; + *from = this_from; + } + index = CHARTAB_IDX (c, depth, min_char); + while (*to < max_char + && *to == min_char + (index + 1) * chartab_chars[depth] - 1) + { + Lisp_Object this_val; + int this_from = *to + 1; + int this_to = this_from + chartab_chars[depth] - 1; + + index++; + this_val = tbl->contents[index]; + if (SUB_CHAR_TABLE_P (this_val)) + this_val = sub_char_table_ref_and_range (this_val, this_from, + &this_from, &this_to, + defalt); + else if (NILP (this_val)) + this_val = defalt; + if (! EQ (this_val, val)) + break; + *to = this_to; + } + + return val; + } + + + /* Return the value for C in char-table TABLE. Set *FROM and *TO to + the range of characters (containing C) that have the same value as + C. It is not assured that the value of (*FROM - 1) and (*TO + 1) + is different from that of C. */ + + Lisp_Object + char_table_ref_and_range (table, c, from, to) + Lisp_Object table; + int c; + int *from, *to; + { + struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); + int index = CHARTAB_IDX (c, 0, 0); + Lisp_Object val; + + val = tbl->contents[index]; + *from = index * chartab_chars[0]; + *to = *from + chartab_chars[0] - 1; + if (SUB_CHAR_TABLE_P (val)) + val = sub_char_table_ref_and_range (val, c, from, to, tbl->defalt); + else if (NILP (val)) + val = tbl->defalt; + + while (*from > 0 && *from == index * chartab_chars[0]) + { + Lisp_Object this_val; + int this_from = *from - chartab_chars[0]; + int this_to = *from - 1; + + index--; + this_val = tbl->contents[index]; + if (SUB_CHAR_TABLE_P (this_val)) + this_val = sub_char_table_ref_and_range (this_val, this_to, + &this_from, &this_to, + tbl->defalt); + else if (NILP (this_val)) + this_val = tbl->defalt; + + if (! EQ (this_val, val)) + break; + *from = this_from; + } + while (*to < MAX_CHAR && *to == (index + 1) * chartab_chars[0] - 1) + { + Lisp_Object this_val; + int this_from = *to + 1; + int this_to = this_from + chartab_chars[0] - 1; + + index++; + this_val = tbl->contents[index]; + if (SUB_CHAR_TABLE_P (this_val)) + this_val = sub_char_table_ref_and_range (this_val, this_from, + &this_from, &this_to, + tbl->defalt); + else if (NILP (this_val)) + this_val = tbl->defalt; + if (! EQ (this_val, val)) + break; + *to = this_to; + } + + return val; + } + + + #define ASET_RANGE(ARRAY, FROM, TO, LIMIT, VAL) \ + do { \ + int limit = (TO) < (LIMIT) ? (TO) : (LIMIT); \ + for (; (FROM) < limit; (FROM)++) (ARRAY)->contents[(FROM)] = (VAL); \ + } while (0) + + #define GET_SUB_CHAR_TABLE(TABLE, SUBTABLE, IDX, DEPTH, MIN_CHAR) \ + do { \ + (SUBTABLE) = (TABLE)->contents[(IDX)]; \ + if (!SUB_CHAR_TABLE_P (SUBTABLE)) \ + (SUBTABLE) = make_sub_char_table ((DEPTH), (MIN_CHAR), (SUBTABLE)); \ + } while (0) + + + static void + sub_char_table_set (table, c, val) + Lisp_Object table; + int c; + Lisp_Object val; + { + struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); + int depth = XINT ((tbl)->depth); + int min_char = XINT ((tbl)->min_char); + int i = CHARTAB_IDX (c, depth, min_char); + Lisp_Object sub; - ++ + if (depth == 3) + tbl->contents[i] = val; + else + { + sub = tbl->contents[i]; + if (! SUB_CHAR_TABLE_P (sub)) + { + sub = make_sub_char_table (depth + 1, + min_char + i * chartab_chars[depth], sub); + tbl->contents[i] = sub; + } + sub_char_table_set (sub, c, val); + } + } + + Lisp_Object + char_table_set (table, c, val) + Lisp_Object table; + int c; + Lisp_Object val; + { + struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); + + if (ASCII_CHAR_P (c) + && SUB_CHAR_TABLE_P (tbl->ascii)) + { + XSUB_CHAR_TABLE (tbl->ascii)->contents[c] = val; + } + else + { + int i = CHARTAB_IDX (c, 0, 0); + Lisp_Object sub; + + sub = tbl->contents[i]; + if (! SUB_CHAR_TABLE_P (sub)) + { + sub = make_sub_char_table (1, i * chartab_chars[0], sub); + tbl->contents[i] = sub; + } + sub_char_table_set (sub, c, val); + if (ASCII_CHAR_P (c)) + tbl->ascii = char_table_ascii (table); + } + return val; + } + + static void + sub_char_table_set_range (table, depth, min_char, from, to, val) + Lisp_Object *table; + int depth; + int min_char; + int from, to; + Lisp_Object val; + { + int max_char = min_char + chartab_chars[depth] - 1; + + if (depth == 3 || (from <= min_char && to >= max_char)) + *table = val; + else + { + int i, j; + + depth++; + if (! SUB_CHAR_TABLE_P (*table)) + *table = make_sub_char_table (depth, min_char, *table); + if (from < min_char) + from = min_char; + if (to > max_char) + to = max_char; + i = CHARTAB_IDX (from, depth, min_char); + j = CHARTAB_IDX (to, depth, min_char); + min_char += chartab_chars[depth] * i; + for (; i <= j; i++, min_char += chartab_chars[depth]) + sub_char_table_set_range (XSUB_CHAR_TABLE (*table)->contents + i, + depth, min_char, from, to, val); + } + } + + + Lisp_Object + char_table_set_range (table, from, to, val) + Lisp_Object table; + int from, to; + Lisp_Object val; + { + struct Lisp_Char_Table *tbl = XCHAR_TABLE (table); + Lisp_Object *contents = tbl->contents; + int i, min_char; + + if (from == to) + char_table_set (table, from, val); + else + { + for (i = CHARTAB_IDX (from, 0, 0), min_char = i * chartab_chars[0]; + min_char <= to; + i++, min_char += chartab_chars[0]) + sub_char_table_set_range (contents + i, 0, min_char, from, to, val); + if (ASCII_CHAR_P (from)) + tbl->ascii = char_table_ascii (table); + } + return val; + } + + + DEFUN ("char-table-subtype", Fchar_table_subtype, Schar_table_subtype, + 1, 1, 0, + doc: /* + Return the subtype of char-table CHAR-TABLE. The value is a symbol. */) + (char_table) + Lisp_Object char_table; + { + CHECK_CHAR_TABLE (char_table); + + return XCHAR_TABLE (char_table)->purpose; + } + + DEFUN ("char-table-parent", Fchar_table_parent, Schar_table_parent, + 1, 1, 0, + doc: /* Return the parent char-table of CHAR-TABLE. + The value is either nil or another char-table. + If CHAR-TABLE holds nil for a given character, + then the actual applicable value is inherited from the parent char-table + \(or from its parents, if necessary). */) + (char_table) + Lisp_Object char_table; + { + CHECK_CHAR_TABLE (char_table); + + return XCHAR_TABLE (char_table)->parent; + } + + DEFUN ("set-char-table-parent", Fset_char_table_parent, Sset_char_table_parent, + 2, 2, 0, + doc: /* Set the parent char-table of CHAR-TABLE to PARENT. + PARENT must be either nil or another char-table. */) + (char_table, parent) + Lisp_Object char_table, parent; + { + Lisp_Object temp; + + CHECK_CHAR_TABLE (char_table); + + if (!NILP (parent)) + { + CHECK_CHAR_TABLE (parent); + + for (temp = parent; !NILP (temp); temp = XCHAR_TABLE (temp)->parent) + if (EQ (temp, char_table)) + error ("Attempt to make a chartable be its own parent"); + } + + XCHAR_TABLE (char_table)->parent = parent; + + return parent; + } + + DEFUN ("char-table-extra-slot", Fchar_table_extra_slot, Schar_table_extra_slot, + 2, 2, 0, + doc: /* Return the value of CHAR-TABLE's extra-slot number N. */) + (char_table, n) + Lisp_Object char_table, n; + { + CHECK_CHAR_TABLE (char_table); + CHECK_NUMBER (n); + if (XINT (n) < 0 + || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table))) + args_out_of_range (char_table, n); + + return XCHAR_TABLE (char_table)->extras[XINT (n)]; + } + + DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot, + Sset_char_table_extra_slot, + 3, 3, 0, + doc: /* Set CHAR-TABLE's extra-slot number N to VALUE. */) + (char_table, n, value) + Lisp_Object char_table, n, value; + { + CHECK_CHAR_TABLE (char_table); + CHECK_NUMBER (n); + if (XINT (n) < 0 + || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table))) + args_out_of_range (char_table, n); + + return XCHAR_TABLE (char_table)->extras[XINT (n)] = value; + } + + DEFUN ("char-table-range", Fchar_table_range, Schar_table_range, + 2, 2, 0, + doc: /* Return the value in CHAR-TABLE for a range of characters RANGE. + RANGE should be nil (for the default value), + a cons of character codes (for characters in the range), or a character code. */) + (char_table, range) + Lisp_Object char_table, range; + { + Lisp_Object val; + CHECK_CHAR_TABLE (char_table); + + if (EQ (range, Qnil)) + val = XCHAR_TABLE (char_table)->defalt; + else if (INTEGERP (range)) + val = CHAR_TABLE_REF (char_table, XINT (range)); + else if (CONSP (range)) + { + int from, to; + - CHECK_CHARACTER (XCAR (range)); - CHECK_CHARACTER (XCDR (range)); ++ CHECK_CHARACTER_CAR (range); ++ CHECK_CHARACTER_CDR (range); + val = char_table_ref_and_range (char_table, XINT (XCAR (range)), + &from, &to); + /* Not yet implemented. */ + } + else + error ("Invalid RANGE argument to `char-table-range'"); + return val; + } + + DEFUN ("set-char-table-range", Fset_char_table_range, Sset_char_table_range, + 3, 3, 0, + doc: /* + Set the value in CHAR-TABLE for characters specified by RANGE to VALUE. + RANGE should be t (for all characters), nil (for the default value), + a cons of character codes (for characters in the range), or a character code. */) + (char_table, range, value) + Lisp_Object char_table, range, value; + { + CHECK_CHAR_TABLE (char_table); + if (EQ (range, Qt)) + { + int i; + + XCHAR_TABLE (char_table)->ascii = Qnil; + for (i = 0; i < chartab_size[0]; i++) + XCHAR_TABLE (char_table)->contents[i] = Qnil; + XCHAR_TABLE (char_table)->defalt = value; + } + else if (EQ (range, Qnil)) + XCHAR_TABLE (char_table)->defalt = value; + else if (INTEGERP (range)) + char_table_set (char_table, XINT (range), value); + else if (CONSP (range)) + { - CHECK_CHARACTER (XCAR (range)); - CHECK_CHARACTER (XCDR (range)); ++ CHECK_CHARACTER_CAR (range); ++ CHECK_CHARACTER_CDR (range); + char_table_set_range (char_table, + XINT (XCAR (range)), XINT (XCDR (range)), value); + } + else + error ("Invalid RANGE argument to `set-char-table-range'"); + + return value; + } + + DEFUN ("set-char-table-default", Fset_char_table_default, + Sset_char_table_default, 3, 3, 0, + doc: /* + This function is obsolete and has no effect. */) + (char_table, ch, value) + Lisp_Object char_table, ch, value; + { + return Qnil; + } + + /* Look up the element in TABLE at index CH, and return it as an + integer. If the element is nil, return CH itself. (Actually we do + that for any non-integer.) */ + + int + char_table_translate (table, ch) + Lisp_Object table; + int ch; + { + Lisp_Object value; + value = Faref (table, make_number (ch)); + if (! INTEGERP (value)) /* fixme: use CHARACTERP? */ + return ch; + return XINT (value); + } + + static Lisp_Object + optimize_sub_char_table (table) + Lisp_Object table; + { + struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); + int depth = XINT (tbl->depth); + Lisp_Object elt, this; + int i; + + elt = XSUB_CHAR_TABLE (table)->contents[0]; + if (SUB_CHAR_TABLE_P (elt)) + elt = XSUB_CHAR_TABLE (table)->contents[0] = optimize_sub_char_table (elt); + if (SUB_CHAR_TABLE_P (elt)) + return table; + for (i = 1; i < chartab_size[depth]; i++) + { + this = XSUB_CHAR_TABLE (table)->contents[i]; + if (SUB_CHAR_TABLE_P (this)) + this = XSUB_CHAR_TABLE (table)->contents[i] + = optimize_sub_char_table (this); + if (SUB_CHAR_TABLE_P (this) + || NILP (Fequal (this, elt))) + break; + } + + return (i < chartab_size[depth] ? table : elt); + } + + DEFUN ("optimize-char-table", Foptimize_char_table, Soptimize_char_table, + 1, 1, 0, + doc: /* Optimize CHAR-TABLE. */) + (char_table) + Lisp_Object char_table; + { + Lisp_Object elt; + int i; + + CHECK_CHAR_TABLE (char_table); + + for (i = 0; i < chartab_size[0]; i++) + { + elt = XCHAR_TABLE (char_table)->contents[i]; + if (SUB_CHAR_TABLE_P (elt)) + XCHAR_TABLE (char_table)->contents[i] = optimize_sub_char_table (elt); + } + return Qnil; + } + + + static Lisp_Object -map_sub_char_table (c_function, function, table, arg, val, range, default_val) ++map_sub_char_table (c_function, function, table, arg, val, range, ++ default_val, parent) + void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object)); - Lisp_Object function, table, arg, val, range, default_val; ++ Lisp_Object function, table, arg, val, range, default_val, parent; + { + struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); + int depth = XINT (tbl->depth); + int i, c; + + for (i = 0, c = XINT (tbl->min_char); i < chartab_size[depth]; + i++, c += chartab_chars[depth]) + { + Lisp_Object this; + + this = tbl->contents[i]; + if (SUB_CHAR_TABLE_P (this)) + val = map_sub_char_table (c_function, function, this, arg, val, range, - default_val); ++ default_val, parent); + else + { + if (NILP (this)) + this = default_val; ++ if (NILP (this) && ! NILP (parent)) ++ this = CHAR_TABLE_REF (parent, c); + if (NILP (Fequal (val, this))) + { + if (! NILP (val)) + { - XCDR (range) = make_number (c - 1); ++ XSETCDR (range, make_number (c - 1)); + if (depth == 3 + && EQ (XCAR (range), XCDR (range))) + { + if (c_function) + (*c_function) (arg, XCAR (range), val); + else + call2 (function, XCAR (range), val); + } + else + { + if (c_function) + (*c_function) (arg, range, val); + else + call2 (function, range, val); + } + } + val = this; - XCAR (range) = make_number (c); ++ XSETCAR (range, make_number (c)); + } + } + } + return val; + } + + + /* Map C_FUNCTION or FUNCTION over TABLE, calling it for each + character or group of characters that share a value. + - ARG is passed to C_FUNCTION when that is called. - - DEPTH and INDICES are ignored. They are removed in the new - feature. */ ++ ARG is passed to C_FUNCTION when that is called. */ + + void -map_char_table (c_function, function, table, arg, depth, indices) ++map_char_table (c_function, function, table, arg) + void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object)); - Lisp_Object function, table, arg, *indices; - int depth; ++ Lisp_Object function, table, arg; + { + Lisp_Object range, val; + int c, i; + + range = Fcons (make_number (0), Qnil); + val = XCHAR_TABLE (table)->ascii; + if (SUB_CHAR_TABLE_P (val)) + val = XSUB_CHAR_TABLE (val)->contents[0]; + + for (i = 0, c = 0; i < chartab_size[0]; i++, c += chartab_chars[0]) + { + Lisp_Object this; + + this = XCHAR_TABLE (table)->contents[i]; + if (SUB_CHAR_TABLE_P (this)) + val = map_sub_char_table (c_function, function, this, arg, val, range, - XCHAR_TABLE (table)->defalt); ++ XCHAR_TABLE (table)->defalt, ++ XCHAR_TABLE (table)->parent); + else + { + if (NILP (this)) + this = XCHAR_TABLE (table)->defalt; ++ if (NILP (this) && ! NILP (XCHAR_TABLE (table)->parent)) ++ this = CHAR_TABLE_REF (XCHAR_TABLE (table)->parent, c); + if (NILP (Fequal (val, this))) + { + if (! NILP (val)) + { - XCDR (range) = make_number (c - 1); ++ XSETCDR (range, make_number (c - 1)); + if (c_function) + (*c_function) (arg, range, val); + else + call2 (function, range, val); + } + val = this; - XCAR (range) = make_number (c); ++ XSETCAR (range, make_number (c)); + } + } + } + + if (! NILP (val)) + { - XCDR (range) = make_number (c - 1); ++ XSETCDR (range, make_number (c - 1)); + if (c_function) + (*c_function) (arg, range, val); + else + call2 (function, range, val); + } + } + + DEFUN ("map-char-table", Fmap_char_table, Smap_char_table, + 2, 2, 0, + doc: /* + Call FUNCTION for each character in CHAR-TABLE that has non-nil value. + FUNCTION is called with two arguments--a key and a value. + The key is a character code or a cons of character codes specifying a + range of characters that have the same value. */) + (function, char_table) + Lisp_Object function, char_table; + { + CHECK_CHAR_TABLE (char_table); + - map_char_table (NULL, function, char_table, char_table, 0, NULL); ++ map_char_table (NULL, function, char_table, char_table); + return Qnil; + } + + + static void + map_sub_char_table_for_charset (c_function, function, table, arg, range, + charset, from, to) + void (*c_function) P_ ((Lisp_Object, Lisp_Object)); + Lisp_Object function, table, arg, range; + struct charset *charset; + unsigned from, to; + { + struct Lisp_Sub_Char_Table *tbl = XSUB_CHAR_TABLE (table); + int depth = XINT (tbl->depth); + int c, i; + + if (depth < 3) + for (i = 0, c = XINT (tbl->min_char); i < chartab_size[depth]; + i++, c += chartab_chars[depth]) + { + Lisp_Object this; + + this = tbl->contents[i]; + if (SUB_CHAR_TABLE_P (this)) + map_sub_char_table_for_charset (c_function, function, this, arg, + range, charset, from, to); + else + { + if (! NILP (XCAR (range))) + { + XSETCDR (range, make_number (c - 1)); + if (c_function) + (*c_function) (arg, range); + else + call2 (function, range, arg); + } + XSETCAR (range, Qnil); + } + } + else + for (i = 0, c = XINT (tbl->min_char); i < chartab_size[depth]; i++, c ++) + { + Lisp_Object this; + unsigned code; + + this = tbl->contents[i]; + if (NILP (this) + || (charset + && (code = ENCODE_CHAR (charset, c), + (code < from || code > to)))) + { + if (! NILP (XCAR (range))) + { + XSETCDR (range, make_number (c - 1)); + if (c_function) + (*c_function) (range, arg); + else + call2 (function, range, arg); + XSETCAR (range, Qnil); + } + } + else + { + if (NILP (XCAR (range))) + XSETCAR (range, make_number (c)); + } + } + } + + + void + map_char_table_for_charset (c_function, function, table, arg, + charset, from, to) + void (*c_function) P_ ((Lisp_Object, Lisp_Object)); + Lisp_Object function, table, arg; + struct charset *charset; + unsigned from, to; + { + Lisp_Object range; + int c, i; + - if (NILP (char_table_ref (table, 0))) - range = Fcons (Qnil, Qnil); - else - range = Fcons (make_number (0), make_number (0)); ++ range = Fcons (Qnil, Qnil); + + for (i = 0, c = 0; i < chartab_size[0]; i++, c += chartab_chars[0]) + { + Lisp_Object this; + + this = XCHAR_TABLE (table)->contents[i]; + if (SUB_CHAR_TABLE_P (this)) + map_sub_char_table_for_charset (c_function, function, this, arg, + range, charset, from, to); + else + { + if (! NILP (XCAR (range))) + { + XSETCDR (range, make_number (c - 1)); + if (c_function) + (*c_function) (arg, range); + else + call2 (function, range, arg); + } + XSETCAR (range, Qnil); + } + } + if (! NILP (XCAR (range))) + { + XSETCDR (range, make_number (c - 1)); + if (c_function) + (*c_function) (arg, range); + else + call2 (function, range, arg); + } + } + - - -#if 0 -Lisp_Object -make_class_table (purpose) - Lisp_Object purpose; -{ - Lisp_Object table; - Lisp_Object args[4]; - - args[0] = purpose; - args[1] = Qnil; - args[2] = QCextra_slots; - args[3] = Fmake_vector (make_number (2), Qnil); - ASET (args[3], 0, Fmakehash (Qequal)); - table = Fmake_char_table (4, args); - return table; -} - -Lisp_Object -modify_class_entry (c, val, table, set) - int c; - Lisp_Object val, table, set; -{ - Lisp_Object classes, hash, canon; - int i, ival; - - hash = XCHAR_TABLE (table)->extras[0]; - classes = CHAR_TABLE_REF (table, c); - - if (! BOOL_VECTOR_P (classes)) - classes = (NILP (set) - ? Qnil - : Fmake_bool_vector (make_number ((ival / 8) * 8 + 8), Qnil)); - else if (ival < XBOOL_VECTOR (classes)->size) - { - Lisp_Object old; - old = classes; - classes = Fmake_bool_vector (make_number ((ival / 8) * 8 + 8), Qnil); - for (i = 0; i < XBOOL_VECTOR (classes)->size; i++) - Faset (classes, make_number (i), Faref (old, make_number (i))); - Faset (classes, val, set); - } - else if (NILP (Faref (classes, val)) != NILP (set)) - { - classes = Fcopy_sequence (classes); - Faset (classes, val, set); - } - else - classes = Qnil; - - if (!NILP (classes)) - { - canon = Fgethash (classes, hash, Qnil); - if (NILP (canon)) - { - canon = classes; - Fputhash (canon, canon, hash); - } - char_table_set (table, c, canon); - } - - return val; -} -#endif - + + void + syms_of_chartab () + { + defsubr (&Smake_char_table); + defsubr (&Schar_table_parent); + defsubr (&Schar_table_subtype); + defsubr (&Sset_char_table_parent); + defsubr (&Schar_table_extra_slot); + defsubr (&Sset_char_table_extra_slot); + defsubr (&Schar_table_range); + defsubr (&Sset_char_table_range); + defsubr (&Sset_char_table_default); + defsubr (&Soptimize_char_table); + defsubr (&Smap_char_table); + } diff --cc src/cmds.c index 4d7228e88ad,21706d4ba80..2e63b2fbb52 --- a/src/cmds.c +++ b/src/cmds.c @@@ -326,38 -295,36 +326,38 @@@ Whichever character you type to run thi CHECK_NUMBER (n); /* Barf if the key that invoked this was not a character. */ - if (!INTEGERP (last_command_char)) + if (!CHARACTERP (last_command_char)) bitch_at_user (); - else if (XINT (n) >= 2 && NILP (current_buffer->overwrite_mode)) - { - int modified_char = character; - /* Add the offset to the character, for Finsert_char. - We pass internal_self_insert the unmodified character - because it itself does this offsetting. */ - if (! NILP (current_buffer->enable_multibyte_characters)) - modified_char = unibyte_char_to_multibyte (modified_char); - - XSETFASTINT (n, XFASTINT (n) - 2); - /* The first one might want to expand an abbrev. */ - internal_self_insert (character, 1); - /* The bulk of the copies of this char can be inserted simply. - We don't have to handle a user-specified face specially - because it will get inherited from the first char inserted. */ - Finsert_char (make_number (modified_char), n, Qt); - /* The last one might want to auto-fill. */ - internal_self_insert (character, 0); - } - else - while (XINT (n) > 0) + { + int character = translate_char (Vtranslation_table_for_input, - XINT (last_command_char), 0, 0, 0); ++ XINT (last_command_char)); + if (XINT (n) >= 2 && NILP (current_buffer->overwrite_mode)) { - /* Ok since old and new vals both nonneg */ - XSETFASTINT (n, XFASTINT (n) - 1); - internal_self_insert (character, XFASTINT (n) != 0); + int modified_char = character; + /* Add the offset to the character, for Finsert_char. + We pass internal_self_insert the unmodified character + because it itself does this offsetting. */ + if (! NILP (current_buffer->enable_multibyte_characters)) + modified_char = unibyte_char_to_multibyte (modified_char); + + XSETFASTINT (n, XFASTINT (n) - 2); + /* The first one might want to expand an abbrev. */ + internal_self_insert (character, 1); + /* The bulk of the copies of this char can be inserted simply. + We don't have to handle a user-specified face specially + because it will get inherited from the first char inserted. */ + Finsert_char (make_number (modified_char), n, Qt); + /* The last one might want to auto-fill. */ + internal_self_insert (character, 0); } + else + while (XINT (n) > 0) + { + /* Ok since old and new vals both nonneg */ + XSETFASTINT (n, XFASTINT (n) - 1); + internal_self_insert (character, XFASTINT (n) != 0); + } + } return Qnil; } diff --cc src/coding.c index b06bf79a4bf,e5f1ae82cd5..c3804630d72 --- a/src/coding.c +++ b/src/coding.c @@@ -1,7 -1,10 +1,10 @@@ - /* Coding system handler (conversion, detection, and etc). - Copyright (C) 1995,97,1998,2002,2003 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. - Copyright (C) 2001,2002,2003 Free Software Foundation, Inc. + /* Coding system handler (conversion, detection, etc). + Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. ++ Licensed to the Free Software Foundation. + Copyright (C) 2001, 2002 Free Software Foundation, Inc. - Copyright (C) 2001, 2002 ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@@ -35,67 -41,86 +41,86 @@@ Boston, MA 02111-1307, USA. * */ - /*** 0. General comments ***/ + /*** 0. General comments *** - /*** GENERAL NOTE on CODING SYSTEMS *** + CODING SYSTEM - A coding system is an encoding mechanism for one or more character - sets. Here's a list of coding systems which Emacs can handle. When - we say "decode", it means converting some other coding system to - Emacs' internal format (emacs-mule), and when we say "encode", - it means converting the coding system emacs-mule to some other + A coding system is an object for an encoding mechanism that contains + information about how to convert byte sequences to character + sequences and vice versa. When we say "decode", it means converting + a byte sequence of a specific coding system into a character + sequence that is represented by Emacs' internal coding system + `emacs-utf-8', and when we say "encode", it means converting a + character sequence of emacs-utf-8 to a byte sequence of a specific coding system. - 0. Emacs' internal format (emacs-mule) + In Emacs Lisp, a coding system is represented by a Lisp symbol. In + C level, a coding system is represented by a vector of attributes + stored in the hash table Vcharset_hash_table. The conversion from + coding system symbol to attributes vector is done by looking up + Vcharset_hash_table by the symbol. - Emacs itself holds a multi-lingual character in buffers and strings - in a special format. Details are described in section 2. + Coding systems are classified into the following types depending on + the encoding mechanism. Here's a brief description of the types. - 1. ISO2022 + o UTF-8 + + o UTF-16 + + o Charset-base coding system + + A coding system defined by one or more (coded) character sets. + Decoding and encoding are done by a code converter defined for each + character set. + + o Old Emacs internal format (emacs-mule) + + The coding system adopted by old versions of Emacs (20 and 21). + + o ISO2022-base coding system The most famous coding system for multiple character sets. X's - Compound Text, various EUCs (Extended Unix Code), and coding - systems used in Internet communication such as ISO-2022-JP are - all variants of ISO2022. Details are described in section 3. + Compound Text, various EUCs (Extended Unix Code), and coding systems + used in the Internet communication such as ISO-2022-JP are all + variants of ISO2022. - 2. SJIS (or Shift-JIS or MS-Kanji-Code) + o SJIS (or Shift-JIS or MS-Kanji-Code) - + A coding system to encode character sets: ASCII, JISX0201, and JISX0208. Widely used for PC's in Japan. Details are described in - section 4. + section 8. - 3. BIG5 + o BIG5 - A coding system to encode the character sets ASCII and Big5. Widely + A coding system to encode character sets: ASCII and Big5. Widely used for Chinese (mainly in Taiwan and Hong Kong). Details are - described in section 4. In this file, when we write "BIG5" - (all uppercase), we mean the coding system, and when we write - "Big5" (capitalized), we mean the character set. + described in section 8. In this file, when we write "big5" (all + lowercase), we mean the coding system, and when we write "Big5" + (capitalized), we mean the character set. - 4. Raw text + o CCL - A coding system for text containing random 8-bit code. Emacs does - no code conversion on such text except for end-of-line format. + If a user wants to decode/encode text encoded in a coding system + not listed above, he can supply a decoder and an encoder for it in + CCL (Code Conversion Language) programs. Emacs executes the CCL + program while decoding/encoding. - 5. Other + o Raw-text - If a user wants to read/write text encoded in a coding system not - listed above, he can supply a decoder and an encoder for it as CCL - (Code Conversion Language) programs. Emacs executes the CCL program - while reading/writing. + A coding system for text containing raw eight-bit data. Emacs + treats each byte of source text as a character (except for + end-of-line conversion). - Emacs represents a coding system by a Lisp symbol that has a property - `coding-system'. But, before actually using the coding system, the - information about it is set in a structure of type `struct - coding_system' for rapid processing. See section 6 for more details. + o No-conversion + + Like raw text, but don't do end-of-line conversion. - */ - /*** GENERAL NOTES on END-OF-LINE FORMAT *** + END-OF-LINE FORMAT - How end-of-line of text is encoded depends on the operating system. - For instance, Unix's format is just one byte of `line-feed' code, + How text end-of-line is encoded depends on operating system. For + instance, Unix's format is just one byte of LF (line-feed) code, whereas DOS's format is two-byte sequence of `carriage-return' and `line-feed' codes. MacOS's format is usually one byte of `carriage-return'. @@@ -366,10 -318,8 +318,10 @@@ Lisp_Object Qcall_process, Qcall_proces Lisp_Object Qstart_process, Qopen_network_stream; Lisp_Object Qtarget_idx; - Lisp_Object Vselect_safe_coding_system_function; - +int coding_system_require_warning; + + Lisp_Object Vselect_safe_coding_system_function; + /* Mnemonic string for each format of end-of-line. */ Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac; /* Mnemonic string to indicate format of end-of-line is not yet @@@ -399,8 -335,6 +337,7 @@@ Lisp_Object Qcoding_system_p, Qcoding_s /* Coding system emacs-mule and raw-text are for converting only end-of-line format. */ Lisp_Object Qemacs_mule, Qraw_text; - - Lisp_Object Qutf_8; ++Lisp_Object Qutf_8_emacs; /* Coding-systems are handed between Emacs Lisp programs and C internal routines by the following three variables. */ @@@ -509,195 -401,376 +407,375 @@@ Lisp_Object Vtranslation_table_for_inpu to avoid infinite recursive call. */ static int inhibit_pre_post_conversion; - Lisp_Object Qchar_coding_system; + /* Two special coding systems. */ + Lisp_Object Vsjis_coding_system; + Lisp_Object Vbig5_coding_system; + + + static int detect_coding_utf_8 P_ ((struct coding_system *, + struct coding_detection_info *info)); + static void decode_coding_utf_8 P_ ((struct coding_system *)); + static int encode_coding_utf_8 P_ ((struct coding_system *)); + + static int detect_coding_utf_16 P_ ((struct coding_system *, + struct coding_detection_info *info)); + static void decode_coding_utf_16 P_ ((struct coding_system *)); + static int encode_coding_utf_16 P_ ((struct coding_system *)); + + static int detect_coding_iso_2022 P_ ((struct coding_system *, + struct coding_detection_info *info)); + static void decode_coding_iso_2022 P_ ((struct coding_system *)); + static int encode_coding_iso_2022 P_ ((struct coding_system *)); + + static int detect_coding_emacs_mule P_ ((struct coding_system *, + struct coding_detection_info *info)); + static void decode_coding_emacs_mule P_ ((struct coding_system *)); + static int encode_coding_emacs_mule P_ ((struct coding_system *)); + + static int detect_coding_sjis P_ ((struct coding_system *, + struct coding_detection_info *info)); + static void decode_coding_sjis P_ ((struct coding_system *)); + static int encode_coding_sjis P_ ((struct coding_system *)); + + static int detect_coding_big5 P_ ((struct coding_system *, + struct coding_detection_info *info)); + static void decode_coding_big5 P_ ((struct coding_system *)); + static int encode_coding_big5 P_ ((struct coding_system *)); + + static int detect_coding_ccl P_ ((struct coding_system *, + struct coding_detection_info *info)); + static void decode_coding_ccl P_ ((struct coding_system *)); + static int encode_coding_ccl P_ ((struct coding_system *)); + + static void decode_coding_raw_text P_ ((struct coding_system *)); + static int encode_coding_raw_text P_ ((struct coding_system *)); + + + /* ISO2022 section */ + + #define CODING_ISO_INITIAL(coding, reg) \ + (XINT (AREF (AREF (CODING_ID_ATTRS ((coding)->id), \ + coding_attr_iso_initial), \ + reg))) + + + #define CODING_ISO_REQUEST(coding, charset_id) \ + ((charset_id <= (coding)->max_charset_id \ + ? (coding)->safe_charsets[charset_id] \ + : -1)) + + + #define CODING_ISO_FLAGS(coding) \ + ((coding)->spec.iso_2022.flags) + #define CODING_ISO_DESIGNATION(coding, reg) \ + ((coding)->spec.iso_2022.current_designation[reg]) + #define CODING_ISO_INVOCATION(coding, plane) \ + ((coding)->spec.iso_2022.current_invocation[plane]) + #define CODING_ISO_SINGLE_SHIFTING(coding) \ + ((coding)->spec.iso_2022.single_shifting) + #define CODING_ISO_BOL(coding) \ + ((coding)->spec.iso_2022.bol) + #define CODING_ISO_INVOKED_CHARSET(coding, plane) \ + CODING_ISO_DESIGNATION ((coding), CODING_ISO_INVOCATION ((coding), (plane))) + + /* Control characters of ISO2022. */ + /* code */ /* function */ + #define ISO_CODE_LF 0x0A /* line-feed */ + #define ISO_CODE_CR 0x0D /* carriage-return */ + #define ISO_CODE_SO 0x0E /* shift-out */ + #define ISO_CODE_SI 0x0F /* shift-in */ + #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */ + #define ISO_CODE_ESC 0x1B /* escape */ + #define ISO_CODE_SS2 0x8E /* single-shift-2 */ + #define ISO_CODE_SS3 0x8F /* single-shift-3 */ + #define ISO_CODE_CSI 0x9B /* control-sequence-introducer */ + + /* All code (1-byte) of ISO2022 is classified into one of the + followings. */ + enum iso_code_class_type + { + ISO_control_0, /* Control codes in the range + 0x00..0x1F and 0x7F, except for the + following 5 codes. */ + ISO_carriage_return, /* ISO_CODE_CR (0x0D) */ + ISO_shift_out, /* ISO_CODE_SO (0x0E) */ + ISO_shift_in, /* ISO_CODE_SI (0x0F) */ + ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ + ISO_escape, /* ISO_CODE_SO (0x1B) */ + ISO_control_1, /* Control codes in the range + 0x80..0x9F, except for the + following 3 codes. */ + ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */ + ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */ + ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */ + ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */ + ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */ + ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */ + ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */ + }; - /* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check - its validity. */ + /** The macros CODING_ISO_FLAG_XXX defines a flag bit of the + `iso-flags' attribute of an iso2022 coding system. */ - Lisp_Object - coding_safe_chars (coding_system) - Lisp_Object coding_system; - { - Lisp_Object coding_spec, plist, safe_chars; + /* If set, produce long-form designation sequence (e.g. ESC $ ( A) + instead of the correct short-form sequence (e.g. ESC $ A). */ + #define CODING_ISO_FLAG_LONG_FORM 0x0001 - coding_spec = Fget (coding_system, Qcoding_system); - plist = XVECTOR (coding_spec)->contents[3]; - safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars); - return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt); - } + /* If set, reset graphic planes and registers at end-of-line to the + initial state. */ + #define CODING_ISO_FLAG_RESET_AT_EOL 0x0002 - #define CODING_SAFE_CHAR_P(safe_chars, c) \ - (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c))) + /* If set, reset graphic planes and registers before any control + characters to the initial state. */ + #define CODING_ISO_FLAG_RESET_AT_CNTL 0x0004 - - /*** 2. Emacs internal format (emacs-mule) handlers ***/ + /* If set, encode by 7-bit environment. */ + #define CODING_ISO_FLAG_SEVEN_BITS 0x0008 - /* Emacs' internal format for representation of multiple character - sets is a kind of multi-byte encoding, i.e. characters are - represented by variable-length sequences of one-byte codes. + /* If set, use locking-shift function. */ + #define CODING_ISO_FLAG_LOCKING_SHIFT 0x0010 - ASCII characters and control characters (e.g. `tab', `newline') are - represented by one-byte sequences which are their ASCII codes, in - the range 0x00 through 0x7F. + /* If set, use single-shift function. Overwrite + CODING_ISO_FLAG_LOCKING_SHIFT. */ + #define CODING_ISO_FLAG_SINGLE_SHIFT 0x0020 - 8-bit characters of the range 0x80..0x9F are represented by - two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit - code + 0x20). + /* If set, use designation escape sequence. */ + #define CODING_ISO_FLAG_DESIGNATION 0x0040 - 8-bit characters of the range 0xA0..0xFF are represented by - one-byte sequences which are their 8-bit code. + /* If set, produce revision number sequence. */ + #define CODING_ISO_FLAG_REVISION 0x0080 - The other characters are represented by a sequence of `base - leading-code', optional `extended leading-code', and one or two - `position-code's. The length of the sequence is determined by the - base leading-code. Leading-code takes the range 0x81 through 0x9D, - whereas extended leading-code and position-code take the range 0xA0 - through 0xFF. See `charset.h' for more details about leading-code - and position-code. + /* If set, produce ISO6429's direction specifying sequence. */ + #define CODING_ISO_FLAG_DIRECTION 0x0100 - --- CODE RANGE of Emacs' internal format --- - character set range - ------------- ----- - ascii 0x00..0x7F - eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF - eight-bit-graphic 0xA0..0xBF - ELSE 0x81..0x9D + [0xA0..0xFF]+ - --------------------------------------------- + /* If set, assume designation states are reset at beginning of line on + output. */ + #define CODING_ISO_FLAG_INIT_AT_BOL 0x0200 - As this is the internal character representation, the format is - usually not used externally (i.e. in a file or in a data sent to a - process). But, it is possible to have a text externally in this - format (i.e. by encoding by the coding system `emacs-mule'). + /* If set, designation sequence should be placed at beginning of line + on output. */ + #define CODING_ISO_FLAG_DESIGNATE_AT_BOL 0x0400 - In that case, a sequence of one-byte codes has a slightly different - form. + /* If set, do not encode unsafe charactes on output. */ + #define CODING_ISO_FLAG_SAFE 0x0800 - Firstly, all characters in eight-bit-control are represented by - one-byte sequences which are their 8-bit code. + /* If set, extra latin codes (128..159) are accepted as a valid code + on input. */ + #define CODING_ISO_FLAG_LATIN_EXTRA 0x1000 - Next, character composition data are represented by the byte - sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ..., - where, - METHOD is 0xF0 plus one of composition method (enum - composition_method), + #define CODING_ISO_FLAG_COMPOSITION 0x2000 - BYTES is 0xA0 plus the byte length of these composition data, + #define CODING_ISO_FLAG_EUC_TW_SHIFT 0x4000 - CHARS is 0xA0 plus the number of characters composed by these - data, + #define CODING_ISO_FLAG_USE_ROMAN 0x8000 - COMPONENTs are characters of multibyte form or composition - rules encoded by two-byte of ASCII codes. + #define CODING_ISO_FLAG_USE_OLDJIS 0x10000 - In addition, for backward compatibility, the following formats are - also recognized as composition data on decoding. + #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000 - 0x80 MSEQ ... - 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ + /* A character to be produced on output if encoding of the original + character is prohibited by CODING_ISO_FLAG_SAFE. */ + #define CODING_INHIBIT_CHARACTER_SUBSTITUTION '?' - Here, - MSEQ is a multibyte form but in these special format: - ASCII: 0xA0 ASCII_CODE+0x80, - other: LEADING_CODE+0x20 FOLLOWING-BYTE ..., - RULE is a one byte code of the range 0xA0..0xF0 that - represents a composition rule. - */ - enum emacs_code_class_type emacs_code_class[256]; + /* UTF-16 section */ + #define CODING_UTF_16_BOM(coding) \ + ((coding)->spec.utf_16.bom) - /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". - Check if a text is encoded in Emacs' internal format. If it is, - return CODING_CATEGORY_MASK_EMACS_MULE, else return 0. */ + #define CODING_UTF_16_ENDIAN(coding) \ + ((coding)->spec.utf_16.endian) - static int - detect_coding_emacs_mule (src, src_end, multibytep) - unsigned char *src, *src_end; - int multibytep; - { - unsigned char c; - int composing = 0; - /* Dummy for ONE_MORE_BYTE. */ - struct coding_system dummy_coding; - struct coding_system *coding = &dummy_coding; + #define CODING_UTF_16_SURROGATE(coding) \ + ((coding)->spec.utf_16.surrogate) - while (1) - { - ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); - if (composing) - { - if (c < 0xA0) - composing = 0; - else if (c == 0xA0) - { - ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); - c &= 0x7F; - } - else - c -= 0x20; - } + /* CCL section */ + #define CODING_CCL_DECODER(coding) \ + AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_decoder) + #define CODING_CCL_ENCODER(coding) \ + AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_encoder) + #define CODING_CCL_VALIDS(coding) \ - (XSTRING (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids)) \ - ->data) ++ (SDATA (AREF (CODING_ID_ATTRS ((coding)->id), coding_attr_ccl_valids))) - if (c < 0x20) - { - if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) - return 0; - } - else if (c >= 0x80 && c < 0xA0) - { - if (c == 0x80) - /* Old leading code for a composite character. */ - composing = 1; - else - { - unsigned char *src_base = src - 1; - int bytes; + /* Index for each coding category in `coding_categories' */ - if (!UNIBYTE_STR_AS_MULTIBYTE_P (src_base, src_end - src_base, - bytes)) - return 0; - src = src_base + bytes; - } - } - } - label_end_of_loop: - return CODING_CATEGORY_MASK_EMACS_MULE; - } + enum coding_category + { + coding_category_iso_7, + coding_category_iso_7_tight, + coding_category_iso_8_1, + coding_category_iso_8_2, + coding_category_iso_7_else, + coding_category_iso_8_else, + coding_category_utf_8, + coding_category_utf_16_auto, + coding_category_utf_16_be, + coding_category_utf_16_le, + coding_category_utf_16_be_nosig, + coding_category_utf_16_le_nosig, + coding_category_charset, + coding_category_sjis, + coding_category_big5, + coding_category_ccl, + coding_category_emacs_mule, + /* All above are targets of code detection. */ + coding_category_raw_text, + coding_category_undecided, + coding_category_max + }; + + /* Definitions of flag bits used in detect_coding_XXXX. */ + #define CATEGORY_MASK_ISO_7 (1 << coding_category_iso_7) + #define CATEGORY_MASK_ISO_7_TIGHT (1 << coding_category_iso_7_tight) + #define CATEGORY_MASK_ISO_8_1 (1 << coding_category_iso_8_1) + #define CATEGORY_MASK_ISO_8_2 (1 << coding_category_iso_8_2) + #define CATEGORY_MASK_ISO_7_ELSE (1 << coding_category_iso_7_else) + #define CATEGORY_MASK_ISO_8_ELSE (1 << coding_category_iso_8_else) + #define CATEGORY_MASK_UTF_8 (1 << coding_category_utf_8) + #define CATEGORY_MASK_UTF_16_AUTO (1 << coding_category_utf_16_auto) + #define CATEGORY_MASK_UTF_16_BE (1 << coding_category_utf_16_be) + #define CATEGORY_MASK_UTF_16_LE (1 << coding_category_utf_16_le) + #define CATEGORY_MASK_UTF_16_BE_NOSIG (1 << coding_category_utf_16_be_nosig) + #define CATEGORY_MASK_UTF_16_LE_NOSIG (1 << coding_category_utf_16_le_nosig) + #define CATEGORY_MASK_CHARSET (1 << coding_category_charset) + #define CATEGORY_MASK_SJIS (1 << coding_category_sjis) + #define CATEGORY_MASK_BIG5 (1 << coding_category_big5) + #define CATEGORY_MASK_CCL (1 << coding_category_ccl) + #define CATEGORY_MASK_EMACS_MULE (1 << coding_category_emacs_mule) + #define CATEGORY_MASK_RAW_TEXT (1 << coding_category_raw_text) + + /* This value is returned if detect_coding_mask () find nothing other + than ASCII characters. */ + #define CATEGORY_MASK_ANY \ + (CATEGORY_MASK_ISO_7 \ + | CATEGORY_MASK_ISO_7_TIGHT \ + | CATEGORY_MASK_ISO_8_1 \ + | CATEGORY_MASK_ISO_8_2 \ + | CATEGORY_MASK_ISO_7_ELSE \ + | CATEGORY_MASK_ISO_8_ELSE \ + | CATEGORY_MASK_UTF_8 \ + | CATEGORY_MASK_UTF_16_BE \ + | CATEGORY_MASK_UTF_16_LE \ + | CATEGORY_MASK_UTF_16_BE_NOSIG \ + | CATEGORY_MASK_UTF_16_LE_NOSIG \ + | CATEGORY_MASK_CHARSET \ + | CATEGORY_MASK_SJIS \ + | CATEGORY_MASK_BIG5 \ + | CATEGORY_MASK_CCL \ + | CATEGORY_MASK_EMACS_MULE) + + + #define CATEGORY_MASK_ISO_7BIT \ + (CATEGORY_MASK_ISO_7 | CATEGORY_MASK_ISO_7_TIGHT) + + #define CATEGORY_MASK_ISO_8BIT \ + (CATEGORY_MASK_ISO_8_1 | CATEGORY_MASK_ISO_8_2) + + #define CATEGORY_MASK_ISO_ELSE \ + (CATEGORY_MASK_ISO_7_ELSE | CATEGORY_MASK_ISO_8_ELSE) + + #define CATEGORY_MASK_ISO_ESCAPE \ + (CATEGORY_MASK_ISO_7 \ + | CATEGORY_MASK_ISO_7_TIGHT \ + | CATEGORY_MASK_ISO_7_ELSE \ + | CATEGORY_MASK_ISO_8_ELSE) + + #define CATEGORY_MASK_ISO \ + ( CATEGORY_MASK_ISO_7BIT \ + | CATEGORY_MASK_ISO_8BIT \ + | CATEGORY_MASK_ISO_ELSE) + + #define CATEGORY_MASK_UTF_16 \ + (CATEGORY_MASK_UTF_16_BE \ + | CATEGORY_MASK_UTF_16_LE \ + | CATEGORY_MASK_UTF_16_BE_NOSIG \ + | CATEGORY_MASK_UTF_16_LE_NOSIG) + + + /* List of symbols `coding-category-xxx' ordered by priority. This + variable is exposed to Emacs Lisp. */ + static Lisp_Object Vcoding_category_list; + + /* Table of coding categories (Lisp symbols). This variable is for + internal use oly. */ + static Lisp_Object Vcoding_category_table; + + /* Table of coding-categories ordered by priority. */ + static enum coding_category coding_priorities[coding_category_max]; + + /* Nth element is a coding context for the coding system bound to the + Nth coding category. */ + static struct coding_system coding_categories[coding_category_max]; + + /*** Commonly used macros and functions ***/ + + #ifndef min + #define min(a, b) ((a) < (b) ? (a) : (b)) + #endif + #ifndef max + #define max(a, b) ((a) > (b) ? (a) : (b)) + #endif + + #define CODING_GET_INFO(coding, attrs, eol_type, charset_list) \ + do { \ + attrs = CODING_ID_ATTRS (coding->id); \ + eol_type = CODING_ID_EOL_TYPE (coding->id); \ + if (VECTORP (eol_type)) \ + eol_type = Qunix; \ + charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ + } while (0) - /* Record the starting position START and METHOD of one composition. */ + /* Safely get one byte from the source text pointed by SRC which ends + at SRC_END, and set C to that byte. If there are not enough bytes + in the source, it jumps to `no_more_source'. The caller + should declare and set these variables appropriately in advance: + src, src_end, multibytep + */ - #define CODING_ADD_COMPOSITION_START(coding, start, method) \ + #define ONE_MORE_BYTE(c) \ do { \ - struct composition_data *cmp_data = coding->cmp_data; \ - int *data = cmp_data->data + cmp_data->used; \ - coding->cmp_data_start = cmp_data->used; \ - data[0] = -1; \ - data[1] = cmp_data->char_offset + start; \ - data[3] = (int) method; \ - cmp_data->used += 4; \ + if (src == src_end) \ + { \ + if (src_base < src) \ + coding->result = CODING_RESULT_INSUFFICIENT_SRC; \ + goto no_more_source; \ + } \ + c = *src++; \ + if (multibytep && (c & 0x80)) \ + { \ + if ((c & 0xFE) != 0xC0) \ + error ("Undecodable char found"); \ + c = ((c & 1) << 6) | *src++; \ + } \ + consumed_chars++; \ } while (0) - /* Record the ending position END of the current composition. */ - #define CODING_ADD_COMPOSITION_END(coding, end) \ - do { \ - struct composition_data *cmp_data = coding->cmp_data; \ - int *data = cmp_data->data + coding->cmp_data_start; \ - data[0] = cmp_data->used - coding->cmp_data_start; \ - data[2] = cmp_data->char_offset + end; \ + #define ONE_MORE_BYTE_NO_CHECK(c) \ + do { \ + c = *src++; \ + if (multibytep && (c & 0x80)) \ + { \ + if ((c & 0xFE) != 0xC0) \ + error ("Undecodable char found"); \ + c = ((c & 1) << 6) | *src++; \ + } \ + consumed_chars++; \ } while (0) - /* Record one COMPONENT (alternate character or composition rule). */ - #define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ - do { \ - coding->cmp_data->data[coding->cmp_data->used++] = component; \ - if (coding->cmp_data->used - coding->cmp_data_start \ - == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \ - { \ - CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \ - coding->composing = COMPOSITION_NO; \ - } \ + /* Store a byte C in the place pointed by DST and increment DST to the + next free point, and increment PRODUCED_CHARS. The caller should + assure that C is 0..127, and declare and set the variable `dst' + appropriately in advance. + */ + + + #define EMIT_ONE_ASCII_BYTE(c) \ + do { \ + produced_chars++; \ + *dst++ = (c); \ } while (0) @@@ -759,425 -830,1325 +835,1324 @@@ } while (0) - /* Decode a composition rule represented as a component of composition - sequence of Emacs 20 style at SRC. Set C to the rule. If not - valid rule is found, set C to -1. */ + #define EMIT_THREE_BYTES(c1, c2, c3) \ + do { \ + EMIT_ONE_BYTE (c1); \ + EMIT_TWO_BYTES (c2, c3); \ + } while (0) - #define DECODE_EMACS_MULE_COMPOSITION_RULE(c) \ - do { \ - c = SAFE_ONE_MORE_BYTE (); \ - c -= 0xA0; \ - if (c < 0 || c >= 81) \ - c = -1; \ - else \ - { \ - gref = c / 9, nref = c % 9; \ - c = COMPOSITION_ENCODE_RULE (gref, nref); \ - } \ + + #define EMIT_FOUR_BYTES(c1, c2, c3, c4) \ + do { \ + EMIT_TWO_BYTES (c1, c2); \ + EMIT_TWO_BYTES (c3, c4); \ + } while (0) + + + #define CODING_DECODE_CHAR(coding, src, src_base, src_end, charset, code, c) \ + do { \ + charset_map_loaded = 0; \ + c = DECODE_CHAR (charset, code); \ + if (charset_map_loaded) \ + { \ - unsigned char *orig = coding->source; \ ++ const unsigned char *orig = coding->source; \ + EMACS_INT offset; \ + \ + coding_set_source (coding); \ + offset = coding->source - orig; \ + src += offset; \ + src_base += offset; \ + src_end += offset; \ + } \ } while (0) - /* Decode composition sequence encoded by `emacs-mule' at the source - pointed by SRC. SRC_END is the end of source. Store information - of the composition in CODING->cmp_data. + #define ASSURE_DESTINATION(bytes) \ + do { \ + if (dst + (bytes) >= dst_end) \ + { \ + int more_bytes = charbuf_end - charbuf + (bytes); \ + \ + dst = alloc_destination (coding, more_bytes, dst); \ + dst_end = coding->destination + coding->dst_bytes; \ + } \ + } while (0) - For backward compatibility, decode also a composition sequence of - Emacs 20 style. In that case, the composition sequence contains - characters that should be extracted into a buffer or string. Store - those characters at *DESTINATION in multibyte form. - If we encounter an invalid byte sequence, return 0. - If we encounter an insufficient source or destination, or - insufficient space in CODING->cmp_data, return 1. - Otherwise, return consumed bytes in the source. - */ - static INLINE int - decode_composition_emacs_mule (coding, src, src_end, - destination, dst_end, dst_bytes) + static void + coding_set_source (coding) struct coding_system *coding; - unsigned char *src, *src_end, **destination, *dst_end; - int dst_bytes; { - unsigned char *dst = *destination; - int method, data_len, nchars; - unsigned char *src_base = src++; - /* Store components of composition. */ - int component[COMPOSITION_DATA_MAX_BUNCH_LENGTH]; - int ncomponent; - /* Store multibyte form of characters to be composed. This is for - Emacs 20 style composition sequence. */ - unsigned char buf[MAX_COMPOSITION_COMPONENTS * MAX_MULTIBYTE_LENGTH]; - unsigned char *bufp = buf; - int c, i, gref, nref; + if (BUFFERP (coding->src_object)) + { + struct buffer *buf = XBUFFER (coding->src_object); - if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH - >= COMPOSITION_DATA_SIZE) + if (coding->src_pos < 0) + coding->source = BUF_GAP_END_ADDR (buf) + coding->src_pos_byte; + else + coding->source = BUF_BYTE_ADDRESS (buf, coding->src_pos_byte); + } + else if (STRINGP (coding->src_object)) { - coding->result = CODING_FINISH_INSUFFICIENT_CMP; - return -1; - coding->source = (XSTRING (coding->src_object)->data - + coding->src_pos_byte); ++ coding->source = SDATA (coding->src_object) + coding->src_pos_byte; } + else + /* Otherwise, the source is C string and is never relocated + automatically. Thus we don't have to update anything. */ + ; + } - ONE_MORE_BYTE (c); - if (c - 0xF0 >= COMPOSITION_RELATIVE - && c - 0xF0 <= COMPOSITION_WITH_RULE_ALTCHARS) + static void + coding_set_destination (coding) + struct coding_system *coding; + { + if (BUFFERP (coding->dst_object)) { - int with_rule; - - method = c - 0xF0; - with_rule = (method == COMPOSITION_WITH_RULE - || method == COMPOSITION_WITH_RULE_ALTCHARS); - ONE_MORE_BYTE (c); - data_len = c - 0xA0; - if (data_len < 4 - || src_base + data_len > src_end) - return 0; - ONE_MORE_BYTE (c); - nchars = c - 0xA0; - if (c < 1) - return 0; - for (ncomponent = 0; src < src_base + data_len; ncomponent++) + if (coding->src_pos < 0) { - /* If it is longer than this, it can't be valid. */ - if (ncomponent >= COMPOSITION_DATA_MAX_BUNCH_LENGTH) - return 0; - - if (ncomponent % 2 && with_rule) - { - ONE_MORE_BYTE (gref); - gref -= 32; - ONE_MORE_BYTE (nref); - nref -= 32; - c = COMPOSITION_ENCODE_RULE (gref, nref); - } - else - { - int bytes; - if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes) - || (coding->flags /* We are recovering a file. */ - && src[0] == LEADING_CODE_8_BIT_CONTROL - && ! CHAR_HEAD_P (src[1]))) - c = STRING_CHAR (src, bytes); - else - c = *src, bytes = 1; - src += bytes; - } - component[ncomponent] = c; + coding->destination = BEG_ADDR + coding->dst_pos_byte - 1; + coding->dst_bytes = (GAP_END_ADDR + - (coding->src_bytes - coding->consumed) + - coding->destination); } + else + { + /* We are sure that coding->dst_pos_byte is before the gap + of the buffer. */ + coding->destination = (BUF_BEG_ADDR (XBUFFER (coding->dst_object)) + + coding->dst_pos_byte - 1); + coding->dst_bytes = (BUF_GAP_END_ADDR (XBUFFER (coding->dst_object)) + - coding->destination); + } + } + else + /* Otherwise, the destination is C string and is never relocated + automatically. Thus we don't have to update anything. */ + ; + } + + + static void + coding_alloc_by_realloc (coding, bytes) + struct coding_system *coding; + EMACS_INT bytes; + { + coding->destination = (unsigned char *) xrealloc (coding->destination, + coding->dst_bytes + bytes); + coding->dst_bytes += bytes; + } + + static void + coding_alloc_by_making_gap (coding, bytes) + struct coding_system *coding; + EMACS_INT bytes; + { + if (BUFFERP (coding->dst_object) + && EQ (coding->src_object, coding->dst_object)) + { + EMACS_INT add = coding->src_bytes - coding->consumed; + + GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; + make_gap (bytes); + GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; + } + else + { + Lisp_Object this_buffer; + + this_buffer = Fcurrent_buffer (); + set_buffer_internal (XBUFFER (coding->dst_object)); + make_gap (bytes); + set_buffer_internal (XBUFFER (this_buffer)); } + } - ++ + + static unsigned char * + alloc_destination (coding, nbytes, dst) + struct coding_system *coding; + int nbytes; + unsigned char *dst; + { + EMACS_INT offset = dst - coding->destination; + + if (BUFFERP (coding->dst_object)) + coding_alloc_by_making_gap (coding, nbytes); else + coding_alloc_by_realloc (coding, nbytes); + coding->result = CODING_RESULT_SUCCESS; + coding_set_destination (coding); + dst = coding->destination + offset; + return dst; + } + + /** Macros for annotations. */ + + /* Maximum length of annotation data (sum of annotations for + composition and charset). */ + #define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5) + + /* An annotation data is stored in the array coding->charbuf in this + format: + [ -LENGTH ANNOTATION_MASK FROM TO ... ] + LENGTH is the number of elements in the annotation. + ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK. + FROM and TO specify the range of text annotated. They are relative + to coding->src_pos (on encoding) or coding->dst_pos (on decoding). + + The format of the following elements depend on ANNOTATION_MASK. + + In the case of CODING_ANNOTATE_COMPOSITION_MASK, these elements + follows: + ... METHOD [ COMPOSITION-COMPONENTS ... ] + METHOD is one of enum composition_method. + Optionnal COMPOSITION-COMPONENTS are characters and composition + rules. + + In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID + follows. */ + + #define ADD_ANNOTATION_DATA(buf, len, mask, from, to) \ + do { \ + *(buf)++ = -(len); \ + *(buf)++ = (mask); \ + *(buf)++ = (from); \ + *(buf)++ = (to); \ + coding->annotated = 1; \ + } while (0); + + #define ADD_COMPOSITION_DATA(buf, from, to, method) \ + do { \ + ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \ + *buf++ = method; \ + } while (0) + + + #define ADD_CHARSET_DATA(buf, from, to, id) \ + do { \ + ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \ + *buf++ = id; \ + } while (0) + + + /*** 2. Emacs' internal format (emacs-utf-8) ***/ + + + + + /*** 3. UTF-8 ***/ + + /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". + Check if a text is encoded in UTF-8. If it is, return 1, else + return 0. */ + + #define UTF_8_1_OCTET_P(c) ((c) < 0x80) + #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80) + #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0) + #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0) + #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0) + #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) + + static int + detect_coding_utf_8 (coding, detect_info) + struct coding_system *coding; + struct coding_detection_info *detect_info; + { - unsigned char *src = coding->source, *src_base = src; - unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src = coding->source, *src_base = src; ++ const unsigned char *src_end = coding->source + coding->src_bytes; + int multibytep = coding->src_multibyte; + int consumed_chars = 0; + int found = 0; + int incomplete; + + detect_info->checked |= CATEGORY_MASK_UTF_8; + /* A coding system of this category is always ASCII compatible. */ + src += coding->head_ascii; + + while (1) { - /* This may be an old Emacs 20 style format. See the comment at - the section 2 of this file. */ - while (src < src_end && !CHAR_HEAD_P (*src)) src++; - if (src == src_end - && !(coding->mode & CODING_MODE_LAST_BLOCK)) - goto label_end_of_loop; + int c, c1, c2, c3, c4; - src_end = src; - src = src_base + 1; - if (c < 0xC0) + incomplete = 0; + ONE_MORE_BYTE (c); + if (UTF_8_1_OCTET_P (c)) + continue; + incomplete = 1; + ONE_MORE_BYTE (c1); + if (! UTF_8_EXTRA_OCTET_P (c1)) + break; + if (UTF_8_2_OCTET_LEADING_P (c)) { - method = COMPOSITION_RELATIVE; - for (ncomponent = 0; ncomponent < MAX_COMPOSITION_COMPONENTS;) - { - DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); - if (c < 0) - break; - component[ncomponent++] = c; - } - if (ncomponent < 2) - return 0; - nchars = ncomponent; + found = CATEGORY_MASK_UTF_8; + continue; } - else if (c == 0xFF) + ONE_MORE_BYTE (c2); + if (! UTF_8_EXTRA_OCTET_P (c2)) + break; + if (UTF_8_3_OCTET_LEADING_P (c)) { - method = COMPOSITION_WITH_RULE; - src++; - DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); - if (c < 0) - return 0; - component[0] = c; - for (ncomponent = 1; - ncomponent < MAX_COMPOSITION_COMPONENTS * 2 - 1;) - { - DECODE_EMACS_MULE_COMPOSITION_RULE (c); - if (c < 0) - break; - component[ncomponent++] = c; - DECODE_EMACS_MULE_COMPOSITION_CHAR (c, bufp); - if (c < 0) - break; - component[ncomponent++] = c; - } - if (ncomponent < 3) - return 0; - nchars = (ncomponent + 1) / 2; + found = CATEGORY_MASK_UTF_8; + continue; } - else - return 0; + ONE_MORE_BYTE (c3); + if (! UTF_8_EXTRA_OCTET_P (c3)) + break; + if (UTF_8_4_OCTET_LEADING_P (c)) + { + found = CATEGORY_MASK_UTF_8; + continue; + } + ONE_MORE_BYTE (c4); + if (! UTF_8_EXTRA_OCTET_P (c4)) + break; + if (UTF_8_5_OCTET_LEADING_P (c)) + { + found = CATEGORY_MASK_UTF_8; + continue; + } + break; } + detect_info->rejected |= CATEGORY_MASK_UTF_8; + return 0; - if (buf == bufp || dst + (bufp - buf) <= (dst_bytes ? dst_end : src)) + no_more_source: + if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) { - CODING_ADD_COMPOSITION_START (coding, coding->produced_char, method); - for (i = 0; i < ncomponent; i++) - CODING_ADD_COMPOSITION_COMPONENT (coding, component[i]); - CODING_ADD_COMPOSITION_END (coding, coding->produced_char + nchars); - if (buf < bufp) - { - unsigned char *p = buf; - EMIT_BYTES (p, bufp); - *destination += bufp - buf; - coding->produced_char += nchars; - } - return (src - src_base); + detect_info->rejected |= CATEGORY_MASK_UTF_8; + return 0; } - label_end_of_loop: - return -1; + detect_info->found |= found; + return 1; } - /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ static void - decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) + decode_coding_utf_8 (coding) struct coding_system *coding; - unsigned char *source, *destination; - int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; - unsigned char *dst = destination; - unsigned char *dst_end = destination + dst_bytes; - /* SRC_BASE remembers the start position in source in each loop. - The loop will be exited when there's not enough source code, or - when there's not enough destination area to produce a - character. */ - unsigned char *src = coding->source + coding->consumed; - unsigned char *src_end = coding->source + coding->src_bytes; -- unsigned char *src_base; ++ const unsigned char *src = coding->source + coding->consumed; ++ const unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src_base; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_size; + int consumed_chars = 0, consumed_chars_base; + int multibytep = coding->src_multibyte; + Lisp_Object attr, eol_type, charset_list; - coding->produced_char = 0; - while ((src_base = src) < src_end) + CODING_GET_INFO (coding, attr, eol_type, charset_list); + + while (1) { - unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p; - int bytes; + int c, c1, c2, c3, c4, c5; - if (*src == '\r') - { - int c = *src++; + src_base = src; + consumed_chars_base = consumed_chars; - if (coding->eol_type == CODING_EOL_CR) - c = '\n'; - else if (coding->eol_type == CODING_EOL_CRLF) + if (charbuf >= charbuf_end) + break; + + ONE_MORE_BYTE (c1); + if (UTF_8_1_OCTET_P(c1)) + { + c = c1; + if (c == '\r') { - ONE_MORE_BYTE (c); - if (c != '\n') + if (EQ (eol_type, Qdos)) { - src--; - c = '\r'; + if (src == src_end) + { + coding->result = CODING_RESULT_INSUFFICIENT_SRC; + goto no_more_source; + } + if (*src == '\n') + ONE_MORE_BYTE (c); } + else if (EQ (eol_type, Qmac)) + c = '\n'; } - *dst++ = c; - coding->produced_char++; - continue; } - else if (*src == '\n') + else { - if ((coding->eol_type == CODING_EOL_CR - || coding->eol_type == CODING_EOL_CRLF) - && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) + ONE_MORE_BYTE (c2); + if (! UTF_8_EXTRA_OCTET_P (c2)) + goto invalid_code; + if (UTF_8_2_OCTET_LEADING_P (c1)) { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; + c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); + /* Reject overlong sequences here and below. Encoders + producing them are incorrect, they can be misleading, + and they mess up read/write invariance. */ + if (c < 128) + goto invalid_code; } - *dst++ = *src++; - coding->produced_char++; - continue; - } - else if (*src == 0x80 && coding->cmp_data) - { - /* Start of composition data. */ - int consumed = decode_composition_emacs_mule (coding, src, src_end, - &dst, dst_end, - dst_bytes); - if (consumed < 0) - goto label_end_of_loop; - else if (consumed > 0) + else { - src += consumed; - continue; + ONE_MORE_BYTE (c3); + if (! UTF_8_EXTRA_OCTET_P (c3)) + goto invalid_code; + if (UTF_8_3_OCTET_LEADING_P (c1)) + { + c = (((c1 & 0xF) << 12) + | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); + if (c < 0x800 + || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */ + goto invalid_code; + } + else + { + ONE_MORE_BYTE (c4); + if (! UTF_8_EXTRA_OCTET_P (c4)) + goto invalid_code; + if (UTF_8_4_OCTET_LEADING_P (c1)) + { + c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) + | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); + if (c < 0x10000) + goto invalid_code; + } + else + { + ONE_MORE_BYTE (c5); + if (! UTF_8_EXTRA_OCTET_P (c5)) + goto invalid_code; + if (UTF_8_5_OCTET_LEADING_P (c1)) + { + c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18) + | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6) + | (c5 & 0x3F)); + if ((c > MAX_CHAR) || (c < 0x200000)) + goto invalid_code; + } + else + goto invalid_code; + } + } } - bytes = CHAR_STRING (*src, tmp); - p = tmp; - src++; - } - else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes) - || (coding->flags /* We are recovering a file. */ - && src[0] == LEADING_CODE_8_BIT_CONTROL - && ! CHAR_HEAD_P (src[1]))) - { - p = src; - src += bytes; } - else + + *charbuf++ = c; + continue; + + invalid_code: + src = src_base; + consumed_chars = consumed_chars_base; + ONE_MORE_BYTE (c); + *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + coding->errors++; + } + + no_more_source: + coding->consumed_char += consumed_chars_base; + coding->consumed = src_base - coding->source; + coding->charbuf_used = charbuf - coding->charbuf; + } + + + static int + encode_coding_utf_8 (coding) + struct coding_system *coding; + { + int multibytep = coding->dst_multibyte; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_used; + unsigned char *dst = coding->destination + coding->produced; + unsigned char *dst_end = coding->destination + coding->dst_bytes; + int produced_chars = 0; + int c; + + if (multibytep) + { + int safe_room = MAX_MULTIBYTE_LENGTH * 2; + + while (charbuf < charbuf_end) { - bytes = CHAR_STRING (*src, tmp); - p = tmp; - src++; + unsigned char str[MAX_MULTIBYTE_LENGTH], *p, *pend = str; - ++ + ASSURE_DESTINATION (safe_room); + c = *charbuf++; + if (CHAR_BYTE8_P (c)) + { + c = CHAR_TO_BYTE8 (c); + EMIT_ONE_BYTE (c); + } + else + { + CHAR_STRING_ADVANCE (c, pend); + for (p = str; p < pend; p++) + EMIT_ONE_BYTE (*p); + } } - if (dst + bytes >= (dst_bytes ? dst_end : src)) + } + else + { + int safe_room = MAX_MULTIBYTE_LENGTH; + + while (charbuf < charbuf_end) { - coding->result = CODING_FINISH_INSUFFICIENT_DST; - break; + ASSURE_DESTINATION (safe_room); + c = *charbuf++; + dst += CHAR_STRING (c, dst); + produced_chars++; } - while (bytes--) *dst++ = *p++; - coding->produced_char++; } - label_end_of_loop: - coding->consumed = coding->consumed_char = src_base - source; - coding->produced = dst - destination; + coding->result = CODING_RESULT_SUCCESS; + coding->produced_char += produced_chars; + coding->produced = dst - coding->destination; + return 0; } - /* Encode composition data stored at DATA into a special byte sequence - starting by 0x80. Update CODING->cmp_data_start and maybe - CODING->cmp_data for the next call. */ + /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". + Check if a text is encoded in one of UTF-16 based coding systems. + If it is, return 1, else return 0. */ - #define ENCODE_COMPOSITION_EMACS_MULE(coding, data) \ - do { \ - unsigned char buf[1024], *p0 = buf, *p; \ - int len = data[0]; \ - int i; \ - \ - buf[0] = 0x80; \ - buf[1] = 0xF0 + data[3]; /* METHOD */ \ - buf[3] = 0xA0 + (data[2] - data[1]); /* COMPOSED-CHARS */ \ - p = buf + 4; \ - if (data[3] == COMPOSITION_WITH_RULE \ - || data[3] == COMPOSITION_WITH_RULE_ALTCHARS) \ - { \ - p += CHAR_STRING (data[4], p); \ - for (i = 5; i < len; i += 2) \ - { \ - int gref, nref; \ - COMPOSITION_DECODE_RULE (data[i], gref, nref); \ - *p++ = 0x20 + gref; \ - *p++ = 0x20 + nref; \ - p += CHAR_STRING (data[i + 1], p); \ - } \ - } \ - else \ - { \ - for (i = 4; i < len; i++) \ - p += CHAR_STRING (data[i], p); \ - } \ - buf[2] = 0xA0 + (p - buf); /* COMPONENTS-BYTES */ \ - \ - if (dst + (p - buf) + 4 > (dst_bytes ? dst_end : src)) \ - { \ - coding->result = CODING_FINISH_INSUFFICIENT_DST; \ - goto label_end_of_loop; \ - } \ - while (p0 < p) \ - *dst++ = *p0++; \ - coding->cmp_data_start += data[0]; \ - if (coding->cmp_data_start == coding->cmp_data->used \ - && coding->cmp_data->next) \ - { \ - coding->cmp_data = coding->cmp_data->next; \ - coding->cmp_data_start = 0; \ - } \ - } while (0) + #define UTF_16_HIGH_SURROGATE_P(val) \ + (((val) & 0xFC00) == 0xD800) + + #define UTF_16_LOW_SURROGATE_P(val) \ + (((val) & 0xFC00) == 0xDC00) + #define UTF_16_INVALID_P(val) \ + (((val) == 0xFFFE) \ + || ((val) == 0xFFFF) \ + || UTF_16_LOW_SURROGATE_P (val)) - static void encode_eol P_ ((struct coding_system *, const unsigned char *, - unsigned char *, int, int)); - static void - encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes) + static int + detect_coding_utf_16 (coding, detect_info) struct coding_system *coding; - unsigned char *source, *destination; - int src_bytes, dst_bytes; + struct coding_detection_info *detect_info; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; - unsigned char *dst = destination; - unsigned char *dst_end = destination + dst_bytes; - unsigned char *src_base; - int c; - int char_offset; - int *data; - unsigned char *src = coding->source, *src_base = src; - unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src = coding->source, *src_base = src; ++ const unsigned char *src_end = coding->source + coding->src_bytes; + int multibytep = coding->src_multibyte; + int consumed_chars = 0; + int c1, c2; - Lisp_Object translation_table; + detect_info->checked |= CATEGORY_MASK_UTF_16; - translation_table = Qnil; + if (coding->mode & CODING_MODE_LAST_BLOCK + && (coding->src_bytes & 1)) + { + detect_info->rejected |= CATEGORY_MASK_UTF_16; + return 0; + } + ONE_MORE_BYTE (c1); + ONE_MORE_BYTE (c2); - /* Optimization for the case that there's no composition. */ - if (!coding->cmp_data || coding->cmp_data->used == 0) + if ((c1 == 0xFF) && (c2 == 0xFE)) { - encode_eol (coding, source, destination, src_bytes, dst_bytes); - return; + detect_info->found |= (CATEGORY_MASK_UTF_16_LE + | CATEGORY_MASK_UTF_16_AUTO); + detect_info->rejected |= CATEGORY_MASK_UTF_16_BE; } + else if ((c1 == 0xFE) && (c2 == 0xFF)) + { + detect_info->found |= (CATEGORY_MASK_UTF_16_BE + | CATEGORY_MASK_UTF_16_AUTO); + detect_info->rejected |= CATEGORY_MASK_UTF_16_LE; + } + no_more_source: + return 1; + } - char_offset = coding->cmp_data->char_offset; - data = coding->cmp_data->data + coding->cmp_data_start; - while (1) + static void + decode_coding_utf_16 (coding) + struct coding_system *coding; + { - unsigned char *src = coding->source + coding->consumed; - unsigned char *src_end = coding->source + coding->src_bytes; - unsigned char *src_base; ++ const unsigned char *src = coding->source + coding->consumed; ++ const unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src_base; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_size; + int consumed_chars = 0, consumed_chars_base; + int multibytep = coding->src_multibyte; + enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); + enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); + int surrogate = CODING_UTF_16_SURROGATE (coding); + Lisp_Object attr, eol_type, charset_list; + + CODING_GET_INFO (coding, attr, eol_type, charset_list); + + if (bom == utf_16_with_bom) { + int c, c1, c2; + src_base = src; + ONE_MORE_BYTE (c1); + ONE_MORE_BYTE (c2); + c = (c1 << 8) | c2; - /* If SRC starts a composition, encode the information about the - composition in advance. */ - if (coding->cmp_data_start < coding->cmp_data->used - && char_offset + coding->consumed_char == data[1]) + if (endian == utf_16_big_endian + ? c != 0xFEFF : c != 0xFFFE) { - ENCODE_COMPOSITION_EMACS_MULE (coding, data); - char_offset = coding->cmp_data->char_offset; - data = coding->cmp_data->data + coding->cmp_data_start; + /* The first two bytes are not BOM. Treat them as bytes + for a normal character. */ + src = src_base; + coding->errors++; } + CODING_UTF_16_BOM (coding) = utf_16_without_bom; + } + else if (bom == utf_16_detect_bom) + { + /* We have already tried to detect BOM and failed in + detect_coding. */ + CODING_UTF_16_BOM (coding) = utf_16_without_bom; + } - ONE_MORE_CHAR (c); - if (c == '\n' && (coding->eol_type == CODING_EOL_CRLF - || coding->eol_type == CODING_EOL_CR)) - { - if (coding->eol_type == CODING_EOL_CRLF) - EMIT_TWO_BYTES ('\r', c); - else - EMIT_ONE_BYTE ('\r'); - } - else if (SINGLE_BYTE_CHAR_P (c)) + while (1) + { + int c, c1, c2; + + src_base = src; + consumed_chars_base = consumed_chars; + + if (charbuf + 2 >= charbuf_end) + break; + + ONE_MORE_BYTE (c1); + ONE_MORE_BYTE (c2); + c = (endian == utf_16_big_endian + ? ((c1 << 8) | c2) : ((c2 << 8) | c1)); + if (surrogate) { - if (coding->flags && ! ASCII_BYTE_P (c)) + if (! UTF_16_LOW_SURROGATE_P (c)) { - /* As we are auto saving, retain the multibyte form for - 8-bit chars. */ - unsigned char buf[MAX_MULTIBYTE_LENGTH]; - int bytes = CHAR_STRING (c, buf); - - if (bytes == 1) - EMIT_ONE_BYTE (buf[0]); + if (endian == utf_16_big_endian) + c1 = surrogate >> 8, c2 = surrogate & 0xFF; + else + c1 = surrogate & 0xFF, c2 = surrogate >> 8; + *charbuf++ = c1; + *charbuf++ = c2; + coding->errors++; + if (UTF_16_HIGH_SURROGATE_P (c)) + CODING_UTF_16_SURROGATE (coding) = surrogate = c; else - EMIT_TWO_BYTES (buf[0], buf[1]); + *charbuf++ = c; } else - EMIT_ONE_BYTE (c); + { + c = ((surrogate - 0xD800) << 10) | (c - 0xDC00); + CODING_UTF_16_SURROGATE (coding) = surrogate = 0; + *charbuf++ = c; + } } else - EMIT_BYTES (src_base, src); - coding->consumed_char++; + { + if (UTF_16_HIGH_SURROGATE_P (c)) + CODING_UTF_16_SURROGATE (coding) = surrogate = c; + else + *charbuf++ = c; - } ++ } } - label_end_of_loop: - coding->consumed = src_base - source; - coding->produced = coding->produced_char = dst - destination; - return; + + no_more_source: + coding->consumed_char += consumed_chars_base; + coding->consumed = src_base - coding->source; + coding->charbuf_used = charbuf - coding->charbuf; } - - /*** 3. ISO2022 handlers ***/ + static int + encode_coding_utf_16 (coding) + struct coding_system *coding; + { + int multibytep = coding->dst_multibyte; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_used; + unsigned char *dst = coding->destination + coding->produced; + unsigned char *dst_end = coding->destination + coding->dst_bytes; + int safe_room = 8; + enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding); + int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian; + int produced_chars = 0; + Lisp_Object attrs, eol_type, charset_list; + int c; - /* The following note describes the coding system ISO2022 briefly. - Since the intention of this note is to help understand the - functions in this file, some parts are NOT ACCURATE or are OVERLY - SIMPLIFIED. For thorough understanding, please refer to the - original document of ISO2022. This is equivalent to the standard + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + + if (bom != utf_16_without_bom) + { + ASSURE_DESTINATION (safe_room); + if (big_endian) + EMIT_TWO_BYTES (0xFE, 0xFF); + else + EMIT_TWO_BYTES (0xFF, 0xFE); + CODING_UTF_16_BOM (coding) = utf_16_without_bom; + } + + while (charbuf < charbuf_end) + { + ASSURE_DESTINATION (safe_room); + c = *charbuf++; + if (c >= MAX_UNICODE_CHAR) + c = coding->default_char; + + if (c < 0x10000) + { + if (big_endian) + EMIT_TWO_BYTES (c >> 8, c & 0xFF); + else + EMIT_TWO_BYTES (c & 0xFF, c >> 8); + } + else + { + int c1, c2; + + c -= 0x10000; + c1 = (c >> 10) + 0xD800; + c2 = (c & 0x3FF) + 0xDC00; + if (big_endian) + EMIT_FOUR_BYTES (c1 >> 8, c1 & 0xFF, c2 >> 8, c2 & 0xFF); + else + EMIT_FOUR_BYTES (c1 & 0xFF, c1 >> 8, c2 & 0xFF, c2 >> 8); + } + } + coding->result = CODING_RESULT_SUCCESS; + coding->produced = dst - coding->destination; + coding->produced_char += produced_chars; + return 0; + } + + + /*** 6. Old Emacs' internal format (emacs-mule) ***/ + + /* Emacs' internal format for representation of multiple character + sets is a kind of multi-byte encoding, i.e. characters are + represented by variable-length sequences of one-byte codes. + + ASCII characters and control characters (e.g. `tab', `newline') are + represented by one-byte sequences which are their ASCII codes, in + the range 0x00 through 0x7F. + + 8-bit characters of the range 0x80..0x9F are represented by + two-byte sequences of LEADING_CODE_8_BIT_CONTROL and (their 8-bit + code + 0x20). + + 8-bit characters of the range 0xA0..0xFF are represented by + one-byte sequences which are their 8-bit code. + + The other characters are represented by a sequence of `base + leading-code', optional `extended leading-code', and one or two + `position-code's. The length of the sequence is determined by the + base leading-code. Leading-code takes the range 0x81 through 0x9D, + whereas extended leading-code and position-code take the range 0xA0 + through 0xFF. See `charset.h' for more details about leading-code + and position-code. + + --- CODE RANGE of Emacs' internal format --- + character set range + ------------- ----- + ascii 0x00..0x7F + eight-bit-control LEADING_CODE_8_BIT_CONTROL + 0xA0..0xBF + eight-bit-graphic 0xA0..0xBF + ELSE 0x81..0x9D + [0xA0..0xFF]+ + --------------------------------------------- + + As this is the internal character representation, the format is + usually not used externally (i.e. in a file or in a data sent to a + process). But, it is possible to have a text externally in this + format (i.e. by encoding by the coding system `emacs-mule'). + + In that case, a sequence of one-byte codes has a slightly different + form. + + At first, all characters in eight-bit-control are represented by + one-byte sequences which are their 8-bit code. + + Next, character composition data are represented by the byte + sequence of the form: 0x80 METHOD BYTES CHARS COMPONENT ..., + where, + METHOD is 0xF0 plus one of composition method (enum + composition_method), + + BYTES is 0xA0 plus a byte length of this composition data, + + CHARS is 0x20 plus a number of characters composed by this + data, + + COMPONENTs are characters of multibye form or composition + rules encoded by two-byte of ASCII codes. + + In addition, for backward compatibility, the following formats are + also recognized as composition data on decoding. + + 0x80 MSEQ ... + 0x80 0xFF MSEQ RULE MSEQ RULE ... MSEQ + + Here, + MSEQ is a multibyte form but in these special format: + ASCII: 0xA0 ASCII_CODE+0x80, + other: LEADING_CODE+0x20 FOLLOWING-BYTE ..., + RULE is a one byte code of the range 0xA0..0xF0 that + represents a composition rule. + */ + + char emacs_mule_bytes[256]; + + int + emacs_mule_char (coding, src, nbytes, nchars, id) + struct coding_system *coding; + unsigned char *src; + int *nbytes, *nchars, *id; + { - unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src_base = src; + int multibytep = coding->src_multibyte; - unsigned char *src_base = src; + struct charset *charset; + unsigned code; + int c; + int consumed_chars = 0; + + ONE_MORE_BYTE (c); + switch (emacs_mule_bytes[c]) + { + case 2: + if (! (charset = emacs_mule_charset[c])) + goto invalid_code; + ONE_MORE_BYTE (c); + code = c & 0x7F; + break; + + case 3: + if (c == EMACS_MULE_LEADING_CODE_PRIVATE_11 + || c == EMACS_MULE_LEADING_CODE_PRIVATE_12) + { + ONE_MORE_BYTE (c); + if (! (charset = emacs_mule_charset[c])) + goto invalid_code; + ONE_MORE_BYTE (c); + code = c & 0x7F; + } + else + { + if (! (charset = emacs_mule_charset[c])) + goto invalid_code; + ONE_MORE_BYTE (c); + code = (c & 0x7F) << 8; + ONE_MORE_BYTE (c); + code |= c & 0x7F; + } + break; + + case 4: + ONE_MORE_BYTE (c); + if (! (charset = emacs_mule_charset[c])) + goto invalid_code; + ONE_MORE_BYTE (c); + code = (c & 0x7F) << 8; + ONE_MORE_BYTE (c); + code |= c & 0x7F; + break; + + case 1: + code = c; + charset = CHARSET_FROM_ID (ASCII_BYTE_P (code) + ? charset_ascii : charset_eight_bit); + break; + + default: + abort (); + } + c = DECODE_CHAR (charset, code); + if (c < 0) + goto invalid_code; + *nbytes = src - src_base; + *nchars = consumed_chars; + if (id) + *id = charset->id; + return c; + + no_more_source: + return -2; + + invalid_code: + return -1; + } + + + /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". + Check if a text is encoded in `emacs-mule'. If it is, return 1, + else return 0. */ + + static int + detect_coding_emacs_mule (coding, detect_info) + struct coding_system *coding; + struct coding_detection_info *detect_info; + { - unsigned char *src = coding->source, *src_base = src; - unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src = coding->source, *src_base = src; ++ const unsigned char *src_end = coding->source + coding->src_bytes; + int multibytep = coding->src_multibyte; + int consumed_chars = 0; + int c; + int found = 0; + int incomplete; + + detect_info->checked |= CATEGORY_MASK_EMACS_MULE; + /* A coding system of this category is always ASCII compatible. */ + src += coding->head_ascii; + + while (1) + { + incomplete = 0; + ONE_MORE_BYTE (c); + incomplete = 1; + + if (c == 0x80) + { + /* Perhaps the start of composite character. We simple skip + it because analyzing it is too heavy for detecting. But, + at least, we check that the composite character + constitues of more than 4 bytes. */ - unsigned char *src_base; ++ const unsigned char *src_base; + + repeat: + src_base = src; + do + { + ONE_MORE_BYTE (c); + } + while (c >= 0xA0); + + if (src - src_base <= 4) + break; + found = CATEGORY_MASK_EMACS_MULE; + if (c == 0x80) + goto repeat; + } + + if (c < 0x80) + { + if (c < 0x20 + && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)) + break; + } + else + { - unsigned char *src_base = src - 1; ++ const unsigned char *src_base = src - 1; + + do + { + ONE_MORE_BYTE (c); + } + while (c >= 0xA0); + if (src - src_base != emacs_mule_bytes[*src_base]) + break; + found = CATEGORY_MASK_EMACS_MULE; + } + } + detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; + return 0; + + no_more_source: + if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) + { + detect_info->rejected |= CATEGORY_MASK_EMACS_MULE; + return 0; + } + detect_info->found |= found; + return 1; + } + + + /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ + + /* Decode a character represented as a component of composition + sequence of Emacs 20/21 style at SRC. Set C to that character and + update SRC to the head of next character (or an encoded composition + rule). If SRC doesn't points a composition component, set C to -1. + If SRC points an invalid byte sequence, global exit by a return + value 0. */ + + #define DECODE_EMACS_MULE_COMPOSITION_CHAR(buf) \ + if (1) \ + { \ + int c; \ + int nbytes, nchars; \ + \ + if (src == src_end) \ + break; \ + c = emacs_mule_char (coding, src, &nbytes, &nchars, NULL);\ + if (c < 0) \ + { \ + if (c == -2) \ + break; \ + goto invalid_code; \ + } \ + *buf++ = c; \ + src += nbytes; \ + consumed_chars += nchars; \ + } \ + else + + + /* Decode a composition rule represented as a component of composition + sequence of Emacs 20 style at SRC. Store the decoded rule in *BUF, + and increment BUF. If SRC points an invalid byte sequence, set C + to -1. */ + + #define DECODE_EMACS_MULE_COMPOSITION_RULE_20(buf) \ + do { \ + int c, gref, nref; \ + \ + if (src >= src_end) \ + goto invalid_code; \ + ONE_MORE_BYTE_NO_CHECK (c); \ + c -= 0x20; \ + if (c < 0 || c >= 81) \ + goto invalid_code; \ + \ + gref = c / 9, nref = c % 9; \ + *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ + } while (0) + + + /* Decode a composition rule represented as a component of composition + sequence of Emacs 21 style at SRC. Store the decoded rule in *BUF, + and increment BUF. If SRC points an invalid byte sequence, set C + to -1. */ + + #define DECODE_EMACS_MULE_COMPOSITION_RULE_21(buf) \ + do { \ + int gref, nref; \ + \ + if (src + 1>= src_end) \ + goto invalid_code; \ + ONE_MORE_BYTE_NO_CHECK (gref); \ + gref -= 0x20; \ + ONE_MORE_BYTE_NO_CHECK (nref); \ + nref -= 0x20; \ + if (gref < 0 || gref >= 81 \ + || nref < 0 || nref >= 81) \ + goto invalid_code; \ + *buf++ = COMPOSITION_ENCODE_RULE (gref, nref); \ + } while (0) + + + #define DECODE_EMACS_MULE_21_COMPOSITION(c) \ + do { \ + /* Emacs 21 style format. The first three bytes at SRC are \ + (METHOD - 0xF2), (BYTES - 0xA0), (CHARS - 0xA0), where BYTES is \ + the byte length of this composition information, CHARS is the \ + number of characters composed by this composition. */ \ + enum composition_method method = c - 0xF2; \ + int *charbuf_base = charbuf; \ + int from, to; \ + int consumed_chars_limit; \ + int nbytes, nchars; \ + \ + ONE_MORE_BYTE (c); \ + nbytes = c - 0xA0; \ + if (nbytes < 3) \ + goto invalid_code; \ + ONE_MORE_BYTE (c); \ + nchars = c - 0xA0; \ + from = coding->produced + char_offset; \ + to = from + nchars; \ + ADD_COMPOSITION_DATA (charbuf, from, to, method); \ + consumed_chars_limit = consumed_chars_base + nbytes; \ + if (method != COMPOSITION_RELATIVE) \ + { \ + int i = 0; \ + while (consumed_chars < consumed_chars_limit) \ + { \ + if (i % 2 && method != COMPOSITION_WITH_ALTCHARS) \ + DECODE_EMACS_MULE_COMPOSITION_RULE_21 (charbuf); \ + else \ + DECODE_EMACS_MULE_COMPOSITION_CHAR (charbuf); \ + i++; \ + } \ + if (consumed_chars < consumed_chars_limit) \ + goto invalid_code; \ + charbuf_base[0] -= i; \ + } \ + } while (0) + + + #define DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION(c) \ + do { \ + /* Emacs 20 style format for relative composition. */ \ + /* Store multibyte form of characters to be composed. */ \ + enum composition_method method = COMPOSITION_RELATIVE; \ + int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ + int *buf = components; \ + int i, j; \ + int from, to; \ + \ + src = src_base; \ + ONE_MORE_BYTE (c); /* skip 0x80 */ \ + for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ + DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ + if (i < 2) \ + goto invalid_code; \ + from = coding->produced_char + char_offset; \ + to = from + i; \ + ADD_COMPOSITION_DATA (charbuf, from, to, method); \ + for (j = 0; j < i; j++) \ + *charbuf++ = components[j]; \ + } while (0) + + + #define DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION(c) \ + do { \ + /* Emacs 20 style format for rule-base composition. */ \ + /* Store multibyte form of characters to be composed. */ \ + enum composition_method method = COMPOSITION_WITH_RULE; \ + int components[MAX_COMPOSITION_COMPONENTS * 2 - 1]; \ + int *buf = components; \ + int i, j; \ + int from, to; \ + \ + DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ + for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++) \ + { \ + DECODE_EMACS_MULE_COMPOSITION_RULE_20 (buf); \ + DECODE_EMACS_MULE_COMPOSITION_CHAR (buf); \ + } \ + if (i < 1 || (buf - components) % 2 == 0) \ + goto invalid_code; \ + if (charbuf + i + (i / 2) + 1 < charbuf_end) \ + goto no_more_source; \ + from = coding->produced_char + char_offset; \ + to = from + i; \ + ADD_COMPOSITION_DATA (buf, from, to, method); \ + for (j = 0; j < i; j++) \ + *charbuf++ = components[j]; \ + for (j = 0; j < i; j += 2) \ + *charbuf++ = components[j]; \ + } while (0) + + + static void + decode_coding_emacs_mule (coding) + struct coding_system *coding; + { - unsigned char *src = coding->source + coding->consumed; - unsigned char *src_end = coding->source + coding->src_bytes; - unsigned char *src_base; ++ const unsigned char *src = coding->source + coding->consumed; ++ const unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src_base; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; + int consumed_chars = 0, consumed_chars_base; + int multibytep = coding->src_multibyte; + Lisp_Object attrs, eol_type, charset_list; + int char_offset = coding->produced_char; + int last_offset = char_offset; + int last_id = charset_ascii; + + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + + while (1) + { + int c; + + src_base = src; + consumed_chars_base = consumed_chars; + + if (charbuf >= charbuf_end) + break; + + ONE_MORE_BYTE (c); + + if (c < 0x80) + { + if (c == '\r') + { + if (EQ (eol_type, Qdos)) + { + if (src == src_end) + { + coding->result = CODING_RESULT_INSUFFICIENT_SRC; + goto no_more_source; + } + if (*src == '\n') + ONE_MORE_BYTE (c); + } + else if (EQ (eol_type, Qmac)) + c = '\n'; + } + *charbuf++ = c; + char_offset++; + } + else if (c == 0x80) + { + ONE_MORE_BYTE (c); + if (c - 0xF2 >= COMPOSITION_RELATIVE + && c - 0xF2 <= COMPOSITION_WITH_RULE_ALTCHARS) + DECODE_EMACS_MULE_21_COMPOSITION (c); + else if (c < 0xC0) + DECODE_EMACS_MULE_20_RELATIVE_COMPOSITION (c); + else if (c == 0xFF) + DECODE_EMACS_MULE_20_RULEBASE_COMPOSITION (c); + else + goto invalid_code; + } + else if (c < 0xA0 && emacs_mule_bytes[c] > 1) + { + int nbytes, nchars; + int id; + + src = src_base; + consumed_chars = consumed_chars_base; + c = emacs_mule_char (coding, src, &nbytes, &nchars, &id); + if (c < 0) + { + if (c == -2) + break; + goto invalid_code; + } + if (last_id != id) + { + if (last_id != charset_ascii) + ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); + last_id = id; + last_offset = char_offset; + } + *charbuf++ = c; + src += nbytes; + consumed_chars += nchars; + char_offset++; + } + continue; + + invalid_code: + src = src_base; + consumed_chars = consumed_chars_base; + ONE_MORE_BYTE (c); + *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + char_offset++; + coding->errors++; + } + + no_more_source: + if (last_id != charset_ascii) + ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); + coding->consumed_char += consumed_chars_base; + coding->consumed = src_base - coding->source; + coding->charbuf_used = charbuf - coding->charbuf; + } + + + #define EMACS_MULE_LEADING_CODES(id, codes) \ + do { \ + if (id < 0xA0) \ + codes[0] = id, codes[1] = 0; \ + else if (id < 0xE0) \ + codes[0] = 0x9A, codes[1] = id; \ + else if (id < 0xF0) \ + codes[0] = 0x9B, codes[1] = id; \ + else if (id < 0xF5) \ + codes[0] = 0x9C, codes[1] = id; \ + else \ + codes[0] = 0x9D, codes[1] = id; \ + } while (0); + + + static int + encode_coding_emacs_mule (coding) + struct coding_system *coding; + { + int multibytep = coding->dst_multibyte; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_used; + unsigned char *dst = coding->destination + coding->produced; + unsigned char *dst_end = coding->destination + coding->dst_bytes; + int safe_room = 8; + int produced_chars = 0; + Lisp_Object attrs, eol_type, charset_list; + int c; + int preferred_charset_id = -1; + + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + + while (charbuf < charbuf_end) + { + ASSURE_DESTINATION (safe_room); + c = *charbuf++; + + if (c < 0) + { + /* Handle an annotation. */ + switch (*charbuf) + { + case CODING_ANNOTATE_COMPOSITION_MASK: + /* Not yet implemented. */ + break; + case CODING_ANNOTATE_CHARSET_MASK: + preferred_charset_id = charbuf[3]; + if (preferred_charset_id >= 0 + && NILP (Fmemq (make_number (preferred_charset_id), + charset_list))) + preferred_charset_id = -1; + break; + default: + abort (); + } + charbuf += -c - 1; + continue; + } + + if (ASCII_CHAR_P (c)) + EMIT_ONE_ASCII_BYTE (c); + else if (CHAR_BYTE8_P (c)) + { + c = CHAR_TO_BYTE8 (c); + EMIT_ONE_BYTE (c); + } + else + { + struct charset *charset; + unsigned code; + int dimension; + int emacs_mule_id; + unsigned char leading_codes[2]; + + if (preferred_charset_id >= 0) + { + charset = CHARSET_FROM_ID (preferred_charset_id); + if (! CHAR_CHARSET_P (c, charset)) + charset = char_charset (c, charset_list, NULL); + } + else + charset = char_charset (c, charset_list, &code); + if (! charset) + { + c = coding->default_char; + if (ASCII_CHAR_P (c)) + { + EMIT_ONE_ASCII_BYTE (c); + continue; + } + charset = char_charset (c, charset_list, &code); + } + dimension = CHARSET_DIMENSION (charset); + emacs_mule_id = CHARSET_EMACS_MULE_ID (charset); + EMACS_MULE_LEADING_CODES (emacs_mule_id, leading_codes); + EMIT_ONE_BYTE (leading_codes[0]); + if (leading_codes[1]) + EMIT_ONE_BYTE (leading_codes[1]); + if (dimension == 1) + EMIT_ONE_BYTE (code); + else + { + EMIT_ONE_BYTE (code >> 8); + EMIT_ONE_BYTE (code & 0xFF); + } + } + } + coding->result = CODING_RESULT_SUCCESS; + coding->produced_char += produced_chars; + coding->produced = dst - coding->destination; + return 0; + } + + + /*** 7. ISO2022 handlers ***/ + + /* The following note describes the coding system ISO2022 briefly. + Since the intention of this note is to help understand the + functions in this file, some parts are NOT ACCURATE or are OVERLY + SIMPLIFIED. For thorough understanding, please refer to the + original document of ISO2022. This is equivalent to the standard ECMA-35, obtainable from (*). ISO2022 provides many mechanisms to encode several character sets @@@ -1350,52 -2321,121 +2325,121 @@@ enum iso_code_class_type iso_code_class[256]; - #define CHARSET_OK(idx, charset, c) \ - (coding_system_table[idx] \ - && (charset == CHARSET_ASCII \ - || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \ - CODING_SAFE_CHAR_P (safe_chars, c))) \ - && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \ - charset) \ - != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) + #define SAFE_CHARSET_P(coding, id) \ + ((id) <= (coding)->max_charset_id \ + && (coding)->safe_charsets[id] >= 0) + + + #define SHIFT_OUT_OK(category) \ + (CODING_ISO_INITIAL (&coding_categories[category], 1) >= 0) + + static void + setup_iso_safe_charsets (attrs) + Lisp_Object attrs; + { + Lisp_Object charset_list, safe_charsets; + Lisp_Object request; + Lisp_Object reg_usage; + Lisp_Object tail; + int reg94, reg96; + int flags = XINT (AREF (attrs, coding_attr_iso_flags)); + int max_charset_id; + + charset_list = CODING_ATTR_CHARSET_LIST (attrs); + if ((flags & CODING_ISO_FLAG_FULL_SUPPORT) + && ! EQ (charset_list, Viso_2022_charset_list)) + { + CODING_ATTR_CHARSET_LIST (attrs) + = charset_list = Viso_2022_charset_list; + ASET (attrs, coding_attr_safe_charsets, Qnil); + } + + if (STRINGP (AREF (attrs, coding_attr_safe_charsets))) + return; + + max_charset_id = 0; + for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) + { + int id = XINT (XCAR (tail)); + if (max_charset_id < id) + max_charset_id = id; + } - #define SHIFT_OUT_OK(idx) \ - (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) + safe_charsets = Fmake_string (make_number (max_charset_id + 1), + make_number (255)); + request = AREF (attrs, coding_attr_iso_request); + reg_usage = AREF (attrs, coding_attr_iso_usage); + reg94 = XINT (XCAR (reg_usage)); + reg96 = XINT (XCDR (reg_usage)); + + for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) + { + Lisp_Object id; + Lisp_Object reg; + struct charset *charset; + + id = XCAR (tail); + charset = CHARSET_FROM_ID (XINT (id)); + reg = Fcdr (Fassq (id, request)); + if (! NILP (reg)) - XSTRING (safe_charsets)->data[XINT (id)] = XINT (reg); ++ SSET (safe_charsets, XINT (id), XINT (reg)); + else if (charset->iso_chars_96) + { + if (reg96 < 4) - XSTRING (safe_charsets)->data[XINT (id)] = reg96; ++ SSET (safe_charsets, XINT (id), reg96); + } + else + { + if (reg94 < 4) - XSTRING (safe_charsets)->data[XINT (id)] = reg94; ++ SSET (safe_charsets, XINT (id), reg94); + } + } + ASET (attrs, coding_attr_safe_charsets, safe_charsets); + } - #define COMPOSITION_OK(idx) \ - (coding_system_table[idx]->composing != COMPOSITION_DISABLED) /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". - Check if a text is encoded in ISO2022. If it is, return an - integer in which appropriate flag bits any of: - CODING_CATEGORY_MASK_ISO_7 - CODING_CATEGORY_MASK_ISO_7_TIGHT - CODING_CATEGORY_MASK_ISO_8_1 - CODING_CATEGORY_MASK_ISO_8_2 - CODING_CATEGORY_MASK_ISO_7_ELSE - CODING_CATEGORY_MASK_ISO_8_ELSE - are set. If a code which should never appear in ISO2022 is found, - returns 0. */ + Check if a text is encoded in one of ISO-2022 based codig systems. + If it is, return 1, else return 0. */ static int - detect_coding_iso2022 (src, src_end, multibytep) - unsigned char *src, *src_end; - int multibytep; + detect_coding_iso_2022 (coding, detect_info) + struct coding_system *coding; + struct coding_detection_info *detect_info; { - int mask = CODING_CATEGORY_MASK_ISO; - int mask_found = 0; - int reg[4], shift_out = 0, single_shifting = 0; - int c, c1, charset; - /* Dummy for ONE_MORE_BYTE. */ - struct coding_system dummy_coding; - struct coding_system *coding = &dummy_coding; - Lisp_Object safe_chars; - - reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1; - while (mask && src < src_end) - { - ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); - retry: - unsigned char *src = coding->source, *src_base = src; - unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src = coding->source, *src_base = src; ++ const unsigned char *src_end = coding->source + coding->src_bytes; + int multibytep = coding->src_multibyte; + int single_shifting = 0; + int id; + int c, c1; + int consumed_chars = 0; + int i; + int rejected = 0; + int found = 0; + + detect_info->checked |= CATEGORY_MASK_ISO; + + for (i = coding_category_iso_7; i <= coding_category_iso_8_else; i++) + { + struct coding_system *this = &(coding_categories[i]); + Lisp_Object attrs, val; + + attrs = CODING_ID_ATTRS (this->id); + if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT + && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list)) + setup_iso_safe_charsets (attrs); + val = CODING_ATTR_SAFE_CHARSETS (attrs); - this->max_charset_id = XSTRING (val)->size - 1; - this->safe_charsets = (char *) XSTRING (val)->data; ++ this->max_charset_id = SCHARS (val) - 1; ++ this->safe_charsets = (char *) SDATA (val); + } + + /* A coding system of this category is always ASCII compatible. */ + src += coding->head_ascii; + + while (rejected != CATEGORY_MASK_ISO) + { + ONE_MORE_BYTE (c); switch (c) { case ISO_CODE_ESC: @@@ -1512,47 -2520,32 +2524,32 @@@ if (inhibit_iso_escape_detection) break; single_shifting = 0; - if (shift_out == 1) - { - /* Locking shift in. */ - mask &= ~CODING_CATEGORY_MASK_ISO_7BIT; - mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT; - } + rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT; + found |= CATEGORY_MASK_ISO_ELSE; break; - + case ISO_CODE_CSI: + /* Control sequence introducer. */ single_shifting = 0; + rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_7_ELSE; + found |= CATEGORY_MASK_ISO_8_ELSE; + goto check_extra_latin; + + case ISO_CODE_SS2: case ISO_CODE_SS3: - { - int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE; - - if (inhibit_iso_escape_detection) - break; - if (c != ISO_CODE_CSI) - { - if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags - & CODING_FLAG_ISO_SINGLE_SHIFT) - newmask |= CODING_CATEGORY_MASK_ISO_8_1; - if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags - & CODING_FLAG_ISO_SINGLE_SHIFT) - newmask |= CODING_CATEGORY_MASK_ISO_8_2; - single_shifting = 1; - } - if (VECTORP (Vlatin_extra_code_table) - && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) - { - if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags - & CODING_FLAG_ISO_LATIN_EXTRA) - newmask |= CODING_CATEGORY_MASK_ISO_8_1; - if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags - & CODING_FLAG_ISO_LATIN_EXTRA) - newmask |= CODING_CATEGORY_MASK_ISO_8_2; - } - mask &= newmask; - mask_found |= newmask; - } - break; + /* Single shift. */ + if (inhibit_iso_escape_detection) + break; + single_shifting = 1; + rejected |= CATEGORY_MASK_ISO_7BIT; + if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1]) + & CODING_ISO_FLAG_SINGLE_SHIFT) + found |= CATEGORY_MASK_ISO_8_1; + if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_2]) + & CODING_ISO_FLAG_SINGLE_SHIFT) + found |= CATEGORY_MASK_ISO_8_2; + goto check_extra_latin; default: if (c < 0x80) @@@ -1690,73 -2671,81 +2675,81 @@@ /* Handle composition start sequence ESC 0, ESC 2, ESC 3, or ESC 4. ESC 0 : relative composition : ESC 0 CHAR ... ESC 1 ESC 2 : rulebase composition : ESC 2 CHAR RULE CHAR RULE ... CHAR ESC 1 - ESC 3 : altchar composition : ESC 3 ALT ... ESC 0 CHAR ... ESC 1 - ESC 4 : alt&rule composition : ESC 4 ALT RULE .. ALT ESC 0 CHAR ... ESC 1 + ESC 3 : altchar composition : ESC 3 CHAR ... ESC 0 CHAR ... ESC 1 + ESC 4 : alt&rule composition : ESC 4 CHAR RULE ... CHAR ESC 0 CHAR ... ESC 1 */ - #define DECODE_COMPOSITION_START(c1) \ - do { \ - if (coding->composing == COMPOSITION_DISABLED) \ - { \ - *dst++ = ISO_CODE_ESC; \ - *dst++ = c1 & 0x7f; \ - coding->produced_char += 2; \ - } \ - else if (!COMPOSING_P (coding)) \ - { \ - /* This is surely the start of a composition. We must be sure \ - that coding->cmp_data has enough space to store the \ - information about the composition. If not, terminate the \ - current decoding loop, allocate one more memory block for \ - coding->cmp_data in the caller, then start the decoding \ - loop again. We can't allocate memory here directly because \ - it may cause buffer/string relocation. */ \ - if (!coding->cmp_data \ - || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \ - >= COMPOSITION_DATA_SIZE)) \ - { \ - coding->result = CODING_FINISH_INSUFFICIENT_CMP; \ - goto label_end_of_loop; \ - } \ - coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \ - : c1 == '2' ? COMPOSITION_WITH_RULE \ - : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \ - : COMPOSITION_WITH_RULE_ALTCHARS); \ - CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \ - coding->composing); \ - coding->composition_rule_follows = 0; \ - } \ - else \ - { \ - /* We are already handling a composition. If the method is \ - the following two, the codes following the current escape \ - sequence are actual characters stored in a buffer. */ \ - if (coding->composing == COMPOSITION_WITH_ALTCHARS \ - || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \ - { \ - coding->composing = COMPOSITION_RELATIVE; \ - coding->composition_rule_follows = 0; \ - } \ - } \ + #define DECODE_COMPOSITION_START(c1) \ + do { \ + if (c1 == '0' \ + && composition_state == COMPOSING_COMPONENT_RULE) \ + { \ + component_len = component_idx; \ + composition_state = COMPOSING_CHAR; \ + } \ + else \ + { \ - unsigned char *p; \ ++ const unsigned char *p; \ + \ + MAYBE_FINISH_COMPOSITION (); \ + if (charbuf + MAX_COMPOSITION_COMPONENTS > charbuf_end) \ + goto no_more_source; \ + for (p = src; p < src_end - 1; p++) \ + if (*p == ISO_CODE_ESC && p[1] == '1') \ + break; \ + if (p == src_end - 1) \ + { \ + if (coding->mode & CODING_MODE_LAST_BLOCK) \ + goto invalid_code; \ + goto no_more_source; \ + } \ + \ + /* This is surely the start of a composition. */ \ + method = (c1 == '0' ? COMPOSITION_RELATIVE \ + : c1 == '2' ? COMPOSITION_WITH_RULE \ + : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \ + : COMPOSITION_WITH_RULE_ALTCHARS); \ + composition_state = (c1 <= '2' ? COMPOSING_CHAR \ + : COMPOSING_COMPONENT_CHAR); \ + component_idx = component_len = 0; \ + } \ } while (0) - /* Handle composition end sequence ESC 1. */ - #define DECODE_COMPOSITION_END(c1) \ + /* Handle compositoin end sequence ESC 1. */ + + #define DECODE_COMPOSITION_END() \ do { \ - if (! COMPOSING_P (coding)) \ + int nchars = (component_len > 0 ? component_idx - component_len \ + : method == COMPOSITION_RELATIVE ? component_idx \ + : (component_idx + 1) / 2); \ + int i; \ + int *saved_charbuf = charbuf; \ - int from = coding->produced_char + char_offset; \ ++ int from = char_offset; \ + int to = from + nchars; \ + \ + ADD_COMPOSITION_DATA (charbuf, from, to, method); \ + if (method != COMPOSITION_RELATIVE) \ { \ - *dst++ = ISO_CODE_ESC; \ - *dst++ = c1; \ - coding->produced_char += 2; \ + if (component_len == 0) \ + for (i = 0; i < component_idx; i++) \ + *charbuf++ = components[i]; \ + else \ + for (i = 0; i < component_len; i++) \ + *charbuf++ = components[i]; \ + *saved_charbuf = saved_charbuf - charbuf; \ } \ + if (method == COMPOSITION_WITH_RULE) \ + for (i = 0; i < component_idx; i += 2, char_offset++) \ + *charbuf++ = components[i]; \ else \ - { \ - CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \ - coding->composing = COMPOSITION_NO; \ - } \ + for (i = component_len; i < component_idx; i++, char_offset++) \ + *charbuf++ = components[i]; \ + coding->annotated = 1; \ + composition_state = COMPOSING_NO; \ } while (0) + /* Decode a composition rule from the byte C1 (and maybe one more byte from SRC) and store one encoded composition rule in coding->cmp_data. */ @@@ -1786,40 -2774,41 +2778,41 @@@ /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ static void - decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) + decode_coding_iso_2022 (coding) struct coding_system *coding; - unsigned char *source, *destination; - int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; - unsigned char *dst = destination; - unsigned char *dst_end = destination + dst_bytes; - unsigned char *src = coding->source + coding->consumed; - unsigned char *src_end = coding->source + coding->src_bytes; - unsigned char *src_base; ++ const unsigned char *src = coding->source + coding->consumed; ++ const unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src_base; + int *charbuf = coding->charbuf; + int *charbuf_end + = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH; + int consumed_chars = 0, consumed_chars_base; + int multibytep = coding->src_multibyte; /* Charsets invoked to graphic plane 0 and 1 respectively. */ - int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); - int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1); - /* SRC_BASE remembers the start position in source in each loop. - The loop will be exited when there's not enough source code - (within macro ONE_MORE_BYTE), or when there's not enough - destination area to produce a character (within macro - EMIT_CHAR). */ - unsigned char *src_base; - int c, charset; - Lisp_Object translation_table; - Lisp_Object safe_chars; - - safe_chars = coding_safe_chars (coding->symbol); - - if (NILP (Venable_character_translation)) - translation_table = Qnil; - else - { - translation_table = coding->translation_table_for_decode; - if (NILP (translation_table)) - translation_table = Vstandard_translation_table_for_decode; - } - - coding->result = CODING_FINISH_NORMAL; + int charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); + int charset_id_1 = CODING_ISO_INVOKED_CHARSET (coding, 1); + struct charset *charset; + int c; + /* For handling composition sequence. */ + #define COMPOSING_NO 0 + #define COMPOSING_CHAR 1 + #define COMPOSING_RULE 2 + #define COMPOSING_COMPONENT_CHAR 3 + #define COMPOSING_COMPONENT_RULE 4 + + int composition_state = COMPOSING_NO; + enum composition_method method; + int components[MAX_COMPOSITION_COMPONENTS * 2 + 1]; + int component_idx; + int component_len; + Lisp_Object attrs, eol_type, charset_list; + int char_offset = coding->produced_char; + int last_offset = char_offset; + int last_id = charset_ascii; + + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + setup_iso_safe_charsets (attrs); while (1) { @@@ -1861,57 -2868,44 +2872,44 @@@ /* This is a graphic character, we fall down ... */ case ISO_graphic_plane_1: - if (charset1 < 0) - goto label_invalid_code; - charset = charset1; - break; - - case ISO_control_0: - if (COMPOSING_P (coding)) - DECODE_COMPOSITION_END ('1'); - - /* All ISO2022 control characters in this class have the - same representation in Emacs internal format. */ - if (c1 == '\n' - && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - && (coding->eol_type == CODING_EOL_CR - || coding->eol_type == CODING_EOL_CRLF)) - { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; - } - charset = CHARSET_ASCII; + if (charset_id_1 < 0) + goto invalid_code; + charset = CHARSET_FROM_ID (charset_id_1); break; - case ISO_control_1: - if (COMPOSING_P (coding)) - DECODE_COMPOSITION_END ('1'); - goto label_invalid_code; - case ISO_carriage_return: - if (COMPOSING_P (coding)) - DECODE_COMPOSITION_END ('1'); - - if (coding->eol_type == CODING_EOL_CR) - c1 = '\n'; - else if (coding->eol_type == CODING_EOL_CRLF) + if (c1 == '\r') { - ONE_MORE_BYTE (c1); - if (c1 != ISO_CODE_LF) + if (EQ (eol_type, Qdos)) { - src--; - c1 = '\r'; + if (src == src_end) + { + coding->result = CODING_RESULT_INSUFFICIENT_SRC; + goto no_more_source; - } ++ } + if (*src == '\n') + ONE_MORE_BYTE (c1); } + else if (EQ (eol_type, Qmac)) + c1 = '\n'; } - charset = CHARSET_ASCII; + /* fall through */ + + case ISO_control_0: + MAYBE_FINISH_COMPOSITION (); + charset = CHARSET_FROM_ID (charset_ascii); break; + case ISO_control_1: + MAYBE_FINISH_COMPOSITION (); + goto invalid_code; + case ISO_shift_out: - if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT) - || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0) - goto label_invalid_code; - CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; - charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0); + if (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LOCKING_SHIFT) + || CODING_ISO_DESIGNATION (coding, 1) < 0) + goto invalid_code; + CODING_ISO_INVOCATION (coding, 0) = 1; + charset_id_0 = CODING_ISO_INVOKED_CHARSET (coding, 0); continue; case ISO_shift_in: @@@ -2074,9 -3070,8 +3074,8 @@@ We keep these bytes as is for the moment. They may be decoded by post-read-conversion. */ int dim, M, L; - int size, required; - int produced_chars; - + int size; - ++ ONE_MORE_BYTE (dim); ONE_MORE_BYTE (M); ONE_MORE_BYTE (L); @@@ -2609,123 -3625,73 +3629,72 @@@ encode_designation_at_bol (coding, char /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions". */ - static void - encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) + static int + encode_coding_iso_2022 (coding) struct coding_system *coding; - unsigned char *source, *destination; - int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; - unsigned char *dst = destination; - unsigned char *dst_end = destination + dst_bytes; - /* Since the maximum bytes produced by each loop is 20, we subtract 19 - from DST_END to assure overflow checking is necessary only at the - head of loop. */ - unsigned char *adjusted_dst_end = dst_end - 19; - /* SRC_BASE remembers the start position in source in each loop. - The loop will be exited when there's not enough source text to - analyze multi-byte codes (within macro ONE_MORE_CHAR), or when - there's not enough destination area to produce encoded codes - (within macro EMIT_BYTES). */ - unsigned char *src_base; + int multibytep = coding->dst_multibyte; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_used; + unsigned char *dst = coding->destination + coding->produced; + unsigned char *dst_end = coding->destination + coding->dst_bytes; + int safe_room = 16; + int bol_designation + = (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_DESIGNATE_AT_BOL + && CODING_ISO_BOL (coding)); + int produced_chars = 0; + Lisp_Object attrs, eol_type, charset_list; + int ascii_compatible; int c; - Lisp_Object translation_table; - Lisp_Object safe_chars; + int preferred_charset_id = -1; - if (coding->flags & CODING_FLAG_ISO_SAFE) - coding->mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR; + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + setup_iso_safe_charsets (attrs); + /* Charset list may have been changed. */ + charset_list = CODING_ATTR_CHARSET_LIST (attrs); \ - coding->safe_charsets - = (char *) XSTRING (CODING_ATTR_SAFE_CHARSETS(attrs))->data; ++ coding->safe_charsets = (char *) SDATA (CODING_ATTR_SAFE_CHARSETS(attrs)); - safe_chars = coding_safe_chars (coding->symbol); + ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); - if (NILP (Venable_character_translation)) - translation_table = Qnil; - else - { - translation_table = coding->translation_table_for_encode; - if (NILP (translation_table)) - translation_table = Vstandard_translation_table_for_encode; - } - - coding->consumed_char = 0; - coding->errors = 0; - while (1) + while (charbuf < charbuf_end) { - src_base = src; + ASSURE_DESTINATION (safe_room); - if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19))) + if (bol_designation) { - coding->result = CODING_FINISH_INSUFFICIENT_DST; - break; - } + unsigned char *dst_prev = dst; - if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL - && CODING_SPEC_ISO_BOL (coding)) - { /* We have to produce designation sequences if any now. */ - dst = encode_designation_at_bol (coding, translation_table, - src, src_end, dst); - CODING_SPEC_ISO_BOL (coding) = 0; + dst = encode_designation_at_bol (coding, charbuf, charbuf_end, dst); + bol_designation = 0; + /* We are sure that designation sequences are all ASCII bytes. */ + produced_chars += dst - dst_prev; } - /* Check composition start and end. */ - if (coding->composing != COMPOSITION_DISABLED - && coding->cmp_data_start < coding->cmp_data->used) - { - struct composition_data *cmp_data = coding->cmp_data; - int *data = cmp_data->data + coding->cmp_data_start; - int this_pos = cmp_data->char_offset + coding->consumed_char; + c = *charbuf++; - if (coding->composing == COMPOSITION_RELATIVE) - { - if (this_pos == data[2]) - { - ENCODE_COMPOSITION_END (coding, data); - cmp_data = coding->cmp_data; - data = cmp_data->data + coding->cmp_data_start; - } - } - else if (COMPOSING_P (coding)) - { - /* COMPOSITION_WITH_ALTCHARS or COMPOSITION_WITH_RULE_ALTCHAR */ - if (coding->cmp_data_index == coding->cmp_data_start + data[0]) - /* We have consumed components of the composition. - What follows in SRC is the composition's base - text. */ - ENCODE_COMPOSITION_FAKE_START (coding); - else - { - int c = cmp_data->data[coding->cmp_data_index++]; - if (coding->composition_rule_follows) - { - ENCODE_COMPOSITION_RULE (c); - coding->composition_rule_follows = 0; - } - else - { - if (coding->mode & CODING_MODE_INHIBIT_UNENCODABLE_CHAR - && ! CODING_SAFE_CHAR_P (safe_chars, c)) - ENCODE_UNSAFE_CHARACTER (c); - else - ENCODE_ISO_CHARACTER (c); - if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) - coding->composition_rule_follows = 1; - } - continue; - } - } - if (!COMPOSING_P (coding)) + if (c < 0) + { + /* Handle an annotation. */ + switch (*charbuf) { - if (this_pos == data[1]) - { - ENCODE_COMPOSITION_START (coding, data); - continue; - } + case CODING_ANNOTATE_COMPOSITION_MASK: + /* Not yet implemented. */ + break; + case CODING_ANNOTATE_CHARSET_MASK: + preferred_charset_id = charbuf[3]; + if (preferred_charset_id >= 0 + && NILP (Fmemq (make_number (preferred_charset_id), + charset_list))) + preferred_charset_id = -1; + break; + default: + abort (); } + charbuf += -c - 1; + continue; } - ONE_MORE_CHAR (c); - /* Now encode the character C. */ if (c < 0x20 || c == 0x7F) { @@@ -2863,17 -3811,24 +3814,24 @@@ /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". Check if a text is encoded in SJIS. If it is, return - CODING_CATEGORY_MASK_SJIS, else return 0. */ + CATEGORY_MASK_SJIS, else return 0. */ static int - detect_coding_sjis (src, src_end, multibytep) - unsigned char *src, *src_end; - int multibytep; + detect_coding_sjis (coding, detect_info) + struct coding_system *coding; + struct coding_detection_info *detect_info; { - unsigned char *src = coding->source, *src_base = src; - unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src = coding->source, *src_base = src; ++ const unsigned char *src_end = coding->source + coding->src_bytes; + int multibytep = coding->src_multibyte; + int consumed_chars = 0; + int found = 0; int c; - /* Dummy for ONE_MORE_BYTE. */ - struct coding_system dummy_coding; - struct coding_system *coding = &dummy_coding; + int incomplete; + + detect_info->checked |= CATEGORY_MASK_SJIS; + /* A coding system of this category is always ASCII compatible. */ + src += coding->head_ascii; while (1) { @@@ -2895,214 -3864,223 +3867,223 @@@ /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". Check if a text is encoded in BIG5. If it is, return - CODING_CATEGORY_MASK_BIG5, else return 0. */ + CATEGORY_MASK_BIG5, else return 0. */ static int - detect_coding_big5 (src, src_end, multibytep) - unsigned char *src, *src_end; - int multibytep; + detect_coding_big5 (coding, detect_info) + struct coding_system *coding; + struct coding_detection_info *detect_info; { - unsigned char *src = coding->source, *src_base = src; - unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src = coding->source, *src_base = src; ++ const unsigned char *src_end = coding->source + coding->src_bytes; + int multibytep = coding->src_multibyte; + int consumed_chars = 0; + int found = 0; int c; - /* Dummy for ONE_MORE_BYTE. */ - struct coding_system dummy_coding; - struct coding_system *coding = &dummy_coding; - - while (1) - { - ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); - if (c < 0x80) - continue; - if (c < 0xA1 || c > 0xFE) - return 0; - ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); - if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE) - return 0; - } - label_end_of_loop: - return CODING_CATEGORY_MASK_BIG5; - } - - /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". - Check if a text is encoded in UTF-8. If it is, return - CODING_CATEGORY_MASK_UTF_8, else return 0. */ - - #define UTF_8_1_OCTET_P(c) ((c) < 0x80) - #define UTF_8_EXTRA_OCTET_P(c) (((c) & 0xC0) == 0x80) - #define UTF_8_2_OCTET_LEADING_P(c) (((c) & 0xE0) == 0xC0) - #define UTF_8_3_OCTET_LEADING_P(c) (((c) & 0xF0) == 0xE0) - #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0) - #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8) - #define UTF_8_6_OCTET_LEADING_P(c) (((c) & 0xFE) == 0xFC) + int incomplete; - static int - detect_coding_utf_8 (src, src_end, multibytep) - unsigned char *src, *src_end; - int multibytep; - { - unsigned char c; - int seq_maybe_bytes; - /* Dummy for ONE_MORE_BYTE. */ - struct coding_system dummy_coding; - struct coding_system *coding = &dummy_coding; + detect_info->checked |= CATEGORY_MASK_BIG5; + /* A coding system of this category is always ASCII compatible. */ + src += coding->head_ascii; while (1) { - ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); - if (UTF_8_1_OCTET_P (c)) + incomplete = 0; + ONE_MORE_BYTE (c); + incomplete = 1; + if (c < 0x80) continue; - else if (UTF_8_2_OCTET_LEADING_P (c)) - seq_maybe_bytes = 1; - else if (UTF_8_3_OCTET_LEADING_P (c)) - seq_maybe_bytes = 2; - else if (UTF_8_4_OCTET_LEADING_P (c)) - seq_maybe_bytes = 3; - else if (UTF_8_5_OCTET_LEADING_P (c)) - seq_maybe_bytes = 4; - else if (UTF_8_6_OCTET_LEADING_P (c)) - seq_maybe_bytes = 5; - else - return 0; - - do + if (c >= 0xA1) { - ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); - if (!UTF_8_EXTRA_OCTET_P (c)) + ONE_MORE_BYTE (c); + if (c < 0x40 || (c >= 0x7F && c <= 0xA0)) return 0; - seq_maybe_bytes--; + found = CATEGORY_MASK_BIG5; } - while (seq_maybe_bytes > 0); + else + break; } + detect_info->rejected |= CATEGORY_MASK_BIG5; + return 0; - label_end_of_loop: - return CODING_CATEGORY_MASK_UTF_8; + no_more_source: + if (incomplete && coding->mode & CODING_MODE_LAST_BLOCK) + { + detect_info->rejected |= CATEGORY_MASK_BIG5; + return 0; + } + detect_info->found |= found; + return 1; } - /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". - Check if a text is encoded in UTF-16 Big Endian (endian == 1) or - Little Endian (otherwise). If it is, return - CODING_CATEGORY_MASK_UTF_16_BE or CODING_CATEGORY_MASK_UTF_16_LE, - else return 0. */ + /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". + If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ - #define UTF_16_INVALID_P(val) \ - (((val) == 0xFFFE) \ - || ((val) == 0xFFFF)) + static void + decode_coding_sjis (coding) + struct coding_system *coding; + { - unsigned char *src = coding->source + coding->consumed; - unsigned char *src_end = coding->source + coding->src_bytes; - unsigned char *src_base; ++ const unsigned char *src = coding->source + coding->consumed; ++ const unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src_base; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; + int consumed_chars = 0, consumed_chars_base; + int multibytep = coding->src_multibyte; + struct charset *charset_roman, *charset_kanji, *charset_kana; + Lisp_Object attrs, eol_type, charset_list, val; + int char_offset = coding->produced_char; + int last_offset = char_offset; + int last_id = charset_ascii; + + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + + val = charset_list; + charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); + charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); + charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); - #define UTF_16_HIGH_SURROGATE_P(val) \ - (((val) & 0xD800) == 0xD800) + while (1) + { + int c, c1; - #define UTF_16_LOW_SURROGATE_P(val) \ - (((val) & 0xDC00) == 0xDC00) + src_base = src; + consumed_chars_base = consumed_chars; - static int - detect_coding_utf_16 (src, src_end, multibytep) - unsigned char *src, *src_end; - int multibytep; - { - unsigned char c1, c2; - /* Dummy for ONE_MORE_BYTE_CHECK_MULTIBYTE. */ - struct coding_system dummy_coding; - struct coding_system *coding = &dummy_coding; + if (charbuf >= charbuf_end) + break; + + ONE_MORE_BYTE (c); + + if (c == '\r') + { + if (EQ (eol_type, Qdos)) + { + if (src == src_end) + { + coding->result = CODING_RESULT_INSUFFICIENT_SRC; + goto no_more_source; + } + if (*src == '\n') + ONE_MORE_BYTE (c); + } + else if (EQ (eol_type, Qmac)) + c = '\n'; + } + else + { + struct charset *charset; - ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep); - ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep); + if (c < 0x80) + charset = charset_roman; + else + { + if (c >= 0xF0) + goto invalid_code; + if (c < 0xA0 || c >= 0xE0) + { + /* SJIS -> JISX0208 */ + ONE_MORE_BYTE (c1); + if (c1 < 0x40 || c1 == 0x7F || c1 > 0xFC) + goto invalid_code; + c = (c << 8) | c1; + SJIS_TO_JIS (c); + charset = charset_kanji; + } + else if (c > 0xA0) + { + /* SJIS -> JISX0201-Kana */ + c &= 0x7F; + charset = charset_kana; + } + else + goto invalid_code; + } + if (charset->id != charset_ascii + && last_id != charset->id) + { + if (last_id != charset_ascii) + ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); + last_id = charset->id; + last_offset = char_offset; + } + CODING_DECODE_CHAR (coding, src, src_base, src_end, charset, c, c); + } + *charbuf++ = c; + char_offset++; + continue; - if ((c1 == 0xFF) && (c2 == 0xFE)) - return CODING_CATEGORY_MASK_UTF_16_LE; - else if ((c1 == 0xFE) && (c2 == 0xFF)) - return CODING_CATEGORY_MASK_UTF_16_BE; + invalid_code: + src = src_base; + consumed_chars = consumed_chars_base; + ONE_MORE_BYTE (c); + *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + char_offset++; + coding->errors++; + } - label_end_of_loop: - return 0; + no_more_source: + if (last_id != charset_ascii) + ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); + coding->consumed_char += consumed_chars_base; + coding->consumed = src_base - coding->source; + coding->charbuf_used = charbuf - coding->charbuf; } - /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". - If SJIS_P is 1, decode SJIS text, else decode BIG5 test. */ - static void - decode_coding_sjis_big5 (coding, source, destination, - src_bytes, dst_bytes, sjis_p) + decode_coding_big5 (coding) struct coding_system *coding; - unsigned char *source, *destination; - int src_bytes, dst_bytes; - int sjis_p; { - unsigned char *src = source; - unsigned char *src_end = source + src_bytes; - unsigned char *dst = destination; - unsigned char *dst_end = destination + dst_bytes; - /* SRC_BASE remembers the start position in source in each loop. - The loop will be exited when there's not enough source code - (within macro ONE_MORE_BYTE), or when there's not enough - destination area to produce a character (within macro - EMIT_CHAR). */ - unsigned char *src = coding->source + coding->consumed; - unsigned char *src_end = coding->source + coding->src_bytes; -- unsigned char *src_base; - Lisp_Object translation_table; - - if (NILP (Venable_character_translation)) - translation_table = Qnil; - else - { - translation_table = coding->translation_table_for_decode; - if (NILP (translation_table)) - translation_table = Vstandard_translation_table_for_decode; - } ++ const unsigned char *src = coding->source + coding->consumed; ++ const unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src_base; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; + int consumed_chars = 0, consumed_chars_base; + int multibytep = coding->src_multibyte; + struct charset *charset_roman, *charset_big5; + Lisp_Object attrs, eol_type, charset_list, val; + int char_offset = coding->produced_char; + int last_offset = char_offset; + int last_id = charset_ascii; + + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + val = charset_list; + charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); + charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); - coding->produced_char = 0; while (1) { - int c, charset, c1, c2; + int c, c1; src_base = src; - ONE_MORE_BYTE (c1); + consumed_chars_base = consumed_chars; + + if (charbuf >= charbuf_end) + break; + + ONE_MORE_BYTE (c); - if (c1 < 0x80) + if (c == '\r') { - charset = CHARSET_ASCII; - if (c1 < 0x20) + if (EQ (eol_type, Qdos)) { - if (c1 == '\r') - { - if (coding->eol_type == CODING_EOL_CRLF) - { - ONE_MORE_BYTE (c2); - if (c2 == '\n') - c1 = c2; - else - /* To process C2 again, SRC is subtracted by 1. */ - src--; - } - else if (coding->eol_type == CODING_EOL_CR) - c1 = '\n'; - } - else if (c1 == '\n' - && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - && (coding->eol_type == CODING_EOL_CR - || coding->eol_type == CODING_EOL_CRLF)) + if (src == src_end) { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; + coding->result = CODING_RESULT_INSUFFICIENT_SRC; + goto no_more_source; } + if (*src == '\n') + ONE_MORE_BYTE (c); } + else if (EQ (eol_type, Qmac)) + c = '\n'; } else - { - if (sjis_p) - { - if (c1 == 0x80 || c1 == 0xA0 || c1 > 0xEF) - goto label_invalid_code; - if (c1 <= 0x9F || c1 >= 0xE0) - { - /* SJIS -> JISX0208 */ - ONE_MORE_BYTE (c2); - if (c2 < 0x40 || c2 == 0x7F || c2 > 0xFC) - goto label_invalid_code; - DECODE_SJIS (c1, c2, c1, c2); - charset = charset_jisx0208; - } - else - /* SJIS -> JISX0201-Kana */ - charset = charset_katakana_jisx0201; - } + { + struct charset *charset; + if (c < 0x80) + charset = charset_roman; else { /* BIG5 -> Big5 */ @@@ -3259,181 -4284,485 +4287,485 @@@ encode_coding_big5 (coding /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". Check if a text is encoded in a coding system of which encoder/decoder are written in CCL program. If it is, return - CODING_CATEGORY_MASK_CCL, else return 0. */ + CATEGORY_MASK_CCL, else return 0. */ static int - detect_coding_ccl (src, src_end, multibytep) - unsigned char *src, *src_end; - int multibytep; + detect_coding_ccl (coding, detect_info) + struct coding_system *coding; + struct coding_detection_info *detect_info; { - unsigned char *valid; - int c; - /* Dummy for ONE_MORE_BYTE. */ - struct coding_system dummy_coding; - struct coding_system *coding = &dummy_coding; - unsigned char *src = coding->source, *src_base = src; - unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src = coding->source, *src_base = src; ++ const unsigned char *src_end = coding->source + coding->src_bytes; + int multibytep = coding->src_multibyte; + int consumed_chars = 0; + int found = 0; + unsigned char *valids = CODING_CCL_VALIDS (coding); + int head_ascii = coding->head_ascii; + Lisp_Object attrs; + + detect_info->checked |= CATEGORY_MASK_CCL; + + coding = &coding_categories[coding_category_ccl]; + attrs = CODING_ID_ATTRS (coding->id); + if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) + src += head_ascii; - /* No coding system is assigned to coding-category-ccl. */ - if (!coding_system_table[CODING_CATEGORY_IDX_CCL]) - return 0; - - valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes; while (1) { - ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep); - if (! valid[c]) - return 0; + int c; + ONE_MORE_BYTE (c); + if (! valids[c]) + break; + if ((valids[c] > 1)) + found = CATEGORY_MASK_CCL; + } + detect_info->rejected |= CATEGORY_MASK_CCL; + return 0; + + no_more_source: + detect_info->found |= found; + return 1; + } + + static void + decode_coding_ccl (coding) + struct coding_system *coding; + { + const unsigned char *src = coding->source + coding->consumed; - unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src_end = coding->source + coding->src_bytes; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_size; + int consumed_chars = 0; + int multibytep = coding->src_multibyte; + struct ccl_program ccl; + int source_charbuf[1024]; + int source_byteidx[1024]; + Lisp_Object attrs, eol_type, charset_list; + + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + setup_ccl_program (&ccl, CODING_CCL_DECODER (coding)); + + while (src < src_end) + { + const unsigned char *p = src; + int *source, *source_end; + int i = 0; + + if (multibytep) + while (i < 1024 && p < src_end) + { + source_byteidx[i] = p - src; + source_charbuf[i++] = STRING_CHAR_ADVANCE (p); + } + else + while (i < 1024 && p < src_end) + source_charbuf[i++] = *p++; - ++ + if (p == src_end && coding->mode & CODING_MODE_LAST_BLOCK) + ccl.last_block = 1; + + source = source_charbuf; + source_end = source + i; + while (source < source_end) + { + ccl_driver (&ccl, source, charbuf, + source_end - source, charbuf_end - charbuf, + charset_list); + source += ccl.consumed; + charbuf += ccl.produced; + if (ccl.status != CCL_STAT_SUSPEND_BY_DST) + break; + } + if (source < source_end) + src += source_byteidx[source - source_charbuf]; + else + src = p; + consumed_chars += source - source_charbuf; + + if (ccl.status != CCL_STAT_SUSPEND_BY_SRC + && ccl.status != CODING_RESULT_INSUFFICIENT_SRC) + break; + } + + switch (ccl.status) + { + case CCL_STAT_SUSPEND_BY_SRC: + coding->result = CODING_RESULT_INSUFFICIENT_SRC; + break; + case CCL_STAT_SUSPEND_BY_DST: + break; + case CCL_STAT_QUIT: + case CCL_STAT_INVALID_CMD: + coding->result = CODING_RESULT_INTERRUPT; + break; + default: + coding->result = CODING_RESULT_SUCCESS; + break; + } + coding->consumed_char += consumed_chars; + coding->consumed = src - coding->source; + coding->charbuf_used = charbuf - coding->charbuf; + } + + static int + encode_coding_ccl (coding) + struct coding_system *coding; + { + struct ccl_program ccl; + int multibytep = coding->dst_multibyte; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_used; + unsigned char *dst = coding->destination + coding->produced; + unsigned char *dst_end = coding->destination + coding->dst_bytes; + unsigned char *adjusted_dst_end = dst_end - 1; + int destination_charbuf[1024]; + int i, produced_chars = 0; + Lisp_Object attrs, eol_type, charset_list; + + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + setup_ccl_program (&ccl, CODING_CCL_ENCODER (coding)); + + ccl.last_block = coding->mode & CODING_MODE_LAST_BLOCK; + ccl.dst_multibyte = coding->dst_multibyte; + + while (charbuf < charbuf_end && dst < adjusted_dst_end) + { + int dst_bytes = dst_end - dst; + if (dst_bytes > 1024) + dst_bytes = 1024; + + ccl_driver (&ccl, charbuf, destination_charbuf, + charbuf_end - charbuf, dst_bytes, charset_list); + charbuf += ccl.consumed; + if (multibytep) + for (i = 0; i < ccl.produced; i++) + EMIT_ONE_BYTE (destination_charbuf[i] & 0xFF); + else + { + for (i = 0; i < ccl.produced; i++) + *dst++ = destination_charbuf[i] & 0xFF; + produced_chars += ccl.produced; + } + } + + switch (ccl.status) + { + case CCL_STAT_SUSPEND_BY_SRC: + coding->result = CODING_RESULT_INSUFFICIENT_SRC; + break; + case CCL_STAT_SUSPEND_BY_DST: + coding->result = CODING_RESULT_INSUFFICIENT_DST; + break; + case CCL_STAT_QUIT: + case CCL_STAT_INVALID_CMD: + coding->result = CODING_RESULT_INTERRUPT; + break; + default: + coding->result = CODING_RESULT_SUCCESS; + break; } - label_end_of_loop: - return CODING_CATEGORY_MASK_CCL; + + coding->produced_char += produced_chars; + coding->produced = dst - coding->destination; + return 0; } + - /*** 6. End-of-line handlers ***/ + /*** 10, 11. no-conversion handlers ***/ /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ static void - decode_eol (coding, source, destination, src_bytes, dst_bytes) + decode_coding_raw_text (coding) struct coding_system *coding; - unsigned char *source, *destination; - int src_bytes, dst_bytes; { - unsigned char *src = source; - unsigned char *dst = destination; - unsigned char *src_end = src + src_bytes; - unsigned char *dst_end = dst + dst_bytes; - Lisp_Object translation_table; - /* SRC_BASE remembers the start position in source in each loop. - The loop will be exited when there's not enough source code - (within macro ONE_MORE_BYTE), or when there's not enough - destination area to produce a character (within macro - EMIT_CHAR). */ - unsigned char *src_base; + coding->chars_at_source = 1; + coding->consumed_char = 0; + coding->consumed = 0; + coding->result = CODING_RESULT_SUCCESS; + } + + static int + encode_coding_raw_text (coding) + struct coding_system *coding; + { + int multibytep = coding->dst_multibyte; + int *charbuf = coding->charbuf; + int *charbuf_end = coding->charbuf + coding->charbuf_used; + unsigned char *dst = coding->destination + coding->produced; + unsigned char *dst_end = coding->destination + coding->dst_bytes; + int produced_chars = 0; int c; - translation_table = Qnil; - switch (coding->eol_type) + if (multibytep) + { + int safe_room = MAX_MULTIBYTE_LENGTH * 2; + + if (coding->src_multibyte) + while (charbuf < charbuf_end) + { + ASSURE_DESTINATION (safe_room); + c = *charbuf++; + if (ASCII_CHAR_P (c)) + EMIT_ONE_ASCII_BYTE (c); + else if (CHAR_BYTE8_P (c)) + { + c = CHAR_TO_BYTE8 (c); + EMIT_ONE_BYTE (c); + } + else + { + unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str; + + CHAR_STRING_ADVANCE (c, p1); + while (p0 < p1) + { + EMIT_ONE_BYTE (*p0); + p0++; + } + } + } + else + while (charbuf < charbuf_end) + { + ASSURE_DESTINATION (safe_room); + c = *charbuf++; + EMIT_ONE_BYTE (c); + } + } + else { - case CODING_EOL_CRLF: - while (1) + if (coding->src_multibyte) { - src_base = src; - ONE_MORE_BYTE (c); - if (c == '\r') - { - ONE_MORE_BYTE (c); - if (c != '\n') - { - src--; - c = '\r'; - } - } - else if (c == '\n' - && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)) + int safe_room = MAX_MULTIBYTE_LENGTH; + + while (charbuf < charbuf_end) { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; + ASSURE_DESTINATION (safe_room); + c = *charbuf++; + if (ASCII_CHAR_P (c)) + *dst++ = c; + else if (CHAR_BYTE8_P (c)) + *dst++ = CHAR_TO_BYTE8 (c); + else + CHAR_STRING_ADVANCE (c, dst); + produced_chars++; } - EMIT_CHAR (c); } - break; + else + { + ASSURE_DESTINATION (charbuf_end - charbuf); + while (charbuf < charbuf_end && dst < dst_end) + *dst++ = *charbuf++; + produced_chars = dst - (coding->destination + coding->dst_bytes); - } ++ } + } + coding->result = CODING_RESULT_SUCCESS; + coding->produced_char += produced_chars; + coding->produced = dst - coding->destination; + return 0; + } + + /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". + Check if a text is encoded in a charset-based coding system. If it + is, return 1, else return 0. */ + + static int + detect_coding_charset (coding, detect_info) + struct coding_system *coding; + struct coding_detection_info *detect_info; + { - unsigned char *src = coding->source, *src_base = src; - unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src = coding->source, *src_base = src; ++ const unsigned char *src_end = coding->source + coding->src_bytes; + int multibytep = coding->src_multibyte; + int consumed_chars = 0; + Lisp_Object attrs, valids; + int found = 0; + + detect_info->checked |= CATEGORY_MASK_CHARSET; + + coding = &coding_categories[coding_category_charset]; + attrs = CODING_ID_ATTRS (coding->id); + valids = AREF (attrs, coding_attr_charset_valids); + + if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) + src += coding->head_ascii; + + while (1) + { + int c; + + ONE_MORE_BYTE (c); + if (NILP (AREF (valids, c))) + break; + if (c >= 0x80) + found = CATEGORY_MASK_CHARSET; + } + detect_info->rejected |= CATEGORY_MASK_CHARSET; + return 0; + + no_more_source: + detect_info->found |= found; + return 1; + } - case CODING_EOL_CR: - while (1) + static void + decode_coding_charset (coding) + struct coding_system *coding; + { - unsigned char *src = coding->source + coding->consumed; - unsigned char *src_end = coding->source + coding->src_bytes; - unsigned char *src_base; ++ const unsigned char *src = coding->source + coding->consumed; ++ const unsigned char *src_end = coding->source + coding->src_bytes; ++ const unsigned char *src_base; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH; + int consumed_chars = 0, consumed_chars_base; + int multibytep = coding->src_multibyte; + Lisp_Object attrs, eol_type, charset_list, valids; + int char_offset = coding->produced_char; + int last_offset = char_offset; + int last_id = charset_ascii; + + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + valids = AREF (attrs, coding_attr_charset_valids); + + while (1) + { + int c; + + src_base = src; + consumed_chars_base = consumed_chars; + + if (charbuf >= charbuf_end) + break; + + ONE_MORE_BYTE (c); + if (c == '\r') { - src_base = src; - ONE_MORE_BYTE (c); - if (c == '\n') + /* Here we assume that no charset maps '\r' to something + else. */ + if (EQ (eol_type, Qdos)) { - if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) + if (src == src_end) { - coding->result = CODING_FINISH_INCONSISTENT_EOL; - goto label_end_of_loop; + coding->result = CODING_RESULT_INSUFFICIENT_SRC; + goto no_more_source; } + if (*src == '\n') + ONE_MORE_BYTE (c); } - else if (c == '\r') + else if (EQ (eol_type, Qmac)) c = '\n'; - EMIT_CHAR (c); } - break; - - default: /* no need for EOL handling */ - while (1) + else { - src_base = src; - ONE_MORE_BYTE (c); - EMIT_CHAR (c); + Lisp_Object val; + struct charset *charset; + int dim; + int len = 1; + unsigned code = c; + + val = AREF (valids, c); + if (NILP (val)) + goto invalid_code; + if (INTEGERP (val)) + { + charset = CHARSET_FROM_ID (XFASTINT (val)); + dim = CHARSET_DIMENSION (charset); + while (len < dim) + { + ONE_MORE_BYTE (c); + code = (code << 8) | c; + len++; + } + CODING_DECODE_CHAR (coding, src, src_base, src_end, + charset, code, c); + } + else + { + /* VAL is a list of charset IDs. It is assured that the + list is sorted by charset dimensions (smaller one + comes first). */ + while (CONSP (val)) + { + charset = CHARSET_FROM_ID (XFASTINT (XCAR (val))); + dim = CHARSET_DIMENSION (charset); + while (len < dim) + { + ONE_MORE_BYTE (c); + code = (code << 8) | c; + len++; + } + CODING_DECODE_CHAR (coding, src, src_base, + src_end, charset, code, c); + if (c >= 0) + break; + val = XCDR (val); + } + } + if (c < 0) + goto invalid_code; + if (charset->id != charset_ascii + && last_id != charset->id) + { + if (last_id != charset_ascii) + ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); + last_id = charset->id; + last_offset = char_offset; + } } + *charbuf++ = c; + char_offset++; + continue; + + invalid_code: + src = src_base; + consumed_chars = consumed_chars_base; + ONE_MORE_BYTE (c); + *charbuf++ = ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c); + char_offset++; + coding->errors++; } - label_end_of_loop: - coding->consumed = coding->consumed_char = src_base - source; - coding->produced = dst - destination; - return; + no_more_source: + if (last_id != charset_ascii) + ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id); + coding->consumed_char += consumed_chars_base; + coding->consumed = src_base - coding->source; + coding->charbuf_used = charbuf - coding->charbuf; } - /* See "GENERAL NOTES about `encode_coding_XXX ()' functions". Encode - format of end-of-line according to `coding->eol_type'. It also - convert multibyte form 8-bit characters to unibyte if - CODING->src_multibyte is nonzero. If `coding->mode & - CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code '\r' in source text - also means end-of-line. */ - - static void - encode_eol (coding, source, destination, src_bytes, dst_bytes) + static int + encode_coding_charset (coding) struct coding_system *coding; - const unsigned char *source; - unsigned char *destination; - int src_bytes, dst_bytes; { - const unsigned char *src = source; - unsigned char *dst = destination; - const unsigned char *src_end = src + src_bytes; - unsigned char *dst_end = dst + dst_bytes; - Lisp_Object translation_table; - /* SRC_BASE remembers the start position in source in each loop. - The loop will be exited when there's not enough source text to - analyze multi-byte codes (within macro ONE_MORE_CHAR), or when - there's not enough destination area to produce encoded codes - (within macro EMIT_BYTES). */ - const unsigned char *src_base; - unsigned char *tmp; + int multibytep = coding->dst_multibyte; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_used; + unsigned char *dst = coding->destination + coding->produced; + unsigned char *dst_end = coding->destination + coding->dst_bytes; + int safe_room = MAX_MULTIBYTE_LENGTH; + int produced_chars = 0; + Lisp_Object attrs, eol_type, charset_list; + int ascii_compatible; int c; - int selective_display = coding->mode & CODING_MODE_SELECTIVE_DISPLAY; - translation_table = Qnil; - if (coding->src_multibyte - && *(src_end - 1) == LEADING_CODE_8_BIT_CONTROL) - { - src_end--; - src_bytes--; - coding->result = CODING_FINISH_INSUFFICIENT_SRC; - } + CODING_GET_INFO (coding, attrs, eol_type, charset_list); + ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); - if (coding->eol_type == CODING_EOL_CRLF) - { - while (src < src_end) - { - src_base = src; - c = *src++; - if (c >= 0x20) - EMIT_ONE_BYTE (c); - else if (c == '\n' || (c == '\r' && selective_display)) - EMIT_TWO_BYTES ('\r', '\n'); - else - EMIT_ONE_BYTE (c); - } - src_base = src; - label_end_of_loop: - ; - } - else + while (charbuf < charbuf_end) { - if (!dst_bytes || src_bytes <= dst_bytes) + struct charset *charset; + unsigned code; - ++ + ASSURE_DESTINATION (safe_room); + c = *charbuf++; + if (ascii_compatible && ASCII_CHAR_P (c)) + EMIT_ONE_ASCII_BYTE (c); + else if (CHAR_BYTE8_P (c)) { - safe_bcopy (src, dst, src_bytes); - src_base = src_end; - dst += src_bytes; + c = CHAR_TO_BYTE8 (c); + EMIT_ONE_BYTE (c); } else { @@@ -3538,417 -4808,236 +4811,237 @@@ setup_coding_system (coding_system, cod Lisp_Object coding_system; struct coding_system *coding; { - Lisp_Object coding_spec, coding_type, eol_type, plist; + Lisp_Object attrs; + Lisp_Object eol_type; + Lisp_Object coding_type; Lisp_Object val; - /* At first, zero clear all members. */ - bzero (coding, sizeof (struct coding_system)); + if (NILP (coding_system)) + coding_system = Qno_conversion; - /* Initialize some fields required for all kinds of coding systems. */ - coding->symbol = coding_system; - coding->heading_ascii = -1; - coding->post_read_conversion = coding->pre_write_conversion = Qnil; - coding->composing = COMPOSITION_DISABLED; - coding->cmp_data = NULL; + CHECK_CODING_SYSTEM_GET_ID (coding_system, coding->id); - if (NILP (coding_system)) - goto label_invalid_coding_system; + attrs = CODING_ID_ATTRS (coding->id); + eol_type = CODING_ID_EOL_TYPE (coding->id); - coding_spec = Fget (coding_system, Qcoding_system); + coding->mode = 0; + coding->head_ascii = -1; + coding->common_flags + = (VECTORP (eol_type) ? CODING_REQUIRE_DETECTION_MASK : 0); + if (! NILP (CODING_ATTR_POST_READ (attrs))) + coding->common_flags |= CODING_REQUIRE_DECODING_MASK; + if (! NILP (CODING_ATTR_PRE_WRITE (attrs))) + coding->common_flags |= CODING_REQUIRE_ENCODING_MASK; ++ if (! NILP (CODING_ATTR_FOR_UNIBYTE (attrs))) ++ coding->common_flags |= CODING_FOR_UNIBYTE_MASK; - if (!VECTORP (coding_spec) - || XVECTOR (coding_spec)->size != 5 - || !CONSP (XVECTOR (coding_spec)->contents[3])) - goto label_invalid_coding_system; + val = CODING_ATTR_SAFE_CHARSETS (attrs); - coding->max_charset_id = XSTRING (val)->size - 1; - coding->safe_charsets = (char *) XSTRING (val)->data; ++ coding->max_charset_id = SCHARS (val) - 1; ++ coding->safe_charsets = (char *) SDATA (val); + coding->default_char = XINT (CODING_ATTR_DEFAULT_CHAR (attrs)); - eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type); - if (VECTORP (eol_type)) + coding_type = CODING_ATTR_TYPE (attrs); + if (EQ (coding_type, Qundecided)) { - coding->eol_type = CODING_EOL_UNDECIDED; - coding->common_flags = CODING_REQUIRE_DETECTION_MASK; + coding->detector = NULL; + coding->decoder = decode_coding_raw_text; + coding->encoder = encode_coding_raw_text; + coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; } - else if (XFASTINT (eol_type) == 1) + else if (EQ (coding_type, Qiso_2022)) { - coding->eol_type = CODING_EOL_CRLF; + int i; + int flags = XINT (AREF (attrs, coding_attr_iso_flags)); + + /* Invoke graphic register 0 to plane 0. */ + CODING_ISO_INVOCATION (coding, 0) = 0; + /* Invoke graphic register 1 to plane 1 if we can use 8-bit. */ + CODING_ISO_INVOCATION (coding, 1) + = (flags & CODING_ISO_FLAG_SEVEN_BITS ? -1 : 1); + /* Setup the initial status of designation. */ + for (i = 0; i < 4; i++) + CODING_ISO_DESIGNATION (coding, i) = CODING_ISO_INITIAL (coding, i); + /* Not single shifting initially. */ + CODING_ISO_SINGLE_SHIFTING (coding) = 0; + /* Beginning of buffer should also be regarded as bol. */ + CODING_ISO_BOL (coding) = 1; + coding->detector = detect_coding_iso_2022; + coding->decoder = decode_coding_iso_2022; + coding->encoder = encode_coding_iso_2022; + if (flags & CODING_ISO_FLAG_SAFE) + coding->mode |= CODING_MODE_SAFE_ENCODING; coding->common_flags - = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; + |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK + | CODING_REQUIRE_FLUSHING_MASK); + if (flags & CODING_ISO_FLAG_COMPOSITION) + coding->common_flags |= CODING_ANNOTATE_COMPOSITION_MASK; + if (flags & CODING_ISO_FLAG_DESIGNATION) + coding->common_flags |= CODING_ANNOTATE_CHARSET_MASK; + if (flags & CODING_ISO_FLAG_FULL_SUPPORT) + { + setup_iso_safe_charsets (attrs); + val = CODING_ATTR_SAFE_CHARSETS (attrs); - coding->max_charset_id = XSTRING (val)->size - 1; - coding->safe_charsets = (char *) XSTRING (val)->data; ++ coding->max_charset_id = SCHARS (val) - 1; ++ coding->safe_charsets = (char *) SDATA (val); + } + CODING_ISO_FLAGS (coding) = flags; } - else if (XFASTINT (eol_type) == 2) + else if (EQ (coding_type, Qcharset)) { - coding->eol_type = CODING_EOL_CR; + coding->detector = detect_coding_charset; + coding->decoder = decode_coding_charset; + coding->encoder = encode_coding_charset; coding->common_flags - = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; + |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); } - else - coding->eol_type = CODING_EOL_LF; - - coding_type = XVECTOR (coding_spec)->contents[0]; - /* Try short cut. */ - if (SYMBOLP (coding_type)) + else if (EQ (coding_type, Qutf_8)) { - if (EQ (coding_type, Qt)) - { - coding->type = coding_type_undecided; - coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; - } - else - coding->type = coding_type_no_conversion; - /* Initialize this member. Any thing other than - CODING_CATEGORY_IDX_UTF_16_BE and - CODING_CATEGORY_IDX_UTF_16_LE are ok because they have - special treatment in detect_eol. */ - coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE; - - return 0; - } - - /* Get values of coding system properties: - `post-read-conversion', `pre-write-conversion', - `translation-table-for-decode', `translation-table-for-encode'. */ - plist = XVECTOR (coding_spec)->contents[3]; - /* Pre & post conversion functions should be disabled if - inhibit_eol_conversion is nonzero. This is the case that a code - conversion function is called while those functions are running. */ - if (! inhibit_pre_post_conversion) - { - coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion); - coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion); - } - val = Fplist_get (plist, Qtranslation_table_for_decode); - if (SYMBOLP (val)) - val = Fget (val, Qtranslation_table_for_decode); - coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil; - val = Fplist_get (plist, Qtranslation_table_for_encode); - if (SYMBOLP (val)) - val = Fget (val, Qtranslation_table_for_encode); - coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil; - val = Fplist_get (plist, Qcoding_category); - if (!NILP (val)) - { - val = Fget (val, Qcoding_category_index); - if (INTEGERP (val)) - coding->category_idx = XINT (val); - else - goto label_invalid_coding_system; + coding->detector = detect_coding_utf_8; + coding->decoder = decode_coding_utf_8; + coding->encoder = encode_coding_utf_8; + coding->common_flags + |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); + } + else if (EQ (coding_type, Qutf_16)) + { + val = AREF (attrs, coding_attr_utf_16_bom); + CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom + : EQ (val, Qt) ? utf_16_with_bom + : utf_16_without_bom); + val = AREF (attrs, coding_attr_utf_16_endian); + CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian + : utf_16_little_endian); + CODING_UTF_16_SURROGATE (coding) = 0; + coding->detector = detect_coding_utf_16; + coding->decoder = decode_coding_utf_16; + coding->encoder = encode_coding_utf_16; + coding->common_flags + |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); + if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom) + coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; } - else - goto label_invalid_coding_system; - - /* If the coding system has non-nil `composition' property, enable - composition handling. */ - val = Fplist_get (plist, Qcomposition); - if (!NILP (val)) - coding->composing = COMPOSITION_NO; - - switch (XFASTINT (coding_type)) + else if (EQ (coding_type, Qccl)) { - case 0: - coding->type = coding_type_emacs_mule; - coding->common_flags - |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; - if (!NILP (coding->post_read_conversion)) - coding->common_flags |= CODING_REQUIRE_DECODING_MASK; - if (!NILP (coding->pre_write_conversion)) - coding->common_flags |= CODING_REQUIRE_ENCODING_MASK; - break; - - case 1: - coding->type = coding_type_sjis; + coding->detector = detect_coding_ccl; + coding->decoder = decode_coding_ccl; + coding->encoder = encode_coding_ccl; coding->common_flags - |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; - break; - - case 2: - coding->type = coding_type_iso2022; + |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK + | CODING_REQUIRE_FLUSHING_MASK); + } + else if (EQ (coding_type, Qemacs_mule)) + { + coding->detector = detect_coding_emacs_mule; + coding->decoder = decode_coding_emacs_mule; + coding->encoder = encode_coding_emacs_mule; coding->common_flags - |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; - { - Lisp_Object val, temp; - Lisp_Object *flags; - int i, charset, reg_bits = 0; - - val = XVECTOR (coding_spec)->contents[4]; - - if (!VECTORP (val) || XVECTOR (val)->size != 32) - goto label_invalid_coding_system; - - flags = XVECTOR (val)->contents; - coding->flags - = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM) - | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL) - | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL) - | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS) - | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT) - | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT) - | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN) - | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS) - | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION) - | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL) - | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL) - | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE) - | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA) - ); - - /* Invoke graphic register 0 to plane 0. */ - CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; - /* Invoke graphic register 1 to plane 1 if we can use full 8-bit. */ - CODING_SPEC_ISO_INVOCATION (coding, 1) - = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1); - /* Not single shifting at first. */ - CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0; - /* Beginning of buffer should also be regarded as bol. */ - CODING_SPEC_ISO_BOL (coding) = 1; - - for (charset = 0; charset <= MAX_CHARSET; charset++) - CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255; - val = Vcharset_revision_alist; - while (CONSP (val)) - { - charset = get_charset_id (Fcar_safe (XCAR (val))); - if (charset >= 0 - && (temp = Fcdr_safe (XCAR (val)), INTEGERP (temp)) - && (i = XINT (temp), (i >= 0 && (i + '@') < 128))) - CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i; - val = XCDR (val); - } - - /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations. - FLAGS[REG] can be one of below: - integer CHARSET: CHARSET occupies register I, - t: designate nothing to REG initially, but can be used - by any charsets, - list of integer, nil, or t: designate the first - element (if integer) to REG initially, the remaining - elements (if integer) is designated to REG on request, - if an element is t, REG can be used by any charsets, - nil: REG is never used. */ - for (charset = 0; charset <= MAX_CHARSET; charset++) - CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) - = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION; - for (i = 0; i < 4; i++) - { - if ((INTEGERP (flags[i]) - && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))) - || (charset = get_charset_id (flags[i])) >= 0) - { - CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; - CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i; - } - else if (EQ (flags[i], Qt)) - { - CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; - reg_bits |= 1 << i; - coding->flags |= CODING_FLAG_ISO_DESIGNATION; - } - else if (CONSP (flags[i])) - { - Lisp_Object tail; - tail = flags[i]; - - coding->flags |= CODING_FLAG_ISO_DESIGNATION; - if ((INTEGERP (XCAR (tail)) - && (charset = XINT (XCAR (tail)), - CHARSET_VALID_P (charset))) - || (charset = get_charset_id (XCAR (tail))) >= 0) - { - CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset; - CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i; - } - else - CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; - tail = XCDR (tail); - while (CONSP (tail)) - { - if ((INTEGERP (XCAR (tail)) - && (charset = XINT (XCAR (tail)), - CHARSET_VALID_P (charset))) - || (charset = get_charset_id (XCAR (tail))) >= 0) - CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) - = i; - else if (EQ (XCAR (tail), Qt)) - reg_bits |= 1 << i; - tail = XCDR (tail); - } - } - else - CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1; - - CODING_SPEC_ISO_DESIGNATION (coding, i) - = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i); - } - - if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)) - { - /* REG 1 can be used only by locking shift in 7-bit env. */ - if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS) - reg_bits &= ~2; - if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)) - /* Without any shifting, only REG 0 and 1 can be used. */ - reg_bits &= 3; - } - - if (reg_bits) - for (charset = 0; charset <= MAX_CHARSET; charset++) - { - if (CHARSET_DEFINED_P (charset) - && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) - == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)) - { - /* There exist some default graphic registers to be - used by CHARSET. */ - - /* We had better avoid designating a charset of - CHARS96 to REG 0 as far as possible. */ - if (CHARSET_CHARS (charset) == 96) - CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) - = (reg_bits & 2 - ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0))); - else - CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) - = (reg_bits & 1 - ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3))); - } - } - } - coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; - coding->spec.iso2022.last_invalid_designation_register = -1; - break; - - case 3: - coding->type = coding_type_big5; + |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); + if (! NILP (AREF (attrs, coding_attr_emacs_mule_full)) + && ! EQ (CODING_ATTR_CHARSET_LIST (attrs), Vemacs_mule_charset_list)) + { + Lisp_Object tail, safe_charsets; + int max_charset_id = 0; + + for (tail = Vemacs_mule_charset_list; CONSP (tail); + tail = XCDR (tail)) + if (max_charset_id < XFASTINT (XCAR (tail))) + max_charset_id = XFASTINT (XCAR (tail)); + safe_charsets = Fmake_string (make_number (max_charset_id + 1), + make_number (255)); + for (tail = Vemacs_mule_charset_list; CONSP (tail); + tail = XCDR (tail)) - XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0; ++ SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); + coding->max_charset_id = max_charset_id; - coding->safe_charsets = (char *) XSTRING (safe_charsets)->data; ++ coding->safe_charsets = (char *) SDATA (safe_charsets); + } + } + else if (EQ (coding_type, Qshift_jis)) + { + coding->detector = detect_coding_sjis; + coding->decoder = decode_coding_sjis; + coding->encoder = encode_coding_sjis; coding->common_flags - |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; - coding->flags - = (NILP (XVECTOR (coding_spec)->contents[4]) - ? CODING_FLAG_BIG5_HKU - : CODING_FLAG_BIG5_ETEN); - break; - - case 4: - coding->type = coding_type_ccl; + |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); + } + else if (EQ (coding_type, Qbig5)) + { + coding->detector = detect_coding_big5; + coding->decoder = decode_coding_big5; + coding->encoder = encode_coding_big5; coding->common_flags - |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK; - { - val = XVECTOR (coding_spec)->contents[4]; - if (! CONSP (val) - || setup_ccl_program (&(coding->spec.ccl.decoder), - XCAR (val)) < 0 - || setup_ccl_program (&(coding->spec.ccl.encoder), - XCDR (val)) < 0) - goto label_invalid_coding_system; - - bzero (coding->spec.ccl.valid_codes, 256); - val = Fplist_get (plist, Qvalid_codes); - if (CONSP (val)) - { - Lisp_Object this; - - for (; CONSP (val); val = XCDR (val)) - { - this = XCAR (val); - if (INTEGERP (this) - && XINT (this) >= 0 && XINT (this) < 256) - coding->spec.ccl.valid_codes[XINT (this)] = 1; - else if (CONSP (this) - && INTEGERP (XCAR (this)) - && INTEGERP (XCDR (this))) - { - int start = XINT (XCAR (this)); - int end = XINT (XCDR (this)); - - if (start >= 0 && start <= end && end < 256) - while (start <= end) - coding->spec.ccl.valid_codes[start++] = 1; - } - } - } - } - coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; - coding->spec.ccl.cr_carryover = 0; - coding->spec.ccl.eight_bit_carryover[0] = 0; - break; - - case 5: - coding->type = coding_type_raw_text; - break; - - default: - goto label_invalid_coding_system; + |= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK); + } + else /* EQ (coding_type, Qraw_text) */ + { + coding->detector = NULL; + coding->decoder = decode_coding_raw_text; + coding->encoder = encode_coding_raw_text; - coding->common_flags |= CODING_FOR_UNIBYTE_MASK; } - return 0; - label_invalid_coding_system: - coding->type = coding_type_no_conversion; - coding->category_idx = CODING_CATEGORY_IDX_BINARY; - coding->common_flags = 0; - coding->eol_type = CODING_EOL_LF; - coding->pre_write_conversion = coding->post_read_conversion = Qnil; - return -1; + return; } - /* Free memory blocks allocated for storing composition information. */ + /* Return raw-text or one of its subsidiaries that has the same + eol_type as CODING-SYSTEM. */ - void - coding_free_composition_data (coding) - struct coding_system *coding; + Lisp_Object + raw_text_coding_system (coding_system) + Lisp_Object coding_system; { - struct composition_data *cmp_data = coding->cmp_data, *next; - - if (!cmp_data) - return; - /* Memory blocks are chained. At first, rewind to the first, then, - free blocks one by one. */ - while (cmp_data->prev) - cmp_data = cmp_data->prev; - while (cmp_data) - { - next = cmp_data->next; - xfree (cmp_data); - cmp_data = next; - } - coding->cmp_data = NULL; - } + Lisp_Object spec, attrs; + Lisp_Object eol_type, raw_text_eol_type; - /* Set `char_offset' member of all memory blocks pointed by - coding->cmp_data to POS. */ + if (NILP (coding_system)) + return Qraw_text; + spec = CODING_SYSTEM_SPEC (coding_system); + attrs = AREF (spec, 0); - + - void - coding_adjust_composition_offset (coding, pos) - struct coding_system *coding; - int pos; - { - struct composition_data *cmp_data; + if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text)) + return coding_system; - for (cmp_data = coding->cmp_data; cmp_data; cmp_data = cmp_data->next) - cmp_data->char_offset = pos; + eol_type = AREF (spec, 2); + if (VECTORP (eol_type)) + return Qraw_text; + spec = CODING_SYSTEM_SPEC (Qraw_text); + raw_text_eol_type = AREF (spec, 2); + return (EQ (eol_type, Qunix) ? AREF (raw_text_eol_type, 0) + : EQ (eol_type, Qdos) ? AREF (raw_text_eol_type, 1) + : AREF (raw_text_eol_type, 2)); } - /* Setup raw-text or one of its subsidiaries in the structure - coding_system CODING according to the already setup value eol_type - in CODING. CODING should be setup for some coding system in - advance. */ - void - setup_raw_text_coding_system (coding) - struct coding_system *coding; + /* If CODING_SYSTEM doesn't specify end-of-line format but PARENT + does, return one of the subsidiary that has the same eol-spec as + PARENT. Otherwise, return CODING_SYSTEM. */ + + Lisp_Object + coding_inherit_eol_type (coding_system, parent) + Lisp_Object coding_system, parent; { - if (coding->type != coding_type_raw_text) - { - coding->symbol = Qraw_text; - coding->type = coding_type_raw_text; - if (coding->eol_type != CODING_EOL_UNDECIDED) - { - Lisp_Object subsidiaries; - subsidiaries = Fget (Qraw_text, Qeol_type); + Lisp_Object spec, attrs, eol_type; - if (VECTORP (subsidiaries) - && XVECTOR (subsidiaries)->size == 3) - coding->symbol - = XVECTOR (subsidiaries)->contents[coding->eol_type]; - } - setup_coding_system (coding->symbol, coding); - } - return; + if (NILP (coding_system)) + coding_system = Qraw_text; + spec = CODING_SYSTEM_SPEC (coding_system); + attrs = AREF (spec, 0); + eol_type = AREF (spec, 2); + if (VECTORP (eol_type) + && ! NILP (parent)) + { + Lisp_Object parent_spec; + Lisp_Object parent_eol_type; + + parent_spec + = CODING_SYSTEM_SPEC (buffer_defaults.buffer_file_coding_system); + parent_eol_type = AREF (parent_spec, 2); + if (EQ (parent_eol_type, Qunix)) + coding_system = AREF (eol_type, 0); + else if (EQ (parent_eol_type, Qdos)) + coding_system = AREF (eol_type, 1); + else if (EQ (parent_eol_type, Qmac)) + coding_system = AREF (eol_type, 2); + } + return coding_system; } /* Emacs has a mechanism to automatically detect a coding system if it @@@ -4060,889 -5149,1138 +5153,1135 @@@ */ - static - int ascii_skip_code[256]; + #define EOL_SEEN_NONE 0 + #define EOL_SEEN_LF 1 + #define EOL_SEEN_CR 2 + #define EOL_SEEN_CRLF 4 - /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded. - If it detects possible coding systems, return an integer in which - appropriate flag bits are set. Flag bits are defined by macros - CODING_CATEGORY_MASK_XXX in `coding.h'. If PRIORITIES is non-NULL, - it should point the table `coding_priorities'. In that case, only - the flag bit for a coding system of the highest priority is set in - the returned value. If MULTIBYTEP is nonzero, 8-bit codes of the - range 0x80..0x9F are in multibyte form. + /* Detect how end-of-line of a text of length SRC_BYTES pointed by + SOURCE is encoded. If CATEGORY is one of + coding_category_utf_16_XXXX, assume that CR and LF are encoded by + two-byte, else they are encoded by one-byte. + + Return one of EOL_SEEN_XXX. */ - How many ASCII characters are at the head is returned as *SKIP. */ + #define MAX_EOL_CHECK_COUNT 3 static int - detect_coding_mask (source, src_bytes, priorities, skip, multibytep) + detect_eol (source, src_bytes, category) unsigned char *source; - int src_bytes, *priorities, *skip; - int multibytep; + EMACS_INT src_bytes; + enum coding_category category; { - register unsigned char c; - unsigned char *src = source, *src_end = source + src_bytes; - unsigned int mask, utf16_examined_p, iso2022_examined_p; - int i; + unsigned char *src = source, *src_end = src + src_bytes; + unsigned char c; + int total = 0; + int eol_seen = EOL_SEEN_NONE; - /* At first, skip all ASCII characters and control characters except - for three ISO2022 specific control characters. */ - ascii_skip_code[ISO_CODE_SO] = 0; - ascii_skip_code[ISO_CODE_SI] = 0; - ascii_skip_code[ISO_CODE_ESC] = 0; - - label_loop_detect_coding: - while (src < src_end && ascii_skip_code[*src]) src++; - *skip = src - source; - - if (src >= src_end) - /* We found nothing other than ASCII. There's nothing to do. */ - return 0; - - c = *src; - /* The text seems to be encoded in some multilingual coding system. - Now, try to find in which coding system the text is encoded. */ - if (c < 0x80) - { - /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */ - /* C is an ISO2022 specific control code of C0. */ - mask = detect_coding_iso2022 (src, src_end, multibytep); - if (mask == 0) - { - /* No valid ISO2022 code follows C. Try again. */ - src++; - if (c == ISO_CODE_ESC) - ascii_skip_code[ISO_CODE_ESC] = 1; - else - ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1; - goto label_loop_detect_coding; - } - if (priorities) + if ((1 << category) & CATEGORY_MASK_UTF_16) + { + int msb, lsb; + + msb = category == (coding_category_utf_16_le + | coding_category_utf_16_le_nosig); + lsb = 1 - msb; + + while (src + 1 < src_end) { - for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++) + c = src[lsb]; + if (src[msb] == 0 && (c == '\n' || c == '\r')) { - if (mask & priorities[i]) - return priorities[i]; + int this_eol; + + if (c == '\n') + this_eol = EOL_SEEN_LF; + else if (src + 3 >= src_end + || src[msb + 2] != 0 + || src[lsb + 2] != '\n') + this_eol = EOL_SEEN_CR; + else - this_eol = EOL_SEEN_CRLF; ++ this_eol = EOL_SEEN_CRLF; + + if (eol_seen == EOL_SEEN_NONE) + /* This is the first end-of-line. */ + eol_seen = this_eol; + else if (eol_seen != this_eol) + { + /* The found type is different from what found before. */ + eol_seen = EOL_SEEN_LF; + break; + } + if (++total == MAX_EOL_CHECK_COUNT) + break; } - return CODING_CATEGORY_MASK_RAW_TEXT; + src += 2; } - } + } else { - int try; + while (src < src_end) + { + c = *src++; + if (c == '\n' || c == '\r') + { + int this_eol; + + if (c == '\n') + this_eol = EOL_SEEN_LF; + else if (src >= src_end || *src != '\n') + this_eol = EOL_SEEN_CR; + else + this_eol = EOL_SEEN_CRLF, src++; + + if (eol_seen == EOL_SEEN_NONE) + /* This is the first end-of-line. */ + eol_seen = this_eol; + else if (eol_seen != this_eol) + { + /* The found type is different from what found before. */ + eol_seen = EOL_SEEN_LF; + break; + } + if (++total == MAX_EOL_CHECK_COUNT) + break; + } + } + } + return eol_seen; + } + + + static void + adjust_coding_eol_type (coding, eol_seen) + struct coding_system *coding; + int eol_seen; + { + Lisp_Object eol_type; - ++ + eol_type = CODING_ID_EOL_TYPE (coding->id); + if (eol_seen & EOL_SEEN_LF) + coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0)); + else if (eol_seen & EOL_SEEN_CRLF) + coding->id = CODING_SYSTEM_ID (AREF (eol_type, 1)); + else if (eol_seen & EOL_SEEN_CR) + coding->id = CODING_SYSTEM_ID (AREF (eol_type, 2)); + } - if (multibytep && c == LEADING_CODE_8_BIT_CONTROL) - c = src[1] - 0x20; + /* Detect how a text specified in CODING is encoded. If a coding + system is detected, update fields of CODING by the detected coding + system. */ - if (c < 0xA0) + void + detect_coding (coding) + struct coding_system *coding; + { - unsigned char *src, *src_end; ++ const unsigned char *src, *src_end; + Lisp_Object attrs, coding_type; + + coding->consumed = coding->consumed_char = 0; + coding->produced = coding->produced_char = 0; + coding_set_source (coding); + + src_end = coding->source + coding->src_bytes; + + /* If we have not yet decided the text encoding type, detect it + now. */ + if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) + { + int c, i; + + for (src = coding->source; src < src_end; src++) { - /* C is the first byte of SJIS character code, - or a leading-code of Emacs' internal format (emacs-mule), - or the first byte of UTF-16. */ - try = (CODING_CATEGORY_MASK_SJIS - | CODING_CATEGORY_MASK_EMACS_MULE - | CODING_CATEGORY_MASK_UTF_16_BE - | CODING_CATEGORY_MASK_UTF_16_LE); - - /* Or, if C is a special latin extra code, - or is an ISO2022 specific control code of C1 (SS2 or SS3), - or is an ISO2022 control-sequence-introducer (CSI), - we should also consider the possibility of ISO2022 codings. */ - if ((VECTORP (Vlatin_extra_code_table) - && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) - || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3) - || (c == ISO_CODE_CSI - && (src < src_end - && (*src == ']' - || ((*src == '0' || *src == '1' || *src == '2') - && src + 1 < src_end - && src[1] == ']'))))) - try |= (CODING_CATEGORY_MASK_ISO_8_ELSE - | CODING_CATEGORY_MASK_ISO_8BIT); + c = *src; + if (c & 0x80 || (c < 0x20 && (c == ISO_CODE_ESC + || c == ISO_CODE_SI + || c == ISO_CODE_SO))) + break; } - else - /* C is a character of ISO2022 in graphic plane right, - or a SJIS's 1-byte character code (i.e. JISX0201), - or the first byte of BIG5's 2-byte code, - or the first byte of UTF-8/16. */ - try = (CODING_CATEGORY_MASK_ISO_8_ELSE - | CODING_CATEGORY_MASK_ISO_8BIT - | CODING_CATEGORY_MASK_SJIS - | CODING_CATEGORY_MASK_BIG5 - | CODING_CATEGORY_MASK_UTF_8 - | CODING_CATEGORY_MASK_UTF_16_BE - | CODING_CATEGORY_MASK_UTF_16_LE); - - /* Or, we may have to consider the possibility of CCL. */ - if (coding_system_table[CODING_CATEGORY_IDX_CCL] - && (coding_system_table[CODING_CATEGORY_IDX_CCL] - ->spec.ccl.valid_codes)[c]) - try |= CODING_CATEGORY_MASK_CCL; - - mask = 0; - utf16_examined_p = iso2022_examined_p = 0; - if (priorities) + coding->head_ascii = src - (coding->source + coding->consumed); + + if (coding->head_ascii < coding->src_bytes) { - for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++) + struct coding_detection_info detect_info; + enum coding_category category; + struct coding_system *this; + + detect_info.checked = detect_info.found = detect_info.rejected = 0; + for (i = 0; i < coding_category_raw_text; i++) { - if (!iso2022_examined_p - && (priorities[i] & try & CODING_CATEGORY_MASK_ISO)) + category = coding_priorities[i]; + this = coding_categories + category; + if (this->id < 0) { - mask |= detect_coding_iso2022 (src, src_end, multibytep); - iso2022_examined_p = 1; + /* No coding system of this category is defined. */ + detect_info.rejected |= (1 << category); } - else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS) - mask |= detect_coding_sjis (src, src_end, multibytep); - else if (priorities[i] & try & CODING_CATEGORY_MASK_UTF_8) - mask |= detect_coding_utf_8 (src, src_end, multibytep); - else if (!utf16_examined_p - && (priorities[i] & try & - CODING_CATEGORY_MASK_UTF_16_BE_LE)) + else if (category >= coding_category_raw_text) + continue; + else if (detect_info.checked & (1 << category)) { - mask |= detect_coding_utf_16 (src, src_end, multibytep); - utf16_examined_p = 1; + if (detect_info.found & (1 << category)) + break; } - else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5) - mask |= detect_coding_big5 (src, src_end, multibytep); - else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE) - mask |= detect_coding_emacs_mule (src, src_end, multibytep); - else if (priorities[i] & try & CODING_CATEGORY_MASK_CCL) - mask |= detect_coding_ccl (src, src_end, multibytep); - else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT) - mask |= CODING_CATEGORY_MASK_RAW_TEXT; - else if (priorities[i] & CODING_CATEGORY_MASK_BINARY) - mask |= CODING_CATEGORY_MASK_BINARY; - if (mask & priorities[i]) - return priorities[i]; + else if ((*(this->detector)) (coding, &detect_info) + && detect_info.found & (1 << category)) + break; } - return CODING_CATEGORY_MASK_RAW_TEXT; + if (i < coding_category_raw_text) + setup_coding_system (CODING_ID_NAME (this->id), coding); + else if (detect_info.rejected == CATEGORY_MASK_ANY) + setup_coding_system (Qraw_text, coding); + else if (detect_info.rejected) + for (i = 0; i < coding_category_raw_text; i++) + if (! (detect_info.rejected & (1 << coding_priorities[i]))) + { + this = coding_categories + coding_priorities[i]; + setup_coding_system (CODING_ID_NAME (this->id), coding); + break; + } } - if (try & CODING_CATEGORY_MASK_ISO) - mask |= detect_coding_iso2022 (src, src_end, multibytep); - if (try & CODING_CATEGORY_MASK_SJIS) - mask |= detect_coding_sjis (src, src_end, multibytep); - if (try & CODING_CATEGORY_MASK_BIG5) - mask |= detect_coding_big5 (src, src_end, multibytep); - if (try & CODING_CATEGORY_MASK_UTF_8) - mask |= detect_coding_utf_8 (src, src_end, multibytep); - if (try & CODING_CATEGORY_MASK_UTF_16_BE_LE) - mask |= detect_coding_utf_16 (src, src_end, multibytep); - if (try & CODING_CATEGORY_MASK_EMACS_MULE) - mask |= detect_coding_emacs_mule (src, src_end, multibytep); - if (try & CODING_CATEGORY_MASK_CCL) - mask |= detect_coding_ccl (src, src_end, multibytep); - } - return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY); - } + } + else if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qutf_16)) + { + Lisp_Object coding_systems; + struct coding_detection_info detect_info; + + coding_systems + = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom); + detect_info.found = detect_info.rejected = 0; + if (CONSP (coding_systems) + && detect_coding_utf_16 (coding, &detect_info) + && (detect_info.found & (CATEGORY_MASK_UTF_16_LE + | CATEGORY_MASK_UTF_16_BE))) + { + if (detect_info.found & CATEGORY_MASK_UTF_16_LE) + setup_coding_system (XCAR (coding_systems), coding); + else + setup_coding_system (XCDR (coding_systems), coding); + } + } - /* Detect how a text of length SRC_BYTES pointed by SRC is encoded. - The information of the detected coding system is set in CODING. */ + attrs = CODING_ID_ATTRS (coding->id); + coding_type = CODING_ATTR_TYPE (attrs); - void - detect_coding (coding, src, src_bytes) - struct coding_system *coding; - const unsigned char *src; - int src_bytes; - { - unsigned int idx; - int skip, mask; - Lisp_Object val; + /* If we have not yet decided the EOL type, detect it now. But, the + detection is impossible for a CCL based coding system, in which + case, we detct the EOL type after decoding. */ + if (VECTORP (CODING_ID_EOL_TYPE (coding->id)) + && ! EQ (coding_type, Qccl)) + { + int eol_seen = detect_eol (coding->source, coding->src_bytes, + XINT (CODING_ATTR_CATEGORY (attrs))); - val = Vcoding_category_list; - mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip, - coding->src_multibyte); - coding->heading_ascii = skip; + if (eol_seen != EOL_SEEN_NONE) + adjust_coding_eol_type (coding, eol_seen); + } + } - if (!mask) return; - /* We found a single coding system of the highest priority in MASK. */ - idx = 0; - while (mask && ! (mask & 1)) mask >>= 1, idx++; - if (! mask) - idx = CODING_CATEGORY_IDX_RAW_TEXT; + static void + decode_eol (coding) + struct coding_system *coding; + { + if (VECTORP (CODING_ID_EOL_TYPE (coding->id))) + { + unsigned char *p = CHAR_POS_ADDR (coding->dst_pos); + unsigned char *pend = p + coding->produced; + int eol_seen = EOL_SEEN_NONE; - val = SYMBOL_VALUE (XVECTOR (Vcoding_category_table)->contents[idx]); + for (; p < pend; p++) + { + if (*p == '\n') + eol_seen |= EOL_SEEN_LF; + else if (*p == '\r') + { + if (p + 1 < pend && *(p + 1) == '\n') + { + eol_seen |= EOL_SEEN_CRLF; + p++; + } + else + eol_seen |= EOL_SEEN_CR; + } + } + if (eol_seen != EOL_SEEN_NONE) + adjust_coding_eol_type (coding, eol_seen); + } - if (coding->eol_type != CODING_EOL_UNDECIDED) + if (EQ (CODING_ID_EOL_TYPE (coding->id), Qmac)) { - Lisp_Object tmp; + unsigned char *p = CHAR_POS_ADDR (coding->dst_pos); + unsigned char *pend = p + coding->produced; - + - tmp = Fget (val, Qeol_type); - if (VECTORP (tmp)) - val = XVECTOR (tmp)->contents[coding->eol_type]; + for (; p < pend; p++) + if (*p == '\r') + *p = '\n'; } + else if (EQ (CODING_ID_EOL_TYPE (coding->id), Qdos)) + { + unsigned char *p, *pbeg, *pend; + Lisp_Object undo_list; - /* Setup this new coding system while preserving some slots. */ - { - int src_multibyte = coding->src_multibyte; - int dst_multibyte = coding->dst_multibyte; + move_gap_both (coding->dst_pos + coding->produced_char, + coding->dst_pos_byte + coding->produced); + undo_list = current_buffer->undo_list; + current_buffer->undo_list = Qt; + del_range_2 (coding->dst_pos, coding->dst_pos_byte, GPT, GPT_BYTE, 0); + current_buffer->undo_list = undo_list; + pbeg = GPT_ADDR; + pend = pbeg + coding->produced; - setup_coding_system (val, coding); - coding->src_multibyte = src_multibyte; - coding->dst_multibyte = dst_multibyte; - coding->heading_ascii = skip; - } + for (p = pend - 1; p >= pbeg; p--) + if (*p == '\r') + { + safe_bcopy ((char *) (p + 1), (char *) p, pend - p - 1); + pend--; + } + coding->produced_char -= coding->produced - (pend - pbeg); + coding->produced = pend - pbeg; + insert_from_gap (coding->produced_char, coding->produced); + } } - /* Detect how end-of-line of a text of length SRC_BYTES pointed by - SOURCE is encoded. Return one of CODING_EOL_LF, CODING_EOL_CRLF, - CODING_EOL_CR, and CODING_EOL_UNDECIDED. + static void + translate_chars (coding, table) + struct coding_system *coding; + Lisp_Object table; + { + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_used; + int c; - How many non-eol characters are at the head is returned as *SKIP. */ + if (coding->chars_at_source) + return; - #define MAX_EOL_CHECK_COUNT 3 + while (charbuf < charbuf_end) + { + c = *charbuf; + if (c < 0) + charbuf += c; + else + *charbuf++ = translate_char (table, c); + } + } static int - detect_eol_type (source, src_bytes, skip) - unsigned char *source; - int src_bytes, *skip; + produce_chars (coding) + struct coding_system *coding; { - unsigned char *src = source, *src_end = src + src_bytes; - unsigned char c; - int total = 0; /* How many end-of-lines are found so far. */ - int eol_type = CODING_EOL_UNDECIDED; - int this_eol_type; + unsigned char *dst = coding->destination + coding->produced; + unsigned char *dst_end = coding->destination + coding->dst_bytes; + int produced; + int produced_chars = 0; - *skip = 0; - - while (src < src_end && total < MAX_EOL_CHECK_COUNT) + if (! coding->chars_at_source) { - c = *src++; - if (c == '\n' || c == '\r') + /* Characters are in coding->charbuf. */ + int *buf = coding->charbuf; + int *buf_end = buf + coding->charbuf_used; + unsigned char *adjusted_dst_end; + + if (BUFFERP (coding->src_object) + && EQ (coding->src_object, coding->dst_object)) - dst_end = coding->source + coding->consumed; ++ dst_end = ((unsigned char *) coding->source) + coding->consumed; + adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH; + + while (buf < buf_end) { - if (*skip == 0) - *skip = src - 1 - source; - total++; - if (c == '\n') - this_eol_type = CODING_EOL_LF; - else if (src >= src_end || *src != '\n') - this_eol_type = CODING_EOL_CR; - else - this_eol_type = CODING_EOL_CRLF, src++; + int c = *buf++; - + - if (eol_type == CODING_EOL_UNDECIDED) - /* This is the first end-of-line. */ - eol_type = this_eol_type; - else if (eol_type != this_eol_type) + if (dst >= adjusted_dst_end) { - /* The found type is different from what found before. */ - eol_type = CODING_EOL_INCONSISTENT; - break; + dst = alloc_destination (coding, + buf_end - buf + MAX_MULTIBYTE_LENGTH, + dst); + dst_end = coding->destination + coding->dst_bytes; + adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH; + } + if (c >= 0) + { + if (coding->dst_multibyte + || ! CHAR_BYTE8_P (c)) + CHAR_STRING_ADVANCE (c, dst); + else + *dst++ = CHAR_TO_BYTE8 (c); + produced_chars++; } + else + /* This is an annotation datum. (-C) is the length of + it. */ + buf += -c - 1; } } - - if (*skip == 0) - *skip = src_end - source; - return eol_type; - } - - /* Like detect_eol_type, but detect EOL type in 2-octet - big-endian/little-endian format for coding systems utf-16-be and - utf-16-le. */ - - static int - detect_eol_type_in_2_octet_form (source, src_bytes, skip, big_endian_p) - unsigned char *source; - int src_bytes, *skip, big_endian_p; - { - unsigned char *src = source, *src_end = src + src_bytes; - unsigned int c1, c2; - int total = 0; /* How many end-of-lines are found so far. */ - int eol_type = CODING_EOL_UNDECIDED; - int this_eol_type; - int msb, lsb; - - if (big_endian_p) - msb = 0, lsb = 1; else - msb = 1, lsb = 0; - - *skip = 0; - - while ((src + 1) < src_end && total < MAX_EOL_CHECK_COUNT) { - c1 = (src[msb] << 8) | (src[lsb]); - src += 2; - unsigned char *src = coding->source; - unsigned char *src_end = src + coding->src_bytes; ++ const unsigned char *src = coding->source; ++ const unsigned char *src_end = src + coding->src_bytes; + Lisp_Object eol_type; - if (c1 == '\n' || c1 == '\r') + eol_type = CODING_ID_EOL_TYPE (coding->id); + + if (coding->src_multibyte != coding->dst_multibyte) { - if (*skip == 0) - *skip = src - 2 - source; - total++; - if (c1 == '\n') + if (coding->src_multibyte) { - this_eol_type = CODING_EOL_LF; + int multibytep = 1; + int consumed_chars; + + while (1) + { - unsigned char *src_base = src; ++ const unsigned char *src_base = src; + int c; + + ONE_MORE_BYTE (c); + if (c == '\r') + { + if (EQ (eol_type, Qdos)) + { + if (src == src_end) + { + coding->result = CODING_RESULT_INSUFFICIENT_SRC; + goto no_more_source; + } + if (*src == '\n') + c = *src++; + } + else if (EQ (eol_type, Qmac)) + c = '\n'; + } + if (dst == dst_end) + { + coding->consumed = src - coding->source; + + if (EQ (coding->src_object, coding->dst_object)) - dst_end = src; ++ dst_end = (unsigned char *) src; + if (dst == dst_end) + { + dst = alloc_destination (coding, src_end - src + 1, + dst); + dst_end = coding->destination + coding->dst_bytes; + coding_set_source (coding); + src = coding->source + coding->consumed; + src_end = coding->source + coding->src_bytes; + } + } + *dst++ = c; + produced_chars++; + } + no_more_source: + ; } else + while (src < src_end) + { + int multibytep = 1; + int c = *src++; + + if (c == '\r') + { + if (EQ (eol_type, Qdos)) + { + if (src < src_end + && *src == '\n') + c = *src++; + } + else if (EQ (eol_type, Qmac)) + c = '\n'; + } + if (dst >= dst_end - 1) + { + coding->consumed = src - coding->source; + + if (EQ (coding->src_object, coding->dst_object)) - dst_end = src; ++ dst_end = (unsigned char *) src; + if (dst >= dst_end - 1) + { + dst = alloc_destination (coding, src_end - src + 2, + dst); + dst_end = coding->destination + coding->dst_bytes; + coding_set_source (coding); + src = coding->source + coding->consumed; + src_end = coding->source + coding->src_bytes; + } + } + EMIT_ONE_BYTE (c); + } + } + else + { + if (!EQ (coding->src_object, coding->dst_object)) { - if ((src + 1) >= src_end) - { - this_eol_type = CODING_EOL_CR; - } - else + int require = coding->src_bytes - coding->dst_bytes; + + if (require > 0) { - c2 = (src[msb] << 8) | (src[lsb]); - if (c2 == '\n') - this_eol_type = CODING_EOL_CRLF, src += 2; - else - this_eol_type = CODING_EOL_CR; + EMACS_INT offset = src - coding->source; + + dst = alloc_destination (coding, require, dst); + coding_set_source (coding); + src = coding->source + offset; + src_end = coding->source + coding->src_bytes; } } - - if (eol_type == CODING_EOL_UNDECIDED) - /* This is the first end-of-line. */ - eol_type = this_eol_type; - else if (eol_type != this_eol_type) + produced_chars = coding->src_chars; + while (src < src_end) { - /* The found type is different from what found before. */ - eol_type = CODING_EOL_INCONSISTENT; - break; + int c = *src++; + + if (c == '\r') + { + if (EQ (eol_type, Qdos)) + { + if (src < src_end + && *src == '\n') + c = *src++; + produced_chars--; + } + else if (EQ (eol_type, Qmac)) + c = '\n'; + } + *dst++ = c; } } + coding->consumed = coding->src_bytes; + coding->consumed_char = coding->src_chars; } - if (*skip == 0) - *skip = src_end - source; - return eol_type; + produced = dst - (coding->destination + coding->produced); + if (BUFFERP (coding->dst_object)) + insert_from_gap (produced_chars, produced); + coding->produced += produced; + coding->produced_char += produced_chars; + return produced_chars; } - /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC - is encoded. If it detects an appropriate format of end-of-line, it - sets the information in *CODING. */ + /* Compose text in CODING->object according to the annotation data at + CHARBUF. CHARBUF is an array: + [ -LENGTH ANNOTATION_MASK FROM TO METHOD COMP_LEN [ COMPONENTS... ] ] + */ - void - detect_eol (coding, src, src_bytes) + static INLINE void + produce_composition (coding, charbuf) struct coding_system *coding; - const unsigned char *src; - int src_bytes; + int *charbuf; { - Lisp_Object val; - int skip; - int eol_type; + int len; + EMACS_INT from, to; + enum composition_method method; + Lisp_Object components; - switch (coding->category_idx) - { - case CODING_CATEGORY_IDX_UTF_16_BE: - eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 1); - break; - case CODING_CATEGORY_IDX_UTF_16_LE: - eol_type = detect_eol_type_in_2_octet_form (src, src_bytes, &skip, 0); - break; - default: - eol_type = detect_eol_type (src, src_bytes, &skip); - break; - } + len = -charbuf[0]; + from = coding->dst_pos + charbuf[2]; + to = coding->dst_pos + charbuf[3]; + method = (enum composition_method) (charbuf[4]); - if (coding->heading_ascii > skip) - coding->heading_ascii = skip; + if (method == COMPOSITION_RELATIVE) + components = Qnil; else - skip = coding->heading_ascii; - - if (eol_type == CODING_EOL_UNDECIDED) - return; - if (eol_type == CODING_EOL_INCONSISTENT) - { - #if 0 - /* This code is suppressed until we find a better way to - distinguish raw text file and binary file. */ - - /* If we have already detected that the coding is raw-text, the - coding should actually be no-conversion. */ - if (coding->type == coding_type_raw_text) - { - setup_coding_system (Qno_conversion, coding); - return; - } - /* Else, let's decode only text code anyway. */ - #endif /* 0 */ - eol_type = CODING_EOL_LF; - } - - val = Fget (coding->symbol, Qeol_type); - if (VECTORP (val) && XVECTOR (val)->size == 3) { - int src_multibyte = coding->src_multibyte; - int dst_multibyte = coding->dst_multibyte; - struct composition_data *cmp_data = coding->cmp_data; + Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; + int i; - setup_coding_system (XVECTOR (val)->contents[eol_type], coding); - coding->src_multibyte = src_multibyte; - coding->dst_multibyte = dst_multibyte; - coding->heading_ascii = skip; - coding->cmp_data = cmp_data; + len -= 5; + charbuf += 5; + for (i = 0; i < len; i++) + args[i] = make_number (charbuf[i]); + components = (method == COMPOSITION_WITH_ALTCHARS + ? Fstring (len, args) : Fvector (len, args)); } + compose_text (from, to, components, Qnil, coding->dst_object); } - #define CONVERSION_BUFFER_EXTRA_ROOM 256 - - #define DECODING_BUFFER_MAG(coding) \ - (coding->type == coding_type_iso2022 \ - ? 3 \ - : (coding->type == coding_type_ccl \ - ? coding->spec.ccl.decoder.buf_magnification \ - : 2)) - /* Return maximum size (bytes) of a buffer enough for decoding - SRC_BYTES of text encoded in CODING. */ + /* Put `charset' property on text in CODING->object according to + the annotation data at CHARBUF. CHARBUF is an array: + [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ] + */ - int - decoding_buffer_size (coding, src_bytes) + static INLINE void + produce_charset (coding, charbuf) struct coding_system *coding; - int src_bytes; + int *charbuf; { - return (src_bytes * DECODING_BUFFER_MAG (coding) - + CONVERSION_BUFFER_EXTRA_ROOM); + EMACS_INT from = coding->dst_pos + charbuf[2]; + EMACS_INT to = coding->dst_pos + charbuf[3]; + struct charset *charset = CHARSET_FROM_ID (charbuf[4]); + + Fput_text_property (make_number (from), make_number (to), + Qcharset, CHARSET_NAME (charset), + coding->dst_object); } - /* Return maximum size (bytes) of a buffer enough for encoding - SRC_BYTES of text to CODING. */ - int - encoding_buffer_size (coding, src_bytes) + #define CHARBUF_SIZE 0x4000 + + #define ALLOC_CONVERSION_WORK_AREA(coding) \ + do { \ + int size = CHARBUF_SIZE;; \ + \ + coding->charbuf = NULL; \ + while (size > 1024) \ + { \ + coding->charbuf = (int *) alloca (sizeof (int) * size); \ + if (coding->charbuf) \ + break; \ + size >>= 1; \ + } \ + if (! coding->charbuf) \ + { \ + coding->result = CODING_RESULT_INSUFFICIENT_MEM; \ + return coding->result; \ + } \ + coding->charbuf_size = size; \ + } while (0) + + + static void + produce_annotation (coding) struct coding_system *coding; - int src_bytes; { - int magnification; + int *charbuf = coding->charbuf; + int *charbuf_end = charbuf + coding->charbuf_used; + + if (NILP (coding->dst_object)) + return; - if (coding->type == coding_type_ccl) + while (charbuf < charbuf_end) { - magnification = coding->spec.ccl.encoder.buf_magnification; - if (coding->eol_type == CODING_EOL_CRLF) - magnification *= 2; + if (*charbuf >= 0) + charbuf++; + else + { + int len = -*charbuf; + switch (charbuf[1]) + { + case CODING_ANNOTATE_COMPOSITION_MASK: + produce_composition (coding, charbuf); + break; + case CODING_ANNOTATE_CHARSET_MASK: + produce_charset (coding, charbuf); + break; + default: + abort (); + } + charbuf += len; + } } - else if (CODING_REQUIRE_ENCODING (coding)) - magnification = 3; - else - magnification = 1; - - return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM); } - /* Working buffer for code conversion. */ - struct conversion_buffer - { - int size; /* size of data. */ - int on_stack; /* 1 if allocated by alloca. */ - unsigned char *data; - }; + /* Decode the data at CODING->src_object into CODING->dst_object. + CODING->src_object is a buffer, a string, or nil. + CODING->dst_object is a buffer. - /* Don't use alloca for allocating memory space larger than this, lest - we overflow their stack. */ - #define MAX_ALLOCA 16*1024 + If CODING->src_object is a buffer, it must be the current buffer. + In this case, if CODING->src_pos is positive, it is a position of + the source text in the buffer, otherwise, the source text is in the + gap area of the buffer, and CODING->src_pos specifies the offset of + the text from GPT (which must be the same as PT). If this is the + same buffer as CODING->dst_object, CODING->src_pos must be + negative. - /* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */ - #define allocate_conversion_buffer(buf, len) \ - do { \ - if (len < MAX_ALLOCA) \ - { \ - buf.data = (unsigned char *) alloca (len); \ - buf.on_stack = 1; \ - } \ - else \ - { \ - buf.data = (unsigned char *) xmalloc (len); \ - buf.on_stack = 0; \ - } \ - buf.size = len; \ - } while (0) + If CODING->src_object is a string, CODING->src_pos in an index to + that string. - /* Double the allocated memory for *BUF. */ - static void - extend_conversion_buffer (buf) - struct conversion_buffer *buf; - { - if (buf->on_stack) - { - unsigned char *save = buf->data; - buf->data = (unsigned char *) xmalloc (buf->size * 2); - bcopy (save, buf->data, buf->size); - buf->on_stack = 0; - } - else - { - buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2); - } - buf->size *= 2; - } + If CODING->src_object is nil, CODING->source must already point to + the non-relocatable memory area. In this case, CODING->src_pos is + an offset from CODING->source. - /* Free the allocated memory for BUF if it is not on stack. */ - static void - free_conversion_buffer (buf) - struct conversion_buffer *buf; - { - if (!buf->on_stack) - xfree (buf->data); - } + The decoded data is inserted at the current point of the buffer + CODING->dst_object. + */ - int - ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep) + static int + decode_coding (coding) struct coding_system *coding; - unsigned char *source, *destination; - int src_bytes, dst_bytes, encodep; { - struct ccl_program *ccl - = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder; - unsigned char *dst = destination; + Lisp_Object attrs; - ccl->suppress_error = coding->suppress_error; - ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK; - if (encodep) - { - /* On encoding, EOL format is converted within ccl_driver. For - that, setup proper information in the structure CCL. */ - ccl->eol_type = coding->eol_type; - if (ccl->eol_type ==CODING_EOL_UNDECIDED) - ccl->eol_type = CODING_EOL_LF; - ccl->cr_consumed = coding->spec.ccl.cr_carryover; - ccl->eight_bit_control = coding->dst_multibyte; - } - else - ccl->eight_bit_control = 1; - ccl->multibyte = coding->src_multibyte; - if (coding->spec.ccl.eight_bit_carryover[0] != 0) + if (BUFFERP (coding->src_object) + && coding->src_pos > 0 + && coding->src_pos < GPT + && coding->src_pos + coding->src_chars > GPT) + move_gap_both (coding->src_pos, coding->src_pos_byte); + + if (BUFFERP (coding->dst_object)) { - /* Move carryover bytes to DESTINATION. */ - unsigned char *p = coding->spec.ccl.eight_bit_carryover; - while (*p) - *dst++ = *p++; - coding->spec.ccl.eight_bit_carryover[0] = 0; - if (dst_bytes) - dst_bytes -= dst - destination; + if (current_buffer != XBUFFER (coding->dst_object)) + set_buffer_internal (XBUFFER (coding->dst_object)); + if (GPT != PT) + move_gap_both (PT, PT_BYTE); } - coding->produced = (ccl_driver (ccl, source, dst, src_bytes, dst_bytes, - &(coding->consumed)) - + dst - destination); + coding->consumed = coding->consumed_char = 0; + coding->produced = coding->produced_char = 0; + coding->chars_at_source = 0; + coding->result = CODING_RESULT_SUCCESS; + coding->errors = 0; - if (encodep) - { - coding->produced_char = coding->produced; - coding->spec.ccl.cr_carryover = ccl->cr_consumed; - } - else if (!ccl->eight_bit_control) - { - /* The produced bytes forms a valid multibyte sequence. */ - coding->produced_char - = multibyte_chars_in_text (destination, coding->produced); - coding->spec.ccl.eight_bit_carryover[0] = 0; - } - else + ALLOC_CONVERSION_WORK_AREA (coding); + + attrs = CODING_ID_ATTRS (coding->id); + + do { - /* On decoding, the destination should always multibyte. But, - CCL program might have been generated an invalid multibyte - sequence. Here we make such a sequence valid as - multibyte. */ - int bytes - = dst_bytes ? dst_bytes : source + coding->consumed - destination; - - if ((coding->consumed < src_bytes - || !ccl->last_block) - && coding->produced >= 1 - && destination[coding->produced - 1] >= 0x80) + coding_set_source (coding); + coding->annotated = 0; + (*(coding->decoder)) (coding); + if (!NILP (CODING_ATTR_DECODE_TBL (attrs))) + translate_chars (coding, CODING_ATTR_DECODE_TBL (attrs)); + else if (!NILP (Vstandard_translation_table_for_decode)) + translate_chars (coding, Vstandard_translation_table_for_decode); + coding_set_destination (coding); + produce_chars (coding); + if (coding->annotated) + produce_annotation (coding); + } + while (coding->consumed < coding->src_bytes + && ! coding->result); + + if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qccl) + && SYMBOLP (CODING_ID_EOL_TYPE (coding->id)) + && ! EQ (CODING_ID_EOL_TYPE (coding->id), Qunix)) + decode_eol (coding); + + coding->carryover_bytes = 0; + if (coding->consumed < coding->src_bytes) + { + int nbytes = coding->src_bytes - coding->consumed; - unsigned char *src; ++ const unsigned char *src; + + coding_set_source (coding); + coding_set_destination (coding); + src = coding->source + coding->consumed; + + if (coding->mode & CODING_MODE_LAST_BLOCK) { - /* We should not convert the tailing 8-bit codes to - multibyte form even if they doesn't form a valid - multibyte sequence. They may form a valid sequence in - the next call. */ - int carryover = 0; - - if (destination[coding->produced - 1] < 0xA0) - carryover = 1; - else if (coding->produced >= 2) - { - if (destination[coding->produced - 2] >= 0x80) - { - if (destination[coding->produced - 2] < 0xA0) - carryover = 2; - else if (coding->produced >= 3 - && destination[coding->produced - 3] >= 0x80 - && destination[coding->produced - 3] < 0xA0) - carryover = 3; - } - } - if (carryover > 0) + /* Flush out unprocessed data as binary chars. We are sure + that the number of data is less than the size of + coding->charbuf. */ + while (nbytes-- > 0) { - BCOPY_SHORT (destination + coding->produced - carryover, - coding->spec.ccl.eight_bit_carryover, - carryover); - coding->spec.ccl.eight_bit_carryover[carryover] = 0; - coding->produced -= carryover; + int c = *src++; + + coding->charbuf[coding->charbuf_used++] = (c & 0x80 ? - c : c); } + produce_chars (coding); + } + else + { + /* Record unprocessed bytes in coding->carryover. We are + sure that the number of data is less than the size of + coding->carryover. */ + unsigned char *p = coding->carryover; + + coding->carryover_bytes = nbytes; + while (nbytes-- > 0) + *p++ = *src++; } - coding->produced = str_as_multibyte (destination, bytes, - coding->produced, - &(coding->produced_char)); + coding->consumed = coding->src_bytes; } - switch (ccl->status) - { - case CCL_STAT_SUSPEND_BY_SRC: - coding->result = CODING_FINISH_INSUFFICIENT_SRC; - break; - case CCL_STAT_SUSPEND_BY_DST: - coding->result = CODING_FINISH_INSUFFICIENT_DST; - break; - case CCL_STAT_QUIT: - case CCL_STAT_INVALID_CMD: - coding->result = CODING_FINISH_INTERRUPT; - break; - default: - coding->result = CODING_FINISH_NORMAL; - break; - } return coding->result; } - /* Decode EOL format of the text at PTR of BYTES length destructively - according to CODING->eol_type. This is called after the CCL - program produced a decoded text at PTR. If we do CRLF->LF - conversion, update CODING->produced and CODING->produced_char. */ - static void - decode_eol_post_ccl (coding, ptr, bytes) + /* Extract an annotation datum from a composition starting at POS and + ending before LIMIT of CODING->src_object (buffer or string), store + the data in BUF, set *STOP to a starting position of the next + composition (if any) or to LIMIT, and return the address of the + next element of BUF. + + If such an annotation is not found, set *STOP to a starting + position of a composition after POS (if any) or to LIMIT, and + return BUF. */ + + static INLINE int * + handle_composition_annotation (pos, limit, coding, buf, stop) + EMACS_INT pos, limit; struct coding_system *coding; - unsigned char *ptr; - int bytes; + int *buf; + EMACS_INT *stop; { - Lisp_Object val, saved_coding_symbol; - unsigned char *pend = ptr + bytes; - int dummy; - - /* Remember the current coding system symbol. We set it back when - an inconsistent EOL is found so that `last-coding-system-used' is - set to the coding system that doesn't specify EOL conversion. */ - saved_coding_symbol = coding->symbol; - - coding->spec.ccl.cr_carryover = 0; - if (coding->eol_type == CODING_EOL_UNDECIDED) - { - /* Here, to avoid the call of setup_coding_system, we directly - call detect_eol_type. */ - coding->eol_type = detect_eol_type (ptr, bytes, &dummy); - if (coding->eol_type == CODING_EOL_INCONSISTENT) - coding->eol_type = CODING_EOL_LF; - if (coding->eol_type != CODING_EOL_UNDECIDED) - { - val = Fget (coding->symbol, Qeol_type); - if (VECTORP (val) && XVECTOR (val)->size == 3) - coding->symbol = XVECTOR (val)->contents[coding->eol_type]; - } - coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; - } + EMACS_INT start, end; + Lisp_Object prop; - if (coding->eol_type == CODING_EOL_LF - || coding->eol_type == CODING_EOL_UNDECIDED) - { - /* We have nothing to do. */ - ptr = pend; - } - else if (coding->eol_type == CODING_EOL_CRLF) + if (! find_composition (pos, limit, &start, &end, &prop, coding->src_object) + || end > limit) + *stop = limit; + else if (start > pos) + *stop = start; + else { - unsigned char *pstart = ptr, *p = ptr; - - if (! (coding->mode & CODING_MODE_LAST_BLOCK) - && *(pend - 1) == '\r') - { - /* If the last character is CR, we can't handle it here - because LF will be in the not-yet-decoded source text. - Record that the CR is not yet processed. */ - coding->spec.ccl.cr_carryover = 1; - coding->produced--; - coding->produced_char--; - pend--; - } - while (ptr < pend) + if (start == pos) { - if (*ptr == '\r') + /* We found a composition. Store the corresponding + annotation data in BUF. */ + int *head = buf; + enum composition_method method = COMPOSITION_METHOD (prop); + int nchars = COMPOSITION_LENGTH (prop); + + ADD_COMPOSITION_DATA (buf, 0, nchars, method); + if (method != COMPOSITION_RELATIVE) { - if (ptr + 1 < pend && *(ptr + 1) == '\n') + Lisp_Object components; + int len, i, i_byte; + + components = COMPOSITION_COMPONENTS (prop); + if (VECTORP (components)) { - *p++ = '\n'; - ptr += 2; + len = XVECTOR (components)->size; + for (i = 0; i < len; i++) + *buf++ = XINT (AREF (components, i)); } - else + else if (STRINGP (components)) { - if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - goto undo_eol_conversion; - *p++ = *ptr++; - len = XSTRING (components)->size; ++ len = SCHARS (components); + i = i_byte = 0; + while (i < len) + { + FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte); + buf++; + } + } + else if (INTEGERP (components)) + { + len = 1; + *buf++ = XINT (components); + } + else if (CONSP (components)) + { + for (len = 0; CONSP (components); + len++, components = XCDR (components)) + *buf++ = XINT (XCAR (components)); } - } - else if (*ptr == '\n' - && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) - goto undo_eol_conversion; - else - *p++ = *ptr++; - continue; - - undo_eol_conversion: - /* We have faced with inconsistent EOL format at PTR. - Convert all LFs before PTR back to CRLFs. */ - for (p--, ptr--; p >= pstart; p--) - { - if (*p == '\n') - *ptr-- = '\n', *ptr-- = '\r'; else - *ptr-- = *p; - } - /* If carryover is recorded, cancel it because we don't - convert CRLF anymore. */ - if (coding->spec.ccl.cr_carryover) - { - coding->spec.ccl.cr_carryover = 0; - coding->produced++; - coding->produced_char++; - pend++; + abort (); + *head -= len; } - p = ptr = pend; - coding->eol_type = CODING_EOL_LF; - coding->symbol = saved_coding_symbol; - } - if (p < pend) - { - /* As each two-byte sequence CRLF was converted to LF, (PEND - - P) is the number of deleted characters. */ - coding->produced -= pend - p; - coding->produced_char -= pend - p; } + + if (find_composition (end, limit, &start, &end, &prop, + coding->src_object) + && end <= limit) + *stop = start; + else + *stop = limit; } - else /* i.e. coding->eol_type == CODING_EOL_CR */ + return buf; + } + + + /* Extract an annotation datum from a text property `charset' at POS of + CODING->src_object (buffer of string), store the data in BUF, set + *STOP to the position where the value of `charset' property changes + (limiting by LIMIT), and return the address of the next element of + BUF. + + If the property value is nil, set *STOP to the position where the + property value is non-nil (limiting by LIMIT), and return BUF. */ + + static INLINE int * + handle_charset_annotation (pos, limit, coding, buf, stop) + EMACS_INT pos, limit; + struct coding_system *coding; + int *buf; + EMACS_INT *stop; + { + Lisp_Object val, next; + int id; + + val = Fget_text_property (make_number (pos), Qcharset, coding->src_object); + if (! NILP (val) && CHARSETP (val)) + id = XINT (CHARSET_SYMBOL_ID (val)); + else + id = -1; + ADD_CHARSET_DATA (buf, 0, 0, id); + next = Fnext_single_property_change (make_number (pos), Qcharset, + coding->src_object, + make_number (limit)); + *stop = XINT (next); + return buf; + } + + + static void + consume_chars (coding) + struct coding_system *coding; + { + int *buf = coding->charbuf; + int *buf_end = coding->charbuf + coding->charbuf_size; + const unsigned char *src = coding->source + coding->consumed; + const unsigned char *src_end = coding->source + coding->src_bytes; + EMACS_INT pos = coding->src_pos + coding->consumed_char; + EMACS_INT end_pos = coding->src_pos + coding->src_chars; + int multibytep = coding->src_multibyte; + Lisp_Object eol_type; + int c; + EMACS_INT stop, stop_composition, stop_charset; + + eol_type = CODING_ID_EOL_TYPE (coding->id); + if (VECTORP (eol_type)) + eol_type = Qunix; + + /* Note: composition handling is not yet implemented. */ + coding->common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; + + if (coding->common_flags & CODING_ANNOTATE_COMPOSITION_MASK) + stop = stop_composition = pos; + else + stop = stop_composition = end_pos; + if (coding->common_flags & CODING_ANNOTATE_CHARSET_MASK) + stop = stop_charset = pos; + else + stop_charset = end_pos; + + /* Compensate for CRLF and annotation. */ + buf_end -= 1 + MAX_ANNOTATION_LENGTH; + while (buf < buf_end) { - unsigned char *p = ptr; + if (pos == stop) + { + if (pos == end_pos) + break; + if (pos == stop_composition) + buf = handle_composition_annotation (pos, end_pos, coding, + buf, &stop_composition); + if (pos == stop_charset) + buf = handle_charset_annotation (pos, end_pos, coding, + buf, &stop_charset); + stop = (stop_composition < stop_charset + ? stop_composition : stop_charset); + } + + if (! multibytep) + { + EMACS_INT bytes; - for (; ptr < pend; ptr++) + if (! CODING_FOR_UNIBYTE (coding) + && (bytes = MULTIBYTE_LENGTH (src, src_end)) > 0) + c = STRING_CHAR_ADVANCE (src), pos += bytes; + else + c = *src++, pos++; + } + else + c = STRING_CHAR_ADVANCE (src), pos++; + if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)) + c = '\n'; + if (! EQ (eol_type, Qunix)) { - if (*ptr == '\r') - *ptr = '\n'; - else if (*ptr == '\n' - && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) + if (c == '\n') { - for (; p < ptr; p++) - { - if (*p == '\n') - *p = '\r'; - } - ptr = pend; - coding->eol_type = CODING_EOL_LF; - coding->symbol = saved_coding_symbol; + if (EQ (eol_type, Qdos)) + *buf++ = '\r'; + else + c = '\r'; } } + *buf++ = c; } + + coding->consumed = src - coding->source; + coding->consumed_char = pos - coding->src_pos; + coding->charbuf_used = buf - coding->charbuf; + coding->chars_at_source = 0; } - /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before - decoding, it may detect coding system and format of end-of-line if - those are not yet decided. The source should be unibyte, the - result is multibyte if CODING->dst_multibyte is nonzero, else - unibyte. */ - int - decode_coding (coding, source, destination, src_bytes, dst_bytes) + /* Encode the text at CODING->src_object into CODING->dst_object. + CODING->src_object is a buffer or a string. + CODING->dst_object is a buffer or nil. + + If CODING->src_object is a buffer, it must be the current buffer. + In this case, if CODING->src_pos is positive, it is a position of + the source text in the buffer, otherwise. the source text is in the + gap area of the buffer, and coding->src_pos specifies the offset of + the text from GPT (which must be the same as PT). If this is the + same buffer as CODING->dst_object, CODING->src_pos must be + negative and CODING should not have `pre-write-conversion'. + + If CODING->src_object is a string, CODING should not have + `pre-write-conversion'. + + If CODING->dst_object is a buffer, the encoded data is inserted at + the current point of that buffer. + + If CODING->dst_object is nil, the encoded data is placed at the + memory area specified by CODING->destination. */ + + static int + encode_coding (coding) struct coding_system *coding; - const unsigned char *source; - unsigned char *destination; - int src_bytes, dst_bytes; { - int extra = 0; + Lisp_Object attrs; - if (coding->type == coding_type_undecided) - detect_coding (coding, source, src_bytes); + attrs = CODING_ID_ATTRS (coding->id); - if (coding->eol_type == CODING_EOL_UNDECIDED - && coding->type != coding_type_ccl) + if (BUFFERP (coding->dst_object)) { - detect_eol (coding, source, src_bytes); - /* We had better recover the original eol format if we - encounter an inconsistent eol format while decoding. */ - coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; + set_buffer_internal (XBUFFER (coding->dst_object)); + coding->dst_multibyte + = ! NILP (current_buffer->enable_multibyte_characters); } - coding->produced = coding->produced_char = 0; coding->consumed = coding->consumed_char = 0; + coding->produced = coding->produced_char = 0; + coding->result = CODING_RESULT_SUCCESS; coding->errors = 0; - coding->result = CODING_FINISH_NORMAL; - switch (coding->type) - { - case coding_type_sjis: - decode_coding_sjis_big5 (coding, source, destination, - src_bytes, dst_bytes, 1); - break; + ALLOC_CONVERSION_WORK_AREA (coding); - case coding_type_iso2022: - decode_coding_iso2022 (coding, source, destination, - src_bytes, dst_bytes); - break; + do { + coding_set_source (coding); + consume_chars (coding); - case coding_type_big5: - decode_coding_sjis_big5 (coding, source, destination, - src_bytes, dst_bytes, 0); - break; + if (!NILP (CODING_ATTR_ENCODE_TBL (attrs))) + translate_chars (coding, CODING_ATTR_ENCODE_TBL (attrs)); + else if (!NILP (Vstandard_translation_table_for_encode)) + translate_chars (coding, Vstandard_translation_table_for_encode); - case coding_type_emacs_mule: - decode_coding_emacs_mule (coding, source, destination, - src_bytes, dst_bytes); - break; + coding_set_destination (coding); + (*(coding->encoder)) (coding); + } while (coding->consumed_char < coding->src_chars); + + if (BUFFERP (coding->dst_object)) + insert_from_gap (coding->produced_char, coding->produced); + + return (coding->result); + } + + + /* Stack of working buffers used in code conversion. An nil element + means that the code conversion of that level is not using a working + buffer. */ + Lisp_Object Vcode_conversion_work_buf_list; + + /* A working buffer used by the top level conversion. */ + Lisp_Object Vcode_conversion_reused_work_buf; + + + /* Return a working buffer that can be freely used by the following + code conversion. MULTIBYTEP specifies the multibyteness of the + buffer. */ + + Lisp_Object + make_conversion_work_buffer (multibytep, depth) + int multibytep, depth; + { + struct buffer *current = current_buffer; + Lisp_Object buf, name; - case coding_type_ccl: - if (coding->spec.ccl.cr_carryover) + if (depth == 0) + { + if (NILP (Vcode_conversion_reused_work_buf)) + Vcode_conversion_reused_work_buf + = Fget_buffer_create (build_string (" *code-converting-work<0>*")); + buf = Vcode_conversion_reused_work_buf; + } + else + { + if (depth < 0) { - /* Put the CR which was not processed by the previous call - of decode_eol_post_ccl in DESTINATION. It will be - decoded together with the following LF by the call to - decode_eol_post_ccl below. */ - *destination = '\r'; - coding->produced++; - coding->produced_char++; - dst_bytes--; - extra = coding->spec.ccl.cr_carryover; + name = build_string (" *code-converting-work*"); + name = Fgenerate_new_buffer_name (name, Qnil); } - ccl_coding_driver (coding, source, destination + extra, - src_bytes, dst_bytes, 0); - if (coding->eol_type != CODING_EOL_LF) + else { - coding->produced += extra; - coding->produced_char += extra; - decode_eol_post_ccl (coding, destination, coding->produced); - } - break; + char str[128]; - default: - decode_eol (coding, source, destination, src_bytes, dst_bytes); + sprintf (str, " *code-converting-work*<%d>", depth); + name = build_string (str); + } + buf = Fget_buffer_create (name); } + set_buffer_internal (XBUFFER (buf)); + current_buffer->undo_list = Qt; + Ferase_buffer (); - Fset_buffer_multibyte (multibytep ? Qt : Qnil, Qnil); ++ Fset_buffer_multibyte (multibytep ? Qt : Qnil); + set_buffer_internal (current); + return buf; + } + + static Lisp_Object + code_conversion_restore (buffer) + Lisp_Object buffer; + { + Lisp_Object workbuf; + + workbuf = XCAR (Vcode_conversion_work_buf_list); + if (! NILP (workbuf) + && ! EQ (workbuf, Vcode_conversion_reused_work_buf) + && ! NILP (Fbuffer_live_p (workbuf))) + Fkill_buffer (workbuf); + Vcode_conversion_work_buf_list = XCDR (Vcode_conversion_work_buf_list); + set_buffer_internal (XBUFFER (buffer)); + return Qnil; + } - if (coding->result == CODING_FINISH_INSUFFICIENT_SRC - && coding->mode & CODING_MODE_LAST_BLOCK - && coding->consumed == src_bytes) - coding->result = CODING_FINISH_NORMAL; + static Lisp_Object + code_conversion_save (buffer, with_work_buf, multibyte) + Lisp_Object buffer; + int with_work_buf, multibyte; + { + Lisp_Object workbuf; - if (coding->mode & CODING_MODE_LAST_BLOCK - && coding->result == CODING_FINISH_INSUFFICIENT_SRC) + if (with_work_buf) { - const unsigned char *src = source + coding->consumed; - unsigned char *dst = destination + coding->produced; + int depth = XINT (Flength (Vcode_conversion_work_buf_list)); - src_bytes -= coding->consumed; - coding->errors++; - if (COMPOSING_P (coding)) - DECODE_COMPOSITION_END ('1'); - while (src_bytes--) - { - int c = *src++; - dst += CHAR_STRING (c, dst); - coding->produced_char++; - } - coding->consumed = coding->consumed_char = src - source; - coding->produced = dst - destination; - coding->result = CODING_FINISH_NORMAL; + workbuf = make_conversion_work_buffer (multibyte, depth); } + else + workbuf = Qnil; + Vcode_conversion_work_buf_list + = Fcons (workbuf, Vcode_conversion_work_buf_list); + record_unwind_protect (code_conversion_restore, buffer); + return workbuf; + } + + int + decode_coding_gap (coding, chars, bytes) + struct coding_system *coding; + EMACS_INT chars, bytes; + { + int count = specpdl_ptr - specpdl; + Lisp_Object attrs; + Lisp_Object buffer; + + buffer = Fcurrent_buffer (); + code_conversion_save (buffer, 0, 0); + + coding->src_object = buffer; + coding->src_chars = chars; + coding->src_bytes = bytes; + coding->src_pos = -chars; + coding->src_pos_byte = -bytes; + coding->src_multibyte = chars < bytes; + coding->dst_object = buffer; + coding->dst_pos = PT; + coding->dst_pos_byte = PT_BYTE; + coding->dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters); + coding->mode |= CODING_MODE_LAST_BLOCK; - if (!coding->dst_multibyte) + if (CODING_REQUIRE_DETECTION (coding)) + detect_coding (coding); - ++ + decode_coding (coding); + + attrs = CODING_ID_ATTRS (coding->id); + if (! NILP (CODING_ATTR_POST_READ (attrs))) { - coding->produced = str_as_unibyte (destination, coding->produced); - coding->produced_char = coding->produced; - struct gcpro gcpro1; + EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE; + Lisp_Object val; + + TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte); - GCPRO1 (buffer); + val = call1 (CODING_ATTR_POST_READ (attrs), + make_number (coding->produced_char)); - UNGCPRO; + CHECK_NATNUM (val); + coding->produced_char += Z - prev_Z; + coding->produced += Z_BYTE - prev_Z_BYTE; } + unbind_to (count, Qnil); return coding->result; } @@@ -5254,2055 -6603,1811 +6604,1923 @@@ encode_coding_object (coding, src_objec } else { - begp_orig = begp = BYTE_POS_ADDR (*beg); - endp_orig = endp = begp + *end - *beg; + coding->dst_object = Qnil; + coding->dst_multibyte = 0; } - eol_conversion = (coding->eol_type == CODING_EOL_CR - || coding->eol_type == CODING_EOL_CRLF); + encode_coding (coding); - /* Here, we don't have to check coding->pre_write_conversion because - the caller is expected to have handled it already. */ - switch (coding->type) + if (EQ (dst_object, Qt)) { - case coding_type_iso2022: - if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, 0) != CHARSET_ASCII) - /* We can't skip any data. */ - break; - if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL) + if (BUFFERP (coding->dst_object)) + coding->dst_object = Fbuffer_string (); + else { - unsigned char *bol = begp; - while (begp < endp && *begp < 0x80) - { - begp++; - if (begp[-1] == '\n') - bol = begp; - } - begp = bol; - goto label_skip_tail; + coding->dst_object + = make_unibyte_string ((char *) coding->destination, + coding->produced); + xfree (coding->destination); } - /* fall down ... */ + } - case coding_type_sjis: - case coding_type_big5: - /* We can skip all ASCII characters at the head and tail. */ - if (eol_conversion) - while (begp < endp && *begp < 0x80 && *begp != '\n') begp++; - else - while (begp < endp && *begp < 0x80) begp++; - label_skip_tail: - if (eol_conversion) - while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--; + if (saved_pt >= 0) + { + /* This is the case of: + (BUFFERP (src_object) && EQ (src_object, dst_object)) + As we have moved PT while replacing the original buffer + contents, we must recover it now. */ + set_buffer_internal (XBUFFER (src_object)); + if (saved_pt < from) + TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte); + else if (saved_pt < from + chars) + TEMP_SET_PT_BOTH (from, from_byte); + else if (! NILP (current_buffer->enable_multibyte_characters)) + TEMP_SET_PT_BOTH (saved_pt + (coding->produced_char - chars), + saved_pt_byte + (coding->produced - bytes)); else - while (begp < endp && *(endp - 1) < 0x80) endp--; - break; + TEMP_SET_PT_BOTH (saved_pt + (coding->produced - bytes), + saved_pt_byte + (coding->produced - bytes)); + } + + unbind_to (count, Qnil); + } + + + Lisp_Object + preferred_coding_system () + { + int id = coding_categories[coding_priorities[0]].id; + + return CODING_ID_NAME (id); + } + + + #ifdef emacs + /*** 8. Emacs Lisp library functions ***/ + + DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0, + doc: /* Return t if OBJECT is nil or a coding-system. + See the documentation of `define-coding-system' for information + about coding-system objects. */) + (obj) + Lisp_Object obj; + { + return ((NILP (obj) || CODING_SYSTEM_P (obj)) ? Qt : Qnil); + } + + DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system, + Sread_non_nil_coding_system, 1, 1, 0, + doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */) + (prompt) + Lisp_Object prompt; + { + Lisp_Object val; + do + { + val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil, + Qt, Qnil, Qcoding_system_history, Qnil, Qnil); + } - while (XSTRING (val)->size == 0); ++ while (SCHARS (val) == 0); + return (Fintern (val, Qnil)); + } + + DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0, + doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. + If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */) + (prompt, default_coding_system) + Lisp_Object prompt, default_coding_system; + { + Lisp_Object val; + if (SYMBOLP (default_coding_system)) - XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name); ++ XSETSTRING (default_coding_system, SYMBOL_NAME (default_coding_system)); + val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil, + Qt, Qnil, Qcoding_system_history, + default_coding_system, Qnil); - return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil)); ++ return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil)); + } + + DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system, + 1, 1, 0, + doc: /* Check validity of CODING-SYSTEM. + If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. */) + (coding_system) + Lisp_Object coding_system; + { + CHECK_SYMBOL (coding_system); + if (!NILP (Fcoding_system_p (coding_system))) + return coding_system; + while (1) + Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil)); + } + + + /* Detect how the bytes at SRC of length SRC_BYTES are encoded. If + HIGHEST is nonzero, return the coding system of the highest + priority among the detected coding systems. Otherwize return a + list of detected coding systems sorted by their priorities. If + MULTIBYTEP is nonzero, it is assumed that the bytes are in correct + multibyte form but contains only ASCII and eight-bit chars. + Otherwise, the bytes are raw bytes. + + CODING-SYSTEM controls the detection as below: + + If it is nil, detect both text-format and eol-format. If the + text-format part of CODING-SYSTEM is already specified + (e.g. `iso-latin-1'), detect only eol-format. If the eol-format + part of CODING-SYSTEM is already specified (e.g. `undecided-unix'), + detect only text-format. */ + + Lisp_Object + detect_coding_system (src, src_bytes, highest, multibytep, coding_system) - unsigned char *src; ++ const unsigned char *src; + int src_bytes, highest; + int multibytep; + Lisp_Object coding_system; + { - unsigned char *src_end = src + src_bytes; ++ const unsigned char *src_end = src + src_bytes; + Lisp_Object attrs, eol_type; + Lisp_Object val; + struct coding_system coding; + int id; + struct coding_detection_info detect_info; - default: - abort (); - } + if (NILP (coding_system)) + coding_system = Qundecided; + setup_coding_system (coding_system, &coding); + attrs = CODING_ID_ATTRS (coding.id); + eol_type = CODING_ID_EOL_TYPE (coding.id); + coding_system = CODING_ATTR_BASE_NAME (attrs); + + coding.source = src; + coding.src_bytes = src_bytes; + coding.src_multibyte = multibytep; + coding.consumed = 0; + coding.mode |= CODING_MODE_LAST_BLOCK; - *beg += begp - begp_orig; - *end += endp - endp_orig; - return; - } + detect_info.checked = detect_info.found = detect_info.rejected = 0; - /* As shrinking conversion region requires some overhead, we don't try - shrinking if the length of conversion region is less than this - value. */ - static int shrink_conversion_region_threshhold = 1024; + /* At first, detect text-format if necessary. */ + if (XINT (CODING_ATTR_CATEGORY (attrs)) == coding_category_undecided) + { + enum coding_category category; + struct coding_system *this; + int c, i; - #define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep) \ - do { \ - if (*(end) - *(beg) > shrink_conversion_region_threshhold) \ - { \ - if (encodep) shrink_encoding_region (beg, end, coding, str); \ - else shrink_decoding_region (beg, end, coding, str); \ - } \ - } while (0) + for (; src < src_end; src++) + { + c = *src; + if (c & 0x80 + || (c < 0x20 && (c == ISO_CODE_ESC + || c == ISO_CODE_SI + || c == ISO_CODE_SO))) + break; + } + coding.head_ascii = src - coding.source; - static Lisp_Object - code_convert_region_unwind (arg) - Lisp_Object arg; - { - inhibit_pre_post_conversion = 0; - Vlast_coding_system_used = arg; - return Qnil; - } + if (src < src_end) + for (i = 0; i < coding_category_raw_text; i++) + { + category = coding_priorities[i]; + this = coding_categories + category; - /* Store information about all compositions in the range FROM and TO - of OBJ in memory blocks pointed by CODING->cmp_data. OBJ is a - buffer or a string, defaults to the current buffer. */ + if (this->id < 0) + { + /* No coding system of this category is defined. */ + detect_info.rejected |= (1 << category); + } + else if (category >= coding_category_raw_text) + continue; + else if (detect_info.checked & (1 << category)) + { + if (highest + && (detect_info.found & (1 << category))) + break; + } + else + { + if ((*(this->detector)) (&coding, &detect_info) + && highest + && (detect_info.found & (1 << category))) + break; + } + } - void - coding_save_composition (coding, from, to, obj) - struct coding_system *coding; - int from, to; - Lisp_Object obj; - { - Lisp_Object prop; - int start, end; - if (coding->composing == COMPOSITION_DISABLED) - return; - if (!coding->cmp_data) - coding_allocate_composition_data (coding, from); - if (!find_composition (from, to, &start, &end, &prop, obj) - || end > to) - return; - if (start < from - && (!find_composition (end, to, &start, &end, &prop, obj) - || end > to)) - return; - coding->composing = COMPOSITION_NO; - do - { - if (COMPOSITION_VALID_P (start, end, prop)) + if (detect_info.rejected == CATEGORY_MASK_ANY) { - enum composition_method method = COMPOSITION_METHOD (prop); - if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH - >= COMPOSITION_DATA_SIZE) - coding_allocate_composition_data (coding, from); - /* For relative composition, we remember start and end - positions, for the other compositions, we also remember - components. */ - CODING_ADD_COMPOSITION_START (coding, start - from, method); - if (method != COMPOSITION_RELATIVE) + detect_info.found = CATEGORY_MASK_RAW_TEXT; + id = coding_categories[coding_category_raw_text].id; + val = Fcons (make_number (id), Qnil); + } + else if (! detect_info.rejected && ! detect_info.found) + { + detect_info.found = CATEGORY_MASK_ANY; + id = coding_categories[coding_category_undecided].id; + val = Fcons (make_number (id), Qnil); + } + else if (highest) + { + if (detect_info.found) { - /* We must store a*/ - Lisp_Object val, ch; + detect_info.found = 1 << category; + val = Fcons (make_number (this->id), Qnil); + } + else + for (i = 0; i < coding_category_raw_text; i++) + if (! (detect_info.rejected & (1 << coding_priorities[i]))) + { + detect_info.found = 1 << coding_priorities[i]; + id = coding_categories[coding_priorities[i]].id; + val = Fcons (make_number (id), Qnil); + break; + } + } + else + { + int mask = detect_info.rejected | detect_info.found; + int found = 0; + val = Qnil; - val = COMPOSITION_COMPONENTS (prop); - if (CONSP (val)) - while (CONSP (val)) - { - ch = XCAR (val), val = XCDR (val); - CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch)); - } - else if (VECTORP (val) || STRINGP (val)) + for (i = coding_category_raw_text - 1; i >= 0; i--) + { + category = coding_priorities[i]; + if (! (mask & (1 << category))) { - int len = (VECTORP (val) - ? XVECTOR (val)->size : SCHARS (val)); - int i; - for (i = 0; i < len; i++) - { - ch = (STRINGP (val) - ? Faref (val, make_number (i)) - : XVECTOR (val)->contents[i]); - CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (ch)); - } + found |= 1 << category; + id = coding_categories[category].id; + val = Fcons (make_number (id), val); + } + } + for (i = coding_category_raw_text - 1; i >= 0; i--) + { + category = coding_priorities[i]; + if (detect_info.found & (1 << category)) + { + id = coding_categories[category].id; + val = Fcons (make_number (id), val); } - else /* INTEGERP (val) */ - CODING_ADD_COMPOSITION_COMPONENT (coding, XINT (val)); } - CODING_ADD_COMPOSITION_END (coding, end - from); + detect_info.found |= found; } - start = end; } - while (start < to - && find_composition (start, to, &start, &end, &prop, obj) - && end <= to); + else + { + detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); + val = Fcons (make_number (coding.id), Qnil); + } + + /* Then, detect eol-format if necessary. */ + { + int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol; + Lisp_Object tail; + + if (VECTORP (eol_type)) + { + if (detect_info.found & ~CATEGORY_MASK_UTF_16) + normal_eol = detect_eol (coding.source, src_bytes, + coding_category_raw_text); + if (detect_info.found & (CATEGORY_MASK_UTF_16_BE + | CATEGORY_MASK_UTF_16_BE_NOSIG)) + utf_16_be_eol = detect_eol (coding.source, src_bytes, + coding_category_utf_16_be); + if (detect_info.found & (CATEGORY_MASK_UTF_16_LE + | CATEGORY_MASK_UTF_16_LE_NOSIG)) + utf_16_le_eol = detect_eol (coding.source, src_bytes, + coding_category_utf_16_le); + } + else + { + if (EQ (eol_type, Qunix)) + normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_LF; + else if (EQ (eol_type, Qdos)) + normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CRLF; + else + normal_eol = utf_16_be_eol = utf_16_le_eol = EOL_SEEN_CR; + } + + for (tail = val; CONSP (tail); tail = XCDR (tail)) + { + enum coding_category category; + int this_eol; + + id = XINT (XCAR (tail)); + attrs = CODING_ID_ATTRS (id); + category = XINT (CODING_ATTR_CATEGORY (attrs)); + eol_type = CODING_ID_EOL_TYPE (id); + if (VECTORP (eol_type)) + { + if (category == coding_category_utf_16_be + || category == coding_category_utf_16_be_nosig) + this_eol = utf_16_be_eol; + else if (category == coding_category_utf_16_le + || category == coding_category_utf_16_le_nosig) + this_eol = utf_16_le_eol; + else + this_eol = normal_eol; + + if (this_eol == EOL_SEEN_LF) + XSETCAR (tail, AREF (eol_type, 0)); + else if (this_eol == EOL_SEEN_CRLF) + XSETCAR (tail, AREF (eol_type, 1)); + else if (this_eol == EOL_SEEN_CR) + XSETCAR (tail, AREF (eol_type, 2)); + else + XSETCAR (tail, CODING_ID_NAME (id)); + } + else + XSETCAR (tail, CODING_ID_NAME (id)); + } + } - /* Make coding->cmp_data point to the first memory block. */ - while (coding->cmp_data->prev) - coding->cmp_data = coding->cmp_data->prev; - coding->cmp_data_start = 0; + return (highest ? XCAR (val) : val); } - /* Reflect the saved information about compositions to OBJ. - CODING->cmp_data points to a memory block for the information. OBJ - is a buffer or a string, defaults to the current buffer. */ - void - coding_restore_composition (coding, obj) - struct coding_system *coding; - Lisp_Object obj; - { - struct composition_data *cmp_data = coding->cmp_data; + DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, + 2, 3, 0, + doc: /* Detect coding system of the text in the region between START and END. + Return a list of possible coding systems ordered by priority. - if (!cmp_data) - return; + If only ASCII characters are found, it returns a list of single element + `undecided' or its subsidiary coding system according to a detected + end-of-line format. - while (cmp_data->prev) - cmp_data = cmp_data->prev; + If optional argument HIGHEST is non-nil, return the coding system of + highest priority. */) + (start, end, highest) + Lisp_Object start, end, highest; + { + int from, to; + int from_byte, to_byte; - while (cmp_data) - { - int i; + CHECK_NUMBER_COERCE_MARKER (start); + CHECK_NUMBER_COERCE_MARKER (end); - for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0; - i += cmp_data->data[i]) - { - int *data = cmp_data->data + i; - enum composition_method method = (enum composition_method) data[3]; - Lisp_Object components; + validate_region (&start, &end); + from = XINT (start), to = XINT (end); + from_byte = CHAR_TO_BYTE (from); + to_byte = CHAR_TO_BYTE (to); - if (method == COMPOSITION_RELATIVE) - components = Qnil; - else - { - int len = data[0] - 4, j; - Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; - - if (method == COMPOSITION_WITH_RULE_ALTCHARS - && len % 2 == 0) - len --; - for (j = 0; j < len; j++) - args[j] = make_number (data[4 + j]); - components = (method == COMPOSITION_WITH_ALTCHARS - ? Fstring (len, args) : Fvector (len, args)); - } - compose_text (data[1], data[2], components, Qnil, obj); - } - cmp_data = cmp_data->next; - } + if (from < GPT && to >= GPT) + move_gap_both (to, to_byte); + + return detect_coding_system (BYTE_POS_ADDR (from_byte), + to_byte - from_byte, + !NILP (highest), + !NILP (current_buffer + ->enable_multibyte_characters), + Qnil); } - /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the - text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by - coding system CODING, and return the status code of code conversion - (currently, this value has no meaning). + DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string, + 1, 2, 0, + doc: /* Detect coding system of the text in STRING. + Return a list of possible coding systems ordered by priority. - How many characters (and bytes) are converted to how many - characters (and bytes) are recorded in members of the structure - CODING. + If only ASCII characters are found, it returns a list of single element + `undecided' or its subsidiary coding system according to a detected + end-of-line format. - If REPLACE is nonzero, we do various things as if the original text - is deleted and a new text is inserted. See the comments in - replace_range (insdel.c) to know what we are doing. + If optional argument HIGHEST is non-nil, return the coding system of + highest priority. */) + (string, highest) + Lisp_Object string, highest; + { + CHECK_STRING (string); - If REPLACE is zero, it is assumed that the source text is unibyte. - Otherwise, it is assumed that the source text is multibyte. */ - return detect_coding_system (XSTRING (string)->data, - STRING_BYTES (XSTRING (string)), - !NILP (highest), - STRING_MULTIBYTE (string), ++ return detect_coding_system (SDATA (string), SBYTES (string), ++ !NILP (highest), STRING_MULTIBYTE (string), + Qnil); + } - int - code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace) - int from, from_byte, to, to_byte, encodep, replace; - struct coding_system *coding; - { - int len = to - from, len_byte = to_byte - from_byte; - int nchars_del = 0, nbytes_del = 0; - int require, inserted, inserted_byte; - int head_skip, tail_skip, total_skip = 0; - Lisp_Object saved_coding_symbol; - int first = 1; - unsigned char *src, *dst; - Lisp_Object deletion; - int orig_point = PT, orig_len = len; - int prev_Z; - int multibyte_p = !NILP (current_buffer->enable_multibyte_characters); - deletion = Qnil; - saved_coding_symbol = coding->symbol; + static INLINE int + char_encodable_p (c, attrs) + int c; + Lisp_Object attrs; + { + Lisp_Object tail; + struct charset *charset; - if (from < PT && PT < to) + for (tail = CODING_ATTR_CHARSET_LIST (attrs); + CONSP (tail); tail = XCDR (tail)) { - TEMP_SET_PT_BOTH (from, from_byte); - orig_point = from; + charset = CHARSET_FROM_ID (XINT (XCAR (tail))); + if (CHAR_CHARSET_P (c, charset)) + break; } + return (! NILP (tail)); + } - if (replace) - { - int saved_from = from; - int saved_inhibit_modification_hooks; - prepare_to_modify_buffer (from, to, &from); - if (saved_from != from) - { - to = from + len; - from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to); - len_byte = to_byte - from_byte; - } + /* Return a list of coding systems that safely encode the text between + START and END. If EXCLUDE is non-nil, it is a list of coding + systems not to check. The returned list doesn't contain any such + coding systems. In any case, if the text contains only ASCII or is + unibyte, return t. */ - /* The code conversion routine can not preserve text properties - for now. So, we must remove all text properties in the - region. Here, we must suppress all modification hooks. */ - saved_inhibit_modification_hooks = inhibit_modification_hooks; - inhibit_modification_hooks = 1; - Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil); - inhibit_modification_hooks = saved_inhibit_modification_hooks; - } + DEFUN ("find-coding-systems-region-internal", + Ffind_coding_systems_region_internal, + Sfind_coding_systems_region_internal, 2, 3, 0, + doc: /* Internal use only. */) + (start, end, exclude) + Lisp_Object start, end, exclude; + { + Lisp_Object coding_attrs_list, safe_codings; + EMACS_INT start_byte, end_byte; + const unsigned char *p, *pbeg, *pend; + int c; + Lisp_Object tail, elt; - if (! encodep && CODING_REQUIRE_DETECTION (coding)) + if (STRINGP (start)) + { + if (!STRING_MULTIBYTE (start) - || XSTRING (start)->size == STRING_BYTES (XSTRING (start))) ++ || SCHARS (start) == SBYTES (start)) + return Qt; + start_byte = 0; - end_byte = STRING_BYTES (XSTRING (start)); ++ end_byte = SBYTES (start); + } + else { - /* We must detect encoding of text and eol format. */ + CHECK_NUMBER_COERCE_MARKER (start); + CHECK_NUMBER_COERCE_MARKER (end); + if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) + args_out_of_range (start, end); + if (NILP (current_buffer->enable_multibyte_characters)) + return Qt; + start_byte = CHAR_TO_BYTE (XINT (start)); + end_byte = CHAR_TO_BYTE (XINT (end)); + if (XINT (end) - XINT (start) == end_byte - start_byte) + return Qt; - if (from < GPT && to > GPT) - move_gap_both (from, from_byte); - if (coding->type == coding_type_undecided) - { - detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte); - if (coding->type == coding_type_undecided) - { - /* It seems that the text contains only ASCII, but we - should not leave it undecided because the deeper - decoding routine (decode_coding) tries to detect the - encodings again in vain. */ - coding->type = coding_type_emacs_mule; - coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE; - /* As emacs-mule decoder will handle composition, we - need this setting to allocate coding->cmp_data - later. */ - coding->composing = COMPOSITION_NO; - } - } - if (coding->eol_type == CODING_EOL_UNDECIDED - && coding->type != coding_type_ccl) + if (XINT (start) < GPT && XINT (end) > GPT) { - detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte); - if (coding->eol_type == CODING_EOL_UNDECIDED) - coding->eol_type = CODING_EOL_LF; - /* We had better recover the original eol format if we - encounter an inconsistent eol format while decoding. */ - coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; + if ((GPT - XINT (start)) < (XINT (end) - GPT)) + move_gap_both (XINT (start), start_byte); + else + move_gap_both (XINT (end), end_byte); } } - /* Now we convert the text. */ + coding_attrs_list = Qnil; + for (tail = Vcoding_system_list; CONSP (tail); tail = XCDR (tail)) + if (NILP (exclude) + || NILP (Fmemq (XCAR (tail), exclude))) + { + Lisp_Object attrs; - /* For encoding, we must process pre-write-conversion in advance. */ - if (! inhibit_pre_post_conversion - && encodep - && SYMBOLP (coding->pre_write_conversion) - && ! NILP (Ffboundp (coding->pre_write_conversion))) - { - /* The function in pre-write-conversion may put a new text in a - new buffer. */ - struct buffer *prev = current_buffer; - Lisp_Object new; + attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0); + if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)) + && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided)) + coding_attrs_list = Fcons (attrs, coding_attrs_list); + } - record_unwind_protect (code_convert_region_unwind, - Vlast_coding_system_used); - /* We should not call any more pre-write/post-read-conversion - functions while this pre-write-conversion is running. */ - inhibit_pre_post_conversion = 1; - call2 (coding->pre_write_conversion, - make_number (from), make_number (to)); - inhibit_pre_post_conversion = 0; - /* Discard the unwind protect. */ - specpdl_ptr--; + if (STRINGP (start)) - p = pbeg = XSTRING (start)->data; ++ p = pbeg = SDATA (start); + else + p = pbeg = BYTE_POS_ADDR (start_byte); + pend = p + (end_byte - start_byte); - if (current_buffer != prev) - { - len = ZV - BEGV; - new = Fcurrent_buffer (); - set_buffer_internal_1 (prev); - del_range_2 (from, from_byte, to, to_byte, 0); - TEMP_SET_PT_BOTH (from, from_byte); - insert_from_buffer (XBUFFER (new), 1, len, 0); - Fkill_buffer (new); - if (orig_point >= to) - orig_point += len - orig_len; - else if (orig_point > from) - orig_point = from; - orig_len = len; - to = from + len; - from_byte = CHAR_TO_BYTE (from); - to_byte = CHAR_TO_BYTE (to); - len_byte = to_byte - from_byte; - TEMP_SET_PT_BOTH (from, from_byte); - } - } + while (p < pend && ASCII_BYTE_P (*p)) p++; + while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--; - if (replace) + while (p < pend) { - if (! EQ (current_buffer->undo_list, Qt)) - deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1); + if (ASCII_BYTE_P (*p)) + p++; else { - nchars_del = to - from; - nbytes_del = to_byte - from_byte; - } - } + c = STRING_CHAR_ADVANCE (p); - if (coding->composing != COMPOSITION_DISABLED) - { - if (encodep) - coding_save_composition (coding, from, to, Fcurrent_buffer ()); - else - coding_allocate_composition_data (coding, from); + charset_map_loaded = 0; + for (tail = coding_attrs_list; CONSP (tail);) + { + elt = XCAR (tail); + if (NILP (elt)) + tail = XCDR (tail); + else if (char_encodable_p (c, elt)) + tail = XCDR (tail); + else if (CONSP (XCDR (tail))) + { + XSETCAR (tail, XCAR (XCDR (tail))); + XSETCDR (tail, XCDR (XCDR (tail))); + } + else + { + XSETCAR (tail, Qnil); + tail = XCDR (tail); + } + } + if (charset_map_loaded) + { + EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg; + + if (STRINGP (start)) - pbeg = XSTRING (start)->data; ++ pbeg = SDATA (start); + else + pbeg = BYTE_POS_ADDR (start_byte); + p = pbeg + p_offset; + pend = pbeg + pend_offset; + } + } } - /* Try to skip the heading and tailing ASCIIs. */ - if (coding->type != coding_type_ccl) - { - int from_byte_orig = from_byte, to_byte_orig = to_byte; + safe_codings = Qnil; + for (tail = coding_attrs_list; CONSP (tail); tail = XCDR (tail)) + if (! NILP (XCAR (tail))) + safe_codings = Fcons (CODING_ATTR_BASE_NAME (XCAR (tail)), safe_codings); - if (from < GPT && GPT < to) - move_gap_both (from, from_byte); - SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep); - if (from_byte == to_byte - && (encodep || NILP (coding->post_read_conversion)) - && ! CODING_REQUIRE_FLUSHING (coding)) - { - coding->produced = len_byte; - coding->produced_char = len; - if (!replace) - /* We must record and adjust for this new text now. */ - adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len); - return 0; - } + return safe_codings; + } - head_skip = from_byte - from_byte_orig; - tail_skip = to_byte_orig - to_byte; - total_skip = head_skip + tail_skip; - from += head_skip; - to -= tail_skip; - len -= total_skip; len_byte -= total_skip; - } - /* For conversion, we must put the gap before the text in addition to - making the gap larger for efficient decoding. The required gap - size starts from 2000 which is the magic number used in make_gap. - But, after one batch of conversion, it will be incremented if we - find that it is not enough . */ - require = 2000; ++DEFUN ("unencodable-char-position", Funencodable_char_position, ++ Sunencodable_char_position, 3, 5, 0, ++ doc: /* ++Return position of first un-encodable character in a region. ++START and END specfiy the region and CODING-SYSTEM specifies the ++encoding to check. Return nil if CODING-SYSTEM does encode the region. + - if (GAP_SIZE < require) - make_gap (require - GAP_SIZE); - move_gap_both (from, from_byte); ++If optional 4th argument COUNT is non-nil, it specifies at most how ++many un-encodable characters to search. In this case, the value is a ++list of positions. + - inserted = inserted_byte = 0; ++If optional 5th argument STRING is non-nil, it is a string to search ++for un-encodable characters. In that case, START and END are indexes ++to the string. */) ++ (start, end, coding_system, count, string) ++ Lisp_Object start, end, coding_system, count, string; ++{ ++ int n; ++ struct coding_system coding; ++ Lisp_Object attrs, charset_list; ++ Lisp_Object positions; ++ int from, to; ++ const unsigned char *p, *stop, *pend; ++ int ascii_compatible; + - GAP_SIZE += len_byte; - ZV -= len; - Z -= len; - ZV_BYTE -= len_byte; - Z_BYTE -= len_byte; ++ setup_coding_system (Fcheck_coding_system (coding_system), &coding); ++ attrs = CODING_ID_ATTRS (coding.id); ++ if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text)) ++ return Qnil; ++ ascii_compatible = ! NILP (CODING_ATTR_ASCII_COMPAT (attrs)); ++ charset_list = CODING_ATTR_CHARSET_LIST (attrs); + - if (GPT - BEG < BEG_UNCHANGED) - BEG_UNCHANGED = GPT - BEG; - if (Z - GPT < END_UNCHANGED) - END_UNCHANGED = Z - GPT; ++ if (NILP (string)) ++ { ++ validate_region (&start, &end); ++ from = XINT (start); ++ to = XINT (end); ++ if (NILP (current_buffer->enable_multibyte_characters) ++ || (ascii_compatible ++ && (to - from) == (CHAR_TO_BYTE (to) - (CHAR_TO_BYTE (from))))) ++ return Qnil; ++ p = CHAR_POS_ADDR (from); ++ pend = CHAR_POS_ADDR (to); ++ if (from < GPT && to >= GPT) ++ stop = GPT_ADDR; ++ else ++ stop = pend; ++ } ++ else ++ { ++ CHECK_STRING (string); ++ CHECK_NATNUM (start); ++ CHECK_NATNUM (end); ++ from = XINT (start); ++ to = XINT (end); ++ if (from > to ++ || to > SCHARS (string)) ++ args_out_of_range_3 (string, start, end); ++ if (! STRING_MULTIBYTE (string)) ++ return Qnil; ++ p = SDATA (string) + string_char_to_byte (string, from); ++ stop = pend = SDATA (string) + string_char_to_byte (string, to); ++ if (ascii_compatible && (to - from) == (pend - p)) ++ return Qnil; ++ } + - if (!encodep && coding->src_multibyte) ++ if (NILP (count)) ++ n = 1; ++ else + { - /* Decoding routines expects that the source text is unibyte. - We must convert 8-bit characters of multibyte form to - unibyte. */ - int len_byte_orig = len_byte; - len_byte = str_as_unibyte (GAP_END_ADDR - len_byte, len_byte); - if (len_byte < len_byte_orig) - safe_bcopy (GAP_END_ADDR - len_byte_orig, GAP_END_ADDR - len_byte, - len_byte); - coding->src_multibyte = 0; ++ CHECK_NATNUM (count); ++ n = XINT (count); + } + - for (;;) ++ positions = Qnil; ++ while (1) + { - int result; - - /* The buffer memory is now: - +--------+converted-text+---------+-------original-text-------+---+ - |<-from->|<--inserted-->|---------|<--------len_byte--------->|---| - |<---------------------- GAP ----------------------->| */ - src = GAP_END_ADDR - len_byte; - dst = GPT_ADDR + inserted_byte; ++ int c; + - if (encodep) - result = encode_coding (coding, src, dst, len_byte, 0); - else ++ if (ascii_compatible) ++ while (p < stop && ASCII_BYTE_P (*p)) ++ p++, from++; ++ if (p >= stop) + { - if (coding->composing != COMPOSITION_DISABLED) - coding->cmp_data->char_offset = from + inserted; - result = decode_coding (coding, src, dst, len_byte, 0); ++ if (p >= pend) ++ break; ++ stop = pend; ++ p = GAP_END_ADDR; + } + - /* The buffer memory is now: - +--------+-------converted-text----+--+------original-text----+---+ - |<-from->|<-inserted->|<-produced->|--|<-(len_byte-consumed)->|---| - |<---------------------- GAP ----------------------->| */ - - inserted += coding->produced_char; - inserted_byte += coding->produced; - len_byte -= coding->consumed; - - if (result == CODING_FINISH_INSUFFICIENT_CMP) ++ c = STRING_CHAR_ADVANCE (p); ++ if (! (ASCII_CHAR_P (c) && ascii_compatible) ++ && ! char_charset (c, charset_list, NULL)) + { - coding_allocate_composition_data (coding, from + inserted); - continue; ++ positions = Fcons (make_number (from), positions); ++ n--; ++ if (n == 0) ++ break; + } + - src += coding->consumed; - dst += coding->produced; ++ from++; ++ } + - if (result == CODING_FINISH_NORMAL) - { - src += len_byte; - break; - } - if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL) - { - unsigned char *pend = dst, *p = pend - inserted_byte; - Lisp_Object eol_type; ++ return (NILP (count) ? Fcar (positions) : Fnreverse (positions)); ++} + - /* Encode LFs back to the original eol format (CR or CRLF). */ - if (coding->eol_type == CODING_EOL_CR) - { - while (p < pend) if (*p++ == '\n') p[-1] = '\r'; - } - else - { - int count = 0; + - while (p < pend) if (*p++ == '\n') count++; - if (src - dst < count) - { - /* We don't have sufficient room for encoding LFs - back to CRLF. We must record converted and - not-yet-converted text back to the buffer - content, enlarge the gap, then record them out of - the buffer contents again. */ - int add = len_byte + inserted_byte; - - GAP_SIZE -= add; - ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; - GPT += inserted_byte; GPT_BYTE += inserted_byte; - make_gap (count - GAP_SIZE); - GAP_SIZE += add; - ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; - GPT -= inserted_byte; GPT_BYTE -= inserted_byte; - /* Don't forget to update SRC, DST, and PEND. */ - src = GAP_END_ADDR - len_byte; - dst = GPT_ADDR + inserted_byte; - pend = dst; - } - inserted += count; - inserted_byte += count; - coding->produced += count; - p = dst = pend + count; - while (count) - { - *--p = *--pend; - if (*p == '\n') count--, *--p = '\r'; - } - } + DEFUN ("check-coding-systems-region", Fcheck_coding_systems_region, + Scheck_coding_systems_region, 3, 3, 0, + doc: /* Check if the region is encodable by coding systems. - /* Suppress eol-format conversion in the further conversion. */ - coding->eol_type = CODING_EOL_LF; + START and END are buffer positions specifying the region. + CODING-SYSTEM-LIST is a list of coding systems to check. - /* Set the coding system symbol to that for Unix-like EOL. */ - eol_type = Fget (saved_coding_symbol, Qeol_type); - if (VECTORP (eol_type) - && XVECTOR (eol_type)->size == 3 - && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF])) - coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF]; - else - coding->symbol = saved_coding_symbol; + The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where + CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the + whole region, POS0, POS1, ... are buffer positions where non-encodable + characters are found. - continue; - } - if (len_byte <= 0) - { - if (coding->type != coding_type_ccl - || coding->mode & CODING_MODE_LAST_BLOCK) - break; - coding->mode |= CODING_MODE_LAST_BLOCK; - continue; - } - if (result == CODING_FINISH_INSUFFICIENT_SRC) - { - /* The source text ends in invalid codes. Let's just - make them valid buffer contents, and finish conversion. */ - if (multibyte_p) - { - unsigned char *start = dst; + If all coding systems in CODING-SYSTEM-LIST can encode the region, the + value is nil. - inserted += len_byte; - while (len_byte--) - { - int c = *src++; - dst += CHAR_STRING (c, dst); - } + START may be a string. In that case, check if the string is + encodable, and the value contains indices to the string instead of + buffer positions. END is ignored. */) + (start, end, coding_system_list) + Lisp_Object start, end, coding_system_list; + { + Lisp_Object list; + EMACS_INT start_byte, end_byte; + int pos; + const unsigned char *p, *pbeg, *pend; + int c; + Lisp_Object tail, elt; - inserted_byte += dst - start; - } - else - { - inserted += len_byte; - inserted_byte += len_byte; - while (len_byte--) - *dst++ = *src++; - } - break; - } - if (result == CODING_FINISH_INTERRUPT) - { - /* The conversion procedure was interrupted by a user. */ - break; - } - /* Now RESULT == CODING_FINISH_INSUFFICIENT_DST */ - if (coding->consumed < 1) - { - /* It's quite strange to require more memory without - consuming any bytes. Perhaps CCL program bug. */ - break; - } - if (first) - { - /* We have just done the first batch of conversion which was - stopped because of insufficient gap. Let's reconsider the - required gap size (i.e. SRT - DST) now. - - We have converted ORIG bytes (== coding->consumed) into - NEW bytes (coding->produced). To convert the remaining - LEN bytes, we may need REQUIRE bytes of gap, where: - REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG) - REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG - Here, we are sure that NEW >= ORIG. */ - float ratio; - - if (coding->produced <= coding->consumed) - { - /* This happens because of CCL-based coding system with - eol-type CRLF. */ - require = 0; - } - else - { - ratio = (coding->produced - coding->consumed) / coding->consumed; - require = len_byte * ratio; - } - first = 0; - } - if ((src - dst) < (require + 2000)) - { - /* See the comment above the previous call of make_gap. */ - int add = len_byte + inserted_byte; - - GAP_SIZE -= add; - ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add; - GPT += inserted_byte; GPT_BYTE += inserted_byte; - make_gap (require + 2000); - GAP_SIZE += add; - ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add; - GPT -= inserted_byte; GPT_BYTE -= inserted_byte; - } + if (STRINGP (start)) + { + if (!STRING_MULTIBYTE (start) - && XSTRING (start)->size != STRING_BYTES (XSTRING (start))) ++ && SCHARS (start) != SBYTES (start)) + return Qnil; + start_byte = 0; - end_byte = STRING_BYTES (XSTRING (start)); ++ end_byte = SBYTES (start); + pos = 0; } - if (src - dst > 0) *dst = 0; /* Put an anchor. */ - - if (encodep && coding->dst_multibyte) + else { - /* The output is unibyte. We must convert 8-bit characters to - multibyte form. */ - if (inserted_byte * 2 > GAP_SIZE) + CHECK_NUMBER_COERCE_MARKER (start); + CHECK_NUMBER_COERCE_MARKER (end); + if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) + args_out_of_range (start, end); + if (NILP (current_buffer->enable_multibyte_characters)) + return Qnil; + start_byte = CHAR_TO_BYTE (XINT (start)); + end_byte = CHAR_TO_BYTE (XINT (end)); + if (XINT (end) - XINT (start) == end_byte - start_byte) + return Qt; + + if (XINT (start) < GPT && XINT (end) > GPT) { - GAP_SIZE -= inserted_byte; - ZV += inserted_byte; Z += inserted_byte; - ZV_BYTE += inserted_byte; Z_BYTE += inserted_byte; - GPT += inserted_byte; GPT_BYTE += inserted_byte; - make_gap (inserted_byte - GAP_SIZE); - GAP_SIZE += inserted_byte; - ZV -= inserted_byte; Z -= inserted_byte; - ZV_BYTE -= inserted_byte; Z_BYTE -= inserted_byte; - GPT -= inserted_byte; GPT_BYTE -= inserted_byte; + if ((GPT - XINT (start)) < (XINT (end) - GPT)) + move_gap_both (XINT (start), start_byte); + else + move_gap_both (XINT (end), end_byte); } - inserted_byte = str_to_multibyte (GPT_ADDR, GAP_SIZE, inserted_byte); + pos = XINT (start); } - /* If we shrank the conversion area, adjust it now. */ - if (total_skip > 0) + list = Qnil; + for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail)) { - if (tail_skip > 0) - safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip); - inserted += total_skip; inserted_byte += total_skip; - GAP_SIZE += total_skip; - GPT -= head_skip; GPT_BYTE -= head_skip; - ZV -= total_skip; ZV_BYTE -= total_skip; - Z -= total_skip; Z_BYTE -= total_skip; - from -= head_skip; from_byte -= head_skip; - to += tail_skip; to_byte += tail_skip; + elt = XCAR (tail); + list = Fcons (Fcons (elt, Fcons (AREF (CODING_SYSTEM_SPEC (elt), 0), + Qnil)), + list); } - prev_Z = Z; - if (! EQ (current_buffer->undo_list, Qt)) - adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte); + if (STRINGP (start)) - p = pbeg = XSTRING (start)->data; ++ p = pbeg = SDATA (start); else - adjust_after_replace_noundo (from, from_byte, nchars_del, nbytes_del, - inserted, inserted_byte); - inserted = Z - prev_Z; + p = pbeg = BYTE_POS_ADDR (start_byte); + pend = p + (end_byte - start_byte); - if (!encodep && coding->cmp_data && coding->cmp_data->used) - coding_restore_composition (coding, Fcurrent_buffer ()); - coding_free_composition_data (coding); + while (p < pend && ASCII_BYTE_P (*p)) p++, pos++; + while (p < pend && ASCII_BYTE_P (*(pend - 1))) pend--; - if (! inhibit_pre_post_conversion - && ! encodep && ! NILP (coding->post_read_conversion)) + while (p < pend) { - Lisp_Object val; - Lisp_Object saved_coding_system; - - if (from != PT) - TEMP_SET_PT_BOTH (from, from_byte); - prev_Z = Z; - record_unwind_protect (code_convert_region_unwind, - Vlast_coding_system_used); - saved_coding_system = Vlast_coding_system_used; - Vlast_coding_system_used = coding->symbol; - /* We should not call any more pre-write/post-read-conversion - functions while this post-read-conversion is running. */ - inhibit_pre_post_conversion = 1; - val = call1 (coding->post_read_conversion, make_number (inserted)); - inhibit_pre_post_conversion = 0; - coding->symbol = Vlast_coding_system_used; - Vlast_coding_system_used = saved_coding_system; - /* Discard the unwind protect. */ - specpdl_ptr--; - CHECK_NUMBER (val); - inserted += Z - prev_Z; - } - - if (orig_point >= from) - { - if (orig_point >= from + orig_len) - orig_point += inserted - orig_len; + if (ASCII_BYTE_P (*p)) + p++; else - orig_point = from; - TEMP_SET_PT (orig_point); + { + c = STRING_CHAR_ADVANCE (p); + + charset_map_loaded = 0; + for (tail = list; CONSP (tail); tail = XCDR (tail)) + { + elt = XCDR (XCAR (tail)); + if (! char_encodable_p (c, XCAR (elt))) + XSETCDR (elt, Fcons (make_number (pos), XCDR (elt))); + } + if (charset_map_loaded) + { + EMACS_INT p_offset = p - pbeg, pend_offset = pend - pbeg; + + if (STRINGP (start)) - pbeg = XSTRING (start)->data; ++ pbeg = SDATA (start); + else + pbeg = BYTE_POS_ADDR (start_byte); + p = pbeg + p_offset; + pend = pbeg + pend_offset; + } + } + pos++; } - if (replace) + tail = list; + list = Qnil; + for (; CONSP (tail); tail = XCDR (tail)) { - signal_after_change (from, to - from, inserted); - update_compositions (from, from + inserted, CHECK_BORDER); + elt = XCAR (tail); + if (CONSP (XCDR (XCDR (elt)))) + list = Fcons (Fcons (XCAR (elt), Fnreverse (XCDR (XCDR (elt)))), + list); } - { - coding->consumed = to_byte - from_byte; - coding->consumed_char = to - from; - coding->produced = inserted_byte; - coding->produced_char = inserted; - } - - return 0; + return list; } - Lisp_Object - run_pre_post_conversion_on_str (str, coding, encodep) - Lisp_Object str; - struct coding_system *coding; - int encodep; - { - int count = SPECPDL_INDEX (); - struct gcpro gcpro1, gcpro2; - int multibyte = STRING_MULTIBYTE (str); - Lisp_Object buffer; - struct buffer *buf; - Lisp_Object old_deactivate_mark; - - record_unwind_protect (Fset_buffer, Fcurrent_buffer ()); - record_unwind_protect (code_convert_region_unwind, - Vlast_coding_system_used); - /* It is not crucial to specbind this. */ - old_deactivate_mark = Vdeactivate_mark; - GCPRO2 (str, old_deactivate_mark); - - buffer = Fget_buffer_create (build_string (" *code-converting-work*")); - buf = XBUFFER (buffer); - - delete_all_overlays (buf); - buf->directory = current_buffer->directory; - buf->read_only = Qnil; - buf->filename = Qnil; - buf->undo_list = Qt; - eassert (buf->overlays_before == NULL); - eassert (buf->overlays_after == NULL); - - set_buffer_internal (buf); - /* We must insert the contents of STR as is without - unibyte<->multibyte conversion. For that, we adjust the - multibyteness of the working buffer to that of STR. */ - Ferase_buffer (); - buf->enable_multibyte_characters = multibyte ? Qt : Qnil; - insert_from_string (str, 0, 0, - SCHARS (str), SBYTES (str), 0); - UNGCPRO; - inhibit_pre_post_conversion = 1; - if (encodep) - call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z)); - else - { - Vlast_coding_system_used = coding->symbol; - TEMP_SET_PT_BOTH (BEG, BEG_BYTE); - call1 (coding->post_read_conversion, make_number (Z - BEG)); - coding->symbol = Vlast_coding_system_used; - } - inhibit_pre_post_conversion = 0; - Vdeactivate_mark = old_deactivate_mark; - str = make_buffer_string (BEG, Z, 1); - return unbind_to (count, str); - } Lisp_Object - decode_coding_string (str, coding, nocopy) - Lisp_Object str; - struct coding_system *coding; - int nocopy; + code_convert_region (start, end, coding_system, dst_object, encodep, norecord) + Lisp_Object start, end, coding_system, dst_object; + int encodep, norecord; { - int len; - struct conversion_buffer buf; - int from, to_byte; - Lisp_Object saved_coding_symbol; - int result; - int require_decoding; - int shrinked_bytes = 0; - Lisp_Object newstr; - int consumed, consumed_char, produced, produced_char; - - from = 0; - to_byte = SBYTES (str); - - saved_coding_symbol = coding->symbol; - coding->src_multibyte = STRING_MULTIBYTE (str); - coding->dst_multibyte = 1; - if (CODING_REQUIRE_DETECTION (coding)) - { - /* See the comments in code_convert_region. */ - if (coding->type == coding_type_undecided) - { - detect_coding (coding, SDATA (str), to_byte); - if (coding->type == coding_type_undecided) - { - coding->type = coding_type_emacs_mule; - coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE; - /* As emacs-mule decoder will handle composition, we - need this setting to allocate coding->cmp_data - later. */ - coding->composing = COMPOSITION_NO; - } - } - if (coding->eol_type == CODING_EOL_UNDECIDED - && coding->type != coding_type_ccl) - { - saved_coding_symbol = coding->symbol; - detect_eol (coding, SDATA (str), to_byte); - if (coding->eol_type == CODING_EOL_UNDECIDED) - coding->eol_type = CODING_EOL_LF; - /* We had better recover the original eol format if we - encounter an inconsistent eol format while decoding. */ - coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; - } - } + struct coding_system coding; + EMACS_INT from, from_byte, to, to_byte; + Lisp_Object src_object; - if (coding->type == coding_type_no_conversion - || coding->type == coding_type_raw_text) - coding->dst_multibyte = 0; + CHECK_NUMBER_COERCE_MARKER (start); + CHECK_NUMBER_COERCE_MARKER (end); + if (NILP (coding_system)) + coding_system = Qno_conversion; + else + CHECK_CODING_SYSTEM (coding_system); + src_object = Fcurrent_buffer (); + if (NILP (dst_object)) + dst_object = src_object; + else if (! EQ (dst_object, Qt)) + CHECK_BUFFER (dst_object); + + validate_region (&start, &end); + from = XFASTINT (start); + from_byte = CHAR_TO_BYTE (from); + to = XFASTINT (end); + to_byte = CHAR_TO_BYTE (to); - require_decoding = CODING_REQUIRE_DECODING (coding); + setup_coding_system (coding_system, &coding); + coding.mode |= CODING_MODE_LAST_BLOCK; - if (STRING_MULTIBYTE (str)) - { - /* Decoding routines expect the source text to be unibyte. */ - str = Fstring_as_unibyte (str); - to_byte = SBYTES (str); - nocopy = 1; - coding->src_multibyte = 0; - } + if (encodep) + encode_coding_object (&coding, src_object, from, from_byte, to, to_byte, + dst_object); + else + decode_coding_object (&coding, src_object, from, from_byte, to, to_byte, + dst_object); + if (! norecord) + Vlast_coding_system_used = CODING_ID_NAME (coding.id); - /* Try to skip the heading and tailing ASCIIs. */ - if (require_decoding && coding->type != coding_type_ccl) - { - SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str), - 0); - if (from == to_byte) - require_decoding = 0; - shrinked_bytes = from + (SBYTES (str) - to_byte); - } + if (coding.result != CODING_RESULT_SUCCESS) + error ("Code conversion error: %d", coding.result); - if (!require_decoding - && !(SYMBOLP (coding->post_read_conversion) - && !NILP (Ffboundp (coding->post_read_conversion)))) - { - coding->consumed = SBYTES (str); - coding->consumed_char = SCHARS (str); - if (coding->dst_multibyte) - { - str = Fstring_as_multibyte (str); - nocopy = 1; - } - coding->produced = SBYTES (str); - coding->produced_char = SCHARS (str); - return (nocopy ? str : Fcopy_sequence (str)); - } + return (BUFFERP (dst_object) + ? make_number (coding.produced_char) + : coding.dst_object); + } - if (coding->composing != COMPOSITION_DISABLED) - coding_allocate_composition_data (coding, from); - len = decoding_buffer_size (coding, to_byte - from); - allocate_conversion_buffer (buf, len); - consumed = consumed_char = produced = produced_char = 0; - while (1) - { - result = decode_coding (coding, SDATA (str) + from + consumed, - buf.data + produced, to_byte - from - consumed, - buf.size - produced); - consumed += coding->consumed; - consumed_char += coding->consumed_char; - produced += coding->produced; - produced_char += coding->produced_char; - if (result == CODING_FINISH_NORMAL - || (result == CODING_FINISH_INSUFFICIENT_SRC - && coding->consumed == 0)) - break; - if (result == CODING_FINISH_INSUFFICIENT_CMP) - coding_allocate_composition_data (coding, from + produced_char); - else if (result == CODING_FINISH_INSUFFICIENT_DST) - extend_conversion_buffer (&buf); - else if (result == CODING_FINISH_INCONSISTENT_EOL) - { - Lisp_Object eol_type; + DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, + 3, 4, "r\nzCoding system: ", + doc: /* Decode the current region from the specified coding system. + When called from a program, takes four arguments: + START, END, CODING-SYSTEM, and DESTINATION. + START and END are buffer positions. - /* Recover the original EOL format. */ - if (coding->eol_type == CODING_EOL_CR) - { - unsigned char *p; - for (p = buf.data; p < buf.data + produced; p++) - if (*p == '\n') *p = '\r'; - } - else if (coding->eol_type == CODING_EOL_CRLF) - { - int num_eol = 0; - unsigned char *p0, *p1; - for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++) - if (*p0 == '\n') num_eol++; - if (produced + num_eol >= buf.size) - extend_conversion_buffer (&buf); - for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;) - { - *--p1 = *--p0; - if (*p0 == '\n') *--p1 = '\r'; - } - produced += num_eol; - produced_char += num_eol; - } - /* Suppress eol-format conversion in the further conversion. */ - coding->eol_type = CODING_EOL_LF; - - /* Set the coding system symbol to that for Unix-like EOL. */ - eol_type = Fget (saved_coding_symbol, Qeol_type); - if (VECTORP (eol_type) - && XVECTOR (eol_type)->size == 3 - && SYMBOLP (XVECTOR (eol_type)->contents[CODING_EOL_LF])) - coding->symbol = XVECTOR (eol_type)->contents[CODING_EOL_LF]; - else - coding->symbol = saved_coding_symbol; + Optional 4th arguments DESTINATION specifies where the decoded text goes. + If nil, the region between START and END is replace by the decoded text. + If buffer, the decoded text is inserted in the buffer. + If t, the decoded text is returned. + This function sets `last-coding-system-used' to the precise coding system + used (which may be different from CODING-SYSTEM if CODING-SYSTEM is + not fully specified.) + It returns the length of the decoded text. */) + (start, end, coding_system, destination) + Lisp_Object start, end, coding_system, destination; + { + return code_convert_region (start, end, coding_system, destination, 0, 0); + } - } - } + DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region, + 3, 4, "r\nzCoding system: ", + doc: /* Encode the current region by specified coding system. + When called from a program, takes three arguments: + START, END, and CODING-SYSTEM. START and END are buffer positions. - coding->consumed = consumed; - coding->consumed_char = consumed_char; - coding->produced = produced; - coding->produced_char = produced_char; + Optional 4th arguments DESTINATION specifies where the encoded text goes. + If nil, the region between START and END is replace by the encoded text. + If buffer, the encoded text is inserted in the buffer. + If t, the encoded text is returned. - if (coding->dst_multibyte) - newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes, - produced + shrinked_bytes); - else - newstr = make_uninit_string (produced + shrinked_bytes); - if (from > 0) - STRING_COPYIN (newstr, 0, SDATA (str), from); - STRING_COPYIN (newstr, from, buf.data, produced); - if (shrinked_bytes > from) - STRING_COPYIN (newstr, from + produced, - SDATA (str) + to_byte, - shrinked_bytes - from); - free_conversion_buffer (&buf); - - if (coding->cmp_data && coding->cmp_data->used) - coding_restore_composition (coding, newstr); - coding_free_composition_data (coding); - - if (SYMBOLP (coding->post_read_conversion) - && !NILP (Ffboundp (coding->post_read_conversion))) - newstr = run_pre_post_conversion_on_str (newstr, coding, 0); - - return newstr; + This function sets `last-coding-system-used' to the precise coding system + used (which may be different from CODING-SYSTEM if CODING-SYSTEM is + not fully specified.) + It returns the length of the encoded text. */) + (start, end, coding_system, destination) + Lisp_Object start, end, coding_system, destination; + { + return code_convert_region (start, end, coding_system, destination, 1, 0); } Lisp_Object - encode_coding_string (str, coding, nocopy) - Lisp_Object str; - struct coding_system *coding; - int nocopy; + code_convert_string (string, coding_system, dst_object, + encodep, nocopy, norecord) + Lisp_Object string, coding_system, dst_object; + int encodep, nocopy, norecord; { - int len; - struct conversion_buffer buf; - int from, to, to_byte; - int result; - int shrinked_bytes = 0; - Lisp_Object newstr; - int consumed, consumed_char, produced, produced_char; - - if (SYMBOLP (coding->pre_write_conversion) - && !NILP (Ffboundp (coding->pre_write_conversion))) - str = run_pre_post_conversion_on_str (str, coding, 1); - - from = 0; - to = SCHARS (str); - to_byte = SBYTES (str); - - /* Encoding routines determine the multibyteness of the source text - by coding->src_multibyte. */ - coding->src_multibyte = STRING_MULTIBYTE (str); - coding->dst_multibyte = 0; - if (! CODING_REQUIRE_ENCODING (coding)) - { - coding->consumed = SBYTES (str); - coding->consumed_char = SCHARS (str); - if (STRING_MULTIBYTE (str)) - { - str = Fstring_as_unibyte (str); - nocopy = 1; - } - coding->produced = SBYTES (str); - coding->produced_char = SCHARS (str); - return (nocopy ? str : Fcopy_sequence (str)); - } - - if (coding->composing != COMPOSITION_DISABLED) - coding_save_composition (coding, from, to, str); + struct coding_system coding; + EMACS_INT chars, bytes; - /* Try to skip the heading and tailing ASCIIs. */ - if (coding->type != coding_type_ccl) + CHECK_STRING (string); + if (NILP (coding_system)) { - SHRINK_CONVERSION_REGION (&from, &to_byte, coding, SDATA (str), - 1); - if (from == to_byte) - return (nocopy ? str : Fcopy_sequence (str)); - shrinked_bytes = from + (SBYTES (str) - to_byte); + if (! norecord) + Vlast_coding_system_used = Qno_conversion; + if (NILP (dst_object)) + return (nocopy ? Fcopy_sequence (string) : string); } - len = encoding_buffer_size (coding, to_byte - from); - allocate_conversion_buffer (buf, len); + if (NILP (coding_system)) + coding_system = Qno_conversion; + else + CHECK_CODING_SYSTEM (coding_system); + if (NILP (dst_object)) + dst_object = Qt; + else if (! EQ (dst_object, Qt)) + CHECK_BUFFER (dst_object); - consumed = consumed_char = produced = produced_char = 0; - while (1) - { - result = encode_coding (coding, SDATA (str) + from + consumed, - buf.data + produced, to_byte - from - consumed, - buf.size - produced); - consumed += coding->consumed; - consumed_char += coding->consumed_char; - produced += coding->produced; - produced_char += coding->produced_char; - if (result == CODING_FINISH_NORMAL - || (result == CODING_FINISH_INSUFFICIENT_SRC - && coding->consumed == 0)) - break; - /* Now result should be CODING_FINISH_INSUFFICIENT_DST. */ - extend_conversion_buffer (&buf); - } + setup_coding_system (coding_system, &coding); + coding.mode |= CODING_MODE_LAST_BLOCK; - chars = XSTRING (string)->size; - bytes = STRING_BYTES (XSTRING (string)); ++ chars = SCHARS (string); ++ bytes = SBYTES (string); + if (encodep) + encode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object); + else + decode_coding_object (&coding, string, 0, 0, chars, bytes, dst_object); + if (! norecord) + Vlast_coding_system_used = CODING_ID_NAME (coding.id); - coding->consumed = consumed; - coding->consumed_char = consumed_char; - coding->produced = produced; - coding->produced_char = produced_char; + if (coding.result != CODING_RESULT_SUCCESS) + error ("Code conversion error: %d", coding.result); - newstr = make_uninit_string (produced + shrinked_bytes); - if (from > 0) - STRING_COPYIN (newstr, 0, SDATA (str), from); - STRING_COPYIN (newstr, from, buf.data, produced); - if (shrinked_bytes > from) - STRING_COPYIN (newstr, from + produced, - SDATA (str) + to_byte, - shrinked_bytes - from); + return (BUFFERP (dst_object) + ? make_number (coding.produced_char) + : coding.dst_object); + } - free_conversion_buffer (&buf); - coding_free_composition_data (coding); - return newstr; - } + /* Encode or decode STRING according to CODING_SYSTEM. + Do not set Vlast_coding_system_used. - - #ifdef emacs - /*** 8. Emacs Lisp library functions ***/ + This function is called only from macros DECODE_FILE and + ENCODE_FILE, thus we ignore character composition. */ - DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0, - doc: /* Return t if OBJECT is nil or a coding-system. - See the documentation of `make-coding-system' for information - about coding-system objects. */) - (obj) - Lisp_Object obj; + Lisp_Object + code_convert_string_norecord (string, coding_system, encodep) + Lisp_Object string, coding_system; + int encodep; { - if (NILP (obj)) - return Qt; - if (!SYMBOLP (obj)) - return Qnil; - /* Get coding-spec vector for OBJ. */ - obj = Fget (obj, Qcoding_system); - return ((VECTORP (obj) && XVECTOR (obj)->size == 5) - ? Qt : Qnil); + return code_convert_string (string, coding_system, Qt, encodep, 0, 1); } - DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system, - Sread_non_nil_coding_system, 1, 1, 0, - doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. */) - (prompt) - Lisp_Object prompt; - { - Lisp_Object val; - do - { - val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil, - Qt, Qnil, Qcoding_system_history, Qnil, Qnil); - } - while (SCHARS (val) == 0); - return (Fintern (val, Qnil)); - } - DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0, - doc: /* Read a coding system from the minibuffer, prompting with string PROMPT. - If the user enters null input, return second argument DEFAULT-CODING-SYSTEM. */) - (prompt, default_coding_system) - Lisp_Object prompt, default_coding_system; + DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, + 2, 4, 0, + doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result. + + Optional third arg NOCOPY non-nil means it is OK to return STRING itself + if the decoding operation is trivial. + + Optional fourth arg BUFFER non-nil meant that the decoded text is + inserted in BUFFER instead of returned as a string. In this case, + the return value is BUFFER. + + This function sets `last-coding-system-used' to the precise coding system + used (which may be different from CODING-SYSTEM if CODING-SYSTEM is + not fully specified. */) + (string, coding_system, nocopy, buffer) + Lisp_Object string, coding_system, nocopy, buffer; { - Lisp_Object val; - if (SYMBOLP (default_coding_system)) - default_coding_system = SYMBOL_NAME (default_coding_system); - val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil, - Qt, Qnil, Qcoding_system_history, - default_coding_system, Qnil); - return (SCHARS (val) == 0 ? Qnil : Fintern (val, Qnil)); + return code_convert_string (string, coding_system, buffer, + 0, ! NILP (nocopy), 0); } - DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system, - 1, 1, 0, - doc: /* Check validity of CODING-SYSTEM. - If valid, return CODING-SYSTEM, else signal a `coding-system-error' error. - It is valid if it is a symbol with a non-nil `coding-system' property. - The value of property should be a vector of length 5. */) - (coding_system) - Lisp_Object coding_system; + DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string, + 2, 4, 0, + doc: /* Encode STRING to CODING-SYSTEM, and return the result. + + Optional third arg NOCOPY non-nil means it is OK to return STRING + itself if the encoding operation is trivial. + + Optional fourth arg BUFFER non-nil meant that the encoded text is + inserted in BUFFER instead of returned as a string. In this case, + the return value is BUFFER. + + This function sets `last-coding-system-used' to the precise coding system + used (which may be different from CODING-SYSTEM if CODING-SYSTEM is + not fully specified.) */) + (string, coding_system, nocopy, buffer) + Lisp_Object string, coding_system, nocopy, buffer; { - CHECK_SYMBOL (coding_system); - if (!NILP (Fcoding_system_p (coding_system))) - return coding_system; - while (1) - Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil)); + return code_convert_string (string, coding_system, buffer, + 1, ! NILP (nocopy), 1); } + - Lisp_Object - detect_coding_system (src, src_bytes, highest, multibytep) - const unsigned char *src; - int src_bytes, highest; - int multibytep; + DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, + doc: /* Decode a Japanese character which has CODE in shift_jis encoding. + Return the corresponding character. */) + (code) + Lisp_Object code; { - int coding_mask, eol_type; - Lisp_Object val, tmp; - int dummy; + Lisp_Object spec, attrs, val; + struct charset *charset_roman, *charset_kanji, *charset_kana, *charset; + int c; - coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy, multibytep); - eol_type = detect_eol_type (src, src_bytes, &dummy); - if (eol_type == CODING_EOL_INCONSISTENT) - eol_type = CODING_EOL_UNDECIDED; + CHECK_NATNUM (code); + c = XFASTINT (code); + CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec); + attrs = AREF (spec, 0); - if (!coding_mask) - { - val = Qundecided; - if (eol_type != CODING_EOL_UNDECIDED) - { - Lisp_Object val2; - val2 = Fget (Qundecided, Qeol_type); - if (VECTORP (val2)) - val = XVECTOR (val2)->contents[eol_type]; - } - return (highest ? val : Fcons (val, Qnil)); - } + if (ASCII_BYTE_P (c) + && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) + return code; - /* At first, gather possible coding systems in VAL. */ - val = Qnil; - for (tmp = Vcoding_category_list; CONSP (tmp); tmp = XCDR (tmp)) - { - Lisp_Object category_val, category_index; + val = CODING_ATTR_CHARSET_LIST (attrs); + charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); + charset_kana = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); + charset_kanji = CHARSET_FROM_ID (XINT (XCAR (val))); - category_index = Fget (XCAR (tmp), Qcoding_category_index); - category_val = Fsymbol_value (XCAR (tmp)); - if (!NILP (category_val) - && NATNUMP (category_index) - && (coding_mask & (1 << XFASTINT (category_index)))) - { - val = Fcons (category_val, val); - if (highest) - break; - } + if (c <= 0x7F) + charset = charset_roman; + else if (c >= 0xA0 && c < 0xDF) + { + charset = charset_kana; + c -= 0x80; } - if (!highest) - val = Fnreverse (val); - - /* Then, replace the elements with subsidiary coding systems. */ - for (tmp = val; CONSP (tmp); tmp = XCDR (tmp)) + else { - if (eol_type != CODING_EOL_UNDECIDED - && eol_type != CODING_EOL_INCONSISTENT) - { - Lisp_Object eol; - eol = Fget (XCAR (tmp), Qeol_type); - if (VECTORP (eol)) - XSETCAR (tmp, XVECTOR (eol)->contents[eol_type]); - } + int s1 = c >> 8, s2 = c & 0xFF; + + if (s1 < 0x81 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF + || s2 < 0x40 || s2 == 0x7F || s2 > 0xFC) + error ("Invalid code: %d", code); + SJIS_TO_JIS (c); + charset = charset_kanji; } - return (highest ? XCAR (val) : val); + c = DECODE_CHAR (charset, c); + if (c < 0) + error ("Invalid code: %d", code); + return make_number (c); } - DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, - 2, 3, 0, - doc: /* Detect how the byte sequence in the region is encoded. - Return a list of possible coding systems used on decoding a byte - sequence containing the bytes in the region between START and END when - the coding system `undecided' is specified. The list is ordered by - priority decided in the current language environment. - If only ASCII characters are found, it returns a list of single element - `undecided' or its subsidiary coding system according to a detected - end-of-line format. + DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0, + doc: /* Encode a Japanese character CHAR to shift_jis encoding. + Return the corresponding code in SJIS. */) + (ch) + Lisp_Object ch; + { + Lisp_Object spec, attrs, charset_list; + int c; + struct charset *charset; + unsigned code; - If optional argument HIGHEST is non-nil, return the coding system of - highest priority. */) - (start, end, highest) - Lisp_Object start, end, highest; + CHECK_CHARACTER (ch); + c = XFASTINT (ch); + CHECK_CODING_SYSTEM_GET_SPEC (Vsjis_coding_system, spec); + attrs = AREF (spec, 0); + + if (ASCII_CHAR_P (c) + && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) + return ch; + + charset_list = CODING_ATTR_CHARSET_LIST (attrs); + charset = char_charset (c, charset_list, &code); + if (code == CHARSET_INVALID_CODE (charset)) + error ("Can't encode by shift_jis encoding: %d", c); + JIS_TO_SJIS (code); + + return make_number (code); + } + + DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0, + doc: /* Decode a Big5 character which has CODE in BIG5 coding system. + Return the corresponding character. */) + (code) + Lisp_Object code; { - int from, to; - int from_byte, to_byte; - int include_anchor_byte = 0; + Lisp_Object spec, attrs, val; + struct charset *charset_roman, *charset_big5, *charset; + int c; - CHECK_NUMBER_COERCE_MARKER (start); - CHECK_NUMBER_COERCE_MARKER (end); + CHECK_NATNUM (code); + c = XFASTINT (code); + CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec); + attrs = AREF (spec, 0); - validate_region (&start, &end); - from = XINT (start), to = XINT (end); - from_byte = CHAR_TO_BYTE (from); - to_byte = CHAR_TO_BYTE (to); + if (ASCII_BYTE_P (c) + && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) + return code; - if (from < GPT && to >= GPT) - move_gap_both (to, to_byte); - /* If we an anchor byte `\0' follows the region, we include it in - the detecting source. Then code detectors can handle the tailing - byte sequence more accurately. + val = CODING_ATTR_CHARSET_LIST (attrs); + charset_roman = CHARSET_FROM_ID (XINT (XCAR (val))), val = XCDR (val); + charset_big5 = CHARSET_FROM_ID (XINT (XCAR (val))); - Fix me: This is not a perfect solution. It is better that we - add one more argument, say LAST_BLOCK, to all detect_coding_XXX. - */ - if (to == Z || (to == GPT && GAP_SIZE > 0)) - include_anchor_byte = 1; - return detect_coding_system (BYTE_POS_ADDR (from_byte), - to_byte - from_byte + include_anchor_byte, - !NILP (highest), - !NILP (current_buffer - ->enable_multibyte_characters)); + if (c <= 0x7F) + charset = charset_roman; + else + { + int b1 = c >> 8, b2 = c & 0x7F; + if (b1 < 0xA1 || b1 > 0xFE + || b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE) + error ("Invalid code: %d", code); + charset = charset_big5; + } + c = DECODE_CHAR (charset, (unsigned )c); + if (c < 0) + error ("Invalid code: %d", code); + return make_number (c); } - DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string, - 1, 2, 0, - doc: /* Detect how the byte sequence in STRING is encoded. - Return a list of possible coding systems used on decoding a byte - sequence containing the bytes in STRING when the coding system - `undecided' is specified. The list is ordered by priority decided in - the current language environment. + DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0, + doc: /* Encode the Big5 character CHAR to BIG5 coding system. + Return the corresponding character code in Big5. */) + (ch) + Lisp_Object ch; + { + Lisp_Object spec, attrs, charset_list; + struct charset *charset; + int c; + unsigned code; + + CHECK_CHARACTER (ch); + c = XFASTINT (ch); + CHECK_CODING_SYSTEM_GET_SPEC (Vbig5_coding_system, spec); + attrs = AREF (spec, 0); + if (ASCII_CHAR_P (c) + && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) + return ch; + + charset_list = CODING_ATTR_CHARSET_LIST (attrs); + charset = char_charset (c, charset_list, &code); + if (code == CHARSET_INVALID_CODE (charset)) + error ("Can't encode by Big5 encoding: %d", c); + + return make_number (code); + } - If only ASCII characters are found, it returns a list of single element - `undecided' or its subsidiary coding system according to a detected - end-of-line format. + + DEFUN ("set-terminal-coding-system-internal", + Fset_terminal_coding_system_internal, + Sset_terminal_coding_system_internal, 1, 1, 0, + doc: /* Internal use only. */) + (coding_system) + Lisp_Object coding_system; + { + CHECK_SYMBOL (coding_system); + setup_coding_system (Fcheck_coding_system (coding_system), + &terminal_coding); - + - If optional argument HIGHEST is non-nil, return the coding system of - highest priority. */) - (string, highest) - Lisp_Object string, highest; + /* We had better not send unsafe characters to terminal. */ + terminal_coding.mode |= CODING_MODE_SAFE_ENCODING; + /* Characer composition should be disabled. */ + terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; + terminal_coding.src_multibyte = 1; + terminal_coding.dst_multibyte = 0; + return Qnil; + } + + DEFUN ("set-safe-terminal-coding-system-internal", + Fset_safe_terminal_coding_system_internal, + Sset_safe_terminal_coding_system_internal, 1, 1, 0, + doc: /* Internal use only. */) + (coding_system) + Lisp_Object coding_system; { - CHECK_STRING (string); + CHECK_SYMBOL (coding_system); + setup_coding_system (Fcheck_coding_system (coding_system), + &safe_terminal_coding); + /* Characer composition should be disabled. */ + safe_terminal_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; + safe_terminal_coding.src_multibyte = 1; + safe_terminal_coding.dst_multibyte = 0; + return Qnil; + } - return detect_coding_system (SDATA (string), - /* "+ 1" is to include the anchor byte - `\0'. With this, code detectors can - handle the tailing bytes more - accurately. */ - SBYTES (string) + 1, - !NILP (highest), - STRING_MULTIBYTE (string)); + DEFUN ("terminal-coding-system", + Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0, + doc: /* Return coding system specified for terminal output. */) + () + { + return CODING_ID_NAME (terminal_coding.id); + } + + DEFUN ("set-keyboard-coding-system-internal", + Fset_keyboard_coding_system_internal, + Sset_keyboard_coding_system_internal, 1, 1, 0, + doc: /* Internal use only. */) + (coding_system) + Lisp_Object coding_system; + { + CHECK_SYMBOL (coding_system); + setup_coding_system (Fcheck_coding_system (coding_system), + &keyboard_coding); + /* Characer composition should be disabled. */ + keyboard_coding.common_flags &= ~CODING_ANNOTATE_COMPOSITION_MASK; + return Qnil; + } + + DEFUN ("keyboard-coding-system", + Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0, + doc: /* Return coding system specified for decoding keyboard input. */) + () + { + return CODING_ID_NAME (keyboard_coding.id); } - /* Subroutine for Fsafe_coding_systems_region_internal. + + DEFUN ("find-operation-coding-system", Ffind_operation_coding_system, + Sfind_operation_coding_system, 1, MANY, 0, + doc: /* Choose a coding system for an operation based on the target name. + The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM). + DECODING-SYSTEM is the coding system to use for decoding + \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system + for encoding (in case OPERATION does encoding). - Return a list of coding systems that safely encode the multibyte - text between P and PEND. SAFE_CODINGS, if non-nil, is an alist of - possible coding systems. If it is nil, it means that we have not - yet found any coding systems. + The first argument OPERATION specifies an I/O primitive: + For file I/O, `insert-file-contents' or `write-region'. + For process I/O, `call-process', `call-process-region', or `start-process'. + For network I/O, `open-network-stream'. - WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An - element of WORK_TABLE is set to t once the element is looked up. + The remaining arguments should be the same arguments that were passed + to the primitive. Depending on which primitive, one of those arguments + is selected as the TARGET. For example, if OPERATION does file I/O, + whichever argument specifies the file name is TARGET. - If a non-ASCII single byte char is found, set - *single_byte_char_found to 1. */ + TARGET has a meaning which depends on OPERATION: + For file I/O, TARGET is a file name. + For process I/O, TARGET is a process name. + For network I/O, TARGET is a service name or a port number - static Lisp_Object - find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) - unsigned char *p, *pend; - Lisp_Object safe_codings, work_table; - int *single_byte_char_found; + This function looks up what specified for TARGET in, + `file-coding-system-alist', `process-coding-system-alist', + or `network-coding-system-alist' depending on OPERATION. + They may specify a coding system, a cons of coding systems, + or a function symbol to call. + In the last case, we call the function with one argument, + which is a list of all the arguments given to this function. + + usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */) + (nargs, args) + int nargs; + Lisp_Object *args; { - int c, len; - Lisp_Object val, ch; - Lisp_Object prev, tail; + Lisp_Object operation, target_idx, target, val; + register Lisp_Object chain; - while (p < pend) + if (nargs < 2) + error ("Too few arguments"); + operation = args[0]; + if (!SYMBOLP (operation) + || !INTEGERP (target_idx = Fget (operation, Qtarget_idx))) + error ("Invalid first arguement"); + if (nargs < 1 + XINT (target_idx)) + error ("Too few arguments for operation: %s", - XSYMBOL (operation)->name->data); ++ SDATA (SYMBOL_NAME (operation))); + target = args[XINT (target_idx) + 1]; + if (!(STRINGP (target) + || (EQ (operation, Qopen_network_stream) && INTEGERP (target)))) + error ("Invalid %dth argument", XINT (target_idx) + 1); + + chain = ((EQ (operation, Qinsert_file_contents) + || EQ (operation, Qwrite_region)) + ? Vfile_coding_system_alist + : (EQ (operation, Qopen_network_stream) + ? Vnetwork_coding_system_alist + : Vprocess_coding_system_alist)); + if (NILP (chain)) + return Qnil; + + for (; CONSP (chain); chain = XCDR (chain)) { - c = STRING_CHAR_AND_LENGTH (p, pend - p, len); - p += len; - if (ASCII_BYTE_P (c)) - /* We can ignore ASCII characters here. */ - continue; - if (SINGLE_BYTE_CHAR_P (c)) - *single_byte_char_found = 1; - if (NILP (safe_codings)) - /* Already all coding systems are excluded. But, we can't - terminate the loop here because non-ASCII single-byte char - must be found. */ - continue; - /* Check the safe coding systems for C. */ - ch = make_number (c); - val = Faref (work_table, ch); - if (EQ (val, Qt)) - /* This element was already checked. Ignore it. */ - continue; - /* Remember that we checked this element. */ - Faset (work_table, ch, Qt); + Lisp_Object elt; - for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail)) + elt = XCAR (chain); + if (CONSP (elt) + && ((STRINGP (target) + && STRINGP (XCAR (elt)) + && fast_string_match (XCAR (elt), target) >= 0) + || (INTEGERP (target) && EQ (target, XCAR (elt))))) { - Lisp_Object elt, translation_table, hash_table, accept_latin_extra; - int encodable; - - elt = XCAR (tail); - if (CONSP (XCDR (elt))) - { - /* This entry has this format now: - ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE - ACCEPT-LATIN-EXTRA ) */ - val = XCDR (elt); - encodable = ! NILP (Faref (XCAR (val), ch)); - if (! encodable) - { - val = XCDR (val); - translation_table = XCAR (val); - hash_table = XCAR (XCDR (val)); - accept_latin_extra = XCAR (XCDR (XCDR (val))); - } - } - else - { - /* This entry has this format now: ( CODING . SAFE-CHARS) */ - encodable = ! NILP (Faref (XCDR (elt), ch)); - if (! encodable) - { - /* Transform the format to: - ( CODING SAFE-CHARS TRANSLATION-TABLE HASH-TABLE - ACCEPT-LATIN-EXTRA ) */ - val = Fget (XCAR (elt), Qcoding_system); - translation_table - = Fplist_get (AREF (val, 3), - Qtranslation_table_for_encode); - if (SYMBOLP (translation_table)) - translation_table = Fget (translation_table, - Qtranslation_table); - hash_table - = (CHAR_TABLE_P (translation_table) - ? XCHAR_TABLE (translation_table)->extras[1] - : Qnil); - accept_latin_extra - = ((EQ (AREF (val, 0), make_number (2)) - && VECTORP (AREF (val, 4))) - ? AREF (AREF (val, 4), 16) - : Qnil); - XSETCAR (tail, list5 (XCAR (elt), XCDR (elt), - translation_table, hash_table, - accept_latin_extra)); - } - } - - if (! encodable - && ((CHAR_TABLE_P (translation_table) - && ! NILP (Faref (translation_table, ch))) - || (HASH_TABLE_P (hash_table) - && ! NILP (Fgethash (ch, hash_table, Qnil))) - || (SINGLE_BYTE_CHAR_P (c) - && ! NILP (accept_latin_extra) - && VECTORP (Vlatin_extra_code_table) - && ! NILP (AREF (Vlatin_extra_code_table, c))))) - encodable = 1; - if (encodable) - prev = tail; - else + val = XCDR (elt); + /* Here, if VAL is both a valid coding system and a valid + function symbol, we return VAL as a coding system. */ + if (CONSP (val)) + return val; + if (! SYMBOLP (val)) + return Qnil; + if (! NILP (Fcoding_system_p (val))) + return Fcons (val, val); + if (! NILP (Ffboundp (val))) { - /* Exclude this coding system from SAFE_CODINGS. */ - if (EQ (tail, safe_codings)) - safe_codings = XCDR (safe_codings); - else - XSETCDR (prev, XCDR (tail)); + val = call1 (val, Flist (nargs, args)); + if (CONSP (val)) + return val; + if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val))) + return Fcons (val, val); } + return Qnil; } } - return safe_codings; + return Qnil; } - DEFUN ("find-coding-systems-region-internal", - Ffind_coding_systems_region_internal, - Sfind_coding_systems_region_internal, 2, 2, 0, - doc: /* Internal use only. */) - (start, end) - Lisp_Object start, end; - { - Lisp_Object work_table, safe_codings; - int non_ascii_p = 0; - int single_byte_char_found = 0; - const unsigned char *p1, *p1end, *p2, *p2end, *p; - - if (STRINGP (start)) - { - if (!STRING_MULTIBYTE (start)) - return Qt; - p1 = SDATA (start), p1end = p1 + SBYTES (start); - p2 = p2end = p1end; - if (SCHARS (start) != SBYTES (start)) - non_ascii_p = 1; - } - else - { - int from, to, stop; - - CHECK_NUMBER_COERCE_MARKER (start); - CHECK_NUMBER_COERCE_MARKER (end); - if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end)) - args_out_of_range (start, end); - if (NILP (current_buffer->enable_multibyte_characters)) - return Qt; - from = CHAR_TO_BYTE (XINT (start)); - to = CHAR_TO_BYTE (XINT (end)); - stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to; - p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from); - if (stop == to) - p2 = p2end = p1end; - else - p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop); - if (XINT (end) - XINT (start) != to - from) - non_ascii_p = 1; - } + DEFUN ("set-coding-system-priority", Fset_coding_system_priority, + Sset_coding_system_priority, 0, MANY, 0, + doc: /* Assign higher priority to the coding systems given as arguments. + If multiple coding systems belongs to the same category, + all but the first one are ignored. */) + (nargs, args) + int nargs; + Lisp_Object *args; + { + int i, j; + int changed[coding_category_max]; + enum coding_category priorities[coding_category_max]; + + bzero (changed, sizeof changed); - if (!non_ascii_p) + for (i = j = 0; i < nargs; i++) { - /* We are sure that the text contains no multibyte character. - Check if it contains eight-bit-graphic. */ - p = p1; - for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++); - if (p == p1end) - { - for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++); - if (p == p2end) - return Qt; - } - } + enum coding_category category; + Lisp_Object spec, attrs; - /* The text contains non-ASCII characters. */ + CHECK_CODING_SYSTEM_GET_SPEC (args[i], spec); + attrs = AREF (spec, 0); + category = XINT (CODING_ATTR_CATEGORY (attrs)); + if (changed[category]) + /* Ignore this coding system because a coding system of the + same category already had a higher priority. */ + continue; + changed[category] = 1; + priorities[j++] = category; + if (coding_categories[category].id >= 0 + && ! EQ (args[i], CODING_ID_NAME (coding_categories[category].id))) + setup_coding_system (args[i], &coding_categories[category]); + Fset (AREF (Vcoding_category_table, category), args[i]); + } - work_table = Fmake_char_table (Qchar_coding_system, Qnil); - safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars)); + /* Now we have decided top J priorities. Reflect the order of the + original priorities to the remaining priorities. */ - safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table, - &single_byte_char_found); - if (p2 < p2end) - safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table, - &single_byte_char_found); - if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars))) - safe_codings = Qt; - else + for (i = j, j = 0; i < coding_category_max; i++, j++) { - /* Turn safe_codings to a list of coding systems... */ - Lisp_Object val; + while (j < coding_category_max + && changed[coding_priorities[j]]) + j++; + if (j == coding_category_max) + abort (); + priorities[i] = coding_priorities[j]; + } - if (single_byte_char_found) - /* ... and append these for eight-bit chars. */ - val = Fcons (Qraw_text, - Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil))); - else - /* ... and append generic coding systems. */ - val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars)); + bcopy (priorities, coding_priorities, sizeof priorities); - for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings)) - val = Fcons (XCAR (XCAR (safe_codings)), val); - safe_codings = val; - } + /* Update `coding-category-list'. */ + Vcoding_category_list = Qnil; + for (i = coding_category_max - 1; i >= 0; i--) + Vcoding_category_list + = Fcons (AREF (Vcoding_category_table, priorities[i]), + Vcoding_category_list); - return safe_codings; + return Qnil; } + DEFUN ("coding-system-priority-list", Fcoding_system_priority_list, + Scoding_system_priority_list, 0, 1, 0, + doc: /* Return a list of coding systems ordered by their priorities. + HIGHESTP non-nil means just return the highest priority one. */) + (highestp) + Lisp_Object highestp; + { + int i; + Lisp_Object val; - /* Search from position POS for such characters that are unencodable - accoding to SAFE_CHARS, and return a list of their positions. P - points where in the memory the character at POS exists. Limit the - search at PEND or when Nth unencodable characters are found. - - If SAFE_CHARS is a char table, an element for an unencodable - character is nil. + for (i = 0, val = Qnil; i < coding_category_max; i++) + { + enum coding_category category = coding_priorities[i]; + int id = coding_categories[category].id; + Lisp_Object attrs; - If SAFE_CHARS is nil, all non-ASCII characters are unencodable. + if (id < 0) + continue; + attrs = CODING_ID_ATTRS (id); + if (! NILP (highestp)) + return CODING_ATTR_BASE_NAME (attrs); + val = Fcons (CODING_ATTR_BASE_NAME (attrs), val); + } + return Fnreverse (val); + } - Otherwise, SAFE_CHARS is t, and only eight-bit-contrl and - eight-bit-graphic characters are unencodable. */ + static char *suffixes[] = { "-unix", "-dos", "-mac" }; static Lisp_Object - unencodable_char_position (safe_chars, pos, p, pend, n) - Lisp_Object safe_chars; - int pos; - unsigned char *p, *pend; - int n; + make_subsidiaries (base) + Lisp_Object base; { - Lisp_Object pos_list; + Lisp_Object subsidiaries; - int base_name_len = STRING_BYTES (XSYMBOL (base)->name); ++ int base_name_len = SBYTES (SYMBOL_NAME (base)); + char *buf = (char *) alloca (base_name_len + 6); + int i; - - bcopy (XSYMBOL (base)->name->data, buf, base_name_len); + - pos_list = Qnil; - while (p < pend) ++ bcopy (SDATA (SYMBOL_NAME (base)), buf, base_name_len); + subsidiaries = Fmake_vector (make_number (3), Qnil); + for (i = 0; i < 3; i++) { - int len; - int c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, len); - - if (c >= 128 - && (CHAR_TABLE_P (safe_chars) - ? NILP (CHAR_TABLE_REF (safe_chars, c)) - : (NILP (safe_chars) || c < 256))) - { - pos_list = Fcons (make_number (pos), pos_list); - if (--n <= 0) - break; - } - pos++; - p += len; + bcopy (suffixes[i], buf + base_name_len, strlen (suffixes[i]) + 1); + ASET (subsidiaries, i, intern (buf)); } - return Fnreverse (pos_list); + return subsidiaries; } - DEFUN ("unencodable-char-position", Funencodable_char_position, - Sunencodable_char_position, 3, 5, 0, - doc: /* - Return position of first un-encodable character in a region. - START and END specfiy the region and CODING-SYSTEM specifies the - encoding to check. Return nil if CODING-SYSTEM does encode the region. - - If optional 4th argument COUNT is non-nil, it specifies at most how - many un-encodable characters to search. In this case, the value is a - list of positions. - - If optional 5th argument STRING is non-nil, it is a string to search - for un-encodable characters. In that case, START and END are indexes - to the string. */) - (start, end, coding_system, count, string) - Lisp_Object start, end, coding_system, count, string; + DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal, + Sdefine_coding_system_internal, coding_arg_max, MANY, 0, + doc: /* For internal use only. + usage: (define-coding-system-internal ...) */) + (nargs, args) + int nargs; + Lisp_Object *args; { - int n; - Lisp_Object safe_chars; - struct coding_system coding; - Lisp_Object positions; - int from, to; - unsigned char *p, *pend; + Lisp_Object name; + Lisp_Object spec_vec; /* [ ATTRS ALIASE EOL_TYPE ] */ + Lisp_Object attrs; /* Vector of attributes. */ + Lisp_Object eol_type; + Lisp_Object aliases; + Lisp_Object coding_type, charset_list, safe_charsets; + enum coding_category category; + Lisp_Object tail, val; + int max_charset_id = 0; + int i; - if (NILP (string)) - { - validate_region (&start, &end); - from = XINT (start); - to = XINT (end); - if (NILP (current_buffer->enable_multibyte_characters)) - return Qnil; - p = CHAR_POS_ADDR (from); - if (to == GPT) - pend = GPT_ADDR; - else - pend = CHAR_POS_ADDR (to); - } - else - { - CHECK_STRING (string); - CHECK_NATNUM (start); - CHECK_NATNUM (end); - from = XINT (start); - to = XINT (end); - if (from > to - || to > SCHARS (string)) - args_out_of_range_3 (string, start, end); - if (! STRING_MULTIBYTE (string)) - return Qnil; - p = SDATA (string) + string_char_to_byte (string, from); - pend = SDATA (string) + string_char_to_byte (string, to); - } + if (nargs < coding_arg_max) + goto short_args; - setup_coding_system (Fcheck_coding_system (coding_system), &coding); + attrs = Fmake_vector (make_number (coding_attr_last_index), Qnil); - if (NILP (count)) - n = 1; - else - { - CHECK_NATNUM (count); - n = XINT (count); - } + name = args[coding_arg_name]; + CHECK_SYMBOL (name); + CODING_ATTR_BASE_NAME (attrs) = name; - if (coding.type == coding_type_no_conversion - || coding.type == coding_type_raw_text) - return Qnil; + val = args[coding_arg_mnemonic]; + if (! STRINGP (val)) + CHECK_CHARACTER (val); + CODING_ATTR_MNEMONIC (attrs) = val; - if (coding.type == coding_type_undecided) - safe_chars = Qnil; - else - safe_chars = coding_safe_chars (coding_system); + coding_type = args[coding_arg_coding_type]; + CHECK_SYMBOL (coding_type); + CODING_ATTR_TYPE (attrs) = coding_type; - if (STRINGP (string) - || from >= GPT || to <= GPT) - positions = unencodable_char_position (safe_chars, from, p, pend, n); + charset_list = args[coding_arg_charset_list]; + if (SYMBOLP (charset_list)) + { + if (EQ (charset_list, Qiso_2022)) + { + if (! EQ (coding_type, Qiso_2022)) + error ("Invalid charset-list"); + charset_list = Viso_2022_charset_list; + } + else if (EQ (charset_list, Qemacs_mule)) + { + if (! EQ (coding_type, Qemacs_mule)) + error ("Invalid charset-list"); + charset_list = Vemacs_mule_charset_list; + } + for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) + if (max_charset_id < XFASTINT (XCAR (tail))) + max_charset_id = XFASTINT (XCAR (tail)); + } else { - Lisp_Object args[2]; - - args[0] = unencodable_char_position (safe_chars, from, p, GPT_ADDR, n); - n -= XINT (Flength (args[0])); - if (n <= 0) - positions = args[0]; - else + charset_list = Fcopy_sequence (charset_list); + for (tail = charset_list; !NILP (tail); tail = Fcdr (tail)) { - args[1] = unencodable_char_position (safe_chars, GPT, GAP_END_ADDR, - pend, n); - positions = Fappend (2, args); + struct charset *charset; + + val = Fcar (tail); + CHECK_CHARSET_GET_CHARSET (val, charset); + if (EQ (coding_type, Qiso_2022) + ? CHARSET_ISO_FINAL (charset) < 0 + : EQ (coding_type, Qemacs_mule) + ? CHARSET_EMACS_MULE_ID (charset) < 0 + : 0) + error ("Can't handle charset `%s'", - XSYMBOL (CHARSET_NAME (charset))->name->data); ++ SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); + - XCAR (tail) = make_number (charset->id); ++ XSETCAR (tail, make_number (charset->id)); + if (max_charset_id < charset->id) + max_charset_id = charset->id; } } + CODING_ATTR_CHARSET_LIST (attrs) = charset_list; - return (NILP (count) ? Fcar (positions) : positions); - } - + safe_charsets = Fmake_string (make_number (max_charset_id + 1), + make_number (255)); + for (tail = charset_list; CONSP (tail); tail = XCDR (tail)) - XSTRING (safe_charsets)->data[XFASTINT (XCAR (tail))] = 0; ++ SSET (safe_charsets, XFASTINT (XCAR (tail)), 0); + CODING_ATTR_SAFE_CHARSETS (attrs) = safe_charsets; - Lisp_Object - code_convert_region1 (start, end, coding_system, encodep) - Lisp_Object start, end, coding_system; - int encodep; - { - struct coding_system coding; - int from, to; + CODING_ATTR_ASCII_COMPAT (attrs) = args[coding_arg_ascii_compatible_p]; - CHECK_NUMBER_COERCE_MARKER (start); - CHECK_NUMBER_COERCE_MARKER (end); - CHECK_SYMBOL (coding_system); + val = args[coding_arg_decode_translation_table]; + if (! NILP (val)) + CHECK_CHAR_TABLE (val); + CODING_ATTR_DECODE_TBL (attrs) = val; - validate_region (&start, &end); - from = XFASTINT (start); - to = XFASTINT (end); + val = args[coding_arg_encode_translation_table]; + if (! NILP (val)) + CHECK_CHAR_TABLE (val); + CODING_ATTR_ENCODE_TBL (attrs) = val; - if (NILP (coding_system)) - return make_number (to - from); + val = args[coding_arg_post_read_conversion]; + CHECK_SYMBOL (val); + CODING_ATTR_POST_READ (attrs) = val; - if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) - error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system))); + val = args[coding_arg_pre_write_conversion]; + CHECK_SYMBOL (val); + CODING_ATTR_PRE_WRITE (attrs) = val; - coding.mode |= CODING_MODE_LAST_BLOCK; - coding.src_multibyte = coding.dst_multibyte - = !NILP (current_buffer->enable_multibyte_characters); - code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to), - &coding, encodep, 1); - Vlast_coding_system_used = coding.symbol; - return make_number (coding.produced_char); - } + val = args[coding_arg_default_char]; + if (NILP (val)) + CODING_ATTR_DEFAULT_CHAR (attrs) = make_number (' '); + else + { - CHECK_CHARACTER (val); ++ CHECK_CHARACTER (val); + CODING_ATTR_DEFAULT_CHAR (attrs) = val; + } - DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region, - 3, 3, "r\nzCoding system: ", - doc: /* Decode the current region from the specified coding system. - When called from a program, takes three arguments: - START, END, and CODING-SYSTEM. START and END are buffer positions. - This function sets `last-coding-system-used' to the precise coding system - used (which may be different from CODING-SYSTEM if CODING-SYSTEM is - not fully specified.) - It returns the length of the decoded text. */) - (start, end, coding_system) - Lisp_Object start, end, coding_system; - { - return code_convert_region1 (start, end, coding_system, 0); - } ++ val = args[coding_arg_for_unibyte]; ++ CODING_ATTR_FOR_UNIBYTE (attrs) = NILP (val) ? Qnil : Qt; + - DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region, - 3, 3, "r\nzCoding system: ", - doc: /* Encode the current region into the specified coding system. - When called from a program, takes three arguments: - START, END, and CODING-SYSTEM. START and END are buffer positions. - This function sets `last-coding-system-used' to the precise coding system - used (which may be different from CODING-SYSTEM if CODING-SYSTEM is - not fully specified.) - It returns the length of the encoded text. */) - (start, end, coding_system) - Lisp_Object start, end, coding_system; - { - return code_convert_region1 (start, end, coding_system, 1); - } + val = args[coding_arg_plist]; + CHECK_LIST (val); + CODING_ATTR_PLIST (attrs) = val; - Lisp_Object - code_convert_string1 (string, coding_system, nocopy, encodep) - Lisp_Object string, coding_system, nocopy; - int encodep; - { - struct coding_system coding; + if (EQ (coding_type, Qcharset)) + { + Lisp_Object list; + /* Generate a lisp vector of 256 elements. Each element is nil, + integer, or a list of charset IDs. - CHECK_STRING (string); - CHECK_SYMBOL (coding_system); + If Nth element is nil, the byte code N is invalid in this + coding system. - if (NILP (coding_system)) - return (NILP (nocopy) ? Fcopy_sequence (string) : string); + If Nth element is a number NUM, N is the first byte of a + charset whose ID is NUM. - if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) - error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system))); + If Nth element is a list of charset IDs, N is the first byte + of one of them. The list is sorted by dimensions of the + charsets. A charset of smaller dimension comes firtst. + */ + for (list = Qnil, tail = charset_list; CONSP (tail); tail = XCDR (tail)) + { + struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail))); - coding.mode |= CODING_MODE_LAST_BLOCK; - string = (encodep - ? encode_coding_string (string, &coding, !NILP (nocopy)) - : decode_coding_string (string, &coding, !NILP (nocopy))); - Vlast_coding_system_used = coding.symbol; + if (charset->method == CHARSET_METHOD_SUPERSET) + { + val = CHARSET_SUPERSET (charset); + for (; CONSP (val); val = XCDR (val)) - list = Fcons (XCAR (XCAR (val)), list); ++ list = Fcons (XCAR (XCAR (val)), list); + } + else + list = Fcons (XCAR (tail), list); + } - return string; - } + val = Fmake_vector (make_number (256), Qnil); - DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string, - 2, 3, 0, - doc: /* Decode STRING which is encoded in CODING-SYSTEM, and return the result. - Optional arg NOCOPY non-nil means it is OK to return STRING itself - if the decoding operation is trivial. - This function sets `last-coding-system-used' to the precise coding system - used (which may be different from CODING-SYSTEM if CODING-SYSTEM is - not fully specified.) */) - (string, coding_system, nocopy) - Lisp_Object string, coding_system, nocopy; - { - return code_convert_string1 (string, coding_system, nocopy, 0); - } + for (tail = Fnreverse (list); CONSP (tail); tail = XCDR (tail)) + { + struct charset *charset = CHARSET_FROM_ID (XFASTINT (XCAR (tail))); + int dim = CHARSET_DIMENSION (charset); + int idx = (dim - 1) * 4; - + - DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string, - 2, 3, 0, - doc: /* Encode STRING to CODING-SYSTEM, and return the result. - Optional arg NOCOPY non-nil means it is OK to return STRING itself - if the encoding operation is trivial. - This function sets `last-coding-system-used' to the precise coding system - used (which may be different from CODING-SYSTEM if CODING-SYSTEM is - not fully specified.) */) - (string, coding_system, nocopy) - Lisp_Object string, coding_system, nocopy; - { - return code_convert_string1 (string, coding_system, nocopy, 1); - } + if (CHARSET_ASCII_COMPATIBLE_P (charset)) + CODING_ATTR_ASCII_COMPAT (attrs) = Qt; - /* Encode or decode STRING according to CODING_SYSTEM. - Do not set Vlast_coding_system_used. + for (i = charset->code_space[idx]; + i <= charset->code_space[idx + 1]; i++) + { + Lisp_Object tmp, tmp2; + int dim2; - This function is called only from macros DECODE_FILE and - ENCODE_FILE, thus we ignore character composition. */ + tmp = AREF (val, i); + if (NILP (tmp)) + tmp = XCAR (tail); + else if (NUMBERP (tmp)) + { + dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp))); + if (dim < dim2) + tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil)); + else + tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil)); + } + else + { + for (tmp2 = tmp; CONSP (tmp2); tmp2 = XCDR (tmp2)) + { + dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (XCAR (tmp2)))); + if (dim < dim2) + break; + } + if (NILP (tmp2)) + tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil)); + else + { + XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2))); + XSETCAR (tmp2, XCAR (tail)); + } + } + ASET (val, i, tmp); + } + } + ASET (attrs, coding_attr_charset_valids, val); + category = coding_category_charset; + } + else if (EQ (coding_type, Qccl)) + { + Lisp_Object valids; - + - Lisp_Object - code_convert_string_norecord (string, coding_system, encodep) - Lisp_Object string, coding_system; - int encodep; - { - struct coding_system coding; + if (nargs < coding_arg_ccl_max) + goto short_args; - CHECK_STRING (string); - CHECK_SYMBOL (coding_system); + val = args[coding_arg_ccl_decoder]; + CHECK_CCL_PROGRAM (val); + if (VECTORP (val)) + val = Fcopy_sequence (val); + ASET (attrs, coding_attr_ccl_decoder, val); - if (NILP (coding_system)) - return string; + val = args[coding_arg_ccl_encoder]; + CHECK_CCL_PROGRAM (val); + if (VECTORP (val)) + val = Fcopy_sequence (val); + ASET (attrs, coding_attr_ccl_encoder, val); - if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0) - error ("Invalid coding system: %s", SDATA (SYMBOL_NAME (coding_system))); + val = args[coding_arg_ccl_valids]; + valids = Fmake_string (make_number (256), make_number (0)); + for (tail = val; !NILP (tail); tail = Fcdr (tail)) + { + int from, to; - coding.composing = COMPOSITION_DISABLED; - coding.mode |= CODING_MODE_LAST_BLOCK; - return (encodep - ? encode_coding_string (string, &coding, 1) - : decode_coding_string (string, &coding, 1)); - } - - DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0, - doc: /* Decode a Japanese character which has CODE in shift_jis encoding. - Return the corresponding character. */) - (code) - Lisp_Object code; - { - unsigned char c1, c2, s1, s2; - Lisp_Object val; + val = Fcar (tail); + if (INTEGERP (val)) + { + from = to = XINT (val); + if (from < 0 || from > 255) + args_out_of_range_3 (val, make_number (0), make_number (255)); + } + else + { + CHECK_CONS (val); - CHECK_NUMBER (XCAR (val)); - CHECK_NUMBER (XCDR (val)); ++ CHECK_NATNUM_CAR (val); ++ CHECK_NATNUM_CDR (val); + from = XINT (XCAR (val)); - if (from < 0 || from > 255) ++ if (from > 255) + args_out_of_range_3 (XCAR (val), + make_number (0), make_number (255)); + to = XINT (XCDR (val)); + if (to < from || to > 255) + args_out_of_range_3 (XCDR (val), + XCAR (val), make_number (255)); + } + for (i = from; i <= to; i++) - XSTRING (valids)->data[i] = 1; ++ SSET (valids, i, 1); + } + ASET (attrs, coding_attr_ccl_valids, valids); - + - CHECK_NUMBER (code); - s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF; - if (s1 == 0) - { - if (s2 < 0x80) - XSETFASTINT (val, s2); - else if (s2 >= 0xA0 || s2 <= 0xDF) - XSETFASTINT (val, MAKE_CHAR (charset_katakana_jisx0201, s2, 0)); - else - error ("Invalid Shift JIS code: %x", XFASTINT (code)); + category = coding_category_ccl; } - else + else if (EQ (coding_type, Qutf_16)) { - if ((s1 < 0x80 || (s1 > 0x9F && s1 < 0xE0) || s1 > 0xEF) - || (s2 < 0x40 || s2 == 0x7F || s2 > 0xFC)) - error ("Invalid Shift JIS code: %x", XFASTINT (code)); - DECODE_SJIS (s1, s2, c1, c2); - XSETFASTINT (val, MAKE_CHAR (charset_jisx0208, c1, c2)); - } - return val; - } + Lisp_Object bom, endian; - DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0, - doc: /* Encode a Japanese character CHAR to shift_jis encoding. - Return the corresponding code in SJIS. */) - (ch) - Lisp_Object ch; - { - int charset, c1, c2, s1, s2; - Lisp_Object val; + CODING_ATTR_ASCII_COMPAT (attrs) = Qnil; - CHECK_NUMBER (ch); - SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); - if (charset == CHARSET_ASCII) - { - val = ch; - } - else if (charset == charset_jisx0208 - && c1 > 0x20 && c1 < 0x7F && c2 > 0x20 && c2 < 0x7F) - { - ENCODE_SJIS (c1, c2, s1, s2); - XSETFASTINT (val, (s1 << 8) | s2); - } - else if (charset == charset_katakana_jisx0201 - && c1 > 0x20 && c2 < 0xE0) - { - XSETFASTINT (val, c1 | 0x80); - } - else - error ("Can't encode to shift_jis: %d", XFASTINT (ch)); - return val; - } + if (nargs < coding_arg_utf16_max) + goto short_args; - DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0, - doc: /* Decode a Big5 character which has CODE in BIG5 coding system. - Return the corresponding character. */) - (code) - Lisp_Object code; - { - int charset; - unsigned char b1, b2, c1, c2; - Lisp_Object val; + bom = args[coding_arg_utf16_bom]; + if (! NILP (bom) && ! EQ (bom, Qt)) + { + CHECK_CONS (bom); - CHECK_CODING_SYSTEM (XCAR (bom)); - CHECK_CODING_SYSTEM (XCDR (bom)); ++ val = XCAR (bom); ++ CHECK_CODING_SYSTEM (val); ++ val = XCDR (bom); ++ CHECK_CODING_SYSTEM (val); + } + ASET (attrs, coding_attr_utf_16_bom, bom); + + endian = args[coding_arg_utf16_endian]; + CHECK_SYMBOL (endian); + if (NILP (endian)) + endian = Qbig; + else if (! EQ (endian, Qbig) && ! EQ (endian, Qlittle)) - error ("Invalid endian: %s", XSYMBOL (endian)->name->data); ++ error ("Invalid endian: %s", SDATA (SYMBOL_NAME (endian))); + ASET (attrs, coding_attr_utf_16_endian, endian); + + category = (CONSP (bom) + ? coding_category_utf_16_auto + : NILP (bom) + ? (EQ (endian, Qbig) + ? coding_category_utf_16_be_nosig + : coding_category_utf_16_le_nosig) + : (EQ (endian, Qbig) + ? coding_category_utf_16_be + : coding_category_utf_16_le)); + } + else if (EQ (coding_type, Qiso_2022)) + { + Lisp_Object initial, reg_usage, request, flags; + int i; + + if (nargs < coding_arg_iso2022_max) + goto short_args; + + initial = Fcopy_sequence (args[coding_arg_iso2022_initial]); + CHECK_VECTOR (initial); + for (i = 0; i < 4; i++) + { + val = Faref (initial, make_number (i)); + if (! NILP (val)) + { + struct charset *charset; + + CHECK_CHARSET_GET_CHARSET (val, charset); + ASET (initial, i, make_number (CHARSET_ID (charset))); + if (i == 0 && CHARSET_ASCII_COMPATIBLE_P (charset)) + CODING_ATTR_ASCII_COMPAT (attrs) = Qt; + } + else + ASET (initial, i, make_number (-1)); + } + + reg_usage = args[coding_arg_iso2022_reg_usage]; + CHECK_CONS (reg_usage); - CHECK_NATNUM (XCAR (reg_usage)); - CHECK_NATNUM (XCDR (reg_usage)); ++ CHECK_NUMBER_CAR (reg_usage); ++ CHECK_NUMBER_CDR (reg_usage); + + request = Fcopy_sequence (args[coding_arg_iso2022_request]); + for (tail = request; ! NILP (tail); tail = Fcdr (tail)) + { + int id; ++ Lisp_Object tmp; + + val = Fcar (tail); + CHECK_CONS (val); - CHECK_CHARSET_GET_ID (XCAR (val), id); - CHECK_NATNUM (XCDR (val)); ++ tmp = XCAR (val); ++ CHECK_CHARSET_GET_ID (tmp, id); ++ CHECK_NATNUM_CDR (val); + if (XINT (XCDR (val)) >= 4) + error ("Invalid graphic register number: %d", XINT (XCDR (val))); - XCAR (val) = make_number (id); ++ XSETCAR (val, make_number (id)); + } - CHECK_NUMBER (code); - b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF; - if (b1 == 0) + flags = args[coding_arg_iso2022_flags]; + CHECK_NATNUM (flags); + i = XINT (flags); + if (EQ (args[coding_arg_charset_list], Qiso_2022)) + flags = make_number (i | CODING_ISO_FLAG_FULL_SUPPORT); + + ASET (attrs, coding_attr_iso_initial, initial); + ASET (attrs, coding_attr_iso_usage, reg_usage); + ASET (attrs, coding_attr_iso_request, request); + ASET (attrs, coding_attr_iso_flags, flags); + setup_iso_safe_charsets (attrs); + + if (i & CODING_ISO_FLAG_SEVEN_BITS) + category = ((i & (CODING_ISO_FLAG_LOCKING_SHIFT + | CODING_ISO_FLAG_SINGLE_SHIFT)) + ? coding_category_iso_7_else + : EQ (args[coding_arg_charset_list], Qiso_2022) + ? coding_category_iso_7 + : coding_category_iso_7_tight); + else + { + int id = XINT (AREF (initial, 1)); + + category = (((i & CODING_ISO_FLAG_LOCKING_SHIFT) + || EQ (args[coding_arg_charset_list], Qiso_2022) + || id < 0) + ? coding_category_iso_8_else + : (CHARSET_DIMENSION (CHARSET_FROM_ID (id)) == 1) + ? coding_category_iso_8_1 + : coding_category_iso_8_2); + } + if (category != coding_category_iso_8_1 + && category != coding_category_iso_8_2) + CODING_ATTR_ASCII_COMPAT (attrs) = Qnil; + } + else if (EQ (coding_type, Qemacs_mule)) { - if (b2 >= 0x80) - error ("Invalid BIG5 code: %x", XFASTINT (code)); - val = code; + if (EQ (args[coding_arg_charset_list], Qemacs_mule)) + ASET (attrs, coding_attr_emacs_mule_full, Qt); + CODING_ATTR_ASCII_COMPAT (attrs) = Qt; + category = coding_category_emacs_mule; } - else + else if (EQ (coding_type, Qshift_jis)) { - if ((b1 < 0xA1 || b1 > 0xFE) - || (b2 < 0x40 || (b2 > 0x7E && b2 < 0xA1) || b2 > 0xFE)) - error ("Invalid BIG5 code: %x", XFASTINT (code)); - DECODE_BIG5 (b1, b2, charset, c1, c2); - XSETFASTINT (val, MAKE_CHAR (charset, c1, c2)); + + struct charset *charset; + + if (XINT (Flength (charset_list)) != 3) + error ("There should be just three charsets"); + + charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); + if (CHARSET_DIMENSION (charset) != 1) + error ("Dimension of charset %s is not one", - XSYMBOL (CHARSET_NAME (charset))->name->data); ++ SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); + if (CHARSET_ASCII_COMPATIBLE_P (charset)) + CODING_ATTR_ASCII_COMPAT (attrs) = Qt; + + charset_list = XCDR (charset_list); + charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); + if (CHARSET_DIMENSION (charset) != 1) + error ("Dimension of charset %s is not one", - XSYMBOL (CHARSET_NAME (charset))->name->data); ++ SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); + + charset_list = XCDR (charset_list); + charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); + if (CHARSET_DIMENSION (charset) != 2) + error ("Dimension of charset %s is not two", - XSYMBOL (CHARSET_NAME (charset))->name->data); ++ SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); + + category = coding_category_sjis; + Vsjis_coding_system = name; } - return val; - } + else if (EQ (coding_type, Qbig5)) + { + struct charset *charset; - DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0, - doc: /* Encode the Big5 character CHAR to BIG5 coding system. - Return the corresponding character code in Big5. */) - (ch) - Lisp_Object ch; - { - int charset, c1, c2, b1, b2; - Lisp_Object val; + if (XINT (Flength (charset_list)) != 2) + error ("There should be just two charsets"); + + charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); + if (CHARSET_DIMENSION (charset) != 1) + error ("Dimension of charset %s is not one", - XSYMBOL (CHARSET_NAME (charset))->name->data); ++ SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); + if (CHARSET_ASCII_COMPATIBLE_P (charset)) + CODING_ATTR_ASCII_COMPAT (attrs) = Qt; + + charset_list = XCDR (charset_list); + charset = CHARSET_FROM_ID (XINT (XCAR (charset_list))); + if (CHARSET_DIMENSION (charset) != 2) + error ("Dimension of charset %s is not two", - XSYMBOL (CHARSET_NAME (charset))->name->data); ++ SDATA (SYMBOL_NAME (CHARSET_NAME (charset)))); - CHECK_NUMBER (ch); - SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); - if (charset == CHARSET_ASCII) + category = coding_category_big5; + Vbig5_coding_system = name; + } + else if (EQ (coding_type, Qraw_text)) { - val = ch; + category = coding_category_raw_text; + CODING_ATTR_ASCII_COMPAT (attrs) = Qt; } - else if ((charset == charset_big5_1 - && (XFASTINT (ch) >= 0x250a1 && XFASTINT (ch) <= 0x271ec)) - || (charset == charset_big5_2 - && XFASTINT (ch) >= 0x290a1 && XFASTINT (ch) <= 0x2bdb2)) + else if (EQ (coding_type, Qutf_8)) { - ENCODE_BIG5 (charset, c1, c2, b1, b2); - XSETFASTINT (val, (b1 << 8) | b2); + category = coding_category_utf_8; + CODING_ATTR_ASCII_COMPAT (attrs) = Qt; } + else if (EQ (coding_type, Qundecided)) + category = coding_category_undecided; else - error ("Can't encode to Big5: %d", XFASTINT (ch)); - return val; - } - - DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal, - Sset_terminal_coding_system_internal, 1, 1, 0, - doc: /* Internal use only. */) - (coding_system) - Lisp_Object coding_system; - { - CHECK_SYMBOL (coding_system); - setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding); - /* We had better not send unsafe characters to terminal. */ - terminal_coding.mode |= CODING_MODE_INHIBIT_UNENCODABLE_CHAR; - /* Character composition should be disabled. */ - terminal_coding.composing = COMPOSITION_DISABLED; - /* Error notification should be suppressed. */ - terminal_coding.suppress_error = 1; - terminal_coding.src_multibyte = 1; - terminal_coding.dst_multibyte = 0; - return Qnil; - } + error ("Invalid coding system type: %s", - XSYMBOL (coding_type)->name->data); ++ SDATA (SYMBOL_NAME (coding_type))); - DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal, - Sset_safe_terminal_coding_system_internal, 1, 1, 0, - doc: /* Internal use only. */) - (coding_system) - Lisp_Object coding_system; - { - CHECK_SYMBOL (coding_system); - setup_coding_system (Fcheck_coding_system (coding_system), - &safe_terminal_coding); - /* Character composition should be disabled. */ - safe_terminal_coding.composing = COMPOSITION_DISABLED; - /* Error notification should be suppressed. */ - terminal_coding.suppress_error = 1; - safe_terminal_coding.src_multibyte = 1; - safe_terminal_coding.dst_multibyte = 0; - return Qnil; - } + CODING_ATTR_CATEGORY (attrs) = make_number (category); + CODING_ATTR_PLIST (attrs) + = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category), + CODING_ATTR_PLIST (attrs))); - DEFUN ("terminal-coding-system", Fterminal_coding_system, - Sterminal_coding_system, 0, 0, 0, - doc: /* Return coding system specified for terminal output. */) - () - { - return terminal_coding.symbol; - } + eol_type = args[coding_arg_eol_type]; + if (! NILP (eol_type) + && ! EQ (eol_type, Qunix) + && ! EQ (eol_type, Qdos) + && ! EQ (eol_type, Qmac)) + error ("Invalid eol-type"); - DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal, - Sset_keyboard_coding_system_internal, 1, 1, 0, - doc: /* Internal use only. */) - (coding_system) - Lisp_Object coding_system; - { - CHECK_SYMBOL (coding_system); - setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding); - /* Character composition should be disabled. */ - keyboard_coding.composing = COMPOSITION_DISABLED; - return Qnil; - } + aliases = Fcons (name, Qnil); - DEFUN ("keyboard-coding-system", Fkeyboard_coding_system, - Skeyboard_coding_system, 0, 0, 0, - doc: /* Return coding system specified for decoding keyboard input. */) - () - { - return keyboard_coding.symbol; - } + if (NILP (eol_type)) + { + eol_type = make_subsidiaries (name); + for (i = 0; i < 3; i++) + { + Lisp_Object this_spec, this_name, this_aliases, this_eol_type; + + this_name = AREF (eol_type, i); + this_aliases = Fcons (this_name, Qnil); + this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); + this_spec = Fmake_vector (make_number (3), attrs); + ASET (this_spec, 1, this_aliases); + ASET (this_spec, 2, this_eol_type); + Fputhash (this_name, this_spec, Vcoding_system_hash_table); + Vcoding_system_list = Fcons (this_name, Vcoding_system_list); + Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (this_name), Qnil), + Vcoding_system_alist); + } + } - - DEFUN ("find-operation-coding-system", Ffind_operation_coding_system, - Sfind_operation_coding_system, 1, MANY, 0, - doc: /* Choose a coding system for an operation based on the target name. - The value names a pair of coding systems: (DECODING-SYSTEM . ENCODING-SYSTEM). - DECODING-SYSTEM is the coding system to use for decoding - \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system - for encoding (in case OPERATION does encoding). + spec_vec = Fmake_vector (make_number (3), attrs); + ASET (spec_vec, 1, aliases); + ASET (spec_vec, 2, eol_type); - The first argument OPERATION specifies an I/O primitive: - For file I/O, `insert-file-contents' or `write-region'. - For process I/O, `call-process', `call-process-region', or `start-process'. - For network I/O, `open-network-stream'. + Fputhash (name, spec_vec, Vcoding_system_hash_table); + Vcoding_system_list = Fcons (name, Vcoding_system_list); + Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (name), Qnil), + Vcoding_system_alist); - The remaining arguments should be the same arguments that were passed - to the primitive. Depending on which primitive, one of those arguments - is selected as the TARGET. For example, if OPERATION does file I/O, - whichever argument specifies the file name is TARGET. + { + int id = coding_categories[category].id; - TARGET has a meaning which depends on OPERATION: - For file I/O, TARGET is a file name. - For process I/O, TARGET is a process name. - For network I/O, TARGET is a service name or a port number + if (id < 0 || EQ (name, CODING_ID_NAME (id))) + setup_coding_system (name, &coding_categories[category]); + } - This function looks up what specified for TARGET in, - `file-coding-system-alist', `process-coding-system-alist', - or `network-coding-system-alist' depending on OPERATION. - They may specify a coding system, a cons of coding systems, - or a function symbol to call. - In the last case, we call the function with one argument, - which is a list of all the arguments given to this function. + return Qnil; - usage: (find-operation-coding-system OPERATION ARGUMENTS ...) */) - (nargs, args) - int nargs; - Lisp_Object *args; - { - Lisp_Object operation, target_idx, target, val; - register Lisp_Object chain; + short_args: + return Fsignal (Qwrong_number_of_arguments, + Fcons (intern ("define-coding-system-internal"), + make_number (nargs))); + } - if (nargs < 2) - error ("Too few arguments"); - operation = args[0]; - if (!SYMBOLP (operation) - || !INTEGERP (target_idx = Fget (operation, Qtarget_idx))) - error ("Invalid first argument"); - if (nargs < 1 + XINT (target_idx)) - error ("Too few arguments for operation: %s", - SDATA (SYMBOL_NAME (operation))); - /* For write-region, if the 6th argument (i.e. VISIT, the 5th - argument to write-region) is string, it must be treated as a - target file name. */ - if (EQ (operation, Qwrite_region) - && nargs > 5 - && STRINGP (args[5])) - target_idx = make_number (4); - target = args[XINT (target_idx) + 1]; - if (!(STRINGP (target) - || (EQ (operation, Qopen_network_stream) && INTEGERP (target)))) - error ("Invalid argument %d", XINT (target_idx) + 1); + /* Fixme: should this record the alias relationships for + diagnostics? Should it update coding-system-list? */ + DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, + Sdefine_coding_system_alias, 2, 2, 0, + doc: /* Define ALIAS as an alias for CODING-SYSTEM. */) + (alias, coding_system) + Lisp_Object alias, coding_system; + { + Lisp_Object spec, aliases, eol_type; - chain = ((EQ (operation, Qinsert_file_contents) - || EQ (operation, Qwrite_region)) - ? Vfile_coding_system_alist - : (EQ (operation, Qopen_network_stream) - ? Vnetwork_coding_system_alist - : Vprocess_coding_system_alist)); - if (NILP (chain)) - return Qnil; + CHECK_SYMBOL (alias); + CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec); + aliases = AREF (spec, 1); + while (!NILP (XCDR (aliases))) + aliases = XCDR (aliases); - XCDR (aliases) = Fcons (alias, Qnil); ++ XSETCDR (aliases, Fcons (alias, Qnil)); - for (; CONSP (chain); chain = XCDR (chain)) + eol_type = AREF (spec, 2); + if (VECTORP (eol_type)) { - Lisp_Object elt; - elt = XCAR (chain); + Lisp_Object subsidiaries; + int i; - if (CONSP (elt) - && ((STRINGP (target) - && STRINGP (XCAR (elt)) - && fast_string_match (XCAR (elt), target) >= 0) - || (INTEGERP (target) && EQ (target, XCAR (elt))))) - { - val = XCDR (elt); - /* Here, if VAL is both a valid coding system and a valid - function symbol, we return VAL as a coding system. */ - if (CONSP (val)) - return val; - if (! SYMBOLP (val)) - return Qnil; - if (! NILP (Fcoding_system_p (val))) - return Fcons (val, val); - if (! NILP (Ffboundp (val))) - { - val = call1 (val, Flist (nargs, args)); - if (CONSP (val)) - return val; - if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val))) - return Fcons (val, val); - } - return Qnil; - } + subsidiaries = make_subsidiaries (alias); + for (i = 0; i < 3; i++) + Fdefine_coding_system_alias (AREF (subsidiaries, i), + AREF (eol_type, i)); + + ASET (spec, 2, subsidiaries); } + + Fputhash (alias, spec, Vcoding_system_hash_table); + Vcoding_system_alist = Fcons (Fcons (Fsymbol_name (alias), Qnil), + Vcoding_system_alist); + return Qnil; } @@@ -7471,11 -8539,24 +8652,29 @@@ init_coding_once ( void syms_of_coding () { - Qtarget_idx = intern ("target-idx"); - staticpro (&Qtarget_idx); + staticpro (&Vcoding_system_hash_table); - Vcoding_system_hash_table = Fmakehash (Qeq); ++ { ++ Lisp_Object args[2]; ++ args[0] = QCtest; ++ args[1] = Qeq; ++ Vcoding_system_hash_table = Fmake_hash_table (2, args); ++ } + + staticpro (&Vsjis_coding_system); + Vsjis_coding_system = Qnil; - Qcoding_system_history = intern ("coding-system-history"); - staticpro (&Qcoding_system_history); + staticpro (&Vbig5_coding_system); + Vbig5_coding_system = Qnil; + + staticpro (&Vcode_conversion_work_buf_list); + Vcode_conversion_work_buf_list = Qnil; + + staticpro (&Vcode_conversion_reused_work_buf); + Vcode_conversion_reused_work_buf = Qnil; + + DEFSYM (Qcharset, "charset"); + DEFSYM (Qtarget_idx, "target-idx"); + DEFSYM (Qcoding_system_history, "coding-system-history"); Fset (Qcoding_system_history, Qnil); /* Target FILENAME is the first argument. */ @@@ -7503,33 -8580,35 +8698,36 @@@ /* Target SERVICE is the fourth argument. */ Fput (Qopen_network_stream, Qtarget_idx, make_number (3)); - Qcoding_system = intern ("coding-system"); - staticpro (&Qcoding_system); - - Qeol_type = intern ("eol-type"); - staticpro (&Qeol_type); + DEFSYM (Qcoding_system, "coding-system"); + DEFSYM (Qcoding_aliases, "coding-aliases"); - Qbuffer_file_coding_system = intern ("buffer-file-coding-system"); - staticpro (&Qbuffer_file_coding_system); + DEFSYM (Qeol_type, "eol-type"); + DEFSYM (Qunix, "unix"); + DEFSYM (Qdos, "dos"); - Qpost_read_conversion = intern ("post-read-conversion"); - staticpro (&Qpost_read_conversion); + DEFSYM (Qbuffer_file_coding_system, "buffer-file-coding-system"); + DEFSYM (Qpost_read_conversion, "post-read-conversion"); + DEFSYM (Qpre_write_conversion, "pre-write-conversion"); + DEFSYM (Qdefault_char, "default-char"); + DEFSYM (Qundecided, "undecided"); + DEFSYM (Qno_conversion, "no-conversion"); + DEFSYM (Qraw_text, "raw-text"); - Qpre_write_conversion = intern ("pre-write-conversion"); - staticpro (&Qpre_write_conversion); + DEFSYM (Qiso_2022, "iso-2022"); - Qno_conversion = intern ("no-conversion"); - staticpro (&Qno_conversion); + DEFSYM (Qutf_8, "utf-8"); ++ DEFSYM (Qutf_8_emacs, "utf-8-emacs"); - Qundecided = intern ("undecided"); - staticpro (&Qundecided); + DEFSYM (Qutf_16, "utf-16"); + DEFSYM (Qbig, "big"); + DEFSYM (Qlittle, "little"); - Qcoding_system_p = intern ("coding-system-p"); - staticpro (&Qcoding_system_p); + DEFSYM (Qshift_jis, "shift-jis"); + DEFSYM (Qbig5, "big5"); - Qcoding_system_error = intern ("coding-system-error"); - staticpro (&Qcoding_system_error); + DEFSYM (Qcoding_system_p, "coding-system-p"); + DEFSYM (Qcoding_system_error, "coding-system-error"); Fput (Qcoding_system_error, Qerror_conditions, Fcons (Qcoding_system_error, Fcons (Qerror, Qnil))); Fput (Qcoding_system_error, Qerror_message, @@@ -7602,7 -8682,7 +8801,8 @@@ defsubr (&Sdetect_coding_region); defsubr (&Sdetect_coding_string); defsubr (&Sfind_coding_systems_region_internal); + defsubr (&Sunencodable_char_position); + defsubr (&Scheck_coding_systems_region); defsubr (&Sdecode_coding_region); defsubr (&Sencode_coding_region); defsubr (&Sdecode_coding_string); @@@ -7816,18 -8913,10 +9033,19 @@@ coding system used in each operation ca The default value is `select-safe-coding-system' (which see). */); Vselect_safe_coding_system_function = Qnil; + DEFVAR_BOOL ("coding-system-require-warning", + &coding_system_require_warning, + doc: /* Internal use only. +If non-nil, on writing a file, `select-safe-coding-system-function' is +called even if `coding-system-for-write' is non-nil. The command +`universal-coding-system-argument' binds this variable to t temporarily. */); + coding_system_require_warning = 0; + + DEFVAR_BOOL ("inhibit-iso-escape-detection", &inhibit_iso_escape_detection, - doc: /* If non-nil, Emacs ignores ISO2022's escape sequence on code detection. + doc: /* + If non-nil, Emacs ignores ISO2022's escape sequence on code detection. By default, on reading a file, Emacs tries to detect how the text is encoded. This code detection is sensitive to escape sequences. If @@@ -7852,11 -8941,46 +9070,54 @@@ to explicitly specify some coding syste escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); inhibit_iso_escape_detection = 0; + DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input, + doc: /* Char table for translating self-inserting characters. +This is applied to the result of input methods, not their input. See also +`keyboard-translate-table'. */); + Vtranslation_table_for_input = Qnil; ++ + { + Lisp_Object args[coding_arg_max]; - Lisp_Object plist[14]; ++ Lisp_Object plist[16]; + int i; + + for (i = 0; i < coding_arg_max; i++) + args[i] = Qnil; + + plist[0] = intern (":name"); + plist[1] = args[coding_arg_name] = Qno_conversion; + plist[2] = intern (":mnemonic"); + plist[3] = args[coding_arg_mnemonic] = make_number ('='); + plist[4] = intern (":coding-type"); + plist[5] = args[coding_arg_coding_type] = Qraw_text; + plist[6] = intern (":ascii-compatible-p"); + plist[7] = args[coding_arg_ascii_compatible_p] = Qt; + plist[8] = intern (":default-char"); + plist[9] = args[coding_arg_default_char] = make_number (0); - plist[10] = intern (":docstring"); - plist[11] = build_string ("Do no conversion.\n\ ++ plist[10] = intern (":for-unibyte"); ++ plist[11] = args[coding_arg_for_unibyte] = Qt; ++ plist[12] = intern (":docstring"); ++ plist[13] = build_string ("Do no conversion.\n\ + \n\ + When you visit a file with this coding, the file is read into a\n\ + unibyte buffer as is, thus each byte of a file is treated as a\n\ + character."); - plist[12] = intern (":eol-type"); - plist[13] = args[coding_arg_eol_type] = Qunix; - args[coding_arg_plist] = Flist (14, plist); ++ plist[14] = intern (":eol-type"); ++ plist[15] = args[coding_arg_eol_type] = Qunix; ++ args[coding_arg_plist] = Flist (16, plist); + Fdefine_coding_system_internal (coding_arg_max, args); + } + + setup_coding_system (Qno_conversion, &keyboard_coding); + setup_coding_system (Qno_conversion, &terminal_coding); + setup_coding_system (Qno_conversion, &safe_terminal_coding); + + { + int i; + + for (i = 0; i < coding_category_max; i++) + Fset (AREF (Vcoding_category_table, i), Qno_conversion); + } } char * diff --cc src/coding.h index 4d020d31521,8522e632974..78a7d4aac04 --- a/src/coding.h +++ b/src/coding.h @@@ -1,6 -1,9 +1,9 @@@ /* Header for coding system handler. Copyright (C) 1995, 1997 Electrotechnical Laboratory, JAPAN. -- Licensed to the Free Software Foundation. - Copyright (C) 2001, 2002 ++ Licensed to the Free Software Foundation. ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@@ -22,203 -25,188 +25,190 @@@ Boston, MA 02111-1307, USA. * #ifndef EMACS_CODING_H #define EMACS_CODING_H - #include "ccl.h" + /* Index to arguments of Fdefine_coding_system_internal. */ - /*** EMACS' INTERNAL FORMAT (emacs-mule) section ***/ + enum define_coding_system_arg_index + { + coding_arg_name, + coding_arg_mnemonic, + coding_arg_coding_type, + coding_arg_charset_list, + coding_arg_ascii_compatible_p, + coding_arg_decode_translation_table, + coding_arg_encode_translation_table, + coding_arg_post_read_conversion, + coding_arg_pre_write_conversion, + coding_arg_default_char, ++ coding_arg_for_unibyte, + coding_arg_plist, + coding_arg_eol_type, + coding_arg_max + }; - /* All code (1-byte) of Emacs' internal format is classified into one - of the followings. See also `charset.h'. */ - enum emacs_code_class_type + enum define_coding_iso2022_arg_index { - EMACS_control_code, /* Control codes in the range - 0x00..0x1F and 0x7F except for the - following two codes. */ - EMACS_linefeed_code, /* 0x0A (linefeed) to denote - end-of-line. */ - EMACS_carriage_return_code, /* 0x0D (carriage-return) to be used - in selective display mode. */ - EMACS_ascii_code, /* ASCII characters. */ - EMACS_leading_code_2, /* Base leading code of official - TYPE9N character. */ - EMACS_leading_code_3, /* Base leading code of private TYPE9N - or official TYPE9Nx9N character. */ - EMACS_leading_code_4, /* Base leading code of private - TYPE9Nx9N character. */ - EMACS_invalid_code /* Invalid code, i.e. a base leading - code not yet assigned to any - charset, or a code of the range - 0xA0..0xFF. */ + coding_arg_iso2022_initial = coding_arg_max, + coding_arg_iso2022_reg_usage, + coding_arg_iso2022_request, + coding_arg_iso2022_flags, + coding_arg_iso2022_max }; - extern enum emacs_code_class_type emacs_code_class[256]; - - /*** ISO2022 section ***/ - - /* Macros to define code of control characters for ISO2022's functions. */ - /* code */ /* function */ - #define ISO_CODE_LF 0x0A /* line-feed */ - #define ISO_CODE_CR 0x0D /* carriage-return */ - #define ISO_CODE_SO 0x0E /* shift-out */ - #define ISO_CODE_SI 0x0F /* shift-in */ - #define ISO_CODE_SS2_7 0x19 /* single-shift-2 for 7-bit code */ - #define ISO_CODE_ESC 0x1B /* escape */ - #define ISO_CODE_SS2 0x8E /* single-shift-2 */ - #define ISO_CODE_SS3 0x8F /* single-shift-3 */ - #define ISO_CODE_CSI 0x9B /* control-sequence-introduce */ - - /* All code (1-byte) of ISO2022 is classified into one of the - followings. */ - enum iso_code_class_type + enum define_coding_utf16_arg_index { - ISO_control_0, /* Control codes in the range - 0x00..0x1F and 0x7F, except for the - following 5 codes. */ - ISO_carriage_return, /* ISO_CODE_CR (0x0D) */ - ISO_shift_out, /* ISO_CODE_SO (0x0E) */ - ISO_shift_in, /* ISO_CODE_SI (0x0F) */ - ISO_single_shift_2_7, /* ISO_CODE_SS2_7 (0x19) */ - ISO_escape, /* ISO_CODE_SO (0x1B) */ - ISO_control_1, /* Control codes in the range - 0x80..0x9F, except for the - following 3 codes. */ - ISO_single_shift_2, /* ISO_CODE_SS2 (0x8E) */ - ISO_single_shift_3, /* ISO_CODE_SS3 (0x8F) */ - ISO_control_sequence_introducer, /* ISO_CODE_CSI (0x9B) */ - ISO_0x20_or_0x7F, /* Codes of the values 0x20 or 0x7F. */ - ISO_graphic_plane_0, /* Graphic codes in the range 0x21..0x7E. */ - ISO_0xA0_or_0xFF, /* Codes of the values 0xA0 or 0xFF. */ - ISO_graphic_plane_1 /* Graphic codes in the range 0xA1..0xFE. */ + coding_arg_utf16_bom = coding_arg_max, + coding_arg_utf16_endian, + coding_arg_utf16_max }; - /** The macros CODING_FLAG_ISO_XXX defines a flag bit of the `flags' - element in the structure `coding_system'. This information is used - while encoding a text to ISO2022. **/ + enum define_coding_ccl_arg_index + { + coding_arg_ccl_decoder = coding_arg_max, + coding_arg_ccl_encoder, + coding_arg_ccl_valids, + coding_arg_ccl_max + }; - /* If set, produce short-form designation sequence (e.g. ESC $ A) - instead of long-form sequence (e.g. ESC $ ( A). */ - #define CODING_FLAG_ISO_SHORT_FORM 0x0001 + extern Lisp_Object Vcoding_system_hash_table; - /* If set, reset graphic planes and registers at end-of-line to the - initial state. */ - #define CODING_FLAG_ISO_RESET_AT_EOL 0x0002 + /* Enumeration of coding system type. */ - /* If set, reset graphic planes and registers before any control - characters to the initial state. */ - #define CODING_FLAG_ISO_RESET_AT_CNTL 0x0004 + enum coding_system_type + { + coding_type_charset, + coding_type_utf_8, + coding_type_utf_16, + coding_type_iso_2022, + coding_type_emacs_mule, + coding_type_sjis, + coding_type_ccl, + coding_type_raw_text, + coding_type_undecided, + coding_type_max + }; - /* If set, encode by 7-bit environment. */ - #define CODING_FLAG_ISO_SEVEN_BITS 0x0008 - /* If set, use locking-shift function. */ - #define CODING_FLAG_ISO_LOCKING_SHIFT 0x0010 + /* Enumeration of end-of-line format type. */ - /* If set, use single-shift function. Overwrite - CODING_FLAG_ISO_LOCKING_SHIFT. */ - #define CODING_FLAG_ISO_SINGLE_SHIFT 0x0020 + enum end_of_line_type + { + eol_lf, /* Line-feed only, same as Emacs' internal + format. */ + eol_crlf, /* Sequence of carriage-return and + line-feed. */ + eol_cr, /* Carriage-return only. */ + eol_any, /* Accept any of above. Produce line-feed + only. */ + eol_undecided, /* This value is used to denote that the + eol-type is not yet undecided. */ + eol_type_max + }; - /* If set, designate JISX0201-Roman instead of ASCII. */ - #define CODING_FLAG_ISO_USE_ROMAN 0x0040 + /* Enumeration of index to an attribute vector of a coding system. */ - /* If set, designate JISX0208-1978 instead of JISX0208-1983. */ - #define CODING_FLAG_ISO_USE_OLDJIS 0x0080 + enum coding_attr_index + { + coding_attr_base_name, + coding_attr_docstring, + coding_attr_mnemonic, + coding_attr_type, + coding_attr_charset_list, + coding_attr_ascii_compat, + coding_attr_decode_tbl, + coding_attr_encode_tbl, + coding_attr_post_read, + coding_attr_pre_write, + coding_attr_default_char, ++ coding_attr_for_unibyte, + coding_attr_plist, + + coding_attr_category, + coding_attr_safe_charsets, + + /* The followings are extra attributes for each type. */ + coding_attr_charset_valids, + + coding_attr_ccl_decoder, + coding_attr_ccl_encoder, + coding_attr_ccl_valids, + + coding_attr_iso_initial, + coding_attr_iso_usage, + coding_attr_iso_request, + coding_attr_iso_flags, + + coding_attr_utf_16_bom, + coding_attr_utf_16_endian, + + coding_attr_emacs_mule_full, + + coding_attr_last_index + }; - /* If set, do not produce ISO6429's direction specifying sequence. */ - #define CODING_FLAG_ISO_NO_DIRECTION 0x0100 - /* If set, assume designation states are reset at beginning of line on - output. */ - #define CODING_FLAG_ISO_INIT_AT_BOL 0x0200 + #define CODING_ATTR_BASE_NAME(attrs) AREF (attrs, coding_attr_base_name) + #define CODING_ATTR_TYPE(attrs) AREF (attrs, coding_attr_type) + #define CODING_ATTR_CHARSET_LIST(attrs) AREF (attrs, coding_attr_charset_list) + #define CODING_ATTR_MNEMONIC(attrs) AREF (attrs, coding_attr_mnemonic) + #define CODING_ATTR_DOCSTRING(attrs) AREF (attrs, coding_attr_docstring) + #define CODING_ATTR_ASCII_COMPAT(attrs) AREF (attrs, coding_attr_ascii_compat) + #define CODING_ATTR_DECODE_TBL(attrs) AREF (attrs, coding_attr_decode_tbl) + #define CODING_ATTR_ENCODE_TBL(attrs) AREF (attrs, coding_attr_encode_tbl) + #define CODING_ATTR_POST_READ(attrs) AREF (attrs, coding_attr_post_read) + #define CODING_ATTR_PRE_WRITE(attrs) AREF (attrs, coding_attr_pre_write) + #define CODING_ATTR_DEFAULT_CHAR(attrs) AREF (attrs, coding_attr_default_char) -#define CODING_ATTR_DIRECTION(attrs) AREF (attrs, coding_attr_direction) ++#define CODING_ATTR_FOR_UNIBYTE(attrs) AREF (attrs, coding_attr_for_unibyte) + #define CODING_ATTR_FLUSHING(attrs) AREF (attrs, coding_attr_flushing) + #define CODING_ATTR_PLIST(attrs) AREF (attrs, coding_attr_plist) + #define CODING_ATTR_CATEGORY(attrs) AREF (attrs, coding_attr_category) + #define CODING_ATTR_SAFE_CHARSETS(attrs)AREF (attrs, coding_attr_safe_charsets) - /* If set, designation sequence should be placed at beginning of line - on output. */ - #define CODING_FLAG_ISO_DESIGNATE_AT_BOL 0x0400 - /* If set, do not encode unsafe characters on output. */ - #define CODING_FLAG_ISO_SAFE 0x0800 + #define CODING_ID_ATTRS(id) \ + (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 0)) - /* If set, extra latin codes (128..159) are accepted as a valid code - on input. */ - #define CODING_FLAG_ISO_LATIN_EXTRA 0x1000 + #define CODING_ID_ALIASES(id) \ + (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 1)) - /* If set, use designation escape sequence. */ - #define CODING_FLAG_ISO_DESIGNATION 0x10000 + #define CODING_ID_EOL_TYPE(id) \ + (AREF (HASH_VALUE (XHASH_TABLE (Vcoding_system_hash_table), id), 2)) - /* A character to be produced on output if encoding of the original - character is inhibitted by CODING_MODE_INHIBIT_UNENCODABLE_CHAR. - It must be an ASCII character. */ - #define CODING_REPLACEMENT_CHARACTER '?' + #define CODING_ID_NAME(id) \ + (HASH_KEY (XHASH_TABLE (Vcoding_system_hash_table), id)) - /* Structure of the field `spec.iso2022' in the structure `coding_system'. */ - struct iso2022_spec - { - /* The current graphic register invoked to each graphic plane. */ - int current_invocation[2]; + #define CODING_SYSTEM_SPEC(coding_system_symbol) \ + (Fgethash (coding_system_symbol, Vcoding_system_hash_table, Qnil)) - /* The current charset designated to each graphic register. */ - int current_designation[4]; + #define CODING_SYSTEM_ID(coding_system_symbol) \ + hash_lookup (XHASH_TABLE (Vcoding_system_hash_table), \ + coding_system_symbol, NULL) - /* A charset initially designated to each graphic register. */ - int initial_designation[4]; + #define CODING_SYSTEM_P(coding_system_symbol) \ + (! NILP (CODING_SYSTEM_SPEC (coding_system_symbol))) - /* If not -1, it is a graphic register specified in an invalid - designation sequence. */ - int last_invalid_designation_register; -#define CHECK_CODING_SYSTEM(x) \ ++#define CHECK_CODING_SYSTEM(x) \ + do { \ + if (!CODING_SYSTEM_P (x)) \ - x = wrong_type_argument (Qcoding_system_p, (x)); \ ++ wrong_type_argument (Qcoding_system_p, (x)); \ + } while (0) - /* A graphic register to which each charset should be designated. */ - unsigned char requested_designation[MAX_CHARSET + 1]; - /* A revision number to be specified for each charset on encoding. - The value 255 means no revision number for the corresponding - charset. */ - unsigned char charset_revision_number[MAX_CHARSET + 1]; + #define CHECK_CODING_SYSTEM_GET_SPEC(x, spec) \ + do { \ + spec = CODING_SYSTEM_SPEC (x); \ + if (NILP (spec)) \ + x = wrong_type_argument (Qcoding_system_p, (x)); \ + } while (0) - /* Set to 1 temporarily only when graphic register 2 or 3 is invoked - by single-shift while encoding. */ - int single_shifting; - /* Set to 1 temporarily only when processing at beginning of line. */ - int bol; - }; + #define CHECK_CODING_SYSTEM_GET_ID(x, id) \ + do \ + { \ + id = CODING_SYSTEM_ID (x); \ + if (id < 0) \ + x = wrong_type_argument (Qcoding_system_p, (x)); \ + } while (0) - /* Macros to access each field in the structure `spec.iso2022'. */ - #define CODING_SPEC_ISO_INVOCATION(coding, plane) \ - (coding)->spec.iso2022.current_invocation[plane] - #define CODING_SPEC_ISO_DESIGNATION(coding, reg) \ - (coding)->spec.iso2022.current_designation[reg] - #define CODING_SPEC_ISO_INITIAL_DESIGNATION(coding, reg) \ - (coding)->spec.iso2022.initial_designation[reg] - #define CODING_SPEC_ISO_REQUESTED_DESIGNATION(coding, charset) \ - (coding)->spec.iso2022.requested_designation[charset] - #define CODING_SPEC_ISO_REVISION_NUMBER(coding, charset) \ - (coding)->spec.iso2022.charset_revision_number[charset] - #define CODING_SPEC_ISO_SINGLE_SHIFTING(coding) \ - (coding)->spec.iso2022.single_shifting - #define CODING_SPEC_ISO_BOL(coding) \ - (coding)->spec.iso2022.bol - - /* A value which may appear in - coding->spec.iso2022.requested_designation indicating that the - corresponding charset does not request any graphic register to be - designated. */ - #define CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION 4 - - /* Return a charset which is currently designated to the graphic plane - PLANE in the coding-system CODING. */ - #define CODING_SPEC_ISO_PLANE_CHARSET(coding, plane) \ - ((CODING_SPEC_ISO_INVOCATION (coding, plane) < 0) \ - ? -1 \ - : CODING_SPEC_ISO_DESIGNATION (coding, \ - CODING_SPEC_ISO_INVOCATION (coding, plane))) - - /*** BIG5 section ***/ - - /* Macros to denote each type of BIG5 coding system. */ - #define CODING_FLAG_BIG5_HKU 0x00 /* BIG5-HKU is one of variants of - BIG5 developed by Hong Kong - University. */ - #define CODING_FLAG_BIG5_ETEN 0x01 /* BIG5_ETen is one of variants - of BIG5 developed by the - company ETen in Taiwan. */ /*** GENERAL section ***/ @@@ -424,28 -362,69 +364,69 @@@ struct coding_syste doesn't relocate Lisp symbols. But, when it is changed, we must find a way to protect them. */ - /* Backward pointer to the Lisp symbol of the coding system. */ - Lisp_Object symbol; + EMACS_INT src_pos, src_pos_byte, src_chars, src_bytes; + Lisp_Object src_object; - unsigned char *source; ++ const unsigned char *source; - /* Lisp function (symbol) to be called after decoding to do - additional conversion, or nil. */ - Lisp_Object post_read_conversion; + EMACS_INT dst_pos, dst_pos_byte, dst_bytes; + Lisp_Object dst_object; + unsigned char *destination; - /* Lisp function (symbol) to be called before encoding to do - additional conversion, or nil. */ - Lisp_Object pre_write_conversion; + int chars_at_source; - /* Character translation tables to look up, or nil. */ - Lisp_Object translation_table_for_decode; - Lisp_Object translation_table_for_encode; - }; + /* If an element is non-negative, it is a character code. + + If it is in the range -128..-1, it is a 8-bit character code + minus 256. + + If it is less than -128, it specifies the start of an annotation + chunk. The length of the chunk is -128 minus the value of the + element. The following elements are OFFSET, ANNOTATION-TYPE, and + a sequence of actual data for the annotation. OFFSET is a + character position offset from dst_pos or src_pos, + ANNOTATION-TYPE specfies the meaning of the annotation and how to + handle the following data.. */ + int *charbuf; + int charbuf_size, charbuf_used; + + /* Set to 1 if charbuf contains an annotation. */ + int annotated; - #define CODING_REQUIRE_FLUSHING_MASK 1 - #define CODING_REQUIRE_DECODING_MASK 2 - #define CODING_REQUIRE_ENCODING_MASK 4 - #define CODING_REQUIRE_DETECTION_MASK 8 + unsigned char carryover[64]; + int carryover_bytes; - /* Return 1 if the coding system CODING requires specific code to be + int default_char; + + int (*detector) P_ ((struct coding_system *, + struct coding_detection_info *)); + void (*decoder) P_ ((struct coding_system *)); + int (*encoder) P_ ((struct coding_system *)); + }; + + /* Meanings of bits in the member `common_flags' of the structure + coding_system. The lowest 8 bits are reserved for various kind of + annotations (currently two of them are used). */ + #define CODING_ANNOTATION_MASK 0x00FF + #define CODING_ANNOTATE_COMPOSITION_MASK 0x0001 + #define CODING_ANNOTATE_DIRECTION_MASK 0x0002 + #define CODING_ANNOTATE_CHARSET_MASK 0x0003 + #define CODING_FOR_UNIBYTE_MASK 0x0100 + #define CODING_REQUIRE_FLUSHING_MASK 0x0200 + #define CODING_REQUIRE_DECODING_MASK 0x0400 + #define CODING_REQUIRE_ENCODING_MASK 0x0800 + #define CODING_REQUIRE_DETECTION_MASK 0x1000 + #define CODING_RESET_AT_BOL_MASK 0x2000 + + /* Return 1 if the coding context CODING requires annotaion + handling. */ + #define CODING_REQUIRE_ANNOTATION(coding) \ + ((coding)->common_flags & CODING_ANNOTATION_MASK) + + /* Return 1 if the coding context CODING prefers decoding into unibyte. */ + #define CODING_FOR_UNIBYTE(coding) \ + ((coding)->common_flags & CODING_FOR_UNIBYTE_MASK) + + /* Return 1 if the coding context CODING requires specific code to be attached at the tail of converted text. */ #define CODING_REQUIRE_FLUSHING(coding) \ ((coding)->common_flags & CODING_REQUIRE_FLUSHING_MASK) @@@ -614,50 -539,74 +541,76 @@@ #endif /* !WINDOWSNT */ - #define ENCODE_UTF_8(str) code_convert_string_norecord (str, Qutf_8, 1) - /* Extern declarations. */ - extern int decode_coding P_ ((struct coding_system *, const unsigned char *, - unsigned char *, int, int)); - extern int encode_coding P_ ((struct coding_system *, const unsigned char *, - unsigned char *, int, int)); - extern void coding_save_composition P_ ((struct coding_system *, int, int, - Lisp_Object)); - extern void coding_free_composition_data P_ ((struct coding_system *)); - extern void coding_adjust_composition_offset P_ ((struct coding_system *, - int)); - extern void coding_allocate_composition_data P_ ((struct coding_system *, - int)); - extern void coding_restore_composition P_ ((struct coding_system *, - Lisp_Object)); - extern int code_convert_region P_ ((int, int, int, int, struct coding_system *, - int, int)); - extern Lisp_Object run_pre_post_conversion_on_str P_ ((Lisp_Object, - struct coding_system *, - int)); + extern Lisp_Object make_conversion_work_buffer P_ ((int, int)); extern int decoding_buffer_size P_ ((struct coding_system *, int)); extern int encoding_buffer_size P_ ((struct coding_system *, int)); - extern void detect_coding P_ ((struct coding_system *, const unsigned char *, - int)); - extern void detect_eol P_ ((struct coding_system *, const unsigned char *, - int)); - extern int setup_coding_system P_ ((Lisp_Object, struct coding_system *)); - extern Lisp_Object code_convert_string P_ ((Lisp_Object, - struct coding_system *, int, int)); - extern Lisp_Object code_convert_string1 P_ ((Lisp_Object, Lisp_Object, - Lisp_Object, int)); + extern void setup_coding_system P_ ((Lisp_Object, struct coding_system *)); + extern void detect_coding P_ ((struct coding_system *)); + extern Lisp_Object code_convert_region P_ ((Lisp_Object, Lisp_Object, + Lisp_Object, Lisp_Object, + int, int)); + extern Lisp_Object code_convert_string P_ ((Lisp_Object, Lisp_Object, + Lisp_Object, int, int, int)); extern Lisp_Object code_convert_string_norecord P_ ((Lisp_Object, Lisp_Object, int)); - extern void setup_raw_text_coding_system P_ ((struct coding_system *)); - extern Lisp_Object encode_coding_string P_ ((Lisp_Object, - struct coding_system *, int)); - extern Lisp_Object decode_coding_string P_ ((Lisp_Object, - struct coding_system *, int)); + extern Lisp_Object raw_text_coding_system P_ ((Lisp_Object)); + extern Lisp_Object coding_inherit_eol_type P_ ((Lisp_Object, Lisp_Object)); + + extern int decode_coding_gap P_ ((struct coding_system *, + EMACS_INT, EMACS_INT)); + extern int encode_coding_gap P_ ((struct coding_system *, + EMACS_INT, EMACS_INT)); + extern void decode_coding_object P_ ((struct coding_system *, + Lisp_Object, EMACS_INT, EMACS_INT, + EMACS_INT, EMACS_INT, Lisp_Object)); + extern void encode_coding_object P_ ((struct coding_system *, + Lisp_Object, EMACS_INT, EMACS_INT, + EMACS_INT, EMACS_INT, Lisp_Object)); + + #define decode_coding_region(coding, from, to) \ + decode_coding_object (coding, Fcurrent_buffer (), \ + from, CHAR_TO_BYTE (from), \ + to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) + + + #define encode_coding_region(coding, from, to) \ + encode_coding_object (coding, Fcurrent_buffer (), \ + from, CHAR_TO_BYTE (from), \ + to, CHAR_TO_BYTE (to), Fcurrent_buffer ()) + + + #define decode_coding_string(coding, string, nocopy) \ + decode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \ + STRING_BYTES (XSTRING (string)), Qt) + + #define encode_coding_string(coding, string, nocopy) \ + (encode_coding_object (coding, string, 0, 0, XSTRING (string)->size, \ + STRING_BYTES (XSTRING (string)), Qt), \ + (coding)->dst_object) + + + #define decode_coding_c_string(coding, src, bytes, dst_object) \ + do { \ + (coding)->source = (src); \ + (coding)->src_chars = (coding)->src_bytes = (bytes); \ + decode_coding_object ((coding), Qnil, 0, 0, (bytes), (bytes), \ + (dst_object)); \ + } while (0) + + + extern Lisp_Object preferred_coding_system P_ (()); + + ++extern Lisp_Object Qutf_8, Qutf_8_emacs; ++ extern Lisp_Object Qcoding_system, Qeol_type, Qcoding_category_index; - extern Lisp_Object Qraw_text, Qemacs_mule; + extern Lisp_Object Qcoding_system_p; + extern Lisp_Object Qraw_text, Qemacs_mule, Qno_conversion, Qundecided; + extern Lisp_Object Qiso_2022; extern Lisp_Object Qbuffer_file_coding_system; - extern Lisp_Object Vcoding_category_list; - extern Lisp_Object Qutf_8; + + extern Lisp_Object Qunix, Qdos, Qmac; extern Lisp_Object Qtranslation_table; extern Lisp_Object Qtranslation_table_id; diff --cc src/composite.c index cc05a869126,de4ed7335aa..43da8887176 --- a/src/composite.c +++ b/src/composite.c @@@ -1,7 -1,10 +1,10 @@@ /* Composite sequence support. Copyright (C) 1999 Electrotechnical Laboratory, JAPAN. -- Licensed to the Free Software Foundation. ++ Licensed to the Free Software Foundation. Copyright (C) 2001 Free Software Foundation, Inc. - Copyright (C) 2001, 2002 ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@@ -462,17 -459,18 +459,18 @@@ run_composition_function (from, to, pro && find_composition (to, -1, &start, &end, &prop, Qnil) && !COMPOSITION_VALID_P (start, end, prop)) to = end; - if (!NILP (func)) + if (!NILP (Ffboundp (func))) call2 (func, make_number (from), make_number (to)); - else if (!NILP (Ffboundp (Vcompose_chars_after_function))) - call3 (Vcompose_chars_after_function, - make_number (from), make_number (to), Qnil); } /* Make invalid compositions adjacent to or inside FROM and TO valid. CHECK_MASK is bitwise `or' of mask bits defined by macros CHECK_XXX (see the comment in composite.h). + It also resets the text-property `auto-composed' to a proper region + so that automatic character composition works correctly later while + displaying the region. - ++ This function is called when a buffer text is changed. If the change is deletion, FROM == TO. Otherwise, FROM < TO. */ @@@ -541,8 -552,15 +552,15 @@@ update_compositions (from, to, check_ma } else if (to < ZV && find_composition (to, -1, &start, &end, &prop, Qnil)) - run_composition_function (start, end, prop); + { + run_composition_function (start, end, prop); + max_pos = end; + } } - + if (min_pos < max_pos) - Fput_text_property (make_number (min_pos), make_number (max_pos), - Qauto_composed, Qnil, Qnil); ++ Fremove_list_of_text_properties (make_number (min_pos), ++ make_number (max_pos), ++ Fcons (Qauto_composed, Qnil), Qnil); } @@@ -588,124 -606,7 +606,6 @@@ compose_text (start, end, components, m Fput_text_property (make_number (start), make_number (end), Qcomposition, prop, string); } - - /* Compose sequences of characters in the region between START and END - by functions registered in Vcomposition_function_table. If STRING - is non-nil, operate on characters contained between indices START - and END in STRING. */ - - void - compose_chars_in_text (start, end, string) - int start, end; - Lisp_Object string; - { - int count = 0; - struct gcpro gcpro1; - Lisp_Object tail, elt, val, to; - /* Set to nonzero if we don't have to compose ASCII characters. */ - int skip_ascii; - int i, len, stop, c; - const unsigned char *ptr, *pend; - - if (! CHAR_TABLE_P (Vcomposition_function_table)) - return; - - if (STRINGP (string)) - { - count = SPECPDL_INDEX (); - GCPRO1 (string); - stop = end; - ptr = SDATA (string) + string_char_to_byte (string, start); - pend = ptr + SBYTES (string); - } - else - { - record_unwind_protect (save_excursion_restore, save_excursion_save ()); - TEMP_SET_PT (start); - stop = (start < GPT && GPT < end ? GPT : end); - ptr = CHAR_POS_ADDR (start); - pend = CHAR_POS_ADDR (end); - } - - /* Preserve the match data. */ - record_unwind_protect (Fset_match_data, Fmatch_data (Qnil, Qnil)); - - /* If none of ASCII characters have composition functions, we can - skip them quickly. */ - for (i = 0; i < 128; i++) - if (!NILP (CHAR_TABLE_REF (Vcomposition_function_table, i))) - break; - skip_ascii = (i == 128); - - - while (1) - { - if (skip_ascii) - while (start < stop && ASCII_BYTE_P (*ptr)) - start++, ptr++; - - if (start >= stop) - { - if (stop == end || start >= end) - break; - stop = end; - if (STRINGP (string)) - ptr = SDATA (string) + string_char_to_byte (string, start); - else - ptr = CHAR_POS_ADDR (start); - } - - c = STRING_CHAR_AND_LENGTH (ptr, pend - ptr, len); - tail = CHAR_TABLE_REF (Vcomposition_function_table, c); - while (CONSP (tail)) - { - elt = XCAR (tail); - if (CONSP (elt) - && STRINGP (XCAR (elt)) - && !NILP (Ffboundp (XCDR (elt)))) - { - if (STRINGP (string)) - val = Fstring_match (XCAR (elt), string, make_number (start)); - else - { - val = Flooking_at (XCAR (elt)); - if (!NILP (val)) - val = make_number (start); - } - if (INTEGERP (val) && XFASTINT (val) == start) - { - to = Fmatch_end (make_number (0)); - val = call4 (XCDR (elt), val, to, XCAR (elt), string); - if (INTEGERP (val) && XINT (val) > 1) - { - start += XINT (val); - if (STRINGP (string)) - ptr = SDATA (string) + string_char_to_byte (string, start); - else - ptr = CHAR_POS_ADDR (start); - } - else - { - start++; - ptr += len; - } - break; - } - } - tail = XCDR (tail); - } - if (!CONSP (tail)) - { - /* No composition done. Try the next character. */ - start++; - ptr += len; - } - } -- - unbind_to (count, Qnil); - if (STRINGP (string)) - UNGCPRO; - } /* Emacs Lisp APIs. */ diff --cc src/composite.h index 620d5d4ce28,3dd7306fc6e..d061b484ea7 --- a/src/composite.h +++ b/src/composite.h @@@ -1,7 -1,10 +1,10 @@@ /* Header for composite sequence handler. Copyright (C) 1999 Electrotechnical Laboratory, JAPAN. -- Licensed to the Free Software Foundation. ++ Licensed to the Free Software Foundation. Copyright (C) 2001 Free Software Foundation, Inc. - Copyright (C) 2001, 2002 ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. diff --cc src/data.c index d18cb187f62,2d7ca8e2037..8ee564db821 --- a/src/data.c +++ b/src/data.c @@@ -2071,8 -1955,8 +1966,8 @@@ bool-vector. IDX starts at 0. */ args_out_of_range (array, idx); CHECK_NUMBER (newelt); - if (XINT (newelt) < 0 || SINGLE_BYTE_CHAR_P (XINT (newelt))) + if (XINT (newelt) < 0 || ASCII_CHAR_P (XINT (newelt))) - XSTRING (array)->data[idxval] = XINT (newelt); + SSET (array, idxval, XINT (newelt)); else { /* We must relocate the string data while converting it to diff --cc src/dispextern.h index 3fc33f55a67,f807a2aa61e..7e917908e19 --- a/src/dispextern.h +++ b/src/dispextern.h @@@ -1364,15 -1266,13 +1365,13 @@@ struct fac reallocated. */ int font_info_id; - /* Fontset ID if this face uses a fontset, or -1. This is only >= 0 - if the face was realized for a composition sequence. - Otherwise, a specific font is loaded from the set of fonts - specified by the fontset given by the family attribute of the face. */ + /* Fontset ID if for this face's fontset. Non-ASCII faces derived + from the same ASCII face have the same fontset. */ int fontset; - + /* Pixmap width and height. */ unsigned int pixmap_w, pixmap_h; - + /* Non-zero means characters in this face have a box that thickness around them. If it is negative, the absolute value indicates the thickness, and the horizontal lines of box (top and bottom) are @@@ -1539,16 -1430,16 +1532,16 @@@ struct face_cach /* Non-zero if FACE is suitable for displaying character CHAR. */ #define FACE_SUITABLE_FOR_CHAR_P(FACE, CHAR) \ - (SINGLE_BYTE_CHAR_P (CHAR) \ - (ASCII_CHAR_P (CHAR) \ ++ (ASCII_CHAR_P (CHAR) \ ? (FACE) == (FACE)->ascii_face \ : face_suitable_for_char_p ((FACE), (CHAR))) /* Return the id of the realized face on frame F that is like the face with id ID but is suitable for displaying character CHAR. This macro is only meaningful for multibyte character CHAR. */ - + #define FACE_FOR_CHAR(F, FACE, CHAR) \ - (SINGLE_BYTE_CHAR_P (CHAR) \ - (ASCII_CHAR_P (CHAR) \ ++ (ASCII_CHAR_P (CHAR) \ ? (FACE)->ascii_face->id \ : face_for_char ((F), (FACE), (CHAR))) @@@ -2120,60 -1949,6 +2114,62 @@@ struct redisplay_interfac desired rows have been made current. */ void (*fix_overlapping_area) P_ ((struct window *w, struct glyph_row *row, enum glyph_row_area area)); + +#ifdef HAVE_WINDOW_SYSTEM + + /* Draw a fringe bitmap in window W of row ROW using parameters P. */ + void (*draw_fringe_bitmap) P_ ((struct window *w, struct glyph_row *row, + struct draw_fringe_bitmap_params *p)); + +/* Get metrics of character CHAR2B in FONT of type FONT_TYPE. + Value is null if CHAR2B is not contained in the font. */ + XCharStruct * (*per_char_metric) P_ ((XFontStruct *font, XChar2b *char2b, + int font_type)); + +/* Encode CHAR2B using encoding information from FONT_INFO. CHAR2B is + the two-byte form of C. Encoding is returned in *CHAR2B. If + TWO_BYTE_P is non-null, return non-zero there if font is two-byte. */ + int (*encode_char) P_ ((int c, XChar2b *char2b, - struct font_info *font_into, int *two_byte_p)); ++ struct font_info *font_into, ++ struct charset *charset, ++ int *two_byte_p)); + +/* Compute left and right overhang of glyph string S. + A NULL pointer if platform does not support this. */ + void (*compute_glyph_string_overhangs) P_ ((struct glyph_string *s)); + +/* Draw a glyph string S. */ + void (*draw_glyph_string) P_ ((struct glyph_string *s)); + +/* Define cursor CURSOR on frame F. */ + void (*define_frame_cursor) P_ ((struct frame *f, Cursor cursor)); + +/* Clear the area at (X,Y,WIDTH,HEIGHT) of frame F. */ + void (*clear_frame_area) P_ ((struct frame *f, int x, int y, + int width, int height)); + +/* Draw specified cursor CURSOR_TYPE of width CURSOR_WIDTH + at row GLYPH_ROW on window W if ON_P is 1. If ON_P is + 0, don't draw cursor. If ACTIVE_P is 1, system caret + should track this cursor (when applicable). */ + void (*draw_window_cursor) P_ ((struct window *w, + struct glyph_row *glyph_row, + int x, int y, + int cursor_type, int cursor_width, + int on_p, int active_p)); + +/* Draw vertical border for window W from (X,Y0) to (X,Y1). */ + void (*draw_vertical_window_border) P_ ((struct window *w, + int x, int y0, int y1)); + +/* Shift display of frame F to make room for inserted glyphs. + The area at pixel (X,Y) of width WIDTH and height HEIGHT is + shifted right by SHIFT_BY pixels. */ + void (*shift_glyphs_for_insert) P_ ((struct frame *f, + int x, int y, int width, + int height, int shift_by)); + +#endif /* HAVE_WINDOW_SYSTEM */ }; /* The current interface for window-based redisplay. */ @@@ -2566,14 -2269,17 +2562,17 @@@ void clear_face_cache P_ ((int)) unsigned long load_color P_ ((struct frame *, struct face *, Lisp_Object, enum lface_attribute_index)); void unload_color P_ ((struct frame *, unsigned long)); -int frame_update_line_height P_ ((struct frame *)); -char *choose_face_font P_ ((struct frame *, Lisp_Object *, Lisp_Object)); ++char *choose_face_font P_ ((struct frame *, Lisp_Object *, Lisp_Object, ++ int *)); int ascii_face_of_lisp_face P_ ((struct frame *, int)); void prepare_face_for_display P_ ((struct frame *, struct face *)); -int xstricmp P_ ((unsigned char *, unsigned char *)); +int xstricmp P_ ((const unsigned char *, const unsigned char *)); - int lookup_face P_ ((struct frame *, Lisp_Object *, int, struct face *)); - int lookup_named_face P_ ((struct frame *, Lisp_Object, int)); + int lookup_face P_ ((struct frame *, Lisp_Object *)); + int lookup_non_ascii_face P_ ((struct frame *, int, struct face *)); + int lookup_named_face P_ ((struct frame *, Lisp_Object)); int smaller_face P_ ((struct frame *, int, int)); int face_with_height P_ ((struct frame *, int, int)); - int lookup_derived_face P_ ((struct frame *, Lisp_Object, int, int)); + int lookup_derived_face P_ ((struct frame *, Lisp_Object, int)); void init_frame_faces P_ ((struct frame *)); void free_frame_faces P_ ((struct frame *)); void recompute_basic_faces P_ ((struct frame *)); @@@ -2583,9 -2289,12 +2582,11 @@@ int face_at_string_position P_ ((struc int, int *, enum face_id, int)); int compute_char_face P_ ((struct frame *, int, Lisp_Object)); void free_all_realized_faces P_ ((Lisp_Object)); + void free_realized_face P_ ((struct frame *, struct face *)); - extern Lisp_Object Qforeground_color, Qbackground_color; extern char unspecified_fg[], unspecified_bg[]; - void free_realized_multibyte_face P_ ((struct frame *, int)); + extern Lisp_Object split_font_name_into_vector P_ ((Lisp_Object)); + extern Lisp_Object build_font_name_from_vector P_ ((Lisp_Object)); /* Defined in xfns.c */ diff --cc src/editfns.c index 97a939ce43b,d4fd545d0c3..e58cf8a5dd9 --- a/src/editfns.c +++ b/src/editfns.c @@@ -3397,29 -3336,17 +3418,29 @@@ usage: (format STRING &rest OBJECTS) * error ("Invalid format operation %%%c", *format); thissize = 30; - if (*format == 'c' - && (! ASCII_CHAR_P (XINT (args[n])) - || XINT (args[n]) == 0)) + if (*format == 'c') { - if (! SINGLE_BYTE_CHAR_P (XINT (args[n])) - if (! multibyte) ++ if (! ASCII_CHAR_P (XINT (args[n])) + /* Note: No one can remeber why we have to treat + the character 0 as a multibyte character here. + But, until it causes a real problem, let's + don't change it. */ + || XINT (args[n]) == 0) + { + if (! multibyte) + { + multibyte = 1; + goto retry; + } + args[n] = Fchar_to_string (args[n]); + thissize = SBYTES (args[n]); + } + else if (! ASCII_BYTE_P (XINT (args[n])) && multibyte) { - multibyte = 1; - goto retry; + args[n] + = Fchar_to_string (Funibyte_char_to_multibyte (args[n])); + thissize = SBYTES (args[n]); } - args[n] = Fchar_to_string (args[n]); - thissize = STRING_BYTES (XSTRING (args[n])); } } else if (FLOATP (args[n]) && *format != 's') diff --cc src/emacs.c index 05897e9bb44,afbad1cd58e..eca2930419e --- a/src/emacs.c +++ b/src/emacs.c @@@ -1302,12 -1270,15 +1303,15 @@@ main (argc, arg Lisp_Object buffer; buffer = Fcdr (XCAR (tail)); - /* Verify that all buffers are empty now, as they - ought to be. */ - if (BUF_Z (XBUFFER (buffer)) > BUF_BEG (XBUFFER (buffer))) - abort (); - /* It is safe to do this crudely in an empty buffer. */ - XBUFFER (buffer)->enable_multibyte_characters = Qnil; - /* Make all multibyte buffers unibyte. */ ++ /* Make a multibyte buffer unibyte. */ + if (BUF_Z_BYTE (XBUFFER (buffer)) > BUF_Z (XBUFFER (buffer))) + { + struct buffer *current = current_buffer; + + set_buffer_temp (XBUFFER (buffer)); - Fset_buffer_multibyte (Qnil, Qnil); ++ Fset_buffer_multibyte (Qnil); + set_buffer_temp (current); + } } } } @@@ -1450,10 -1417,11 +1456,11 @@@ syms_of_casetab (); syms_of_callproc (); syms_of_category (); -#ifndef macintosh - /* Called before init_window_once for Mac OS. */ +#ifndef MAC_OS8 + /* Called before init_window_once for Mac OS Classic. */ syms_of_ccl (); #endif + syms_of_character (); syms_of_charset (); syms_of_cmds (); #ifndef NO_DIR_LIBRARY diff --cc src/fileio.c index a44552010c7,c0a5c75f95b..1103a51bd65 --- a/src/fileio.c +++ b/src/fileio.c @@@ -295,6 -289,18 +295,17 @@@ restore_point_unwind (location Fset_marker (location, Qnil, Qnil); return Qnil; } + + /* Kill the working buffer for code conversion. */ + + static Lisp_Object + kill_workbuf_unwind (workbuf) + Lisp_Object workbuf; + { + if (! NILP (workbuf) && ! NILP (Fbuffer_live_p (workbuf))) + Fkill_buffer (workbuf); + return Qnil; + } - Lisp_Object Qexpand_file_name; Lisp_Object Qsubstitute_in_file_name; @@@ -3772,20 -3729,7 +3787,9 @@@ actually used. */ } } - if (BEG < Z) + if (EQ (Vcoding_system_for_read, Qauto_save_coding)) - { - /* We use emacs-mule for auto saving... */ - setup_coding_system (Qemacs_mule, &coding); - /* ... but with the special flag to indicate to read in a - multibyte sequence for eight-bit-control char as is. */ - coding.flags = 1; - coding.src_multibyte = 0; - coding.dst_multibyte - = !NILP (current_buffer->enable_multibyte_characters); - coding.eol_type = CODING_EOL_LF; - coding_system_decided = 1; - } ++ coding_system = Qutf_8_emacs; + else if (BEG < Z) { /* Decide the coding system to use for reading the file now because we can't use an optimized method for handling @@@ -3837,27 -3777,22 +3838,27 @@@ record_unwind_protect (Fset_buffer, Fcurrent_buffer ()); - /* The call to temp_output_buffer_setup binds - standard-output. */ - count1 = specpdl_ptr - specpdl; - temp_output_buffer_setup (" *code-converting-work*"); - - set_buffer_internal (XBUFFER (Vstandard_output)); - current_buffer->enable_multibyte_characters = Qnil; + buffer = Fget_buffer_create (build_string (" *code-converting-work*")); + buf = XBUFFER (buffer); + + delete_all_overlays (buf); + buf->directory = current_buffer->directory; + buf->read_only = Qnil; + buf->filename = Qnil; + buf->undo_list = Qt; + eassert (buf->overlays_before == NULL); + eassert (buf->overlays_after == NULL); + + set_buffer_internal (buf); + Ferase_buffer (); + buf->enable_multibyte_characters = Qnil; + insert_1_both (read_buf, nread, nread, 0, 0, 0); TEMP_SET_PT_BOTH (BEG, BEG_BYTE); - val = call2 (Vset_auto_coding_function, - filename, make_number (nread)); + coding_system = call2 (Vset_auto_coding_function, - filename, make_number (nread)); ++ filename, make_number (nread)); set_buffer_internal (prev); - /* Remove the binding for standard-output. */ - unbind_to (count1, Qnil); - /* Discard the unwind protect for recovering the current buffer. */ specpdl_ptr--; @@@ -3873,13 -3808,13 +3874,13 @@@ { /* If we have not yet decided a coding system, check file-coding-system-alist. */ -- Lisp_Object args[6], coding_systems; ++ Lisp_Object args[6]; args[0] = Qinsert_file_contents, args[1] = orig_filename; args[2] = visit, args[3] = beg, args[4] = end, args[5] = replace; -- coding_systems = Ffind_operation_coding_system (6, args); -- if (CONSP (coding_systems)) - val = XCAR (coding_systems); - coding_system = XCAR (coding_systems); ++ coding_system = Ffind_operation_coding_system (6, args); ++ if (CONSP (coding_system)) ++ coding_system = XCAR (coding_system); } } @@@ -3950,21 -3885,14 +3951,15 @@@ else if (nread == 0) break; - if (coding.type == coding_type_undecided) - detect_coding (&coding, buffer, nread); - if (coding.common_flags & CODING_REQUIRE_DECODING_MASK) - /* We found that the file should be decoded somehow. - Let's give up here. */ + if (CODING_REQUIRE_DETECTION (&coding)) { - giveup_match_end = 1; - break; + coding_system = detect_coding_system (buffer, nread, 1, 0, + coding_system); + setup_coding_system (coding_system, &coding); } + - if (coding.eol_type == CODING_EOL_UNDECIDED) - detect_eol (&coding, buffer, nread); - if (coding.eol_type != CODING_EOL_UNDECIDED - && coding.eol_type != CODING_EOL_LF) - /* We found that the format of eol should be decoded. + if (CODING_REQUIRE_DECODING (&coding)) + /* We found that the file should be decoded somehow. Let's give up here. */ { giveup_match_end = 1; @@@ -4109,22 -4037,28 +4104,26 @@@ { int same_at_start = BEGV_BYTE; int same_at_end = ZV_BYTE; + int same_at_start_charpos; + int inserted_chars; int overlap; int bufpos; - /* Make sure that the gap is large enough. */ - int bufsize = 2 * st.st_size; - unsigned char *conversion_buffer = (unsigned char *) xmalloc (bufsize); + unsigned char *decoded; int temp; - int this_count = BINDING_STACK_SIZE (); ++ int this_count = SPECPDL_INDEX (); + int multibyte = ! NILP (current_buffer->enable_multibyte_characters); + Lisp_Object conversion_buffer + = make_conversion_work_buffer (-1, multibyte); + struct gcpro1; + + record_unwind_protect (kill_workbuf_unwind, conversion_buffer); /* First read the whole file, performing code conversion into CONVERSION_BUFFER. */ if (lseek (fd, XINT (beg), 0) < 0) -- { - xfree (conversion_buffer); -- report_file_error ("Setting file position", -- Fcons (orig_filename, Qnil)); -- } ++ report_file_error ("Setting file position", ++ Fcons (orig_filename, Qnil)); total = st.st_size; /* Total bytes in the file. */ how_much = 0; /* Bytes read from file so far. */ @@@ -4152,55 -4089,25 +4154,25 @@@ how_much += this; - if (CODING_MAY_REQUIRE_DECODING (&coding)) - { - int require, result; - - this += unprocessed; - - /* If we are using more space than estimated, - make CONVERSION_BUFFER bigger. */ - require = decoding_buffer_size (&coding, this); - if (inserted + require + 2 * (total - how_much) > bufsize) - { - bufsize = inserted + require + 2 * (total - how_much); - conversion_buffer = (unsigned char *) xrealloc (conversion_buffer, bufsize); - } - - /* Convert this batch with results in CONVERSION_BUFFER. */ - if (how_much >= total) /* This is the last block. */ - coding.mode |= CODING_MODE_LAST_BLOCK; - if (coding.composing != COMPOSITION_DISABLED) - coding_allocate_composition_data (&coding, BEGV); - result = decode_coding (&coding, read_buf, - conversion_buffer + inserted, - this, bufsize - inserted); - - /* Save for next iteration whatever we didn't convert. */ - unprocessed = this - coding.consumed; - bcopy (read_buf + coding.consumed, read_buf, unprocessed); - if (!NILP (current_buffer->enable_multibyte_characters)) - this = coding.produced; - else - this = str_as_unibyte (conversion_buffer + inserted, - coding.produced); - } - - inserted += this; + BUF_SET_PT (XBUFFER (conversion_buffer), + BUF_Z (XBUFFER (conversion_buffer))); + decode_coding_c_string (&coding, read_buf, unprocessed + this, + conversion_buffer); + unprocessed = coding.carryover_bytes; + if (coding.carryover_bytes > 0) + bcopy (coding.carryover, read_buf, unprocessed); - } + } + UNGCPRO; + emacs_close (fd); - /* At this point, INSERTED is how many characters (i.e. bytes) - are present in CONVERSION_BUFFER. - HOW_MUCH should equal TOTAL, - or should be <= 0 if we couldn't read the file. */ + /* At this point, HOW_MUCH should equal TOTAL, or should be <= 0 + if we couldn't read the file. */ if (how_much < 0) { - xfree (conversion_buffer); - if (how_much == -1) error ("IO error reading %s: %s", - XSTRING (orig_filename)->data, emacs_strerror (errno)); + SDATA (orig_filename), emacs_strerror (errno)); else if (how_much == -2) error ("maximum buffer size exceeded"); } @@@ -4328,15 -4246,15 +4311,15 @@@ before exiting the loop, it is set to a negative value if I/O error occurs. */ how_much = 0; - + /* Total bytes inserted. */ inserted = 0; - + /* Here, we don't do code conversion in the loop. It is done by - code_convert_region after all data are read into the buffer. */ + decode_coding_gap after all data are read into the buffer. */ { int gap_size = GAP_SIZE; - + while (how_much < total) { /* try is reserved in some compilers (Microsoft C) */ @@@ -4456,82 -4372,85 +4437,87 @@@ if (inserted > 0 && ! NILP (Vset_auto_coding_function)) { - val = call2 (Vset_auto_coding_function, - filename, make_number (inserted)); + coding_system = call2 (Vset_auto_coding_function, - filename, make_number (inserted)); ++ filename, make_number (inserted)); } - if (NILP (val)) + if (NILP (coding_system)) { /* If the coding system is not yet decided, check file-coding-system-alist. */ -- Lisp_Object args[6], coding_systems; ++ Lisp_Object args[6]; args[0] = Qinsert_file_contents, args[1] = orig_filename; args[2] = visit, args[3] = beg, args[4] = end, args[5] = Qnil; -- coding_systems = Ffind_operation_coding_system (6, args); -- if (CONSP (coding_systems)) - val = XCAR (coding_systems); - coding_system = XCAR (coding_systems); ++ coding_system = Ffind_operation_coding_system (6, args); ++ if (CONSP (coding_system)) ++ coding_system = XCAR (coding_system); } unbind_to (count, Qnil); inserted = Z_BYTE - BEG_BYTE; } - /* The following kludgy code is to avoid some compiler bug. - We can't simply do - setup_coding_system (val, &coding); - on some system. */ - { - struct coding_system temp_coding; - setup_coding_system (val, &temp_coding); - bcopy (&temp_coding, &coding, sizeof coding); - } - /* Ensure we set Vlast_coding_system_used. */ - set_coding_system = 1; + if (NILP (coding_system)) + coding_system = Qundecided; + else + CHECK_CODING_SYSTEM (coding_system); - if (NILP (current_buffer->enable_multibyte_characters) - && ! NILP (val)) + if (NILP (current_buffer->enable_multibyte_characters)) /* We must suppress all character code conversion except for end-of-line conversion. */ - setup_raw_text_coding_system (&coding); - coding.src_multibyte = 0; - coding.dst_multibyte - = !NILP (current_buffer->enable_multibyte_characters); + coding_system = raw_text_coding_system (coding_system); - + setup_coding_system (coding_system, &coding); + /* Ensure we set Vlast_coding_system_used. */ + set_coding_system = 1; } - if (!NILP (visit) - /* Can't do this if part of the buffer might be preserved. */ - && NILP (replace) - && (coding.type == coding_type_no_conversion - || coding.type == coding_type_raw_text)) + if (!NILP (visit)) { - /* Visiting a file with these coding system makes the buffer - unibyte. */ - current_buffer->enable_multibyte_characters = Qnil; - coding.dst_multibyte = 0; + /* When we visit a file by raw-text, we change the buffer to + unibyte. If we have not yet decided how to decode a text, + decide it at first by detecting the file's encoding. */ + if (CODING_REQUIRE_DETECTION (&coding)) + { + coding_system = detect_coding_system (PT_ADDR, inserted, 1, 0, + coding_system); + setup_coding_system (coding_system, &coding); + } + + if (CODING_FOR_UNIBYTE (&coding) + /* Can't do this if part of the buffer might be preserved. */ + && NILP (replace)) + /* Visiting a file with these coding system makes the buffer + unibyte. */ + current_buffer->enable_multibyte_characters = Qnil; } - if (inserted > 0 || coding.type == coding_type_ccl) ++ coding.dst_multibyte = ! NILP (current_buffer->enable_multibyte_characters); + if ((CODING_REQUIRE_DETECTION (&coding) + || CODING_REQUIRE_DECODING (&coding)) + && (inserted > 0 || CODING_REQUIRE_FLUSHING (&coding))) { - if (CODING_MAY_REQUIRE_DECODING (&coding)) - { - code_convert_region (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted, - &coding, 0, 0); - inserted = coding.produced_char; - } - else - adjust_after_insert (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted, - inserted); + move_gap_both (PT, PT_BYTE); + GAP_SIZE += inserted; + ZV_BYTE -= inserted; + Z_BYTE -= inserted; + ZV -= inserted; + Z -= inserted; + decode_coding_gap (&coding, inserted, inserted); + inserted = coding.produced_char; } + else if (inserted > 0) + adjust_after_insert (PT, PT_BYTE, PT + inserted, PT_BYTE + inserted, + inserted); + /* Now INSERTED is measured in characters. */ + #ifdef DOS_NT /* Use the conversion type to determine buffer-file-type (find-buffer-file-type is now used to help determine the conversion). */ - if ((coding.eol_type == CODING_EOL_UNDECIDED - || coding.eol_type == CODING_EOL_LF) - if ((coding.eol_type == eol_type_undecided ++ if ((coding.eol_type == eol_type_undecided + || coding.eol_type == eol_type_lf) && ! CODING_REQUIRE_DECODING (&coding)) current_buffer->buffer_file_type = Qt; else @@@ -4571,19 -4490,6 +4557,19 @@@ Fcons (orig_filename, Qnil))); } + if (set_coding_system) - Vlast_coding_system_used = coding.symbol; ++ Vlast_coding_system_used = coding_system; + + if (! NILP (Ffboundp (Qafter_insert_file_set_coding))) + { + insval = call1 (Qafter_insert_file_set_coding, make_number (inserted)); + if (! NILP (insval)) + { + CHECK_NUMBER (insval); + inserted = XFASTINT (insval); + } + } + /* Decode file format */ if (inserted > 0) { @@@ -4685,24 -4592,9 +4669,17 @@@ choose_write_coding_system (start, end Lisp_Object val; if (auto_saving) - { - /* We use emacs-mule for auto saving... */ - setup_coding_system (Qemacs_mule, coding); - /* ... but with the special flag to indicate not to strip off - leading code of eight-bit-control chars. */ - coding->flags = 1; - goto done_setup_coding; - } - val = Qnil; ++ val = Qutf_8_emacs; else if (!NILP (Vcoding_system_for_write)) - val = Vcoding_system_for_write; + { + val = Vcoding_system_for_write; + if (coding_system_require_warning + && !NILP (Ffboundp (Vselect_safe_coding_system_function))) + /* Confirm that VAL can surely encode the current region. */ + val = call5 (Vselect_safe_coding_system_function, + start, end, Fcons (Qt, Fcons (val, Qnil)), + Qnil, filename); + } else { /* If the variable `buffer-file-coding-system' is set locally, @@@ -4747,44 -4638,44 +4723,45 @@@ val = current_buffer->buffer_file_coding_system; using_default_coding = 1; } - + + if (! NILP (val) && ! force_raw_text) + { + Lisp_Object spec, attrs; + + CHECK_CODING_SYSTEM_GET_SPEC (val, spec); + attrs = AREF (spec, 0); + if (EQ (CODING_ATTR_TYPE (attrs), Qraw_text)) + force_raw_text = 1; + } + if (!force_raw_text && !NILP (Ffboundp (Vselect_safe_coding_system_function))) /* Confirm that VAL can surely encode the current region. */ - val = call3 (Vselect_safe_coding_system_function, start, end, val); + val = call5 (Vselect_safe_coding_system_function, + start, end, val, Qnil, filename); - setup_coding_system (Fcheck_coding_system (val), coding); - if (coding->eol_type == CODING_EOL_UNDECIDED - && !using_default_coding) - { - if (! EQ (default_buffer_file_coding.symbol, - buffer_defaults.buffer_file_coding_system)) - setup_coding_system (buffer_defaults.buffer_file_coding_system, - &default_buffer_file_coding); - if (default_buffer_file_coding.eol_type != CODING_EOL_UNDECIDED) - { - Lisp_Object subsidiaries; - - coding->eol_type = default_buffer_file_coding.eol_type; - subsidiaries = Fget (coding->symbol, Qeol_type); - if (VECTORP (subsidiaries) - && XVECTOR (subsidiaries)->size == 3) - coding->symbol - = XVECTOR (subsidiaries)->contents[coding->eol_type]; - } - } + /* If the decided coding-system doesn't specify end-of-line + format, we use that of + `default-buffer-file-coding-system'. */ + if (! using_default_coding + && ! NILP (buffer_defaults.buffer_file_coding_system)) + val = (coding_inherit_eol_type + (val, buffer_defaults.buffer_file_coding_system)); + /* If we decide not to encode text, use `raw-text' or one of its + subsidiaries. */ if (force_raw_text) - setup_raw_text_coding_system (coding); - goto done_setup_coding; + val = raw_text_coding_system (val); } - setup_coding_system (Fcheck_coding_system (val), coding); + setup_coding_system (val, coding); + if (! NILP (val) + && VECTORP (CODING_ID_EOL_TYPE (coding->id))) + val = AREF (CODING_ID_EOL_TYPE (coding->id), 0); - done_setup_coding: if (!STRINGP (start) && !NILP (current_buffer->selective_display)) coding->mode |= CODING_MODE_SELECTIVE_DISPLAY; + return val; } DEFUN ("write-region", Fwrite_region, Swrite_region, 3, 7, @@@ -4928,22 -4811,17 +4905,10 @@@ This does code conversion according to We used to make this choice before calling build_annotations, but that leads to problems when a write-annotate-function takes care of unsavable chars (as was the case with X-Symbol). */ - choose_write_coding_system (start, end, filename, - append, visit, lockname, &coding); - Vlast_coding_system_used = coding.symbol; - - given_buffer = current_buffer; - if (! STRINGP (start)) - { - annotations = build_annotations_2 (start, end, - coding.pre_write_conversion, annotations); - if (current_buffer != given_buffer) - { - XSETFASTINT (start, BEGV); - XSETFASTINT (end, ZV); - } - } + Vlast_coding_system_used + = choose_write_coding_system (start, end, filename, + append, visit, lockname, &coding); - given_buffer = current_buffer; - if (current_buffer != given_buffer) - { - XSETFASTINT (start, BEGV); - XSETFASTINT (end, ZV); - } - #ifdef CLASH_DETECTION if (!auto_saving) { @@@ -5422,47 -5253,36 +5354,32 @@@ e_write (desc, string, start, end, codi int start, end; struct coding_system *coding; { - register char *addr; - register int nbytes; - char buf[WRITE_BUF_SIZE]; int return_val = 0; - if (start >= end) - coding->composing = COMPOSITION_DISABLED; - if (coding->composing != COMPOSITION_DISABLED) - coding_save_composition (coding, start, end, string); - if (STRINGP (string)) { - addr = SDATA (string); - nbytes = SBYTES (string); - coding->src_multibyte = STRING_MULTIBYTE (string); - } - else if (start < end) - { - /* It is assured that the gap is not in the range START and END-1. */ - addr = CHAR_POS_ADDR (start); - nbytes = CHAR_TO_BYTE (end) - CHAR_TO_BYTE (start); - coding->src_multibyte - = !NILP (current_buffer->enable_multibyte_characters); - } - else - { - addr = ""; - nbytes = 0; - coding->src_multibyte = 1; + start = 0; - end = XSTRING (string)->size; ++ end = SCHARS (string); } - coding->mode |= CODING_MODE_FIXED_DESTINATION; - if (! NILP (current_buffer->selective_display)) - coding->mode |= CODING_MODE_SELECTIVE_DISPLAY; - /* We used to have a code for handling selective display here. But, now it is handled within encode_coding. */ - while (1) + do { - int result; + if (STRINGP (string)) + encode_coding_object (coding, string, + start, string_char_to_byte (string, start), + end, string_char_to_byte (string, end), Qt); + else + encode_coding_object (coding, Fcurrent_buffer (), + start, CHAR_TO_BYTE (start), + end, CHAR_TO_BYTE (end), Qt); - result = encode_coding (coding, addr, buf, nbytes, WRITE_BUF_SIZE); if (coding->produced > 0) { - coding->produced -= emacs_write (desc, buf, coding->produced); - coding->produced -= emacs_write (desc, - XSTRING (coding->dst_object)->data, ++ coding->produced -= emacs_write (desc, SDATA (coding->dst_object), + coding->produced); ++ if (coding->produced) { return_val = -1; diff --cc src/filelock.c index bcad75199cd,0b08730ffcb..f6108942ba3 --- a/src/filelock.c +++ b/src/filelock.c @@@ -55,7 -55,8 +55,7 @@@ extern int errno #include "lisp.h" #include "buffer.h" - #include "charset.h" + #include "character.h" -#include "charset.h" #include "coding.h" #include "systime.h" diff --cc src/fns.c index 9ee15ff4994,2812d11907d..0fdca30084e --- a/src/fns.c +++ b/src/fns.c @@@ -34,11 -31,9 +34,11 @@@ Boston, MA 02111-1307, USA. * #undef vector #define vector ***** +#endif /* ! MAC_OSX */ + #include "lisp.h" #include "commands.h" - #include "charset.h" + #include "character.h" #include "coding.h" #include "buffer.h" #include "keyboard.h" @@@ -139,11 -134,9 +139,9 @@@ To get the number of bytes, use `string retry: if (STRINGP (sequence)) - XSETFASTINT (val, XSTRING (sequence)->size); + XSETFASTINT (val, SCHARS (sequence)); else if (VECTORP (sequence)) XSETFASTINT (val, XVECTOR (sequence)->size); - else if (SUB_CHAR_TABLE_P (sequence)) - XSETFASTINT (val, SUB_CHAR_TABLE_ORDINARY_SLOTS); else if (CHAR_TABLE_P (sequence)) XSETFASTINT (val, MAX_CHAR); else if (BOOL_VECTOR_P (sequence)) @@@ -452,30 -445,9 +450,9 @@@ usage: (vconcat &rest SEQUENCES) */ return concat (nargs, args, Lisp_Vectorlike, 0); } - /* Return a copy of a sub char table ARG. The elements except for a - nested sub char table are not copied. */ - static Lisp_Object - copy_sub_char_table (arg) - Lisp_Object arg; - { - Lisp_Object copy = make_sub_char_table (XCHAR_TABLE (arg)->defalt); - int i; - - /* Copy all the contents. */ - bcopy (XCHAR_TABLE (arg)->contents, XCHAR_TABLE (copy)->contents, - SUB_CHAR_TABLE_ORDINARY_SLOTS * sizeof (Lisp_Object)); - /* Recursively copy any sub char-tables in the ordinary slots. */ - for (i = 32; i < SUB_CHAR_TABLE_ORDINARY_SLOTS; i++) - if (SUB_CHAR_TABLE_P (XCHAR_TABLE (arg)->contents[i])) - XCHAR_TABLE (copy)->contents[i] - = copy_sub_char_table (XCHAR_TABLE (copy)->contents[i]); - - return copy; - } - DEFUN ("copy-sequence", Fcopy_sequence, Scopy_sequence, 1, 1, 0, - doc: /* Return a copy of a list, vector or string. + doc: /* Return a copy of a list, vector, string or char-table. The elements of a list or vector are not copied; they are shared with the original. */) (arg) @@@ -485,26 -457,8 +462,9 @@@ if (CHAR_TABLE_P (arg)) { - int i; - Lisp_Object copy; - - copy = Fmake_char_table (XCHAR_TABLE (arg)->purpose, Qnil); - /* Copy all the slots, including the extra ones. */ - bcopy (XVECTOR (arg)->contents, XVECTOR (copy)->contents, - ((XCHAR_TABLE (arg)->size & PSEUDOVECTOR_SIZE_MASK) - * sizeof (Lisp_Object))); - - /* Recursively copy any sub char tables in the ordinary slots - for multibyte characters. */ - for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; - i < CHAR_TABLE_ORDINARY_SLOTS; i++) - if (SUB_CHAR_TABLE_P (XCHAR_TABLE (arg)->contents[i])) - XCHAR_TABLE (copy)->contents[i] - = copy_sub_char_table (XCHAR_TABLE (copy)->contents[i]); - - return copy; + return copy_char_table (arg); } + if (BOOL_VECTOR_P (arg)) { Lisp_Object val; @@@ -625,11 -581,11 +587,11 @@@ concat (nargs, args, target_type, last_ for (i = 0; i < len; i++) { ch = XVECTOR (this)->contents[i]; - if (! INTEGERP (ch)) - wrong_type_argument (Qintegerp, ch); + if (! CHARACTERP (ch)) + wrong_type_argument (Qcharacterp, ch); this_len_byte = CHAR_BYTES (XINT (ch)); result_len_byte += this_len_byte; - if (!SINGLE_BYTE_CHAR_P (XINT (ch))) - if (!ASCII_CHAR_P (XINT (ch))) ++ if (! ASCII_CHAR_P (XINT (ch))) some_multibyte = 1; } else if (BOOL_VECTOR_P (this) && XBOOL_VECTOR (this)->size > 0) @@@ -638,11 -594,11 +600,11 @@@ for (; CONSP (this); this = XCDR (this)) { ch = XCAR (this); - if (! INTEGERP (ch)) - wrong_type_argument (Qintegerp, ch); + if (! CHARACTERP (ch)) + wrong_type_argument (Qcharacterp, ch); this_len_byte = CHAR_BYTES (XINT (ch)); result_len_byte += this_len_byte; - if (!SINGLE_BYTE_CHAR_P (XINT (ch))) - if (!ASCII_CHAR_P (XINT (ch))) ++ if (! ASCII_CHAR_P (XINT (ch))) some_multibyte = 1; } else if (STRINGP (this)) @@@ -704,21 -660,14 +666,14 @@@ if (STRINGP (this) && STRINGP (val) && STRING_MULTIBYTE (this) == some_multibyte) { - int thislen_byte = STRING_BYTES (XSTRING (this)); + int thislen_byte = SBYTES (this); - int combined; - bcopy (XSTRING (this)->data, XSTRING (val)->data + toindex_byte, - STRING_BYTES (XSTRING (this))); - if (! NULL_INTERVAL_P (XSTRING (this)->intervals)) + bcopy (SDATA (this), SDATA (val) + toindex_byte, + SBYTES (this)); - combined = (some_multibyte && toindex_byte > 0 - ? count_combining (SDATA (val), - toindex_byte + thislen_byte, - toindex_byte) - : 0); + if (! NULL_INTERVAL_P (STRING_INTERVALS (this))) { textprops[num_textprops].argnum = argnum; - /* We ignore text properties on characters being combined. */ - textprops[num_textprops].from = combined; + textprops[num_textprops].from = 0; textprops[num_textprops++].to = toindex; } toindex_byte += thislen_byte; @@@ -764,11 -712,9 +718,9 @@@ } else { - XSETFASTINT (elt, XSTRING (this)->data[thisindex++]); + XSETFASTINT (elt, SREF (this, thisindex++)); if (some_multibyte - && (XINT (elt) >= 0240 - || (XINT (elt) >= 0200 - && ! NILP (Vnonascii_translation_table))) + && XINT (elt) >= 0200 && XINT (elt) < 0400) { c = unibyte_char_to_multibyte (XINT (elt)); @@@ -801,34 -747,13 +753,12 @@@ else { CHECK_NUMBER (elt); - if (SINGLE_BYTE_CHAR_P (XINT (elt))) - { - if (some_multibyte) - toindex_byte - += CHAR_STRING (XINT (elt), - SDATA (val) + toindex_byte); - else - SSET (val, toindex_byte++, XINT (elt)); - if (some_multibyte - && toindex_byte > 0 - && count_combining (SDATA (val), - toindex_byte, toindex_byte - 1)) - STRING_SET_CHARS (val, SCHARS (val) - 1); - else - toindex++; - } + if (some_multibyte) - toindex_byte - += CHAR_STRING (XINT (elt), - XSTRING (val)->data + toindex_byte); ++ toindex_byte += CHAR_STRING (XINT (elt), ++ SDATA (val) + toindex_byte); else - /* If we have any multibyte characters, - we already decided to make a multibyte string. */ - { - int c = XINT (elt); - /* P exists as a variable - to avoid a bug on the Masscomp C compiler. */ - unsigned char *p = SDATA (val) + toindex_byte; - - toindex_byte += CHAR_STRING (c, p); - toindex++; - } - XSTRING (val)->data[toindex_byte++] = XINT (elt); ++ SSET (val, toindex_byte++, XINT (elt)); + toindex++; } } } @@@ -903,36 -828,26 +833,26 @@@ string_char_to_byte (string, char_index if (char_index - best_below < best_above - char_index) { - unsigned char *p = XSTRING (string)->data + best_below_byte; ++ unsigned char *p = SDATA (string) + best_below_byte; + while (best_below < char_index) { - int c; - FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, - best_below, best_below_byte); + p += BYTES_BY_CHAR_HEAD (*p); + best_below++; } - i = best_below; - i_byte = best_below_byte; - i_byte = p - XSTRING (string)->data; ++ i_byte = p - SDATA (string); } else { - unsigned char *p = XSTRING (string)->data + best_above_byte; ++ unsigned char *p = SDATA (string) + best_above_byte; + while (best_above > char_index) { - unsigned char *pend = SDATA (string) + best_above_byte; - unsigned char *pbeg = pend - best_above_byte; - unsigned char *p = pend - 1; - int bytes; - - while (p > pbeg && !CHAR_HEAD_P (*p)) p--; - PARSE_MULTIBYTE_SEQ (p, pend - p, bytes); - if (bytes == pend - p) - best_above_byte -= bytes; - else if (bytes > pend - p) - best_above_byte -= (pend - p); - else - best_above_byte--; + p--; + while (!CHAR_HEAD_P (*p)) p--; best_above--; } - i = best_above; - i_byte = best_above_byte; - i_byte = p - XSTRING (string)->data; ++ i_byte = p - SDATA (string); } string_char_byte_cache_bytepos = i_byte; @@@ -976,36 -891,30 +896,30 @@@ string_byte_to_char (string, byte_index if (byte_index - best_below_byte < best_above_byte - byte_index) { - while (best_below_byte < byte_index) - unsigned char *p = XSTRING (string)->data + best_below_byte; - unsigned char *pend = XSTRING (string)->data + byte_index; ++ unsigned char *p = SDATA (string) + best_below_byte; ++ unsigned char *pend = SDATA (string) + byte_index; + + while (p < pend) { - int c; - FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, - best_below, best_below_byte); + p += BYTES_BY_CHAR_HEAD (*p); + best_below++; } i = best_below; - i_byte = best_below_byte; - i_byte = p - XSTRING (string)->data; ++ i_byte = p - SDATA (string); } else { - while (best_above_byte > byte_index) - unsigned char *p = XSTRING (string)->data + best_above_byte; - unsigned char *pbeg = XSTRING (string)->data + byte_index; ++ unsigned char *p = SDATA (string) + best_above_byte; ++ unsigned char *pbeg = SDATA (string) + byte_index; + + while (p > pbeg) { - unsigned char *pend = SDATA (string) + best_above_byte; - unsigned char *pbeg = pend - best_above_byte; - unsigned char *p = pend - 1; - int bytes; - - while (p > pbeg && !CHAR_HEAD_P (*p)) p--; - PARSE_MULTIBYTE_SEQ (p, pend - p, bytes); - if (bytes == pend - p) - best_above_byte -= bytes; - else if (bytes > pend - p) - best_above_byte -= (pend - p); - else - best_above_byte--; + p--; + while (!CHAR_HEAD_P (*p)) p--; best_above--; } i = best_above; - i_byte = best_above_byte; - i_byte = p - XSTRING (string)->data; ++ i_byte = p - SDATA (string); } string_char_byte_cache_bytepos = i_byte; @@@ -1037,42 -944,12 +949,41 @@@ string_make_multibyte (string return string; buf = (unsigned char *) alloca (nbytes); - copy_text (XSTRING (string)->data, buf, STRING_BYTES (XSTRING (string)), + copy_text (SDATA (string), buf, SBYTES (string), 0, 1); - return make_multibyte_string (buf, XSTRING (string)->size, nbytes); + return make_multibyte_string (buf, SCHARS (string), nbytes); } + - /* Convert STRING to a multibyte string without changing each - character codes. Thus, characters 0200 trough 0237 are converted - to eight-bit-control characters, and characters 0240 through 0377 - are converted eight-bit-graphic characters. */ ++/* Convert STRING (if unibyte) to a multibyte string without changing ++ the number of characters. Characters 0200 trough 0237 are ++ converted to eight-bit characters. */ + +Lisp_Object +string_to_multibyte (string) + Lisp_Object string; +{ + unsigned char *buf; + int nbytes; + + if (STRING_MULTIBYTE (string)) + return string; + + nbytes = parse_str_to_multibyte (SDATA (string), SBYTES (string)); - /* If all the chars are ASCII or eight-bit-graphic, they won't need - any more bytes once converted. */ ++ /* If all the chars are ASCII, they won't need any more bytes once ++ converted. */ + if (nbytes == SBYTES (string)) + return make_multibyte_string (SDATA (string), nbytes, nbytes); + + buf = (unsigned char *) alloca (nbytes); + bcopy (SDATA (string), buf, SBYTES (string)); + str_to_multibyte (buf, nbytes, SBYTES (string)); + + return make_multibyte_string (buf, SCHARS (string), nbytes); +} + + /* Convert STRING to a single-byte string. */ Lisp_Object @@@ -2219,7 -1990,8 +2135,8 @@@ internal_equal (o1, o2, depth functions are sensible to compare, so eliminate the others now. */ if (size & PSEUDOVECTOR_FLAG) { - if (!(size & (PVEC_COMPILED | PVEC_CHAR_TABLE))) - if (!(size & (PVEC_COMPILED | PVEC_CHAR_TABLE - | PVEC_SUB_CHAR_TABLE))) ++ if (!(size & (PVEC_COMPILED ++ | PVEC_CHAR_TABLE | PVEC_SUB_CHAR_TABLE))) return 0; size &= PSEUDOVECTOR_SIZE_MASK; } @@@ -2326,471 -2098,6 +2243,19 @@@ ARRAY is a vector, string, char-table, return array; } +DEFUN ("clear-string", Fclear_string, Sclear_string, + 1, 1, 0, + doc: /* Clear the contents of STRING. +This makes STRING unibyte and may change its length. */) + (string) + Lisp_Object string; +{ + int len = SBYTES (string); + bzero (SDATA (string), len); + STRING_SET_CHARS (string, len); + STRING_SET_UNIBYTE (string); + return Qnil; +} - - DEFUN ("char-table-subtype", Fchar_table_subtype, Schar_table_subtype, - 1, 1, 0, - doc: /* Return the subtype of char-table CHAR-TABLE. The value is a symbol. */) - (char_table) - Lisp_Object char_table; - { - CHECK_CHAR_TABLE (char_table); - - return XCHAR_TABLE (char_table)->purpose; - } - - DEFUN ("char-table-parent", Fchar_table_parent, Schar_table_parent, - 1, 1, 0, - doc: /* Return the parent char-table of CHAR-TABLE. - The value is either nil or another char-table. - If CHAR-TABLE holds nil for a given character, - then the actual applicable value is inherited from the parent char-table - \(or from its parents, if necessary). */) - (char_table) - Lisp_Object char_table; - { - CHECK_CHAR_TABLE (char_table); - - return XCHAR_TABLE (char_table)->parent; - } - - DEFUN ("set-char-table-parent", Fset_char_table_parent, Sset_char_table_parent, - 2, 2, 0, - doc: /* Set the parent char-table of CHAR-TABLE to PARENT. - PARENT must be either nil or another char-table. */) - (char_table, parent) - Lisp_Object char_table, parent; - { - Lisp_Object temp; - - CHECK_CHAR_TABLE (char_table); - - if (!NILP (parent)) - { - CHECK_CHAR_TABLE (parent); - - for (temp = parent; !NILP (temp); temp = XCHAR_TABLE (temp)->parent) - if (EQ (temp, char_table)) - error ("Attempt to make a chartable be its own parent"); - } - - XCHAR_TABLE (char_table)->parent = parent; - - return parent; - } - - DEFUN ("char-table-extra-slot", Fchar_table_extra_slot, Schar_table_extra_slot, - 2, 2, 0, - doc: /* Return the value of CHAR-TABLE's extra-slot number N. */) - (char_table, n) - Lisp_Object char_table, n; - { - CHECK_CHAR_TABLE (char_table); - CHECK_NUMBER (n); - if (XINT (n) < 0 - || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table))) - args_out_of_range (char_table, n); - - return XCHAR_TABLE (char_table)->extras[XINT (n)]; - } - - DEFUN ("set-char-table-extra-slot", Fset_char_table_extra_slot, - Sset_char_table_extra_slot, - 3, 3, 0, - doc: /* Set CHAR-TABLE's extra-slot number N to VALUE. */) - (char_table, n, value) - Lisp_Object char_table, n, value; - { - CHECK_CHAR_TABLE (char_table); - CHECK_NUMBER (n); - if (XINT (n) < 0 - || XINT (n) >= CHAR_TABLE_EXTRA_SLOTS (XCHAR_TABLE (char_table))) - args_out_of_range (char_table, n); - - return XCHAR_TABLE (char_table)->extras[XINT (n)] = value; - } - - DEFUN ("char-table-range", Fchar_table_range, Schar_table_range, - 2, 2, 0, - doc: /* Return the value in CHAR-TABLE for a range of characters RANGE. - RANGE should be nil (for the default value) - a vector which identifies a character set or a row of a character set, - a character set name, or a character code. */) - (char_table, range) - Lisp_Object char_table, range; - { - CHECK_CHAR_TABLE (char_table); - - if (EQ (range, Qnil)) - return XCHAR_TABLE (char_table)->defalt; - else if (INTEGERP (range)) - return Faref (char_table, range); - else if (SYMBOLP (range)) - { - Lisp_Object charset_info; - - charset_info = Fget (range, Qcharset); - CHECK_VECTOR (charset_info); - - return Faref (char_table, - make_number (XINT (XVECTOR (charset_info)->contents[0]) - + 128)); - } - else if (VECTORP (range)) - { - if (XVECTOR (range)->size == 1) - return Faref (char_table, - make_number (XINT (XVECTOR (range)->contents[0]) + 128)); - else - { - int size = XVECTOR (range)->size; - Lisp_Object *val = XVECTOR (range)->contents; - Lisp_Object ch = Fmake_char_internal (size <= 0 ? Qnil : val[0], - size <= 1 ? Qnil : val[1], - size <= 2 ? Qnil : val[2]); - return Faref (char_table, ch); - } - } - else - error ("Invalid RANGE argument to `char-table-range'"); - return Qt; - } - - DEFUN ("set-char-table-range", Fset_char_table_range, Sset_char_table_range, - 3, 3, 0, - doc: /* Set the value in CHAR-TABLE for a range of characters RANGE to VALUE. - RANGE should be t (for all characters), nil (for the default value) - a vector which identifies a character set or a row of a character set, - a coding system, or a character code. */) - (char_table, range, value) - Lisp_Object char_table, range, value; - { - int i; - - CHECK_CHAR_TABLE (char_table); - - if (EQ (range, Qt)) - for (i = 0; i < CHAR_TABLE_ORDINARY_SLOTS; i++) - XCHAR_TABLE (char_table)->contents[i] = value; - else if (EQ (range, Qnil)) - XCHAR_TABLE (char_table)->defalt = value; - else if (SYMBOLP (range)) - { - Lisp_Object charset_info; - - charset_info = Fget (range, Qcharset); - CHECK_VECTOR (charset_info); - - return Faset (char_table, - make_number (XINT (XVECTOR (charset_info)->contents[0]) - + 128), - value); - } - else if (INTEGERP (range)) - Faset (char_table, range, value); - else if (VECTORP (range)) - { - if (XVECTOR (range)->size == 1) - return Faset (char_table, - make_number (XINT (XVECTOR (range)->contents[0]) + 128), - value); - else - { - int size = XVECTOR (range)->size; - Lisp_Object *val = XVECTOR (range)->contents; - Lisp_Object ch = Fmake_char_internal (size <= 0 ? Qnil : val[0], - size <= 1 ? Qnil : val[1], - size <= 2 ? Qnil : val[2]); - return Faset (char_table, ch, value); - } - } - else - error ("Invalid RANGE argument to `set-char-table-range'"); - - return value; - } - - DEFUN ("set-char-table-default", Fset_char_table_default, - Sset_char_table_default, 3, 3, 0, - doc: /* Set the default value in CHAR-TABLE for generic character CH to VALUE. - The generic character specifies the group of characters. - See also the documentation of `make-char'. */) - (char_table, ch, value) - Lisp_Object char_table, ch, value; - { - int c, charset, code1, code2; - Lisp_Object temp; - - CHECK_CHAR_TABLE (char_table); - CHECK_NUMBER (ch); - - c = XINT (ch); - SPLIT_CHAR (c, charset, code1, code2); - - /* Since we may want to set the default value for a character set - not yet defined, we check only if the character set is in the - valid range or not, instead of it is already defined or not. */ - if (! CHARSET_VALID_P (charset)) - invalid_character (c); - - if (charset == CHARSET_ASCII) - return (XCHAR_TABLE (char_table)->defalt = value); - - /* Even if C is not a generic char, we had better behave as if a - generic char is specified. */ - if (!CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1) - code1 = 0; - temp = XCHAR_TABLE (char_table)->contents[charset + 128]; - if (!code1) - { - if (SUB_CHAR_TABLE_P (temp)) - XCHAR_TABLE (temp)->defalt = value; - else - XCHAR_TABLE (char_table)->contents[charset + 128] = value; - return value; - } - if (SUB_CHAR_TABLE_P (temp)) - char_table = temp; - else - char_table = (XCHAR_TABLE (char_table)->contents[charset + 128] - = make_sub_char_table (temp)); - temp = XCHAR_TABLE (char_table)->contents[code1]; - if (SUB_CHAR_TABLE_P (temp)) - XCHAR_TABLE (temp)->defalt = value; - else - XCHAR_TABLE (char_table)->contents[code1] = value; - return value; - } - - /* Look up the element in TABLE at index CH, - and return it as an integer. - If the element is nil, return CH itself. - (Actually we do that for any non-integer.) */ - - int - char_table_translate (table, ch) - Lisp_Object table; - int ch; - { - Lisp_Object value; - value = Faref (table, make_number (ch)); - if (! INTEGERP (value)) - return ch; - return XINT (value); - } - - static void - optimize_sub_char_table (table, chars) - Lisp_Object *table; - int chars; - { - Lisp_Object elt; - int from, to; - - if (chars == 94) - from = 33, to = 127; - else - from = 32, to = 128; - - if (!SUB_CHAR_TABLE_P (*table)) - return; - elt = XCHAR_TABLE (*table)->contents[from++]; - for (; from < to; from++) - if (NILP (Fequal (elt, XCHAR_TABLE (*table)->contents[from]))) - return; - *table = elt; - } - - DEFUN ("optimize-char-table", Foptimize_char_table, Soptimize_char_table, - 1, 1, 0, doc: /* Optimize char table TABLE. */) - (table) - Lisp_Object table; - { - Lisp_Object elt; - int dim; - int i, j; - - CHECK_CHAR_TABLE (table); - - for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++) - { - elt = XCHAR_TABLE (table)->contents[i]; - if (!SUB_CHAR_TABLE_P (elt)) - continue; - dim = CHARSET_DIMENSION (i - 128); - if (dim == 2) - for (j = 32; j < SUB_CHAR_TABLE_ORDINARY_SLOTS; j++) - optimize_sub_char_table (XCHAR_TABLE (elt)->contents + j, dim); - optimize_sub_char_table (XCHAR_TABLE (table)->contents + i, dim); - } - return Qnil; - } - - - /* Map C_FUNCTION or FUNCTION over SUBTABLE, calling it for each - character or group of characters that share a value. - DEPTH is the current depth in the originally specified - chartable, and INDICES contains the vector indices - for the levels our callers have descended. - - ARG is passed to C_FUNCTION when that is called. */ - - void - map_char_table (c_function, function, table, subtable, arg, depth, indices) - void (*c_function) P_ ((Lisp_Object, Lisp_Object, Lisp_Object)); - Lisp_Object function, table, subtable, arg, *indices; - int depth; - { - int i, to; - - if (depth == 0) - { - /* At first, handle ASCII and 8-bit European characters. */ - for (i = 0; i < CHAR_TABLE_SINGLE_BYTE_SLOTS; i++) - { - Lisp_Object elt= XCHAR_TABLE (subtable)->contents[i]; - if (NILP (elt)) - elt = XCHAR_TABLE (subtable)->defalt; - if (NILP (elt)) - elt = Faref (subtable, make_number (i)); - if (c_function) - (*c_function) (arg, make_number (i), elt); - else - call2 (function, make_number (i), elt); - } - #if 0 /* If the char table has entries for higher characters, - we should report them. */ - if (NILP (current_buffer->enable_multibyte_characters)) - return; - #endif - to = CHAR_TABLE_ORDINARY_SLOTS; - } - else - { - int charset = XFASTINT (indices[0]) - 128; - - i = 32; - to = SUB_CHAR_TABLE_ORDINARY_SLOTS; - if (CHARSET_CHARS (charset) == 94) - i++, to--; - } - - for (; i < to; i++) - { - Lisp_Object elt; - int charset; - - elt = XCHAR_TABLE (subtable)->contents[i]; - XSETFASTINT (indices[depth], i); - charset = XFASTINT (indices[0]) - 128; - if (depth == 0 - && (!CHARSET_DEFINED_P (charset) - || charset == CHARSET_8_BIT_CONTROL - || charset == CHARSET_8_BIT_GRAPHIC)) - continue; - - if (SUB_CHAR_TABLE_P (elt)) - { - if (depth >= 3) - error ("Too deep char table"); - map_char_table (c_function, function, table, elt, arg, depth + 1, indices); - } - else - { - int c1, c2, c; - - c1 = depth >= 1 ? XFASTINT (indices[1]) : 0; - c2 = depth >= 2 ? XFASTINT (indices[2]) : 0; - c = MAKE_CHAR (charset, c1, c2); - - if (NILP (elt)) - elt = XCHAR_TABLE (subtable)->defalt; - if (NILP (elt)) - elt = Faref (table, make_number (c)); - - if (c_function) - (*c_function) (arg, make_number (c), elt); - else - call2 (function, make_number (c), elt); - } - } - } - - static void void_call2 P_ ((Lisp_Object a, Lisp_Object b, Lisp_Object c)); - static void - void_call2 (a, b, c) - Lisp_Object a, b, c; - { - call2 (a, b, c); - } - - DEFUN ("map-char-table", Fmap_char_table, Smap_char_table, - 2, 2, 0, - doc: /* Call FUNCTION for each (normal and generic) characters in CHAR-TABLE. - FUNCTION is called with two arguments--a key and a value. - The key is always a possible IDX argument to `aref'. */) - (function, char_table) - Lisp_Object function, char_table; - { - /* The depth of char table is at most 3. */ - Lisp_Object indices[3]; - - CHECK_CHAR_TABLE (char_table); - - /* When Lisp_Object is represented as a union, `call2' cannot directly - be passed to map_char_table because it returns a Lisp_Object rather - than returning nothing. - Casting leads to crashes on some architectures. -stef */ - map_char_table (void_call2, Qnil, char_table, char_table, function, 0, indices); - return Qnil; - } - - /* Return a value for character C in char-table TABLE. Store the - actual index for that value in *IDX. Ignore the default value of - TABLE. */ - - Lisp_Object - char_table_ref_and_index (table, c, idx) - Lisp_Object table; - int c, *idx; - { - int charset, c1, c2; - Lisp_Object elt; - - if (SINGLE_BYTE_CHAR_P (c)) - { - *idx = c; - return XCHAR_TABLE (table)->contents[c]; - } - SPLIT_CHAR (c, charset, c1, c2); - elt = XCHAR_TABLE (table)->contents[charset + 128]; - *idx = MAKE_CHAR (charset, 0, 0); - if (!SUB_CHAR_TABLE_P (elt)) - return elt; - if (c1 < 32 || NILP (XCHAR_TABLE (elt)->contents[c1])) - return XCHAR_TABLE (elt)->defalt; - elt = XCHAR_TABLE (elt)->contents[c1]; - *idx = MAKE_CHAR (charset, c1, 0); - if (!SUB_CHAR_TABLE_P (elt)) - return elt; - if (c2 < 32 || NILP (XCHAR_TABLE (elt)->contents[c2])) - return XCHAR_TABLE (elt)->defalt; - *idx = c; - return XCHAR_TABLE (elt)->contents[c2]; - } - /* ARGSUSED */ Lisp_Object @@@ -3830,8 -3095,10 +3297,9 @@@ base64_encode_1 (from, to, length, line if (multibyte) { c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes); - if (c >= 256) - return -1; + if (CHAR_BYTE8_P (c)) + c = CHAR_TO_BYTE8 (c); + else if (c >= 256) - return -1; i += bytes; } else @@@ -5372,11 -4672,11 +4841,11 @@@ guesswork fails. Normally, an error i if (STRING_MULTIBYTE (object)) /* use default, we can't guess correct value */ - coding_system = SYMBOL_VALUE (XCAR (Vcoding_category_list)); + coding_system = preferred_coding_system (); - else + else coding_system = Qraw_text; } - + if (NILP (Fcoding_system_p (coding_system))) { /* Invalid coding system. */ @@@ -5389,10 -4689,10 +4858,10 @@@ } if (STRING_MULTIBYTE (object)) - object = code_convert_string1 (object, coding_system, Qnil, 1); + object = code_convert_string (object, coding_system, Qnil, 1, 0, 1); - size = XSTRING (object)->size; - size_byte = STRING_BYTES (XSTRING (object)); + size = SCHARS (object); + size_byte = SBYTES (object); if (!NILP (start)) { @@@ -5521,11 -4821,11 +4990,11 @@@ object = make_buffer_string (b, e, 0); if (STRING_MULTIBYTE (object)) - object = code_convert_string1 (object, coding_system, Qnil, 1); - object = code_convert_string (object, coding_system, Qnil, 1, 0, 1); ++ object = code_convert_string (object, coding_system, Qnil, 1, 0, 0); } - md5_buffer (XSTRING (object)->data + start_byte, - STRING_BYTES(XSTRING (object)) - (size_byte - end_byte), + md5_buffer (SDATA (object) + start_byte, + SBYTES (object) - (size_byte - end_byte), digest); for (i = 0; i < 16; i++) @@@ -5669,21 -4965,8 +5138,11 @@@ invoked by mouse clicks and mouse menu defsubr (&Sget); defsubr (&Splist_put); defsubr (&Sput); + defsubr (&Slax_plist_get); + defsubr (&Slax_plist_put); defsubr (&Sequal); defsubr (&Sfillarray); + defsubr (&Sclear_string); - defsubr (&Schar_table_subtype); - defsubr (&Schar_table_parent); - defsubr (&Sset_char_table_parent); - defsubr (&Schar_table_extra_slot); - defsubr (&Sset_char_table_extra_slot); - defsubr (&Schar_table_range); - defsubr (&Sset_char_table_range); - defsubr (&Sset_char_table_default); - defsubr (&Soptimize_char_table); - defsubr (&Smap_char_table); defsubr (&Snconc); defsubr (&Smapcar); defsubr (&Smapc); diff --cc src/fontset.c index a23a146c76d,1c880aa246b..e9232f2a25e --- a/src/fontset.c +++ b/src/fontset.c @@@ -1,6 -1,9 +1,9 @@@ /* Fontset handler. Copyright (C) 1995, 1997, 2000 Electrotechnical Laboratory, JAPAN. -- Licensed to the Free Software Foundation. - Copyright (C) 2001, 2002 ++ Licensed to the Free Software Foundation. ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@@ -37,63 -42,113 +42,115 @@@ Boston, MA 02111-1307, USA. * #include "fontset.h" #include "window.h" --#ifdef FONTSET_DEBUG #undef xassert ++#ifdef FONTSET_DEBUG #define xassert(X) do {if (!(X)) abort ();} while (0) #undef INLINE #define INLINE --#endif ++#else /* not FONTSET_DEBUG */ ++#define xassert(X) (void) 0 ++#endif /* not FONTSET_DEBUG */ + EXFUN (Fclear_face_cache, 1); /* FONTSET A fontset is a collection of font related information to give - similar appearance (style, size, etc) of characters. There are two - kinds of fontsets; base and realized. A base fontset is created by - new-fontset from Emacs Lisp explicitly. A realized fontset is - created implicitly when a face is realized for ASCII characters. A - face is also realized for multibyte characters based on an ASCII - face. All of the multibyte faces based on the same ASCII face - share the same realized fontset. + similar appearance (style, etc) of characters. A fontset has two + roles. One is to use for the frame parameter `font' as if it is an + ASCII font. In that case, Emacs uses the font specified for + `ascii' script for the frame's default font. + + Another role, the more important one, is to provide information + about which font to use for each non-ASCII character. + + There are two kinds of fontsets; base and realized. A base fontset + is created by `new-fontset' from Emacs Lisp explicitly. A realized + fontset is created implicitly when a face is realized for ASCII + characters. A face is also realized for non-ASCII characters based + on an ASCII face. All of non-ASCII faces based on the same ASCII + face share the same realized fontset. - ++ + A fontset object is implemented by a char-table whose default value + and parent are always nil. + + An element of a base fontset is a vector of FONT-DEFs which itself + is a vector [ FONT-SPEC ENCODING REPERTORY ]. + + FONT-SPEC is: + [ FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY ] + or + FONT-NAME + where FAMILY, WEIGHT, SLANT, SWIDTH, ADSTYLE, REGISTRY, and + FONT-NAME are strings. + + ENCODING is a charset ID or a char-table that can convert + characters to glyph codes of the corresponding font. + + REPERTORY is a charset ID or nil. If REPERTORY is a charset ID, + the repertory of the charset exactly matches with that of the font. + If REPERTORY is nil, we consult with the font itself to get the + repertory. + + ENCODING and REPERTORY are extracted from the variable + Vfont_encoding_alist by using a font name generated form FONT-SPEC + (if it is a vector) or FONT-NAME as a key. + + + An element of a realized fontset is nil or t, or has this form: - A fontset object is implemented by a char-table. + ( CHARSET-PRIORITY-LIST-TICK . FONT-VECTOR ) - An element of a base fontset is: - (INDEX . FONTNAME) or - (INDEX . (FOUNDRY . REGISTRY )) - FONTNAME is a font name pattern for the corresponding character. - FOUNDRY and REGISTRY are respectively foundry and registry fields of - a font name for the corresponding character. INDEX specifies for - which character (or generic character) the element is defined. It - may be different from an index to access this element. For - instance, if a fontset defines some font for all characters of - charset `japanese-jisx0208', INDEX is the generic character of this - charset. REGISTRY is the + FONT-VECTOR is a vector whose elements have this form: - An element of a realized fontset is FACE-ID which is a face to use - for displaying the corresponding character. + [ FACE-ID FONT-INDEX FONT-DEF ] - All single byte characters (ASCII and 8bit-unibyte) share the same - element in a fontset. The element is stored in the first element - of the fontset. + FONT-VECTOR is automatically reordered by the current charset + priority list. - To access or set each element, use macros FONTSET_REF and - FONTSET_SET respectively for efficiency. + The value nil means that we have not yet generated FONT-VECTOR from + the base of the fontset. - A fontset has 3 extra slots. + The value t means that no font is available for the corresponding + range of characters. - The 1st slot is an ID number of the fontset. - The 2nd slot is a name of the fontset. This is nil for a realized - face. + A fontset has 8 extra slots. - The 3rd slot is a frame that the fontset belongs to. This is nil - for a default face. + The 1st slot: the ID number of the fontset - A parent of a base fontset is nil. A parent of a realized fontset - is a base fontset. + The 2nd slot: + base: the name of the fontset + realized: nil - All fontsets are recorded in Vfontset_table. + The 3rd slot: + base: nil + realized: the base fontset + + The 4th slot: + base: nil + realized: the frame that the fontset belongs to + + The 5th slot: + base: the font name for ASCII characters + realized: nil + + The 6th slot: + base: nil + realized: the ID number of a face to use for characters that + has no font in a realized fontset. + + The 7th slot: + base: nil + realized: Alist of font index vs the corresponding repertory + char-table. + + The 8th slot: + base: nil + realized: If the base is not the default fontset, a fontset + realized from the default fontset, else nil. + + All fontsets are recorded in the vector Vfontset_table. DEFAULT FONTSET @@@ -268,38 -341,299 +343,299 @@@ fontset_ref_and_range (fontset, c, from } - /* Store into the element of FONTSET at index C the value NEWELT. */ - #define FONTSET_SET(fontset, c, newelt) fontset_set(fontset, c, newelt) + /* Set elements of FONTSET for characters in RANGE to the value ELT. + RANGE is a cons (FROM . TO), where FROM and TO are character codes + specifying a range. */ + + #define FONTSET_SET(fontset, range, elt) \ + Fset_char_table_range ((fontset), (range), (elt)) + + + /* Modify the elements of FONTSET for characters in RANGE by replacing + with ELT or adding ETL. RANGE is a cons (FROM . TO), where FROM + and TO are character codes specifying a range. If ADD is nil, + replace with ELT, if ADD is `prepend', prepend ELT, otherwise, + append ELT. */ + + #define FONTSET_ADD(fontset, range, elt, add) \ + (NILP (add) \ + ? Fset_char_table_range ((fontset), (range), \ + Fmake_vector (make_number (1), (elt))) \ + : fontset_add ((fontset), (range), (elt), (add))) + + static Lisp_Object + fontset_add (fontset, range, elt, add) + Lisp_Object fontset, range, elt, add; + { + int from, to, from1, to1; + Lisp_Object elt1; - ++ + from = XINT (XCAR (range)); + to = XINT (XCDR (range)); + do { + elt1 = char_table_ref_and_range (fontset, from, &from1, &to1); + if (to < to1) + to1 = to; + if (NILP (elt1)) + elt1 = Fmake_vector (make_number (1), elt); + else + { + int i, i0 = 1, i1 = ASIZE (elt1) + 1; + Lisp_Object new; + + new = Fmake_vector (make_number (i1), elt); + if (EQ (add, Qappend)) + i0--, i1--; + for (i = 0; i0 < i1; i++, i0++) + ASET (new, i0, AREF (elt1, i)); + elt1 = new; + } + char_table_set_range (fontset, from, to1, elt1); + from = to1 + 1; + } while (from < to); + return Qnil; + } + + + /* Update FONTSET_ELEMENT which has this form: + ( CHARSET-PRIORITY-LIST-TICK . FONT-VECTOR). + Reorder FONT-VECTOR according to the current order of charset + (Vcharset_ordered_list), and update CHARSET-PRIORITY-LIST-TICK to + the latest value. */ static void - fontset_set (fontset, c, newelt) + reorder_font_vector (fontset_element) + Lisp_Object fontset_element; + { + Lisp_Object vec, list, *new_vec; + int size; + int *charset_id_table; + int i, idx; + + XSETCAR (fontset_element, make_number (charset_ordered_list_tick)); + vec = XCDR (fontset_element); + size = ASIZE (vec); + if (size < 2) + /* No need of reordering VEC. */ + return; + charset_id_table = (int *) alloca (sizeof (int) * size); + new_vec = (Lisp_Object *) alloca (sizeof (Lisp_Object) * size); + /* At first, extract ENCODING (a chaset ID) from VEC. VEC has this + form: + [[FACE-ID FONT-INDEX [ FONT-SPEC ENCODING REPERTORY ]] ...] */ + for (i = 0; i < size; i++) + charset_id_table[i] = XINT (AREF (AREF (AREF (vec, i), 2), 1)); + + /* Then, store the elements of VEC in NEW_VEC in the correct + order. */ + idx = 0; + for (list = Vcharset_ordered_list; CONSP (list); list = XCDR (list)) + { + for (i = 0; i < size; i++) + if (charset_id_table[i] == XINT (XCAR (list))) + new_vec[idx++] = AREF (vec, i); + if (idx == size) + break; + } + + /* At last, update VEC. */ + for (i = 0; i < size; i++) + ASET (vec, i, new_vec[i]); + } + + + /* Load a font matching the font related attributes in FACE->lface and + font pattern in FONT_DEF of FONTSET, and return an index of the + font. FONT_DEF has this form: + [ FONT-SPEC ENCODING REPERTORY ] + If REPERTORY is nil, generate a char-table representing the font + repertory by looking into the font itself. */ + + static int + load_font_get_repertory (f, face, font_def, fontset) + FRAME_PTR f; + struct face *face; + Lisp_Object font_def; + Lisp_Object fontset; + { + char *font_name; + struct font_info *font_info; + - font_name = choose_face_font (f, face->lface, AREF (font_def, 0)); ++ font_name = choose_face_font (f, face->lface, AREF (font_def, 0), NULL); + if (! (font_info = fs_load_font (f, font_name, XINT (AREF (font_def, 1))))) + return -1; + + if (NILP (AREF (font_def, 2)) + && NILP (Fassq (make_number (font_info->font_idx), + FONTSET_REPERTORY (fontset)))) + { + /* We must look into the font to get the correct repertory as a + char-table. */ + Lisp_Object repertory; + + repertory = (*get_font_repertory_func) (f, font_info); + FONTSET_REPERTORY (fontset) + = Fcons (Fcons (make_number (font_info->font_idx), repertory), - FONTSET_REPERTORY (fontset)); ++ FONTSET_REPERTORY (fontset)); + } + + return font_info->font_idx; + } + + + /* Return a face ID registerd in the realized fontset FONTSET for the + character C. If FACE is NULL, return -1 if a face is not yet + set. Otherwise, realize a proper face from FACE and return it. */ + + static int + fontset_face (fontset, c, face) Lisp_Object fontset; int c; - Lisp_Object newelt; + struct face *face; { - int charset, code[3]; - Lisp_Object *elt; - int i; + Lisp_Object base_fontset, elt, vec; + int i, from, to; + int font_idx; + FRAME_PTR f = XFRAME (FONTSET_FRAME (fontset)); + + base_fontset = FONTSET_BASE (fontset); + elt = CHAR_TABLE_REF (fontset, c); + + if (EQ (elt, Qt)) + goto try_default; + + if (NILP (elt)) + { + /* We have not yet decided a face for C. */ + Lisp_Object range; + + if (! face) + return -1; + elt = FONTSET_REF_AND_RANGE (base_fontset, c, from, to); + range = Fcons (make_number (from), make_number (to)); + if (NILP (elt)) + { + /* Record that we have no font for characters of this + range. */ + FONTSET_SET (fontset, range, Qt); + goto try_default; + } + elt = Fcopy_sequence (elt); + /* Now ELT is a vector of FONT-DEFs. We at first change it to + FONT-VECTOR, a vector of [ nil nil FONT-DEF ]. */ + for (i = 0; i < ASIZE (elt); i++) + { + Lisp_Object tmp; + + tmp = Fmake_vector (make_number (3), Qnil); + ASET (tmp, 2, AREF (elt, i)); + ASET (elt, i, tmp); + } + /* Then store (-1 . FONT-VECTOR) in the fontset. -1 is to force + reordering of FONT-VECTOR. */ + elt = Fcons (make_number (-1), elt); + FONTSET_SET (fontset, range, elt); + } - if (SINGLE_BYTE_CHAR_P (c)) + if (XINT (XCAR (elt)) != charset_ordered_list_tick) + /* The priority of charsets is changed after we selected a face + for C last time. */ + reorder_font_vector (elt); + + vec = XCDR (elt); + /* Find the first available font in the font vector VEC. */ + for (i = 0; i < ASIZE (vec); i++) { - FONTSET_ASCII (fontset) = newelt; - return; + Lisp_Object font_def; + + elt = AREF (vec, i); + /* ELT == [ FACE-ID FONT-INDEX [ FONT-SPEC ENCODING REPERTORY ] ] */ + font_def = AREF (elt, 2); + if (INTEGERP (AREF (elt, 1)) && XINT (AREF (elt, 1)) < 0) + /* We couldn't open this font last time. */ + continue; + + if (!face && (NILP (AREF (elt, 1)) || NILP (AREF (elt, 0)))) + /* We have not yet opened the font, or we have not yet made a + realized face for the font. */ + return -1; + + if (INTEGERP (AREF (font_def, 2))) + { + /* The repertory is specified by charset ID. */ + struct charset *charset + = CHARSET_FROM_ID (XINT (AREF (font_def, 2))); + + if (! CHAR_CHARSET_P (c, charset)) + /* This font can't display C. */ + continue; + } + else + { + Lisp_Object slot; + + if (! INTEGERP (AREF (elt, 1))) + { + /* We have not yet opened a font matching this spec. + Open the best matching font now and register the + repertory. */ + font_idx = load_font_get_repertory (f, face, font_def, fontset); + ASET (elt, 1, make_number (font_idx)); + if (font_idx < 0) + /* This means that we couldn't find a font matching + FONT_DEF. */ + continue; + } + + slot = Fassq (AREF (elt, 1), FONTSET_REPERTORY (fontset)); + if (! CONSP (slot)) + abort (); + if (NILP (CHAR_TABLE_REF (XCDR (slot), c))) + /* This fond can't display C. */ + continue; + } + + /* Now we have decided to use this font spec to display C. */ + if (INTEGERP (AREF (elt, 1))) + font_idx = XINT (AREF (elt, 1)); + else + { + /* But not yet opened the best matching font. */ + font_idx = load_font_get_repertory (f, face, font_def, fontset); + ASET (elt, 1, make_number (font_idx)); + if (font_idx < 0) + continue; + } + + /* Now we have the opened font. */ + if (NILP (AREF (elt, 0))) + { + /* But not yet made a realized face that uses this font. */ + int face_id = lookup_non_ascii_face (f, font_idx, face); + + ASET (elt, 0, make_number (face_id)); + } + + /* Ok, this face can display C. */ + return XINT (AREF (elt, 0)); } - SPLIT_CHAR (c, charset, code[0], code[1]); - code[2] = 0; /* anchor */ - elt = &XCHAR_TABLE (fontset)->contents[charset + 128]; - for (i = 0; code[i] > 0; i++) + try_default: + if (! EQ (base_fontset, Vdefault_fontset)) + return fontset_face (FONTSET_FALLBACK (fontset), c, face); + + /* We have tried all the fonts for C, but none of them can be opened + nor can display C. */ + if (NILP (FONTSET_NOFONT_FACE (fontset))) { - if (!SUB_CHAR_TABLE_P (*elt)) - *elt = make_sub_char_table (*elt); - elt = &XCHAR_TABLE (*elt)->contents[code[i]]; + int face_id; + + if (! face) + return -1; + face_id = lookup_non_ascii_face (f, -1, face); + FONTSET_NOFONT_FACE (fontset) = make_number (face_id); } - if (SUB_CHAR_TABLE_P (*elt)) - XCHAR_TABLE (*elt)->defalt = newelt; - else - *elt = newelt; + return XINT (FONTSET_NOFONT_FACE (fontset)); } @@@ -854,102 -1055,58 +1057,58 @@@ list_fontsets (f, pattern, size || !BASE_FONTSET_P (fontset) || !EQ (frame, FONTSET_FRAME (fontset))) continue; - name = XSTRING (FONTSET_NAME (fontset))->data; + name = SDATA (FONTSET_NAME (fontset)); - if (!NILP (regexp) + if (STRINGP (regexp) ? (fast_c_string_match_ignore_case (regexp, name) < 0) - : strcmp (XSTRING (pattern)->data, name)) + : strcmp (SDATA (pattern), name)) continue; - if (size) - { - struct font_info *fontp; - fontp = FS_LOAD_FONT (f, 0, NULL, id); - if (!fontp || size != fontp->size) - continue; - } val = Fcons (Fcopy_sequence (FONTSET_NAME (fontset)), val); } return val; } - DEFUN ("new-fontset", Fnew_fontset, Snew_fontset, 2, 2, 0, - doc: /* Create a new fontset NAME that contains font information in FONTLIST. - FONTLIST is an alist of charsets vs corresponding font name patterns. */) - (name, fontlist) - Lisp_Object name, fontlist; - { - Lisp_Object fontset, elements, ascii_font; - Lisp_Object tem, tail, elt; - (*check_window_system_func) (); -/* Free all realized fontsets whose base fontset is BASE. */ ++/* Free all realized fontsets whose base fontset is BASE. */ - CHECK_STRING (name); - CHECK_LIST (fontlist); + static void + free_realized_fontsets (base) + Lisp_Object base; + { + #if 0 + int id; - name = Fdowncase (name); - tem = Fquery_fontset (name, Qnil); - if (!NILP (tem)) - error ("Fontset `%s' matches the existing fontset `%s'", - SDATA (name), SDATA (tem)); - - /* Check the validity of FONTLIST while creating a template for - fontset elements. */ - elements = ascii_font = Qnil; - for (tail = fontlist; CONSP (tail); tail = XCDR (tail)) + /* For the moment, this doesn't work because free_realized_face + doesn't remove FACE from a cache. Until we find a solution, we + suppress this code, and simply use Fclear_face_cache even though + that is not efficient. */ + BLOCK_INPUT; + for (id = 0; id < ASIZE (Vfontset_table); id++) { - int c, charset; - - tem = XCAR (tail); - if (!CONSP (tem) - || (charset = get_charset_id (XCAR (tem))) < 0 - || (!STRINGP (XCDR (tem)) && !CONSP (XCDR (tem)))) - error ("Elements of fontlist must be a cons of charset and font name pattern"); + Lisp_Object this = AREF (Vfontset_table, id); - tem = XCDR (tem); - if (STRINGP (tem)) - tem = Fdowncase (tem); - else - tem = Fcons (Fdowncase (Fcar (tem)), Fdowncase (Fcdr (tem))); - if (charset == CHARSET_ASCII) - ascii_font = tem; - else + if (EQ (FONTSET_BASE (this), base)) { - c = MAKE_CHAR (charset, 0, 0); - elements = Fcons (Fcons (make_number (c), tem), elements); - } - } + Lisp_Object tail; - if (NILP (ascii_font)) - error ("No ASCII font in the fontlist"); + for (tail = FONTSET_FACE_ALIST (this); CONSP (tail); + tail = XCDR (tail)) + { + FRAME_PTR f = XFRAME (FONTSET_FRAME (this)); + int face_id = XINT (XCDR (XCAR (tail))); + struct face *face = FACE_FROM_ID (f, face_id); - + - fontset = make_fontset (Qnil, name, Qnil); - FONTSET_ASCII (fontset) = Fcons (make_number (0), ascii_font); - for (; CONSP (elements); elements = XCDR (elements)) - { - elt = XCAR (elements); - tem = XCDR (elt); - if (STRINGP (tem)) - tem = font_family_registry (tem, 0); - tem = Fcons (XCAR (elt), tem); - FONTSET_SET (fontset, XINT (XCAR (elt)), tem); + /* Face THIS itself is also freed by the following call. */ + free_realized_face (f, face); + } + } } - - return Qnil; - } - - - /* Clear all elements of FONTSET for multibyte characters. */ - - static void - clear_fontset_elements (fontset) - Lisp_Object fontset; - { - int i; - - for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++) - XCHAR_TABLE (fontset)->contents[i] = Qnil; + UNBLOCK_INPUT; + #else /* not 0 */ + Fclear_face_cache (Qt); + #endif /* not 0 */ } @@@ -973,26 -1130,80 +1132,80 @@@ check_fontset_name (name return FONTSET_FROM_ID (id); } - DEFUN ("set-fontset-font", Fset_fontset_font, Sset_fontset_font, 3, 4, 0, - doc: /* Modify fontset NAME to use FONTNAME for CHARACTER. + static void + accumulate_script_ranges (arg, range, val) + Lisp_Object arg, range, val; + { + if (EQ (XCAR (arg), val)) + { + if (CONSP (range)) + XSETCDR (arg, Fcons (Fcons (XCAR (range), XCDR (range)), XCDR (arg))); + else + XSETCDR (arg, Fcons (Fcons (range, range), XCDR (arg))); + } + } + + + /* Return an ASCII font name generated from fontset name NAME and + ASCII font specification ASCII_SPEC. NAME is a string conforming + to XLFD. ASCII_SPEC is a vector: + [FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY]. */ + + static INLINE Lisp_Object + generate_ascii_font_name (name, ascii_spec) + Lisp_Object name, ascii_spec; + { + Lisp_Object vec; + int i; + + vec = split_font_name_into_vector (name); + for (i = FONT_SPEC_FAMILY_INDEX; i <= FONT_SPEC_ADSTYLE_INDEX; i++) + if (! NILP (AREF (ascii_spec, i))) + ASET (vec, 1 + i, AREF (ascii_spec, i)); + if (! NILP (AREF (ascii_spec, FONT_SPEC_REGISTRY_INDEX))) + ASET (vec, 12, AREF (ascii_spec, FONT_SPEC_REGISTRY_INDEX)); + return build_font_name_from_vector (vec); + } + + + DEFUN ("set-fontset-font", Fset_fontset_font, Sset_fontset_font, 3, 5, 0, - doc: /* ++ doc: /* + Modify fontset NAME to use FONT-SPEC for CHARACTER. - If NAME is nil, modify the default fontset. CHARACTER may be a cons; (FROM . TO), where FROM and TO are - non-generic characters. In that case, use FONTNAME - for all characters in the range FROM and TO (inclusive). - CHARACTER may be a charset. In that case, use FONTNAME - for all character in the charsets. - - FONTNAME may be a cons; (FAMILY . REGISTRY), where FAMILY is a family - name of a font, REGISTRY is a registry name of a font. */) - (name, character, fontname, frame) - Lisp_Object name, character, fontname, frame; + characters. In that case, use FONT-SPEC for all characters in the + range FROM and TO (inclusive). + + CHARACTER may be a script name symbol. In that case, use FONT-SPEC + for all characters that belong to the script. + + CHARACTER may be a charset which has a :code-offset attribute and the + attribute value is greater than the maximum Unicode character + \(#x10FFFF). In that case, use FONT-SPEC for all characters in the + charset. + + FONT-SPEC may be: + * A vector [ FAMILY WEIGHT SLANT WIDTH ADSTYLE REGISTRY ]. + See the documentation of `set-face-attribute' for the detail of + these vector elements; + * A cons (FAMILY . REGISTRY), where FAMILY is a font family name and + REGISTRY is a font registry name; + * A font name string. + + Optional 4th argument FRAME, if non-nil, is a frame. This argument is + kept for backward compatibility and has no meaning. + + Optional 5th argument ADD, if non-nil, specifies how to add FONT-SPEC + to the font specifications for RANGE previously set. If it is + `prepend', FONT-SPEC is prepended. If it is `append', FONT-SPEC is + appended. By default, FONT-SPEC overrides the previous settings. */) + (name, character, font_spec, frame, add) + Lisp_Object name, character, font_spec, frame, add; { - Lisp_Object fontset, elt; - Lisp_Object realized; - int from, to; - int id; - Lisp_Object family, registry; + Lisp_Object fontset; + Lisp_Object font_def, registry; + Lisp_Object encoding, repertory; + Lisp_Object range_list; fontset = check_fontset_name (name); @@@ -1020,74 -1253,195 +1255,195 @@@ } else { - CHECK_NUMBER (character); - from = XINT (character); - to = from; + CHECK_STRING (font_spec); + font_spec = Fdowncase (font_spec); + registry = split_font_name_into_vector (font_spec); + if (NILP (registry)) - error ("No XLFD: %s", XSTRING (font_spec)->data); ++ error ("No XLFD: %s", SDATA (font_spec)); + if (NILP (AREF (registry, 12)) + || NILP (AREF (registry, 13))) + error ("Registry must be specified"); + registry = concat2 (concat2 (AREF (registry, 12), build_string ("-")), + AREF (registry, 13)); } - if (!char_valid_p (from, 1)) - invalid_character (from); - if (SINGLE_BYTE_CHAR_P (from)) - error ("Can't change font for a single byte character"); - if (from < to) + + if (STRINGP (font_spec)) - encoding = find_font_encoding ((char *) XSTRING (font_spec)->data); ++ encoding = find_font_encoding ((char *) SDATA (font_spec)); + else - encoding = find_font_encoding ((char *) XSTRING (registry)->data); ++ encoding = find_font_encoding ((char *) SDATA (registry)); + if (SYMBOLP (encoding)) + encoding = repertory = CHARSET_SYMBOL_ID (encoding); + else { - if (!char_valid_p (to, 1)) - invalid_character (to); - if (SINGLE_BYTE_CHAR_P (to)) - error ("Can't change font for a single byte character"); + repertory = XCDR (encoding); + encoding = CHARSET_SYMBOL_ID (XCAR (encoding)); } + font_def = Fmake_vector (make_number (3), font_spec); + ASET (font_def, 1, encoding); + ASET (font_def, 2, repertory); - if (STRINGP (fontname)) + if (CHARACTERP (character)) + range_list = Fcons (Fcons (character, character), Qnil); + else if (CONSP (character)) { - fontname = Fdowncase (fontname); - elt = Fcons (make_number (from), font_family_registry (fontname, 0)); + Lisp_Object from, to; + + from = Fcar (character); + to = Fcdr (character); + CHECK_CHARACTER (from); + CHECK_CHARACTER (to); + range_list = Fcons (character, Qnil); } else { - CHECK_CONS (fontname); - family = XCAR (fontname); - registry = XCDR (fontname); - if (!NILP (family)) + Lisp_Object script_list; + Lisp_Object val; + + CHECK_SYMBOL (character); + range_list = Qnil; + script_list = XCHAR_TABLE (Vchar_script_table)->extras[0]; + if (! NILP (Fmemq (character, script_list))) { - CHECK_STRING (family); - family = Fdowncase (family); + val = Fcons (character, Qnil); + map_char_table (accumulate_script_ranges, Qnil, Vchar_script_table, - val, 0, NULL); - range_list = XCDR (val); ++ val); ++ range_list = XCDR (val); } - if (!NILP (registry)) + else if (CHARSETP (character)) { - CHECK_STRING (registry); - registry = Fdowncase (registry); + struct charset *charset; + + CHECK_CHARSET_GET_CHARSET (character, charset); + if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET) + range_list + = Fcons (Fcons (make_number (CHARSET_MIN_CHAR (charset)), + make_number (CHARSET_MAX_CHAR (charset))), + range_list); + if (EQ (character, Qascii)) + { + if (VECTORP (font_spec)) + font_spec = generate_ascii_font_name (FONTSET_NAME (fontset), + font_spec); + FONTSET_ASCII (fontset) = font_spec; + } } - elt = Fcons (make_number (from), Fcons (family, registry)); + + if (NILP (range_list)) + error ("Invalid script or charset name: %s", - XSYMBOL (character)->name->data); ++ SDATA (SYMBOL_NAME (character))); } - /* The arg FRAME is kept for backward compatibility. We only check - the validity. */ - if (!NILP (frame)) - CHECK_LIVE_FRAME (frame); + for (; CONSP (range_list); range_list = XCDR (range_list)) + FONTSET_ADD (fontset, XCAR (range_list), font_def, add); - for (; from <= to; from++) - FONTSET_SET (fontset, from, elt); - Foptimize_char_table (fontset); + /* Free all realized fontsets whose base is FONTSET. This way, the + specified character(s) are surely redisplayed by a correct + font. */ + free_realized_fontsets (fontset); - /* If there's a realized fontset REALIZED whose parent is FONTSET, - clear all the elements of REALIZED and free all multibyte faces - whose fontset is REALIZED. This way, the specified character(s) - are surely redisplayed by a correct font. */ - for (id = 0; id < ASIZE (Vfontset_table); id++) + return Qnil; + } + + + DEFUN ("new-fontset", Fnew_fontset, Snew_fontset, 2, 2, 0, + doc: /* Create a new fontset NAME from font information in FONTLIST. + + FONTLIST is an alist of scripts vs the corresponding font specification list. + Each element of FONTLIST has the form (SCRIPT FONT-SPEC ...), where a + character of SCRIPT is displayed by a font that matches one of + FONT-SPEC. + + SCRIPT is a symbol that appears in the first extra slot of the + char-table `char-script-table'. + + FONT-SPEC is a vector, a cons, or a string. See the documentation of + `set-fontset-font' for the meaning. */) + (name, fontlist) + Lisp_Object name, fontlist; + { + Lisp_Object fontset; + Lisp_Object val; + int id; + + CHECK_STRING (name); + CHECK_LIST (fontlist); + + id = fs_query_fontset (name, 0); + if (id < 0) { - realized = AREF (Vfontset_table, id); - if (!NILP (realized) - && !BASE_FONTSET_P (realized) - && EQ (FONTSET_BASE (realized), fontset)) - { - FRAME_PTR f = XFRAME (FONTSET_FRAME (realized)); - clear_fontset_elements (realized); - free_realized_multibyte_face (f, id); - } + name = Fdowncase (name); + val = split_font_name_into_vector (name); + if (NILP (val)) + error ("Fontset name must be in XLFD format"); - if (strcmp (XSTRING (AREF (val, 12))->data, "fontset")) ++ if (strcmp (SDATA (AREF (val, 12)), "fontset")) + error ("Registry field of fontset name must be \"fontset\""); + Vfontset_alias_alist + = Fcons (Fcons (name, + concat2 (concat2 (AREF (val, 12), build_string ("-")), + AREF (val, 13))), + Vfontset_alias_alist); + ASET (val, 12, build_string ("iso8859-1")); + fontset = make_fontset (Qnil, name, Qnil); + FONTSET_ASCII (fontset) = build_font_name_from_vector (val); + } + else + { + fontset = FONTSET_FROM_ID (id);; + free_realized_fontsets (fontset); + Fset_char_table_range (fontset, Qt, Qnil); } - return Qnil; + for (; ! NILP (fontlist); fontlist = Fcdr (fontlist)) + { + Lisp_Object elt, script; + + elt = Fcar (fontlist); + script = Fcar (elt); + for (elt = Fcdr (elt); ! NILP (elt); elt = Fcdr (elt)) + Fset_fontset_font (name, script, Fcar (elt), Qnil, Qappend); + } + return name; + } + + + /* Number of fontsets created from a fontname automatically. */ + static int n_auto_fontsets; + + int + new_fontset_from_font_name (Lisp_Object fontname) + { + Lisp_Object name; + Lisp_Object vec; + + fontname = Fdowncase (fontname); + vec = split_font_name_into_vector (fontname); + if ( NILP (vec)) + vec = Fmake_vector (make_number (14), build_string ("")); + ASET (vec, 12, build_string ("fontset")); + if (n_auto_fontsets == 0) + { + ASET (vec, 13, build_string ("startup")); + name = build_font_name_from_vector (vec); + n_auto_fontsets++; + } + else + { + char temp[20]; + + do { + sprintf (temp, "auto%d", n_auto_fontsets); + ASET (vec, 13, build_string (temp)); + name = build_font_name_from_vector (vec); + n_auto_fontsets++; + } while (fs_query_fontset (name, 0) >= 0); + } + name = Fnew_fontset (name, + Fcons (Fcons (Qascii, Fcons (fontname, Qnil)), Qnil)); + Vfontset_alias_alist = Fcons (Fcons (name, fontname), Vfontset_alias_alist); + return fs_query_fontset (name, 0); } + DEFUN ("font-info", Ffont_info, Sfont_info, 1, 2, 0, doc: /* Return information about a font named NAME on frame FRAME. If FRAME is omitted or nil, use the selected frame. @@@ -1301,59 -1591,79 +1593,79 @@@ fontset. The format is the same as abo realized[n_realized++] = elt; } - if (! EQ (fontset, Vdefault_fontset)) - { - /* Merge FONTSET onto the default fontset. */ - val = Fcopy_sequence (Vdefault_fontset); - map_char_table (override_font_info, Qnil, fontset, fontset, val, 0, indices); - fontset = val; - } - /* Accumulate information of the fontset in VAL. The format is - (LAST FONT-INFO FONT-INFO ...), where FONT-INFO is (CHAR-OR-RANGE - FONT-SPEC). See the comment for accumulate_font_info for the - detail. */ - val = Fcons (Fcons (make_number (0), - Fcons (XCDR (FONTSET_ASCII (fontset)), Qnil)), - Qnil); - val = Fcons (val, val); - map_char_table (accumulate_font_info, Qnil, fontset, fontset, val, 0, indices); - val = XCDR (val); - - /* For each FONT-INFO, if CHAR_OR_RANGE (car part) is a generic - character for a charset, replace it with the charset symbol. If - fonts are opened for FONT-SPEC, append the names of the fonts to - FONT-SPEC. */ - for (tail = val; CONSP (tail); tail = XCDR (tail)) + table = Fmake_char_table (Qfontset_info, Qnil); + XCHAR_TABLE (table)->extras[0] = Fmake_char_table (Qnil, Qnil); + /* Accumulate information of the fontset in TABLE. The format of + each element is ((FONT-SPEC OPENED-FONT ...) ...). */ + for (fallback = 0; fallback <= 1; fallback++) { - int c; - elt = XCAR (tail); - if (INTEGERP (XCAR (elt))) + Lisp_Object this_fontset, this_table; - ++ + if (! fallback) { - int charset, c1, c2; - c = XINT (XCAR (elt)); - SPLIT_CHAR (c, charset, c1, c2); - if (c1 == 0) - XSETCAR (elt, CHARSET_SYMBOL (charset)); + this_fontset = fontset; + this_table = table; } else - c = XINT (XCAR (XCAR (elt))); - for (i = 0; i < n_realized; i++) { - Lisp_Object face_id, font; - struct face *face; + this_fontset = Vdefault_fontset; + this_table = XCHAR_TABLE (table)->extras[0]; + #if 0 + for (i = 0; i < n_realized; i++) + realized[i] = FONTSET_FALLBACK (realized[i]); + #endif + } + for (c = 0; c <= MAX_5_BYTE_CHAR; ) + { + int from, to; - face_id = FONTSET_REF_VIA_BASE (realized[i], c); - if (INTEGERP (face_id)) + val = char_table_ref_and_range (this_fontset, c, &from, &to); + if (VECTORP (val)) { - face = FACE_FROM_ID (f, XINT (face_id)); - if (face && face->font && face->font_name) + Lisp_Object alist; + + /* At first, set ALIST to ((FONT-SPEC) ...). */ + for (alist = Qnil, i = 0; i < ASIZE (val); i++) + alist = Fcons (Fcons (AREF (AREF (val, i), 0), Qnil), alist); + alist = Fnreverse (alist); + + /* Then store opend font names to cdr of each elements. */ + for (i = 0; i < n_realized; i++) { - font = build_string (face->font_name); - if (NILP (Fmember (font, XCDR (XCDR (elt))))) - XSETCDR (XCDR (elt), Fcons (font, XCDR (XCDR (elt)))); + if (NILP (realized[i])) + continue; + val = FONTSET_REF (realized[i], c); + if (NILP (val)) + continue; + val = XCDR (val); + /* Now VAL is [[FACE-ID FONT-INDEX FONT-DEF] ...]. + If a font of an element is already opened, + FONT-INDEX of the element is integer. */ + for (j = 0; j < ASIZE (val); j++) + if (INTEGERP (AREF (AREF (val, j), 0))) + { + Lisp_Object font_idx; + + font_idx = AREF (AREF (val, j), 1); + elt = Fassq (AREF (AREF (AREF (val, j), 2), 0), alist); + if (CONSP (elt) + && NILP (Fmemq (font_idx, XCDR(elt)))) + nconc2 (elt, Fcons (font_idx, Qnil)); + } } + for (val = alist; CONSP (val); val = XCDR (val)) + for (elt = XCDR (XCAR (val)); CONSP (elt); elt = XCDR (elt)) + { + struct font_info *font_info + = (*get_font_info_func) (f, XINT (XCAR (elt))); + XSETCAR (elt, build_string (font_info->full_name)); + } + + /* Store ALIST in TBL for characters C..TO. */ + char_table_set_range (this_table, c, to, alist); } + c = to + 1; } } @@@ -1450,28 -1752,27 +1754,27 @@@ syms_of_fontset ( next_fontset_id = 1; DEFVAR_LISP ("font-encoding-alist", &Vfont_encoding_alist, - doc: /* Alist of fontname patterns vs corresponding encoding info. - Each element looks like (REGEXP . ENCODING-INFO), - where ENCODING-INFO is an alist of CHARSET vs ENCODING. - ENCODING is one of the following integer values: - 0: code points 0x20..0x7F or 0x2020..0x7F7F are used, - 1: code points 0xA0..0xFF or 0xA0A0..0xFFFF are used, - 2: code points 0x20A0..0x7FFF are used, - 3: code points 0xA020..0xFF7F are used. */); + doc: /* + Alist of fontname patterns vs the corresponding encoding and repertory info. + Each element looks like (REGEXP . (ENCODING . REPERTORY)), + where ENCODING is a charset or a char-table, -and REPERTORY is a charset, a char-table, or nil. ++and REPERTORY is a charset, a char-table, or nil. + + ENCODING is for converting a character to a glyph code of the font. + If ENCODING is a charset, encoding a character by the charset gives + the corresponding glyph code. If ENCODING is a char-table, looking up + the table by a character gives the corresponding glyph code. + + REPERTORY specifies a repertory of characters supported by the font. + If REPERTORY is a charset, all characters beloging to the charset are + supported. If REPERTORY is a char-table, all characters who have a + non-nil value in the table are supported. It REPERTORY is nil, Emacs + gets the repertory information by an opened font and ENCODING. */); Vfont_encoding_alist = Qnil; - Vfont_encoding_alist - = Fcons (Fcons (build_string ("JISX0201"), - Fcons (Fcons (intern ("latin-jisx0201"), make_number (0)), - Qnil)), - Vfont_encoding_alist); - Vfont_encoding_alist - = Fcons (Fcons (build_string ("ISO8859-1"), - Fcons (Fcons (intern ("ascii"), make_number (0)), - Qnil)), - Vfont_encoding_alist); DEFVAR_LISP ("use-default-ascent", &Vuse_default_ascent, - doc: /* Char table of characters whose ascent values should be ignored. + doc: /* + Char table of characters whose ascent values should be ignored. If an entry for a character is non-nil, the ascent value of the glyph is assumed to be what specified by _MULE_DEFAULT_ASCENT property of a font. @@@ -1515,3 -1817,26 +1819,26 @@@ at the vertical center of lines. */) defsubr (&Sfontset_font); defsubr (&Sfontset_list); } + + Lisp_Object + dump_fontset (fontset) + Lisp_Object fontset; + { + Lisp_Object val; - ++ + if (NILP (FONTSET_FALLBACK (fontset))) + val = Fcons (Fcons (intern ("fallback-id"), Qnil), Qnil); + else + val = Fcons (Fcons (intern ("fallback-id"), + FONTSET_ID (FONTSET_FALLBACK (fontset))), + Qnil); + if (NILP (FONTSET_BASE (fontset))) + val = Fcons (Fcons (intern ("base"), Qnil), val); + else + val = Fcons (Fcons (intern ("base"), + FONTSET_NAME (FONTSET_BASE (fontset))), + val); + val = Fcons (Fcons (intern ("name"), FONTSET_NAME (fontset)), val); + val = Fcons (Fcons (intern ("id"), FONTSET_ID (fontset)), val); + return val; + } diff --cc src/fontset.h index 634711110bc,1d8e38262c1..5c70eddecdc --- a/src/fontset.h +++ b/src/fontset.h @@@ -1,6 -1,9 +1,9 @@@ /* Header for fontset handler. Copyright (C) 1995, 1997, 2000 Electrotechnical Laboratory, JAPAN. -- Licensed to the Free Software Foundation. - Copyright (C) 2001, 2002 ++ Licensed to the Free Software Foundation. ++ Copyright (C) 2003 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@@ -194,8 -201,7 +201,7 @@@ extern int fs_query_fontset P_ ((Lisp_O EXFUN (Fquery_fontset, 2); extern Lisp_Object list_fontsets P_ ((struct frame *, Lisp_Object, int)); - extern Lisp_Object Qfontset; -extern Lisp_Object Vuse_default_ascent; +extern Lisp_Object Vuse_default_ascent; extern Lisp_Object Vignore_relative_composition; extern Lisp_Object Valternate_fontname_alist; extern Lisp_Object Vfontset_alias_alist; diff --cc src/frame.c index 98b3e7067bd,a62e2424980..a92176730b5 --- a/src/frame.c +++ b/src/frame.c @@@ -2489,1648 -2450,160 +2489,1648 @@@ the rightmost or bottommost possible po } +/*********************************************************************** + Frame Parameters + ***********************************************************************/ + +/* Connect the frame-parameter names for X frames + to the ways of passing the parameter values to the window system. + + The name of a parameter, as a Lisp symbol, + has an `x-frame-parameter' property which is an integer in Lisp + that is an index in this table. */ + +struct frame_parm_table { + char *name; + Lisp_Object *variable; +}; + +static struct frame_parm_table frame_parms[] = +{ + {"auto-raise", &Qauto_raise}, + {"auto-lower", &Qauto_lower}, + {"background-color", 0}, + {"border-color", &Qborder_color}, + {"border-width", &Qborder_width}, + {"cursor-color", &Qcursor_color}, + {"cursor-type", &Qcursor_type}, + {"font", 0}, + {"foreground-color", 0}, + {"icon-name", &Qicon_name}, + {"icon-type", &Qicon_type}, + {"internal-border-width", &Qinternal_border_width}, + {"menu-bar-lines", &Qmenu_bar_lines}, + {"mouse-color", &Qmouse_color}, + {"name", &Qname}, + {"scroll-bar-width", &Qscroll_bar_width}, + {"title", &Qtitle}, + {"unsplittable", &Qunsplittable}, + {"vertical-scroll-bars", &Qvertical_scroll_bars}, + {"visibility", &Qvisibility}, + {"tool-bar-lines", &Qtool_bar_lines}, + {"scroll-bar-foreground", &Qscroll_bar_foreground}, + {"scroll-bar-background", &Qscroll_bar_background}, + {"screen-gamma", &Qscreen_gamma}, + {"line-spacing", &Qline_spacing}, + {"left-fringe", &Qleft_fringe}, + {"right-fringe", &Qright_fringe}, + {"wait-for-wm", &Qwait_for_wm}, + {"fullscreen", &Qfullscreen}, +}; + +#ifdef HAVE_WINDOW_SYSTEM + +extern Lisp_Object Qbox; +extern Lisp_Object Qtop; + +/* Calculate fullscreen size. Return in *TOP_POS and *LEFT_POS the + wanted positions of the WM window (not emacs window). + Return in *WIDTH and *HEIGHT the wanted width and height of Emacs + window (FRAME_X_WINDOW). + */ + void -syms_of_frame () +x_fullscreen_adjust (f, width, height, top_pos, left_pos) + struct frame *f; + int *width; + int *height; + int *top_pos; + int *left_pos; { - Qframep = intern ("framep"); - staticpro (&Qframep); - Qframe_live_p = intern ("frame-live-p"); - staticpro (&Qframe_live_p); - Qheight = intern ("height"); - staticpro (&Qheight); - Qicon = intern ("icon"); - staticpro (&Qicon); - Qminibuffer = intern ("minibuffer"); - staticpro (&Qminibuffer); - Qmodeline = intern ("modeline"); - staticpro (&Qmodeline); - Qname = intern ("name"); - staticpro (&Qname); - Qonly = intern ("only"); - staticpro (&Qonly); - Qunsplittable = intern ("unsplittable"); - staticpro (&Qunsplittable); - Qmenu_bar_lines = intern ("menu-bar-lines"); - staticpro (&Qmenu_bar_lines); - Qtool_bar_lines = intern ("tool-bar-lines"); - staticpro (&Qtool_bar_lines); - Qwidth = intern ("width"); - staticpro (&Qwidth); - Qx = intern ("x"); - staticpro (&Qx); - Qw32 = intern ("w32"); - staticpro (&Qw32); - Qpc = intern ("pc"); - staticpro (&Qpc); - Qmac = intern ("mac"); - staticpro (&Qmac); - Qvisible = intern ("visible"); - staticpro (&Qvisible); - Qbuffer_predicate = intern ("buffer-predicate"); - staticpro (&Qbuffer_predicate); - Qbuffer_list = intern ("buffer-list"); - staticpro (&Qbuffer_list); - Qtitle = intern ("title"); - staticpro (&Qtitle); - Qdisplay_type = intern ("display-type"); - staticpro (&Qdisplay_type); - Qbackground_mode = intern ("background-mode"); - staticpro (&Qbackground_mode); - Qleft_fringe = intern ("left-fringe"); - staticpro (&Qleft_fringe); - Qright_fringe = intern ("right-fringe"); - staticpro (&Qright_fringe); - Qtty_color_mode = intern ("tty-color-mode"); - staticpro (&Qtty_color_mode); + int newwidth = FRAME_COLS (f); + int newheight = FRAME_LINES (f); - DEFVAR_LISP ("default-frame-alist", &Vdefault_frame_alist, - doc: /* Alist of default values for frame creation. -These may be set in your init file, like this: - (setq default-frame-alist '((width . 80) (height . 55) (menu-bar-lines . 1)) -These override values given in window system configuration data, - including X Windows' defaults database. -For values specific to the first Emacs frame, see `initial-frame-alist'. -For values specific to the separate minibuffer frame, see - `minibuffer-frame-alist'. -The `menu-bar-lines' element of the list controls whether new frames - have menu bars; `menu-bar-mode' works by altering this element. */); - Vdefault_frame_alist = Qnil; + *top_pos = f->top_pos; + *left_pos = f->left_pos; - Qinhibit_default_face_x_resources - = intern ("inhibit-default-face-x-resources"); - staticpro (&Qinhibit_default_face_x_resources); + if (f->want_fullscreen & FULLSCREEN_HEIGHT) + { + int ph; - DEFVAR_LISP ("terminal-frame", &Vterminal_frame, - doc: /* The initial frame-object, which represents Emacs's stdout. */); + ph = FRAME_X_DISPLAY_INFO (f)->height; + newheight = FRAME_PIXEL_HEIGHT_TO_TEXT_LINES (f, ph); + ph = FRAME_TEXT_LINES_TO_PIXEL_HEIGHT (f, newheight) - f->y_pixels_diff; + newheight = FRAME_PIXEL_HEIGHT_TO_TEXT_LINES (f, ph); + *top_pos = 0; + } - DEFVAR_LISP ("emacs-iconified", &Vemacs_iconified, - doc: /* Non-nil if all of emacs is iconified and frame updates are not needed. */); - Vemacs_iconified = Qnil; + if (f->want_fullscreen & FULLSCREEN_WIDTH) + { + int pw; - DEFVAR_LISP ("mouse-position-function", &Vmouse_position_function, - doc: /* If non-nil, function to transform normal value of `mouse-position'. -`mouse-position' calls this function, passing its usual return value as -argument, and returns whatever this function returns. -This abnormal hook exists for the benefit of packages like `xt-mouse.el' -which need to do mouse handling at the Lisp level. */); - Vmouse_position_function = Qnil; + pw = FRAME_X_DISPLAY_INFO (f)->width; + newwidth = FRAME_PIXEL_WIDTH_TO_TEXT_COLS (f, pw); + pw = FRAME_TEXT_COLS_TO_PIXEL_WIDTH (f, newwidth) - f->x_pixels_diff; + newwidth = FRAME_PIXEL_WIDTH_TO_TEXT_COLS (f, pw); + *left_pos = 0; + } - DEFVAR_LISP ("mouse-highlight", &Vmouse_highlight, - doc: /* If non-nil, clickable text is highlighted when mouse is over it. -If the value is an integer, highlighting is only shown after moving the -mouse, while keyboard input turns off the highlight even when the mouse -is over the clickable text. However, the mouse shape still indicates -when the mouse is over clickable text. */); - Vmouse_highlight = Qt; + *width = newwidth; + *height = newheight; +} - DEFVAR_KBOARD ("default-minibuffer-frame", Vdefault_minibuffer_frame, - doc: /* Minibufferless frames use this frame's minibuffer. -Emacs cannot create minibufferless frames unless this is set to an -appropriate surrogate. +/* Really try to move where we want to be in case of fullscreen. Some WMs + moves the window where we tell them. Some (mwm, twm) moves the outer + window manager window there instead. + Try to compensate for those WM here. */ -Emacs consults this variable only when creating minibufferless -frames; once the frame is created, it sticks with its assigned -minibuffer, no matter what this variable is set to. This means that -this variable doesn't necessarily say anything meaningful about the -current set of frames, or where the minibuffer is currently being -displayed. +static void +x_fullscreen_move (f, new_top, new_left) + struct frame *f; + int new_top; + int new_left; +{ + if (new_top != f->top_pos || new_left != f->left_pos) + { + int move_x = new_left; + int move_y = new_top; -This variable is local to the current terminal and cannot be buffer-local. */); +#ifdef HAVE_X_WINDOWS + move_x += FRAME_X_OUTPUT (f)->x_pixels_outer_diff; + move_y += FRAME_X_OUTPUT (f)->y_pixels_outer_diff; +#endif - staticpro (&Vframe_list); + f->want_fullscreen |= FULLSCREEN_MOVE_WAIT; + x_set_offset (f, move_x, move_y, 1); + } +} - defsubr (&Sactive_minibuffer_window); - defsubr (&Sframep); - defsubr (&Sframe_live_p); - defsubr (&Smake_terminal_frame); - defsubr (&Shandle_switch_frame); - defsubr (&Signore_event); - defsubr (&Sselect_frame); - defsubr (&Sselected_frame); - defsubr (&Swindow_frame); - defsubr (&Sframe_root_window); - defsubr (&Sframe_first_window); - defsubr (&Sframe_selected_window); - defsubr (&Sset_frame_selected_window); - defsubr (&Sframe_list); - defsubr (&Snext_frame); - defsubr (&Sprevious_frame); - defsubr (&Sdelete_frame); - defsubr (&Smouse_position); - defsubr (&Smouse_pixel_position); - defsubr (&Sset_mouse_position); - defsubr (&Sset_mouse_pixel_position); -#if 0 - defsubr (&Sframe_configuration); - defsubr (&Srestore_frame_configuration); +/* Change the parameters of frame F as specified by ALIST. + If a parameter is not specially recognized, do nothing special; + otherwise call the `x_set_...' function for that parameter. + Except for certain geometry properties, always call store_frame_param + to store the new value in the parameter alist. */ + +void +x_set_frame_parameters (f, alist) + FRAME_PTR f; + Lisp_Object alist; +{ + Lisp_Object tail; + + /* If both of these parameters are present, it's more efficient to + set them both at once. So we wait until we've looked at the + entire list before we set them. */ + int width, height; + + /* Same here. */ + Lisp_Object left, top; + + /* Same with these. */ + Lisp_Object icon_left, icon_top; + + /* Record in these vectors all the parms specified. */ + Lisp_Object *parms; + Lisp_Object *values; + int i, p; + int left_no_change = 0, top_no_change = 0; + int icon_left_no_change = 0, icon_top_no_change = 0; + int fullscreen_is_being_set = 0; + + struct gcpro gcpro1, gcpro2; + + i = 0; + for (tail = alist; CONSP (tail); tail = Fcdr (tail)) + i++; + + parms = (Lisp_Object *) alloca (i * sizeof (Lisp_Object)); + values = (Lisp_Object *) alloca (i * sizeof (Lisp_Object)); + + /* Extract parm names and values into those vectors. */ + + i = 0; + for (tail = alist; CONSP (tail); tail = Fcdr (tail)) + { + Lisp_Object elt; + + elt = Fcar (tail); + parms[i] = Fcar (elt); + values[i] = Fcdr (elt); + i++; + } + /* TAIL and ALIST are not used again below here. */ + alist = tail = Qnil; + + GCPRO2 (*parms, *values); + gcpro1.nvars = i; + gcpro2.nvars = i; + + /* There is no need to gcpro LEFT, TOP, ICON_LEFT, or ICON_TOP, + because their values appear in VALUES and strings are not valid. */ + top = left = Qunbound; + icon_left = icon_top = Qunbound; + + /* Provide default values for HEIGHT and WIDTH. */ + width = (f->new_text_cols ? f->new_text_cols : FRAME_COLS (f)); + height = (f->new_text_lines ? f->new_text_lines : FRAME_LINES (f)); + + /* Process foreground_color and background_color before anything else. + They are independent of other properties, but other properties (e.g., + cursor_color) are dependent upon them. */ + /* Process default font as well, since fringe widths depends on it. */ + /* Also, process fullscreen, width and height depend upon that */ + for (p = 0; p < i; p++) + { + Lisp_Object prop, val; + + prop = parms[p]; + val = values[p]; + if (EQ (prop, Qforeground_color) + || EQ (prop, Qbackground_color) + || EQ (prop, Qfont) + || EQ (prop, Qfullscreen)) + { + register Lisp_Object param_index, old_value; + + old_value = get_frame_param (f, prop); + fullscreen_is_being_set |= EQ (prop, Qfullscreen); + + if (NILP (Fequal (val, old_value))) + { + store_frame_param (f, prop, val); + + param_index = Fget (prop, Qx_frame_parameter); + if (NATNUMP (param_index) + && (XFASTINT (param_index) + < sizeof (frame_parms)/sizeof (frame_parms[0])) + && rif->frame_parm_handlers[XINT (param_index)]) + (*(rif->frame_parm_handlers[XINT (param_index)])) (f, val, old_value); + } + } + } + + /* Now process them in reverse of specified order. */ + for (i--; i >= 0; i--) + { + Lisp_Object prop, val; + + prop = parms[i]; + val = values[i]; + + if (EQ (prop, Qwidth) && NUMBERP (val)) + width = XFASTINT (val); + else if (EQ (prop, Qheight) && NUMBERP (val)) + height = XFASTINT (val); + else if (EQ (prop, Qtop)) + top = val; + else if (EQ (prop, Qleft)) + left = val; + else if (EQ (prop, Qicon_top)) + icon_top = val; + else if (EQ (prop, Qicon_left)) + icon_left = val; + else if (EQ (prop, Qforeground_color) + || EQ (prop, Qbackground_color) + || EQ (prop, Qfont) + || EQ (prop, Qfullscreen)) + /* Processed above. */ + continue; + else + { + register Lisp_Object param_index, old_value; + + old_value = get_frame_param (f, prop); + + store_frame_param (f, prop, val); + + param_index = Fget (prop, Qx_frame_parameter); + if (NATNUMP (param_index) + && (XFASTINT (param_index) + < sizeof (frame_parms)/sizeof (frame_parms[0])) + && rif->frame_parm_handlers[XINT (param_index)]) + (*(rif->frame_parm_handlers[XINT (param_index)])) (f, val, old_value); + } + } + + /* Don't die if just one of these was set. */ + if (EQ (left, Qunbound)) + { + left_no_change = 1; + if (f->left_pos < 0) + left = Fcons (Qplus, Fcons (make_number (f->left_pos), Qnil)); + else + XSETINT (left, f->left_pos); + } + if (EQ (top, Qunbound)) + { + top_no_change = 1; + if (f->top_pos < 0) + top = Fcons (Qplus, Fcons (make_number (f->top_pos), Qnil)); + else + XSETINT (top, f->top_pos); + } + + /* If one of the icon positions was not set, preserve or default it. */ + if (EQ (icon_left, Qunbound) || ! INTEGERP (icon_left)) + { + icon_left_no_change = 1; + icon_left = Fcdr (Fassq (Qicon_left, f->param_alist)); + if (NILP (icon_left)) + XSETINT (icon_left, 0); + } + if (EQ (icon_top, Qunbound) || ! INTEGERP (icon_top)) + { + icon_top_no_change = 1; + icon_top = Fcdr (Fassq (Qicon_top, f->param_alist)); + if (NILP (icon_top)) + XSETINT (icon_top, 0); + } + +#ifndef HAVE_CARBON + /* MAC_TODO: fullscreen */ + if (FRAME_VISIBLE_P (f) && fullscreen_is_being_set) + { + /* If the frame is visible already and the fullscreen parameter is + being set, it is too late to set WM manager hints to specify + size and position. + Here we first get the width, height and position that applies to + fullscreen. We then move the frame to the appropriate + position. Resize of the frame is taken care of in the code after + this if-statement. */ + int new_left, new_top; + + x_fullscreen_adjust (f, &width, &height, &new_top, &new_left); + x_fullscreen_move (f, new_top, new_left); + } #endif - defsubr (&Smake_frame_visible); - defsubr (&Smake_frame_invisible); - defsubr (&Siconify_frame); - defsubr (&Sframe_visible_p); - defsubr (&Svisible_frame_list); - defsubr (&Sraise_frame); - defsubr (&Slower_frame); - defsubr (&Sredirect_frame_focus); - defsubr (&Sframe_focus); - defsubr (&Sframe_parameters); - defsubr (&Sframe_parameter); - defsubr (&Smodify_frame_parameters); - defsubr (&Sframe_char_height); - defsubr (&Sframe_char_width); - defsubr (&Sframe_pixel_height); - defsubr (&Sframe_pixel_width); - defsubr (&Sset_frame_height); - defsubr (&Sset_frame_width); - defsubr (&Sset_frame_size); - defsubr (&Sset_frame_position); + + /* Don't set these parameters unless they've been explicitly + specified. The window might be mapped or resized while we're in + this function, and we don't want to override that unless the lisp + code has asked for it. + + Don't set these parameters unless they actually differ from the + window's current parameters; the window may not actually exist + yet. */ + { + Lisp_Object frame; + + check_frame_size (f, &height, &width); + + XSETFRAME (frame, f); + + if (width != FRAME_COLS (f) + || height != FRAME_LINES (f) + || f->new_text_lines || f->new_text_cols) + Fset_frame_size (frame, make_number (width), make_number (height)); + + if ((!NILP (left) || !NILP (top)) + && ! (left_no_change && top_no_change) + && ! (NUMBERP (left) && XINT (left) == f->left_pos + && NUMBERP (top) && XINT (top) == f->top_pos)) + { + int leftpos = 0; + int toppos = 0; + + /* Record the signs. */ + f->size_hint_flags &= ~ (XNegative | YNegative); + if (EQ (left, Qminus)) + f->size_hint_flags |= XNegative; + else if (INTEGERP (left)) + { + leftpos = XINT (left); + if (leftpos < 0) + f->size_hint_flags |= XNegative; + } + else if (CONSP (left) && EQ (XCAR (left), Qminus) + && CONSP (XCDR (left)) + && INTEGERP (XCAR (XCDR (left)))) + { + leftpos = - XINT (XCAR (XCDR (left))); + f->size_hint_flags |= XNegative; + } + else if (CONSP (left) && EQ (XCAR (left), Qplus) + && CONSP (XCDR (left)) + && INTEGERP (XCAR (XCDR (left)))) + { + leftpos = XINT (XCAR (XCDR (left))); + } + + if (EQ (top, Qminus)) + f->size_hint_flags |= YNegative; + else if (INTEGERP (top)) + { + toppos = XINT (top); + if (toppos < 0) + f->size_hint_flags |= YNegative; + } + else if (CONSP (top) && EQ (XCAR (top), Qminus) + && CONSP (XCDR (top)) + && INTEGERP (XCAR (XCDR (top)))) + { + toppos = - XINT (XCAR (XCDR (top))); + f->size_hint_flags |= YNegative; + } + else if (CONSP (top) && EQ (XCAR (top), Qplus) + && CONSP (XCDR (top)) + && INTEGERP (XCAR (XCDR (top)))) + { + toppos = XINT (XCAR (XCDR (top))); + } + + + /* Store the numeric value of the position. */ + f->top_pos = toppos; + f->left_pos = leftpos; + + f->win_gravity = NorthWestGravity; + + /* Actually set that position, and convert to absolute. */ + x_set_offset (f, leftpos, toppos, -1); + } + + if ((!NILP (icon_left) || !NILP (icon_top)) + && ! (icon_left_no_change && icon_top_no_change)) + x_wm_set_icon_position (f, XINT (icon_left), XINT (icon_top)); + } + + UNGCPRO; +} + + +/* Insert a description of internally-recorded parameters of frame X + into the parameter alist *ALISTPTR that is to be given to the user. + Only parameters that are specific to the X window system + and whose values are not correctly recorded in the frame's + param_alist need to be considered here. */ + +void +x_report_frame_params (f, alistptr) + struct frame *f; + Lisp_Object *alistptr; +{ + char buf[16]; + Lisp_Object tem; + + /* Represent negative positions (off the top or left screen edge) + in a way that Fmodify_frame_parameters will understand correctly. */ + XSETINT (tem, f->left_pos); + if (f->left_pos >= 0) + store_in_alist (alistptr, Qleft, tem); + else + store_in_alist (alistptr, Qleft, Fcons (Qplus, Fcons (tem, Qnil))); + + XSETINT (tem, f->top_pos); + if (f->top_pos >= 0) + store_in_alist (alistptr, Qtop, tem); + else + store_in_alist (alistptr, Qtop, Fcons (Qplus, Fcons (tem, Qnil))); + + store_in_alist (alistptr, Qborder_width, + make_number (f->border_width)); + store_in_alist (alistptr, Qinternal_border_width, + make_number (FRAME_INTERNAL_BORDER_WIDTH (f))); + store_in_alist (alistptr, Qleft_fringe, + make_number (FRAME_LEFT_FRINGE_WIDTH (f))); + store_in_alist (alistptr, Qright_fringe, + make_number (FRAME_RIGHT_FRINGE_WIDTH (f))); + store_in_alist (alistptr, Qscroll_bar_width, + (! FRAME_HAS_VERTICAL_SCROLL_BARS (f) + ? make_number (0) + : FRAME_CONFIG_SCROLL_BAR_WIDTH (f) > 0 + ? make_number (FRAME_CONFIG_SCROLL_BAR_WIDTH (f)) + /* nil means "use default width" + for non-toolkit scroll bar. + ruler-mode.el depends on this. */ + : Qnil)); + sprintf (buf, "%ld", (long) FRAME_X_WINDOW (f)); + store_in_alist (alistptr, Qwindow_id, + build_string (buf)); +#ifdef HAVE_X_WINDOWS +#ifdef USE_X_TOOLKIT + /* Tooltip frame may not have this widget. */ + if (FRAME_X_OUTPUT (f)->widget) +#endif + sprintf (buf, "%ld", (long) FRAME_OUTER_WINDOW (f)); + store_in_alist (alistptr, Qouter_window_id, + build_string (buf)); +#endif + store_in_alist (alistptr, Qicon_name, f->icon_name); + FRAME_SAMPLE_VISIBILITY (f); + store_in_alist (alistptr, Qvisibility, + (FRAME_VISIBLE_P (f) ? Qt + : FRAME_ICONIFIED_P (f) ? Qicon : Qnil)); + store_in_alist (alistptr, Qdisplay, + XCAR (FRAME_X_DISPLAY_INFO (f)->name_list_element)); + +#ifndef HAVE_CARBON +/* A Mac Window is identified by a struct, not an integer. */ + if (FRAME_X_OUTPUT (f)->parent_desc == FRAME_X_DISPLAY_INFO (f)->root_window) + tem = Qnil; + else + XSETFASTINT (tem, FRAME_X_OUTPUT (f)->parent_desc); + store_in_alist (alistptr, Qparent_id, tem); +#endif +} + + +/* Change the `fullscreen' frame parameter of frame F. OLD_VALUE is + the previous value of that parameter, NEW_VALUE is the new value. */ + +void +x_set_fullscreen (f, new_value, old_value) + struct frame *f; + Lisp_Object new_value, old_value; +{ +#ifndef HAVE_CARBON + if (NILP (new_value)) + f->want_fullscreen = FULLSCREEN_NONE; + else if (EQ (new_value, Qfullboth)) + f->want_fullscreen = FULLSCREEN_BOTH; + else if (EQ (new_value, Qfullwidth)) + f->want_fullscreen = FULLSCREEN_WIDTH; + else if (EQ (new_value, Qfullheight)) + f->want_fullscreen = FULLSCREEN_HEIGHT; +#endif +} + + +/* Change the `line-spacing' frame parameter of frame F. OLD_VALUE is + the previous value of that parameter, NEW_VALUE is the new value. */ + +void +x_set_line_spacing (f, new_value, old_value) + struct frame *f; + Lisp_Object new_value, old_value; +{ + if (NILP (new_value)) + f->extra_line_spacing = 0; + else if (NATNUMP (new_value)) + f->extra_line_spacing = XFASTINT (new_value); + else + Fsignal (Qerror, Fcons (build_string ("Invalid line-spacing"), + Fcons (new_value, Qnil))); + if (FRAME_VISIBLE_P (f)) + redraw_frame (f); +} + + +/* Change the `screen-gamma' frame parameter of frame F. OLD_VALUE is + the previous value of that parameter, NEW_VALUE is the new value. */ + +void +x_set_screen_gamma (f, new_value, old_value) + struct frame *f; + Lisp_Object new_value, old_value; +{ + if (NILP (new_value)) + f->gamma = 0; + else if (NUMBERP (new_value) && XFLOATINT (new_value) > 0) + /* The value 0.4545 is the normal viewing gamma. */ + f->gamma = 1.0 / (0.4545 * XFLOATINT (new_value)); + else + Fsignal (Qerror, Fcons (build_string ("Invalid screen-gamma"), + Fcons (new_value, Qnil))); + + clear_face_cache (0); +} + + +void +x_set_font (f, arg, oldval) + struct frame *f; + Lisp_Object arg, oldval; +{ + Lisp_Object result; + Lisp_Object fontset_name; + Lisp_Object frame; + int old_fontset = FRAME_FONTSET(f); + + CHECK_STRING (arg); + + fontset_name = Fquery_fontset (arg, Qnil); + + BLOCK_INPUT; + result = (STRINGP (fontset_name) - ? x_new_fontset (f, SDATA (fontset_name)) - : x_new_font (f, SDATA (arg))); ++ ? x_new_fontset (f, fontset_name) ++ : x_new_fontset (f, arg)); + UNBLOCK_INPUT; + + if (EQ (result, Qnil)) + error ("Font `%s' is not defined", SDATA (arg)); + else if (EQ (result, Qt)) - error ("The characters of the given font have varying widths"); ++ error ("The default fontset can't be used for a frame font"); + else if (STRINGP (result)) + { + if (STRINGP (fontset_name)) + { + /* Fontset names are built from ASCII font names, so the + names may be equal despite there was a change. */ + if (old_fontset == FRAME_FONTSET (f)) + return; + } - else if (!NILP (Fequal (result, oldval))) ++ store_frame_param (f, Qfont, result); ++ if (!NILP (Fequal (result, oldval))) + return; + - store_frame_param (f, Qfont, result); + recompute_basic_faces (f); + } + else + abort (); + + do_pending_window_change (0); + + /* Don't call `face-set-after-frame-default' when faces haven't been + initialized yet. This is the case when called from + Fx_create_frame. In that case, the X widget or window doesn't + exist either, and we can end up in x_report_frame_params with a + null widget which gives a segfault. */ + if (FRAME_FACE_CACHE (f)) + { + XSETFRAME (frame, f); + call1 (Qface_set_after_frame_default, frame); + } +} + + +void +x_set_fringe_width (f, new_value, old_value) + struct frame *f; + Lisp_Object new_value, old_value; +{ + compute_fringe_widths (f, 1); +} + +void +x_set_border_width (f, arg, oldval) + struct frame *f; + Lisp_Object arg, oldval; +{ + CHECK_NUMBER (arg); + + if (XINT (arg) == f->border_width) + return; + +#ifndef HAVE_CARBON + if (FRAME_X_WINDOW (f) != 0) + error ("Cannot change the border width of a window"); +#endif /* MAC_TODO */ + + f->border_width = XINT (arg); +} + +void +x_set_internal_border_width (f, arg, oldval) + struct frame *f; + Lisp_Object arg, oldval; +{ + int old = FRAME_INTERNAL_BORDER_WIDTH (f); + + CHECK_NUMBER (arg); + FRAME_INTERNAL_BORDER_WIDTH (f) = XINT (arg); + if (FRAME_INTERNAL_BORDER_WIDTH (f) < 0) + FRAME_INTERNAL_BORDER_WIDTH (f) = 0; + +#ifdef USE_X_TOOLKIT + if (FRAME_X_OUTPUT (f)->edit_widget) + widget_store_internal_border (FRAME_X_OUTPUT (f)->edit_widget); +#endif + + if (FRAME_INTERNAL_BORDER_WIDTH (f) == old) + return; + + if (FRAME_X_WINDOW (f) != 0) + { + x_set_window_size (f, 0, FRAME_COLS (f), FRAME_LINES (f)); + SET_FRAME_GARBAGED (f); + do_pending_window_change (0); + } + else + SET_FRAME_GARBAGED (f); +} + +void +x_set_visibility (f, value, oldval) + struct frame *f; + Lisp_Object value, oldval; +{ + Lisp_Object frame; + XSETFRAME (frame, f); + + if (NILP (value)) + Fmake_frame_invisible (frame, Qt); + else if (EQ (value, Qicon)) + Ficonify_frame (frame); + else + Fmake_frame_visible (frame); +} + +void +x_set_autoraise (f, arg, oldval) + struct frame *f; + Lisp_Object arg, oldval; +{ + f->auto_raise = !EQ (Qnil, arg); +} + +void +x_set_autolower (f, arg, oldval) + struct frame *f; + Lisp_Object arg, oldval; +{ + f->auto_lower = !EQ (Qnil, arg); +} + +void +x_set_unsplittable (f, arg, oldval) + struct frame *f; + Lisp_Object arg, oldval; +{ + f->no_split = !NILP (arg); +} + +void +x_set_vertical_scroll_bars (f, arg, oldval) + struct frame *f; + Lisp_Object arg, oldval; +{ + if ((EQ (arg, Qleft) && FRAME_HAS_VERTICAL_SCROLL_BARS_ON_RIGHT (f)) + || (EQ (arg, Qright) && FRAME_HAS_VERTICAL_SCROLL_BARS_ON_LEFT (f)) + || (NILP (arg) && FRAME_HAS_VERTICAL_SCROLL_BARS (f)) + || (!NILP (arg) && ! FRAME_HAS_VERTICAL_SCROLL_BARS (f))) + { + FRAME_VERTICAL_SCROLL_BAR_TYPE (f) + = (NILP (arg) + ? vertical_scroll_bar_none + : EQ (Qleft, arg) + ? vertical_scroll_bar_left + : EQ (Qright, arg) + ? vertical_scroll_bar_right +#ifdef HAVE_NTGUI + /* MS-Windows has scroll bars on the right by default. */ + : vertical_scroll_bar_right +#else + : vertical_scroll_bar_left +#endif + ); + + /* We set this parameter before creating the X window for the + frame, so we can get the geometry right from the start. + However, if the window hasn't been created yet, we shouldn't + call x_set_window_size. */ + if (FRAME_X_WINDOW (f)) + x_set_window_size (f, 0, FRAME_COLS (f), FRAME_LINES (f)); + do_pending_window_change (0); + } +} + +void +x_set_scroll_bar_width (f, arg, oldval) + struct frame *f; + Lisp_Object arg, oldval; +{ + int wid = FRAME_COLUMN_WIDTH (f); + + if (NILP (arg)) + { + x_set_scroll_bar_default_width (f); + + if (FRAME_X_WINDOW (f)) + x_set_window_size (f, 0, FRAME_COLS (f), FRAME_LINES (f)); + do_pending_window_change (0); + } + else if (INTEGERP (arg) && XINT (arg) > 0 + && XFASTINT (arg) != FRAME_CONFIG_SCROLL_BAR_WIDTH (f)) + { + if (XFASTINT (arg) <= 2 * VERTICAL_SCROLL_BAR_WIDTH_TRIM) + XSETINT (arg, 2 * VERTICAL_SCROLL_BAR_WIDTH_TRIM + 1); + + FRAME_CONFIG_SCROLL_BAR_WIDTH (f) = XFASTINT (arg); + FRAME_CONFIG_SCROLL_BAR_COLS (f) = (XFASTINT (arg) + wid-1) / wid; + if (FRAME_X_WINDOW (f)) + x_set_window_size (f, 0, FRAME_COLS (f), FRAME_LINES (f)); + do_pending_window_change (0); + } + + change_frame_size (f, 0, FRAME_COLS (f), 0, 0, 0); + XWINDOW (FRAME_SELECTED_WINDOW (f))->cursor.hpos = 0; + XWINDOW (FRAME_SELECTED_WINDOW (f))->cursor.x = 0; +} + + + +/* Return non-nil if frame F wants a bitmap icon. */ + +Lisp_Object +x_icon_type (f) + FRAME_PTR f; +{ + Lisp_Object tem; + + tem = assq_no_quit (Qicon_type, f->param_alist); + if (CONSP (tem)) + return XCDR (tem); + else + return Qnil; +} + + +/* Subroutines of creating an X frame. */ + +/* Make sure that Vx_resource_name is set to a reasonable value. + Fix it up, or set it to `emacs' if it is too hopeless. */ + +void +validate_x_resource_name () +{ + int len = 0; + /* Number of valid characters in the resource name. */ + int good_count = 0; + /* Number of invalid characters in the resource name. */ + int bad_count = 0; + Lisp_Object new; + int i; + + if (!STRINGP (Vx_resource_class)) + Vx_resource_class = build_string (EMACS_CLASS); + + if (STRINGP (Vx_resource_name)) + { + unsigned char *p = SDATA (Vx_resource_name); + int i; + + len = SBYTES (Vx_resource_name); + + /* Only letters, digits, - and _ are valid in resource names. + Count the valid characters and count the invalid ones. */ + for (i = 0; i < len; i++) + { + int c = p[i]; + if (! ((c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + || c == '-' || c == '_')) + bad_count++; + else + good_count++; + } + } + else + /* Not a string => completely invalid. */ + bad_count = 5, good_count = 0; + + /* If name is valid already, return. */ + if (bad_count == 0) + return; + + /* If name is entirely invalid, or nearly so, use `emacs'. */ + if (good_count == 0 + || (good_count == 1 && bad_count > 0)) + { + Vx_resource_name = build_string ("emacs"); + return; + } + + /* Name is partly valid. Copy it and replace the invalid characters + with underscores. */ + + Vx_resource_name = new = Fcopy_sequence (Vx_resource_name); + + for (i = 0; i < len; i++) + { + int c = SREF (new, i); + if (! ((c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + || c == '-' || c == '_')) + SSET (new, i, '_'); + } +} + + +extern char *x_get_string_resource P_ ((XrmDatabase, char *, char *)); +extern Display_Info *check_x_display_info P_ ((Lisp_Object)); + + +/* Get specified attribute from resource database RDB. + See Fx_get_resource below for other parameters. */ + +static Lisp_Object +xrdb_get_resource (rdb, attribute, class, component, subclass) + XrmDatabase rdb; + Lisp_Object attribute, class, component, subclass; +{ + register char *value; + char *name_key; + char *class_key; + + CHECK_STRING (attribute); + CHECK_STRING (class); + + if (!NILP (component)) + CHECK_STRING (component); + if (!NILP (subclass)) + CHECK_STRING (subclass); + if (NILP (component) != NILP (subclass)) + error ("x-get-resource: must specify both COMPONENT and SUBCLASS or neither"); + + validate_x_resource_name (); + + /* Allocate space for the components, the dots which separate them, + and the final '\0'. Make them big enough for the worst case. */ + name_key = (char *) alloca (SBYTES (Vx_resource_name) + + (STRINGP (component) + ? SBYTES (component) : 0) + + SBYTES (attribute) + + 3); + + class_key = (char *) alloca (SBYTES (Vx_resource_class) + + SBYTES (class) + + (STRINGP (subclass) + ? SBYTES (subclass) : 0) + + 3); + + /* Start with emacs.FRAMENAME for the name (the specific one) + and with `Emacs' for the class key (the general one). */ + strcpy (name_key, SDATA (Vx_resource_name)); + strcpy (class_key, SDATA (Vx_resource_class)); + + strcat (class_key, "."); + strcat (class_key, SDATA (class)); + + if (!NILP (component)) + { + strcat (class_key, "."); + strcat (class_key, SDATA (subclass)); + + strcat (name_key, "."); + strcat (name_key, SDATA (component)); + } + + strcat (name_key, "."); + strcat (name_key, SDATA (attribute)); + + value = x_get_string_resource (rdb, name_key, class_key); + + if (value != (char *) 0) + return build_string (value); + else + return Qnil; +} + + +DEFUN ("x-get-resource", Fx_get_resource, Sx_get_resource, 2, 4, 0, + doc: /* Return the value of ATTRIBUTE, of class CLASS, from the X defaults database. +This uses `INSTANCE.ATTRIBUTE' as the key and `Emacs.CLASS' as the +class, where INSTANCE is the name under which Emacs was invoked, or +the name specified by the `-name' or `-rn' command-line arguments. + +The optional arguments COMPONENT and SUBCLASS add to the key and the +class, respectively. You must specify both of them or neither. +If you specify them, the key is `INSTANCE.COMPONENT.ATTRIBUTE' +and the class is `Emacs.CLASS.SUBCLASS'. */) + (attribute, class, component, subclass) + Lisp_Object attribute, class, component, subclass; +{ +#ifdef HAVE_X_WINDOWS + check_x (); +#endif + + return xrdb_get_resource (check_x_display_info (Qnil)->xrdb, + attribute, class, component, subclass); +} + +/* Get an X resource, like Fx_get_resource, but for display DPYINFO. */ + +Lisp_Object +display_x_get_resource (dpyinfo, attribute, class, component, subclass) + Display_Info *dpyinfo; + Lisp_Object attribute, class, component, subclass; +{ + return xrdb_get_resource (dpyinfo->xrdb, + attribute, class, component, subclass); +} + +/* Used when C code wants a resource value. */ + +char * +x_get_resource_string (attribute, class) + char *attribute, *class; +{ + char *name_key; + char *class_key; + struct frame *sf = SELECTED_FRAME (); + + /* Allocate space for the components, the dots which separate them, + and the final '\0'. */ + name_key = (char *) alloca (SBYTES (Vinvocation_name) + + strlen (attribute) + 2); + class_key = (char *) alloca ((sizeof (EMACS_CLASS) - 1) + + strlen (class) + 2); + + sprintf (name_key, "%s.%s", SDATA (Vinvocation_name), attribute); + sprintf (class_key, "%s.%s", EMACS_CLASS, class); + + return x_get_string_resource (FRAME_X_DISPLAY_INFO (sf)->xrdb, + name_key, class_key); +} + + +/* Return the value of parameter PARAM. + + First search ALIST, then Vdefault_frame_alist, then the X defaults + database, using ATTRIBUTE as the attribute name and CLASS as its class. + + Convert the resource to the type specified by desired_type. + + If no default is specified, return Qunbound. If you call + x_get_arg, make sure you deal with Qunbound in a reasonable way, + and don't let it get stored in any Lisp-visible variables! */ + +Lisp_Object +x_get_arg (dpyinfo, alist, param, attribute, class, type) + Display_Info *dpyinfo; + Lisp_Object alist, param; + char *attribute; + char *class; + enum resource_types type; +{ + register Lisp_Object tem; + + tem = Fassq (param, alist); + if (EQ (tem, Qnil)) + tem = Fassq (param, Vdefault_frame_alist); + if (EQ (tem, Qnil)) + { + if (attribute) + { + tem = display_x_get_resource (dpyinfo, + build_string (attribute), + build_string (class), + Qnil, Qnil); + + if (NILP (tem)) + return Qunbound; + + switch (type) + { + case RES_TYPE_NUMBER: + return make_number (atoi (SDATA (tem))); + + case RES_TYPE_FLOAT: + return make_float (atof (SDATA (tem))); + + case RES_TYPE_BOOLEAN: + tem = Fdowncase (tem); + if (!strcmp (SDATA (tem), "on") + || !strcmp (SDATA (tem), "true")) + return Qt; + else + return Qnil; + + case RES_TYPE_STRING: + return tem; + + case RES_TYPE_SYMBOL: + /* As a special case, we map the values `true' and `on' + to Qt, and `false' and `off' to Qnil. */ + { + Lisp_Object lower; + lower = Fdowncase (tem); + if (!strcmp (SDATA (lower), "on") + || !strcmp (SDATA (lower), "true")) + return Qt; + else if (!strcmp (SDATA (lower), "off") + || !strcmp (SDATA (lower), "false")) + return Qnil; + else + return Fintern (tem, Qnil); + } + + default: + abort (); + } + } + else + return Qunbound; + } + return Fcdr (tem); +} + +Lisp_Object +x_frame_get_arg (f, alist, param, attribute, class, type) + struct frame *f; + Lisp_Object alist, param; + char *attribute; + char *class; + enum resource_types type; +{ + return x_get_arg (FRAME_X_DISPLAY_INFO (f), + alist, param, attribute, class, type); +} + +/* Like x_frame_get_arg, but also record the value in f->param_alist. */ + +Lisp_Object +x_frame_get_and_record_arg (f, alist, param, attribute, class, type) + struct frame *f; + Lisp_Object alist, param; + char *attribute; + char *class; + enum resource_types type; +{ + Lisp_Object value; + + value = x_get_arg (FRAME_X_DISPLAY_INFO (f), alist, param, + attribute, class, type); + if (! NILP (value)) + store_frame_param (f, param, value); + + return value; +} + + +/* Record in frame F the specified or default value according to ALIST + of the parameter named PROP (a Lisp symbol). + If no value is specified for PROP, look for an X default for XPROP + on the frame named NAME. + If that is not found either, use the value DEFLT. */ + +Lisp_Object +x_default_parameter (f, alist, prop, deflt, xprop, xclass, type) + struct frame *f; + Lisp_Object alist; + Lisp_Object prop; + Lisp_Object deflt; + char *xprop; + char *xclass; + enum resource_types type; +{ + Lisp_Object tem; + + tem = x_frame_get_arg (f, alist, prop, xprop, xclass, type); + if (EQ (tem, Qunbound)) + tem = deflt; + x_set_frame_parameters (f, Fcons (Fcons (prop, tem), Qnil)); + return tem; +} + + + + +DEFUN ("x-parse-geometry", Fx_parse_geometry, Sx_parse_geometry, 1, 1, 0, + doc: /* Parse an X-style geometry string STRING. +Returns an alist of the form ((top . TOP), (left . LEFT) ... ). +The properties returned may include `top', `left', `height', and `width'. +The value of `left' or `top' may be an integer, +or a list (+ N) meaning N pixels relative to top/left corner, +or a list (- N) meaning -N pixels relative to bottom/right corner. */) + (string) + Lisp_Object string; +{ + int geometry, x, y; + unsigned int width, height; + Lisp_Object result; + + CHECK_STRING (string); + + geometry = XParseGeometry ((char *) SDATA (string), + &x, &y, &width, &height); + +#if 0 + if (!!(geometry & XValue) != !!(geometry & YValue)) + error ("Must specify both x and y position, or neither"); +#endif + + result = Qnil; + if (geometry & XValue) + { + Lisp_Object element; + + if (x >= 0 && (geometry & XNegative)) + element = Fcons (Qleft, Fcons (Qminus, Fcons (make_number (-x), Qnil))); + else if (x < 0 && ! (geometry & XNegative)) + element = Fcons (Qleft, Fcons (Qplus, Fcons (make_number (x), Qnil))); + else + element = Fcons (Qleft, make_number (x)); + result = Fcons (element, result); + } + + if (geometry & YValue) + { + Lisp_Object element; + + if (y >= 0 && (geometry & YNegative)) + element = Fcons (Qtop, Fcons (Qminus, Fcons (make_number (-y), Qnil))); + else if (y < 0 && ! (geometry & YNegative)) + element = Fcons (Qtop, Fcons (Qplus, Fcons (make_number (y), Qnil))); + else + element = Fcons (Qtop, make_number (y)); + result = Fcons (element, result); + } + + if (geometry & WidthValue) + result = Fcons (Fcons (Qwidth, make_number (width)), result); + if (geometry & HeightValue) + result = Fcons (Fcons (Qheight, make_number (height)), result); + + return result; +} + +/* Calculate the desired size and position of frame F. + Return the flags saying which aspects were specified. + + Also set the win_gravity and size_hint_flags of F. + + Adjust height for toolbar if TOOLBAR_P is 1. + + This function does not make the coordinates positive. */ + +#define DEFAULT_ROWS 40 +#define DEFAULT_COLS 80 + +int +x_figure_window_size (f, parms, toolbar_p) + struct frame *f; + Lisp_Object parms; + int toolbar_p; +{ + register Lisp_Object tem0, tem1, tem2; + long window_prompting = 0; + Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (f); + + /* Default values if we fall through. + Actually, if that happens we should get + window manager prompting. */ + SET_FRAME_COLS (f, DEFAULT_COLS); + FRAME_LINES (f) = DEFAULT_ROWS; + /* Window managers expect that if program-specified + positions are not (0,0), they're intentional, not defaults. */ + f->top_pos = 0; + f->left_pos = 0; + + /* Ensure that old new_text_cols and new_text_lines will not override the + values set here. */ + /* ++KFS: This was specific to W32, but seems ok for all platforms */ + f->new_text_cols = f->new_text_lines = 0; + + tem0 = x_get_arg (dpyinfo, parms, Qheight, 0, 0, RES_TYPE_NUMBER); + tem1 = x_get_arg (dpyinfo, parms, Qwidth, 0, 0, RES_TYPE_NUMBER); + tem2 = x_get_arg (dpyinfo, parms, Quser_size, 0, 0, RES_TYPE_NUMBER); + if (! EQ (tem0, Qunbound) || ! EQ (tem1, Qunbound)) + { + if (!EQ (tem0, Qunbound)) + { + CHECK_NUMBER (tem0); + FRAME_LINES (f) = XINT (tem0); + } + if (!EQ (tem1, Qunbound)) + { + CHECK_NUMBER (tem1); + SET_FRAME_COLS (f, XINT (tem1)); + } + if (!NILP (tem2) && !EQ (tem2, Qunbound)) + window_prompting |= USSize; + else + window_prompting |= PSize; + } + + f->scroll_bar_actual_width + = FRAME_SCROLL_BAR_COLS (f) * FRAME_COLUMN_WIDTH (f); + + /* This used to be done _before_ calling x_figure_window_size, but + since the height is reset here, this was really a no-op. I + assume that moving it here does what Gerd intended (although he + no longer can remember what that was... ++KFS, 2003-03-25. */ + + /* Add the tool-bar height to the initial frame height so that the + user gets a text display area of the size he specified with -g or + via .Xdefaults. Later changes of the tool-bar height don't + change the frame size. This is done so that users can create + tall Emacs frames without having to guess how tall the tool-bar + will get. */ + if (toolbar_p && FRAME_TOOL_BAR_LINES (f)) + { + int margin, relief, bar_height; + + relief = (tool_bar_button_relief >= 0 + ? tool_bar_button_relief + : DEFAULT_TOOL_BAR_BUTTON_RELIEF); + + if (INTEGERP (Vtool_bar_button_margin) + && XINT (Vtool_bar_button_margin) > 0) + margin = XFASTINT (Vtool_bar_button_margin); + else if (CONSP (Vtool_bar_button_margin) + && INTEGERP (XCDR (Vtool_bar_button_margin)) + && XINT (XCDR (Vtool_bar_button_margin)) > 0) + margin = XFASTINT (XCDR (Vtool_bar_button_margin)); + else + margin = 0; + + bar_height = DEFAULT_TOOL_BAR_IMAGE_HEIGHT + 2 * margin + 2 * relief; + FRAME_LINES (f) += (bar_height + FRAME_LINE_HEIGHT (f) - 1) / FRAME_LINE_HEIGHT (f); + } + + compute_fringe_widths (f, 0); + + FRAME_PIXEL_WIDTH (f) = FRAME_TEXT_COLS_TO_PIXEL_WIDTH (f, FRAME_COLS (f)); + FRAME_PIXEL_HEIGHT (f) = FRAME_TEXT_LINES_TO_PIXEL_HEIGHT (f, FRAME_LINES (f)); + + tem0 = x_get_arg (dpyinfo, parms, Qtop, 0, 0, RES_TYPE_NUMBER); + tem1 = x_get_arg (dpyinfo, parms, Qleft, 0, 0, RES_TYPE_NUMBER); + tem2 = x_get_arg (dpyinfo, parms, Quser_position, 0, 0, RES_TYPE_NUMBER); + if (! EQ (tem0, Qunbound) || ! EQ (tem1, Qunbound)) + { + if (EQ (tem0, Qminus)) + { + f->top_pos = 0; + window_prompting |= YNegative; + } + else if (CONSP (tem0) && EQ (XCAR (tem0), Qminus) + && CONSP (XCDR (tem0)) + && INTEGERP (XCAR (XCDR (tem0)))) + { + f->top_pos = - XINT (XCAR (XCDR (tem0))); + window_prompting |= YNegative; + } + else if (CONSP (tem0) && EQ (XCAR (tem0), Qplus) + && CONSP (XCDR (tem0)) + && INTEGERP (XCAR (XCDR (tem0)))) + { + f->top_pos = XINT (XCAR (XCDR (tem0))); + } + else if (EQ (tem0, Qunbound)) + f->top_pos = 0; + else + { + CHECK_NUMBER (tem0); + f->top_pos = XINT (tem0); + if (f->top_pos < 0) + window_prompting |= YNegative; + } + + if (EQ (tem1, Qminus)) + { + f->left_pos = 0; + window_prompting |= XNegative; + } + else if (CONSP (tem1) && EQ (XCAR (tem1), Qminus) + && CONSP (XCDR (tem1)) + && INTEGERP (XCAR (XCDR (tem1)))) + { + f->left_pos = - XINT (XCAR (XCDR (tem1))); + window_prompting |= XNegative; + } + else if (CONSP (tem1) && EQ (XCAR (tem1), Qplus) + && CONSP (XCDR (tem1)) + && INTEGERP (XCAR (XCDR (tem1)))) + { + f->left_pos = XINT (XCAR (XCDR (tem1))); + } + else if (EQ (tem1, Qunbound)) + f->left_pos = 0; + else + { + CHECK_NUMBER (tem1); + f->left_pos = XINT (tem1); + if (f->left_pos < 0) + window_prompting |= XNegative; + } + + if (!NILP (tem2) && ! EQ (tem2, Qunbound)) + window_prompting |= USPosition; + else + window_prompting |= PPosition; + } + + if (f->want_fullscreen != FULLSCREEN_NONE) + { + int left, top; + int width, height; + + /* It takes both for some WM:s to place it where we want */ + window_prompting = USPosition | PPosition; + x_fullscreen_adjust (f, &width, &height, &top, &left); + FRAME_COLS (f) = width; + FRAME_LINES (f) = height; + FRAME_PIXEL_WIDTH (f) = FRAME_TEXT_COLS_TO_PIXEL_WIDTH (f, width); + FRAME_PIXEL_HEIGHT (f) = FRAME_TEXT_LINES_TO_PIXEL_HEIGHT (f, height); + f->left_pos = left; + f->top_pos = top; + } + + if (window_prompting & XNegative) + { + if (window_prompting & YNegative) + f->win_gravity = SouthEastGravity; + else + f->win_gravity = NorthEastGravity; + } + else + { + if (window_prompting & YNegative) + f->win_gravity = SouthWestGravity; + else + f->win_gravity = NorthWestGravity; + } + + f->size_hint_flags = window_prompting; + + return window_prompting; +} + + + +#endif /* HAVE_WINDOW_SYSTEM */ + + + +/*********************************************************************** + Initialization + ***********************************************************************/ + +void +syms_of_frame () +{ + Qframep = intern ("framep"); + staticpro (&Qframep); + Qframe_live_p = intern ("frame-live-p"); + staticpro (&Qframe_live_p); + Qheight = intern ("height"); + staticpro (&Qheight); + Qicon = intern ("icon"); + staticpro (&Qicon); + Qminibuffer = intern ("minibuffer"); + staticpro (&Qminibuffer); + Qmodeline = intern ("modeline"); + staticpro (&Qmodeline); + Qonly = intern ("only"); + staticpro (&Qonly); + Qwidth = intern ("width"); + staticpro (&Qwidth); + Qgeometry = intern ("geometry"); + staticpro (&Qgeometry); + Qicon_left = intern ("icon-left"); + staticpro (&Qicon_left); + Qicon_top = intern ("icon-top"); + staticpro (&Qicon_top); + Qleft = intern ("left"); + staticpro (&Qleft); + Qright = intern ("right"); + staticpro (&Qright); + Quser_position = intern ("user-position"); + staticpro (&Quser_position); + Quser_size = intern ("user-size"); + staticpro (&Quser_size); + Qwindow_id = intern ("window-id"); + staticpro (&Qwindow_id); +#ifdef HAVE_X_WINDOWS + Qouter_window_id = intern ("outer-window-id"); + staticpro (&Qouter_window_id); +#endif + Qparent_id = intern ("parent-id"); + staticpro (&Qparent_id); + Qx = intern ("x"); + staticpro (&Qx); + Qw32 = intern ("w32"); + staticpro (&Qw32); + Qpc = intern ("pc"); + staticpro (&Qpc); + Qmac = intern ("mac"); + staticpro (&Qmac); + Qvisible = intern ("visible"); + staticpro (&Qvisible); + Qbuffer_predicate = intern ("buffer-predicate"); + staticpro (&Qbuffer_predicate); + Qbuffer_list = intern ("buffer-list"); + staticpro (&Qbuffer_list); + Qdisplay_type = intern ("display-type"); + staticpro (&Qdisplay_type); + Qbackground_mode = intern ("background-mode"); + staticpro (&Qbackground_mode); + Qtty_color_mode = intern ("tty-color-mode"); + staticpro (&Qtty_color_mode); + + Qface_set_after_frame_default = intern ("face-set-after-frame-default"); + staticpro (&Qface_set_after_frame_default); + + Qfullwidth = intern ("fullwidth"); + staticpro (&Qfullwidth); + Qfullheight = intern ("fullheight"); + staticpro (&Qfullheight); + Qfullboth = intern ("fullboth"); + staticpro (&Qfullboth); + Qx_resource_name = intern ("x-resource-name"); + staticpro (&Qx_resource_name); + + Qx_frame_parameter = intern ("x-frame-parameter"); + staticpro (&Qx_frame_parameter); + + { + int i; + + for (i = 0; i < sizeof (frame_parms) / sizeof (frame_parms[0]); i++) + { + Lisp_Object v = intern (frame_parms[i].name); + if (frame_parms[i].variable) + { + *frame_parms[i].variable = v; + staticpro (frame_parms[i].variable); + } + Fput (v, Qx_frame_parameter, make_number (i)); + } + } + +#ifdef HAVE_WINDOW_SYSTEM + DEFVAR_LISP ("x-resource-name", &Vx_resource_name, + doc: /* The name Emacs uses to look up X resources. +`x-get-resource' uses this as the first component of the instance name +when requesting resource values. +Emacs initially sets `x-resource-name' to the name under which Emacs +was invoked, or to the value specified with the `-name' or `-rn' +switches, if present. + +It may be useful to bind this variable locally around a call +to `x-get-resource'. See also the variable `x-resource-class'. */); + Vx_resource_name = Qnil; + + DEFVAR_LISP ("x-resource-class", &Vx_resource_class, + doc: /* The class Emacs uses to look up X resources. +`x-get-resource' uses this as the first component of the instance class +when requesting resource values. + +Emacs initially sets `x-resource-class' to "Emacs". + +Setting this variable permanently is not a reasonable thing to do, +but binding this variable locally around a call to `x-get-resource' +is a reasonable practice. See also the variable `x-resource-name'. */); + Vx_resource_class = build_string (EMACS_CLASS); +#endif + + DEFVAR_LISP ("default-frame-alist", &Vdefault_frame_alist, + doc: /* Alist of default values for frame creation. +These may be set in your init file, like this: + (setq default-frame-alist '((width . 80) (height . 55) (menu-bar-lines . 1)) +These override values given in window system configuration data, + including X Windows' defaults database. +For values specific to the first Emacs frame, see `initial-frame-alist'. +For values specific to the separate minibuffer frame, see + `minibuffer-frame-alist'. +The `menu-bar-lines' element of the list controls whether new frames + have menu bars; `menu-bar-mode' works by altering this element. +Setting this variable does not affect existing frames, only new ones. */); + Vdefault_frame_alist = Qnil; + + Qinhibit_default_face_x_resources + = intern ("inhibit-default-face-x-resources"); + staticpro (&Qinhibit_default_face_x_resources); + + DEFVAR_LISP ("terminal-frame", &Vterminal_frame, + doc: /* The initial frame-object, which represents Emacs's stdout. */); + + DEFVAR_LISP ("emacs-iconified", &Vemacs_iconified, + doc: /* Non-nil if all of emacs is iconified and frame updates are not needed. */); + Vemacs_iconified = Qnil; + + DEFVAR_LISP ("mouse-position-function", &Vmouse_position_function, + doc: /* If non-nil, function to transform normal value of `mouse-position'. +`mouse-position' calls this function, passing its usual return value as +argument, and returns whatever this function returns. +This abnormal hook exists for the benefit of packages like `xt-mouse.el' +which need to do mouse handling at the Lisp level. */); + Vmouse_position_function = Qnil; + + DEFVAR_LISP ("mouse-highlight", &Vmouse_highlight, + doc: /* If non-nil, clickable text is highlighted when mouse is over it. +If the value is an integer, highlighting is only shown after moving the +mouse, while keyboard input turns off the highlight even when the mouse +is over the clickable text. However, the mouse shape still indicates +when the mouse is over clickable text. */); + Vmouse_highlight = Qt; + + DEFVAR_LISP ("delete-frame-functions", &Vdelete_frame_functions, + doc: /* Functions to be run before deleting a frame. +The functions are run with one arg, the frame to be deleted. +See `delete-frame'. */); + Vdelete_frame_functions = Qnil; + + DEFVAR_KBOARD ("default-minibuffer-frame", Vdefault_minibuffer_frame, + doc: /* Minibufferless frames use this frame's minibuffer. + +Emacs cannot create minibufferless frames unless this is set to an +appropriate surrogate. + +Emacs consults this variable only when creating minibufferless +frames; once the frame is created, it sticks with its assigned +minibuffer, no matter what this variable is set to. This means that +this variable doesn't necessarily say anything meaningful about the +current set of frames, or where the minibuffer is currently being +displayed. + +This variable is local to the current terminal and cannot be buffer-local. */); + + staticpro (&Vframe_list); + + defsubr (&Sactive_minibuffer_window); + defsubr (&Sframep); + defsubr (&Sframe_live_p); + defsubr (&Smake_terminal_frame); + defsubr (&Shandle_switch_frame); + defsubr (&Signore_event); + defsubr (&Sselect_frame); + defsubr (&Sselected_frame); + defsubr (&Swindow_frame); + defsubr (&Sframe_root_window); + defsubr (&Sframe_first_window); + defsubr (&Sframe_selected_window); + defsubr (&Sset_frame_selected_window); + defsubr (&Sframe_list); + defsubr (&Snext_frame); + defsubr (&Sprevious_frame); + defsubr (&Sdelete_frame); + defsubr (&Smouse_position); + defsubr (&Smouse_pixel_position); + defsubr (&Sset_mouse_position); + defsubr (&Sset_mouse_pixel_position); +#if 0 + defsubr (&Sframe_configuration); + defsubr (&Srestore_frame_configuration); +#endif + defsubr (&Smake_frame_visible); + defsubr (&Smake_frame_invisible); + defsubr (&Siconify_frame); + defsubr (&Sframe_visible_p); + defsubr (&Svisible_frame_list); + defsubr (&Sraise_frame); + defsubr (&Slower_frame); + defsubr (&Sredirect_frame_focus); + defsubr (&Sframe_focus); + defsubr (&Sframe_parameters); + defsubr (&Sframe_parameter); + defsubr (&Smodify_frame_parameters); + defsubr (&Sframe_char_height); + defsubr (&Sframe_char_width); + defsubr (&Sframe_pixel_height); + defsubr (&Sframe_pixel_width); + defsubr (&Sset_frame_height); + defsubr (&Sset_frame_width); + defsubr (&Sset_frame_size); + defsubr (&Sset_frame_position); + +#ifdef HAVE_WINDOW_SYSTEM + defsubr (&Sx_get_resource); + defsubr (&Sx_parse_geometry); +#endif + } diff --cc src/frame.h index a2ca24a014d,bcfb208a9a6..b4ddf5136d5 --- a/src/frame.h +++ b/src/frame.h @@@ -911,170 -785,9 +911,170 @@@ extern Lisp_Object selected_frame Result is a Lisp float if Y is not a multiple of the canon width, otherwise it's a Lisp integer. */ -#define CANON_Y_FROM_PIXEL_Y(F, Y) \ - ((Y) % CANON_Y_UNIT (F) \ - ? make_float ((double) (Y) / CANON_Y_UNIT (F)) \ - : make_number ((Y) / CANON_Y_UNIT (F))) - +#define FRAME_CANON_Y_FROM_PIXEL_Y(F, Y) \ + ((Y) % FRAME_LINE_HEIGHT (F) \ + ? make_float ((double) (Y) / FRAME_LINE_HEIGHT (F)) \ + : make_number ((Y) / FRAME_LINE_HEIGHT (F))) + + + +/* Manipulating pixel sizes and character sizes. + Knowledge of which factors affect the overall size of the window should + be hidden in these macros, if that's possible. + + Return the upper/left pixel position of the character cell on frame F + at ROW/COL. */ + +#define FRAME_LINE_TO_PIXEL_Y(f, row) \ + (FRAME_INTERNAL_BORDER_WIDTH (f) \ + + (row) * FRAME_LINE_HEIGHT (f)) + +#define FRAME_COL_TO_PIXEL_X(f, col) \ + (FRAME_INTERNAL_BORDER_WIDTH (f) \ + + (col) * FRAME_COLUMN_WIDTH (f)) + +/* Return the pixel width/height of frame F if it has + COLS columns/LINES rows. */ + +#define FRAME_TEXT_COLS_TO_PIXEL_WIDTH(f, cols) \ + (FRAME_COL_TO_PIXEL_X (f, cols) \ + + (f)->scroll_bar_actual_width \ + + FRAME_TOTAL_FRINGE_WIDTH (f) \ + + FRAME_INTERNAL_BORDER_WIDTH (f)) + +#define FRAME_TEXT_LINES_TO_PIXEL_HEIGHT(f, lines) \ + (FRAME_LINE_TO_PIXEL_Y (f, lines) \ + + FRAME_INTERNAL_BORDER_WIDTH (f)) + + +/* Return the row/column (zero-based) of the character cell containing + the pixel on FRAME at Y/X. */ + +#define FRAME_PIXEL_Y_TO_LINE(f, y) \ + (((y) - FRAME_INTERNAL_BORDER_WIDTH (f)) \ + / FRAME_LINE_HEIGHT (f)) + +#define FRAME_PIXEL_X_TO_COL(f, x) \ + (((x) - FRAME_INTERNAL_BORDER_WIDTH (f)) \ + / FRAME_COLUMN_WIDTH (f)) + +/* How many columns/rows of text can we fit in WIDTH/HEIGHT pixels on + frame F? */ + +#define FRAME_PIXEL_WIDTH_TO_TEXT_COLS(f, width) \ + (FRAME_PIXEL_X_TO_COL (f, ((width) \ + - FRAME_INTERNAL_BORDER_WIDTH (f) \ + - FRAME_TOTAL_FRINGE_WIDTH (f) \ + - (f)->scroll_bar_actual_width))) + +#define FRAME_PIXEL_HEIGHT_TO_TEXT_LINES(f, height) \ + (FRAME_PIXEL_Y_TO_LINE (f, ((height) \ + - FRAME_INTERNAL_BORDER_WIDTH (f)))) + + +/*********************************************************************** + Frame Parameters + ***********************************************************************/ + +extern Lisp_Object Qauto_raise, Qauto_lower; +extern Lisp_Object Qborder_color, Qborder_width; +extern Lisp_Object Qbuffer_predicate, Qbuffer_list; +extern Lisp_Object Qcursor_color, Qcursor_type; +extern Lisp_Object Qfont; +extern Lisp_Object Qbackground_color, Qforeground_color; +extern Lisp_Object Qicon, Qicon_name, Qicon_type, Qicon_left, Qicon_top; +extern Lisp_Object Qinternal_border_width; +extern Lisp_Object Qmenu_bar_lines, Qtool_bar_lines; +extern Lisp_Object Qmouse_color; +extern Lisp_Object Qname, Qtitle; +extern Lisp_Object Qparent_id; +extern Lisp_Object Qunsplittable, Qvisibility; +extern Lisp_Object Qscroll_bar_width, Qvertical_scroll_bars; +extern Lisp_Object Qscroll_bar_foreground, Qscroll_bar_background; +extern Lisp_Object Qscreen_gamma; +extern Lisp_Object Qline_spacing; +extern Lisp_Object Qwait_for_wm; +extern Lisp_Object Qfullscreen; + +extern Lisp_Object Qleft_fringe, Qright_fringe; +extern Lisp_Object Qheight, Qwidth; +extern Lisp_Object Qminibuffer, Qmodeline; +extern Lisp_Object Qonly; +extern Lisp_Object Qx, Qw32, Qmac, Qpc; +extern Lisp_Object Qvisible; +extern Lisp_Object Qdisplay_type; +extern Lisp_Object Qbackground_mode; + +extern Lisp_Object Qx_resource_name; + +extern Lisp_Object Qleft, Qright, Qtop, Qbox; +extern Lisp_Object Qdisplay; + +#ifdef HAVE_WINDOW_SYSTEM + +/* The class of this X application. */ +#define EMACS_CLASS "Emacs" + +enum +{ + /* Values used as a bit mask, BOTH == WIDTH | HEIGHT. */ + FULLSCREEN_NONE = 0, + FULLSCREEN_WIDTH = 1, + FULLSCREEN_HEIGHT = 2, + FULLSCREEN_BOTH = 3, + FULLSCREEN_WAIT = 4, + FULLSCREEN_MOVE_WAIT = 8, +}; + + +/* These are in xterm.c, w32term.c, etc. */ + +extern void x_set_scroll_bar_default_width P_ ((struct frame *)); +extern void x_set_offset P_ ((struct frame *, int, int, int)); +extern void x_wm_set_icon_position P_ ((struct frame *, int, int)); + +extern Lisp_Object x_new_font P_ ((struct frame *, char *)); - extern Lisp_Object x_new_fontset P_ ((struct frame *, char *)); ++extern Lisp_Object x_new_fontset P_ ((struct frame *, Lisp_Object)); + + +/* These are in frame.c */ + +extern Lisp_Object Vx_resource_name; +extern Lisp_Object Vx_resource_class; + + +extern Lisp_Object Qface_set_after_frame_default; + +extern void x_fullscreen_adjust P_ ((struct frame *f, int *, int *, + int *, int *)); + +extern void x_set_frame_parameters P_ ((struct frame *, Lisp_Object)); +extern void x_report_frame_params P_ ((struct frame *, Lisp_Object *)); + +extern void x_set_fullscreen P_ ((struct frame *, Lisp_Object, Lisp_Object)); +extern void x_set_line_spacing P_ ((struct frame *, Lisp_Object, Lisp_Object)); +extern void x_set_screen_gamma P_ ((struct frame *, Lisp_Object, Lisp_Object)); +extern void x_set_font P_ ((struct frame *, Lisp_Object, Lisp_Object)); +extern void x_set_fringe_width P_ ((struct frame *, Lisp_Object, Lisp_Object)); +extern void x_set_border_width P_ ((struct frame *, Lisp_Object, Lisp_Object)); +extern void x_set_internal_border_width P_ ((struct frame *, Lisp_Object, + Lisp_Object)); +extern void x_set_visibility P_ ((struct frame *, Lisp_Object, Lisp_Object)); +extern void x_set_autoraise P_ ((struct frame *, Lisp_Object, Lisp_Object)); +extern void x_set_autolower P_ ((struct frame *, Lisp_Object, Lisp_Object)); +extern void x_set_unsplittable P_ ((struct frame *, Lisp_Object, Lisp_Object)); +extern void x_set_vertical_scroll_bars P_ ((struct frame *, Lisp_Object, + Lisp_Object)); +extern void x_set_scroll_bar_width P_ ((struct frame *, Lisp_Object, + Lisp_Object)); + +extern Lisp_Object x_icon_type P_ ((struct frame *)); + +extern int x_figure_window_size P_ ((struct frame *, Lisp_Object, int)); + + +extern void validate_x_resource_name P_ ((void)); + +#endif /* HAVE_WINDOW_SYSTEM */ + #endif /* not EMACS_FRAME_H */ diff --cc src/insdel.c index 5becd5d9163,3cd9da7e2c8..2d9befb677d --- a/src/insdel.c +++ b/src/insdel.c @@@ -1164,6 -1199,6 +1153,47 @@@ insert_from_string_1 (string, pos, pos_ current_buffer, inherit); adjust_point (nchars, outgoing_nbytes); ++ ++ CHECK_MARKERS (); ++} ++ ++/* Insert a sequence of NCHARS chars which occupy NBYTES bytes ++ starting at GPT_ADDR. */ ++ ++void ++insert_from_gap (nchars, nbytes) ++ register int nchars, nbytes; ++{ ++ if (NILP (current_buffer->enable_multibyte_characters)) ++ nchars = nbytes; ++ ++ record_insert (GPT, nchars); ++ MODIFF++; ++ ++ GAP_SIZE -= nbytes; ++ GPT += nchars; ++ ZV += nchars; ++ Z += nchars; ++ GPT_BYTE += nbytes; ++ ZV_BYTE += nbytes; ++ Z_BYTE += nbytes; ++ if (GAP_SIZE > 0) *(GPT_ADDR) = 0; /* Put an anchor. */ ++ ++ if (GPT_BYTE < GPT) ++ abort (); ++ ++ adjust_overlays_for_insert (GPT, nchars); ++ adjust_markers_for_insert (GPT, GPT_BYTE, ++ GPT + nchars, GPT_BYTE + nbytes, ++ 0); ++ ++ if (BUF_INTERVALS (current_buffer) != 0) ++ offset_intervals (current_buffer, GPT, nchars); ++ ++ if (GPT - nchars < PT) ++ adjust_point (nchars, nbytes); ++ ++ CHECK_MARKERS (); } /* Insert text from BUF, NCHARS characters starting at CHARPOS, into the diff --cc src/keyboard.c index ba137f67cb6,0c155063a70..449c8a1b453 --- a/src/keyboard.c +++ b/src/keyboard.c @@@ -1614,26 -1545,15 +1614,26 @@@ command_loop_1 ( = window_display_table (XWINDOW (selected_window)); lose = FETCH_CHAR (PT_BYTE); SET_PT (PT + 1); - if ((dp - ? (VECTORP (DISP_CHAR_VECTOR (dp, lose)) - ? XVECTOR (DISP_CHAR_VECTOR (dp, lose))->size == 1 - : (NILP (DISP_CHAR_VECTOR (dp, lose)) - && (lose >= 0x20 && lose < 0x7f))) - : (lose >= 0x20 && lose < 0x7f)) + if (! NILP (Vpost_command_hook)) + /* Put this before calling adjust_point_for_property + so it will only get called once in any case. */ + goto directly_done; + if (current_buffer == prev_buffer + && last_point_position != PT + && NILP (Vdisable_point_adjustment) + && NILP (Vglobal_disable_point_adjustment)) + adjust_point_for_property (last_point_position, 0); + already_adjusted = 1; + if (PT == last_point_position + 1 + && (dp + ? (VECTORP (DISP_CHAR_VECTOR (dp, lose)) + ? XVECTOR (DISP_CHAR_VECTOR (dp, lose))->size == 1 + : (NILP (DISP_CHAR_VECTOR (dp, lose)) + && (lose >= 0x20 && lose < 0x7f))) + : (lose >= 0x20 && lose < 0x7f)) /* To extract the case of continuation on wide-column characters. */ - && (WIDTH_BY_CHAR_HEAD (FETCH_BYTE (PT_BYTE)) == 1) + && ASCII_BYTE_P (lose) && (XFASTINT (XWINDOW (selected_window)->last_modified) >= MODIFF) && (XFASTINT (XWINDOW (selected_window)->last_overlay_modified) @@@ -1684,13 -1595,10 +1684,13 @@@ goto directly_done; } else if (EQ (Vthis_command, Qself_insert_command) - /* Try this optimization only on ascii keystrokes. */ - && INTEGERP (last_command_char)) + /* Try this optimization only on char keystrokes. */ + && NATNUMP (last_command_char) + && CHAR_VALID_P (XFASTINT (last_command_char), 0)) { - unsigned int c = XINT (last_command_char); + unsigned int c + = translate_char (Vtranslation_table_for_input, - XFASTINT (last_command_char), 0, 0, 0); ++ XFASTINT (last_command_char)); int value; if (NILP (Vexecuting_macro) && !EQ (minibuf_window, selected_window)) @@@ -2914,14 -2693,13 +2914,13 @@@ read_char (commandflag, nmaps, maps, pr { /* If kbd_buffer_get_event gave us an EOF, return that. */ if (XINT (c) == -1) - RETURN_UNGCPRO (c); + goto exit; if ((STRINGP (Vkeyboard_translate_table) - && XSTRING (Vkeyboard_translate_table)->size > (unsigned) XFASTINT (c)) + && SCHARS (Vkeyboard_translate_table) > (unsigned) XFASTINT (c)) || (VECTORP (Vkeyboard_translate_table) && XVECTOR (Vkeyboard_translate_table)->size > (unsigned) XFASTINT (c)) - || (CHAR_TABLE_P (Vkeyboard_translate_table) - && CHAR_VALID_P (XINT (c), 0))) + || CHAR_TABLE_P (Vkeyboard_translate_table)) { Lisp_Object d; d = Faref (Vkeyboard_translate_table, c); @@@ -6319,14 -5931,17 +6318,16 @@@ modify_event_symbol (symbol_num, modifi value = Fcdr_safe (Fassq (symbol_int, name_alist_or_stem)); else if (STRINGP (name_alist_or_stem)) { - int len = STRING_BYTES (XSTRING (name_alist_or_stem)); - char *buf = (char *) alloca (len + 50); - + int len = SBYTES (name_alist_or_stem); + char *buf = (char *) alloca (len + 50); if (sizeof (int) == sizeof (EMACS_INT)) - sprintf (buf, "%s-%d", XSTRING (name_alist_or_stem)->data, + sprintf (buf, "%s-%d", SDATA (name_alist_or_stem), XINT (symbol_int) + 1); else if (sizeof (long) == sizeof (EMACS_INT)) - sprintf (buf, "%s-%ld", XSTRING (name_alist_or_stem)->data, + sprintf (buf, "%s-%ld", SDATA (name_alist_or_stem), XINT (symbol_int) + 1); + else + abort (); value = intern (buf); } else if (name_table != 0 && name_table[symbol_num]) @@@ -9373,12 -9003,11 +9374,11 @@@ read_key_sequence (keybuf, bufsize, pro and cannot be part of a function key or translation, and is an upper case letter use the corresponding lower-case letter instead. */ - if (first_binding == nmaps && ! function_key_possible - && ! key_translation_possible + if (first_binding >= nmaps + && fkey.start >= t && keytran.start >= t && INTEGERP (key) - && ((((XINT (key) & 0x3ffff) - < XCHAR_TABLE (current_buffer->downcase_table)->size) - && UPPERCASEP (XINT (key) & 0x3ffff)) + && ((CHARACTERP (make_number (XINT (key) & ~CHAR_MODIFIER_MASK)) + && UPPERCASEP (XINT (key) & ~CHAR_MODIFIER_MASK)) || (XINT (key) & shift_modifier))) { Lisp_Object new_key; @@@ -9389,8 -9018,8 +9389,8 @@@ if (XINT (key) & shift_modifier) XSETINT (new_key, XINT (key) & ~shift_modifier); else - XSETINT (new_key, (DOWNCASE (XINT (key) & 0x3ffff) - | (XINT (key) & ~0x3ffff))); + XSETINT (new_key, (DOWNCASE (XINT (key) & ~CHAR_MODIFIER_MASK) - | (XINT (key) & CHAR_MODIFIER_MASK))); ++ | (XINT (key) & ~CHAR_MODIFIER_MASK))); /* We have to do this unconditionally, regardless of whether the lower-case char is defined in the keymaps, because they diff --cc src/keymap.c index 64f849f7845,c4c5721a194..c274183de49 --- a/src/keymap.c +++ b/src/keymap.c @@@ -412,9 -403,8 +413,8 @@@ PARENT should be nil or another keymap { Lisp_Object indices[3]; - map_char_table (fix_submap_inheritance, Qnil, XCAR (list), - keymap, 0, indices); + map_char_table (fix_submap_inheritance, Qnil, - XCAR (list), XCAR (list), - keymap, 0, indices); ++ XCAR (list), keymap); } } @@@ -540,19 -527,17 +540,17 @@@ access_keymap (map, idx, t_ok, noinheri return Qnil; } + /* t_binding is where we put a default binding that applies, + to use in case we do not find a binding specifically + for this key sequence. */ { Lisp_Object tail; + Lisp_Object t_binding = Qnil; + struct gcpro gcpro1, gcpro2, gcpro3, gcpro4; - /* t_binding is where we put a default binding that applies, - to use in case we do not find a binding specifically - for this key sequence. */ - - Lisp_Object t_binding; - t_binding = Qnil; + GCPRO4 (map, tail, idx, t_binding); - /* If `t_ok' is 2, both `t' and generic-char bindings are accepted. - If it is 1, only generic-char bindings are accepted. - Otherwise, neither are. */ + /* If `t_ok' is 2, both `t' is accepted. */ t_ok = t_ok ? 2 : 0; for (tail = XCDR (map); @@@ -573,27 -558,9 +571,9 @@@ else if (CONSP (binding)) { Lisp_Object key = XCAR (binding); - + if (EQ (key, idx)) val = XCDR (binding); - else if (t_ok - && INTEGERP (idx) - && (XINT (idx) & CHAR_MODIFIER_MASK) == 0 - && INTEGERP (key) - && (XINT (key) & CHAR_MODIFIER_MASK) == 0 - && !SINGLE_BYTE_CHAR_P (XINT (idx)) - && !SINGLE_BYTE_CHAR_P (XINT (key)) - && CHAR_VALID_P (XINT (key), 1) - && !CHAR_VALID_P (XINT (key), 0) - && (CHAR_CHARSET (XINT (key)) - == CHAR_CHARSET (XINT (idx)))) - { - /* KEY is the generic character of the charset of IDX. - Use KEY's binding if there isn't a binding for IDX - itself. */ - t_binding = XCDR (binding); - t_ok = 0; - } else if (t_ok > 1 && EQ (key, Qt)) { t_binding = XCDR (binding); @@@ -641,103 -608,6 +621,102 @@@ } } +static void +map_keymap_item (fun, args, key, val, data) + map_keymap_function_t fun; + Lisp_Object args, key, val; + void *data; +{ + /* We should maybe try to detect bindings shadowed by previous + ones and things like that. */ + if (EQ (val, Qt)) + val = Qnil; + (*fun) (key, val, args, data); +} + +static void +map_keymap_char_table_item (args, key, val) + Lisp_Object args, key, val; +{ + if (!NILP (val)) + { + map_keymap_function_t fun = XSAVE_VALUE (XCAR (args))->pointer; + args = XCDR (args); + map_keymap_item (fun, XCDR (args), key, val, + XSAVE_VALUE (XCAR (args))->pointer); + } +} + +/* Call FUN for every binding in MAP. + FUN is called with 4 arguments: FUN (KEY, BINDING, ARGS, DATA). + AUTOLOAD if non-zero means that we can autoload keymaps if necessary. */ +void +map_keymap (map, fun, args, data, autoload) + map_keymap_function_t fun; + Lisp_Object map, args; + void *data; + int autoload; +{ + struct gcpro gcpro1, gcpro2, gcpro3; + Lisp_Object tail; + + GCPRO3 (map, args, tail); + map = get_keymap (map, 1, autoload); + for (tail = (CONSP (map) && EQ (Qkeymap, XCAR (map))) ? XCDR (map) : map; + CONSP (tail) || (tail = get_keymap (tail, 0, autoload), CONSP (tail)); + tail = XCDR (tail)) + { + Lisp_Object binding = XCAR (tail); - ++ + if (CONSP (binding)) + map_keymap_item (fun, args, XCAR (binding), XCDR (binding), data); + else if (VECTORP (binding)) + { + /* Loop over the char values represented in the vector. */ + int len = ASIZE (binding); + int c; + abort(); + for (c = 0; c < len; c++) + { + Lisp_Object character; + XSETFASTINT (character, c); + map_keymap_item (fun, args, character, AREF (binding, c), data); + } + } + else if (CHAR_TABLE_P (binding)) + { + Lisp_Object indices[3]; - map_char_table (map_keymap_char_table_item, Qnil, binding, binding, ++ map_char_table (map_keymap_char_table_item, Qnil, binding, + Fcons (make_save_value (fun, 0), + Fcons (make_save_value (data, 0), - args)), - 0, indices); ++ args))); + } + } + UNGCPRO; +} + +static void +map_keymap_call (key, val, fun, dummy) + Lisp_Object key, val, fun; + void *dummy; +{ + call2 (fun, key, val); +} + +DEFUN ("map-keymap", Fmap_keymap, Smap_keymap, 2, 2, 0, + doc: /* Call FUNCTION for every binding in KEYMAP. +FUNCTION is called with two arguments: the event and its binding. */) + (function, keymap) + Lisp_Object function, keymap; +{ + if (INTEGERP (function)) + /* We have to stop integers early since map_keymap gives them special + significance. */ + Fsignal (Qinvalid_function, Fcons (function, Qnil)); + map_keymap (keymap, map_keymap_call, function, NULL, 1); + return Qnil; +} + /* Given OBJECT which was found in a slot in a keymap, trace indirect definitions to get the actual definition of that slot. An indirect definition is a list of the form @@@ -1016,7 -814,8 +1000,7 @@@ static voi copy_keymap_1 (chartable, idx, elt) Lisp_Object chartable, idx, elt; { - Faset (chartable, idx, copy_keymap_item (elt)); - if (CONSP (elt) && EQ (XCAR (elt), Qkeymap)) - Faset (chartable, idx, Fcopy_keymap (elt)); ++ Fset_char_table_range (chartable, idx, copy_keymap_item (elt)); } DEFUN ("copy-keymap", Fcopy_keymap, Scopy_keymap, 1, 1, 0, @@@ -1040,8 -843,11 +1024,8 @@@ is not copied. */ if (CHAR_TABLE_P (elt)) { Lisp_Object indices[3]; - elt = Fcopy_sequence (elt); - map_char_table (copy_keymap_1, Qnil, elt, elt, elt, 0, indices); - XSETCAR (tail, elt); - - map_char_table (copy_keymap_1, Qnil, elt, elt, 0, indices); ++ map_char_table (copy_keymap_1, Qnil, elt, elt); } else if (VECTORP (elt)) { @@@ -1122,8 -1005,15 +1106,15 @@@ binding KEY to DEF is added at the fron { c = Faref (key, make_number (idx)); - if (CONSP (c) && lucid_event_type_list_p (c)) - c = Fevent_convert_list (c); + if (CONSP (c)) + { + /* C may be a cons (FROM . TO) specifying a range of + characters. */ + if (CHARACTERP (XCAR (c))) - CHECK_CHARACTER (XCDR (c)); ++ CHECK_CHARACTER_CDR (c); + else if (lucid_event_type_list_p (c)) + c = Fevent_convert_list (c); + } if (SYMBOLP (c)) silly_event_symbol_error (c); @@@ -1761,50 -1653,39 +1755,50 @@@ accessible_keymaps_1 (key, cmd, maps, t if (NILP (cmd)) return; - tem = get_keymap (cmd, 0, 0); - if (CONSP (tem)) - { - cmd = tem; - /* Ignore keymaps that are already added to maps. */ - tem = Frassq (cmd, maps); - if (NILP (tem)) - { - /* If the last key in thisseq is meta-prefix-char, - turn it into a meta-ized keystroke. We know - that the event we're about to append is an - ascii keystroke since we're processing a - keymap table. */ - if (is_metized) - { - int meta_bit = meta_modifier; - Lisp_Object last = make_number (XINT (Flength (thisseq)) - 1); - tem = Fcopy_sequence (thisseq); - - Faset (tem, last, make_number (XINT (key) | meta_bit)); - - /* This new sequence is the same length as - thisseq, so stick it in the list right - after this one. */ - XSETCDR (tail, - Fcons (Fcons (tem, cmd), XCDR (tail))); - } - else - { - tem = append_key (thisseq, key); - nconc2 (tail, Fcons (Fcons (tem, cmd), Qnil)); - } + /* Look for and break cycles. */ + while (!NILP (tem = Frassq (cmd, maps))) + { + Lisp_Object prefix = XCAR (tem); + int lim = XINT (Flength (XCAR (tem))); + if (lim <= XINT (Flength (thisseq))) + { /* This keymap was already seen with a smaller prefix. */ + int i = 0; + while (i < lim && EQ (Faref (prefix, make_number (i)), + Faref (thisseq, make_number (i)))) + i++; + if (i >= lim) + /* `prefix' is a prefix of `thisseq' => there's a cycle. */ + return; } + /* This occurrence of `cmd' in `maps' does not correspond to a cycle, + but maybe `cmd' occurs again further down in `maps', so keep + looking. */ + maps = XCDR (Fmemq (tem, maps)); + } + + /* If the last key in thisseq is meta-prefix-char, + turn it into a meta-ized keystroke. We know + that the event we're about to append is an + ascii keystroke since we're processing a + keymap table. */ + if (is_metized) + { + int meta_bit = meta_modifier; + Lisp_Object last = make_number (XINT (Flength (thisseq)) - 1); + tem = Fcopy_sequence (thisseq); - ++ + Faset (tem, last, make_number (XINT (key) | meta_bit)); - ++ + /* This new sequence is the same length as + thisseq, so stick it in the list right + after this one. */ + XSETCDR (tail, + Fcons (Fcons (tem, cmd), XCDR (tail))); + } + else + { + tem = append_key (thisseq, key); + nconc2 (tail, Fcons (Fcons (tem, cmd), Qnil)); } } @@@ -1915,10 -1796,10 +1909,9 @@@ then the value includes only maps for p { Lisp_Object indices[3]; - map_char_table (accessible_keymaps_char_table, Qnil, elt, + map_char_table (accessible_keymaps_char_table, Qnil, elt, Fcons (Fcons (maps, make_number (is_metized)), -- Fcons (tail, thisseq)), -- 0, indices); ++ Fcons (tail, thisseq))); } else if (VECTORP (elt)) { @@@ -2432,8 -2299,8 +2386,7 @@@ where_is_internal (definition, keymaps Fcons (Fcons (this, last), Fcons (make_number (nomenus), make_number (last_is_meta)))); - map_char_table (where_is_internal_2, Qnil, elt, elt, args, - map_char_table (where_is_internal_2, Qnil, elt, args, -- 0, indices); ++ map_char_table (where_is_internal_2, Qnil, elt, args); sequences = XCDR (XCAR (args)); } else if (CONSP (elt)) @@@ -3363,33 -3186,14 +3272,14 @@@ describe_vector (vector, elt_prefix, ar if (!NILP (tem)) continue; } - /* Set CHARACTER to the character this entry describes, if any. - Also update *INDICES. */ - if (CHAR_TABLE_P (vector)) - { - indices[char_table_depth] = i; - - if (char_table_depth == 0) - { - character = i; - indices[0] = i - 128; - } - else if (complete_char) - { - character = MAKE_CHAR (indices[0], indices[1], indices[2]); - } - else - character = 0; - } - else - character = i; + character = make_number (starting_i); /* If this binding is shadowed by some other map, ignore it. */ - if (!NILP (shadow) && complete_char) + if (!NILP (shadow)) { Lisp_Object tem; - + - ASET (kludge, 0, make_number (character)); + ASET (kludge, 0, character); tem = shadow_lookup (shadow, kludge, Qt); if (!NILP (tem)) continue; @@@ -3509,32 -3252,7 +3338,8 @@@ if (!NILP (elt_prefix)) insert1 (elt_prefix); + - if (CHAR_TABLE_P (vector)) - { - if (char_table_depth == 0) - { - insert1 (Fsingle_key_description (make_number (i), Qnil)); - } - else if (complete_char) - { - indices[char_table_depth] = i; - character = MAKE_CHAR (indices[0], indices[1], indices[2]); - insert_char (character); - } - else - { - /* We need an octal representation for this block of - characters. */ - char work[16]; - sprintf (work, "(row %d)", i); - insert (work, strlen (work)); - } - } - else - { - insert1 (Fsingle_key_description (make_number (i), Qnil)); - } + insert1 (Fsingle_key_description (make_number (i), Qnil)); } /* Print a description of the definition of this character. @@@ -3543,16 -3261,9 +3348,8 @@@ (*elt_describer) (definition, args); } - /* For (sub) char-table, print `defalt' slot at last. */ - if (CHAR_TABLE_P (vector) && !NILP (XCHAR_TABLE (vector)->defalt)) - { - insert (" ", char_table_depth * 2); - insert_string ("<>"); - (*elt_describer) (XCHAR_TABLE (vector)->defalt, args); - } - UNGCPRO; } - /* Apropos - finding all symbols whose names match a regexp. */ static Lisp_Object apropos_predicate; diff --cc src/lisp.h index 9a80cb774fc,b978bfc8c25..717db80e558 --- a/src/lisp.h +++ b/src/lisp.h @@@ -297,8 -297,9 +297,9 @@@ enum pvec_typ PVEC_BOOL_VECTOR = 0x10000, PVEC_BUFFER = 0x20000, PVEC_HASH_TABLE = 0x40000, - PVEC_TYPE_MASK = 0x7fe00 + PVEC_SUB_CHAR_TABLE = 0x80000, + PVEC_TYPE_MASK = 0x0ffe00 - + #if 0 /* This is used to make the value of PSEUDOVECTOR_FLAG available to GDB. It doesn't work on OS Alpha. Moved to a variable in emacs.c. */ @@@ -722,55 -738,80 +703,79 @@@ struct Lisp_Vecto For these characters, do not check validity of CT and do not follow parent. */ - #define CHAR_TABLE_TRANSLATE(CT, IDX) \ - ((IDX) < CHAR_TABLE_SINGLE_BYTE_SLOTS \ - ? (!NILP (XCHAR_TABLE (CT)->contents[IDX]) \ - ? XINT (XCHAR_TABLE (CT)->contents[IDX]) \ - : IDX) \ - : char_table_translate (CT, IDX)) + #define CHAR_TABLE_TRANSLATE(CT, IDX) \ + char_table_translate (CT, IDX) /* Equivalent to Faset (CT, IDX, VAL) with optimization for ASCII and - 8-bit Europeans characters. Do not check validity of CT. */ - #define CHAR_TABLE_SET(CT, IDX, VAL) \ - do { \ - if (XFASTINT (IDX) < CHAR_TABLE_SINGLE_BYTE_SLOTS) \ - XCHAR_TABLE (CT)->contents[XFASTINT (IDX)] = VAL; \ - else \ - Faset (CT, IDX, VAL); \ - } while (0) + 8-bit European characters. Do not check validity of CT. */ + #define CHAR_TABLE_SET(CT, IDX, VAL) \ + (((IDX) >= 0 && ASCII_CHAR_P (IDX) \ + && SUB_CHAR_TABLE_P (XCHAR_TABLE (CT)->ascii)) \ + ? XSUB_CHAR_TABLE (XCHAR_TABLE (CT)->ascii)->contents[IDX] = VAL \ + : char_table_set (CT, IDX, VAL)) + - + #define CHARTAB_SIZE_BITS_0 6 + #define CHARTAB_SIZE_BITS_1 4 + #define CHARTAB_SIZE_BITS_2 5 + #define CHARTAB_SIZE_BITS_3 7 + + extern const int chartab_size[4]; + + struct Lisp_Sub_Char_Table; struct Lisp_Char_Table -{ - /* This is the vector's size field, which also holds the - pseudovector type information. It holds the size, too. The size - counts the defalt, parent, purpose, ascii, contents, and extras - slots. */ - EMACS_INT size; - struct Lisp_Vector *next; + { + /* This is the vector's size field, which also holds the - pseudovector type information. It holds the size, too. - The size counts the top, defalt, purpose, and parent slots. - The last three are not counted if this is a sub char table. */ ++ pseudovector type information. It holds the size, too. The size ++ counts the defalt, parent, purpose, ascii, contents, and extras ++ slots. */ + EMACS_INT size; + struct Lisp_Vector *next; - /* This holds a flag to tell if this is a top level char table (t) - or a sub char table (nil). */ - Lisp_Object top; + - /* This holds a default value, - which is used whenever the value for a specific character is nil. */ - Lisp_Object defalt; + /* This holds a default value, + which is used whenever the value for a specific character is nil. */ + Lisp_Object defalt; - /* This holds an actual value of each element. A sub char table - has only SUB_CHAR_TABLE_ORDINARY_SLOTS number of elements. */ - Lisp_Object contents[CHAR_TABLE_ORDINARY_SLOTS]; - - /* A sub char table doesn't has the following slots. */ - /* This points to another char table, which we inherit from - when the value for a specific character is nil. - The `defalt' slot takes precedence over this. */ - /* This points to another char table, which we inherit from when the - value for a specific character is nil. The `defalt' slot takes - precedence over this. */ - Lisp_Object parent; ++ /* This points to another char table, which we inherit from when the ++ value for a specific character is nil. The `defalt' slot takes ++ precedence over this. */ + Lisp_Object parent; - /* This should be a symbol which says what kind of use - this char-table is meant for. - Typically now the values can be `syntax-table' and `display-table'. */ + - /* This is a symbol which says what kind of use this char-table is - meant for. */ - Lisp_Object purpose; ++ /* This is a symbol which says what kind of use this char-table is ++ meant for. */ + Lisp_Object purpose; - /* These hold additional data. */ + - /* The bottom sub char-table for characters of the range 0..127. It - is nil if none of ASCII character has a specific value. */ - Lisp_Object ascii; ++ /* The bottom sub char-table for characters of the range 0..127. It ++ is nil if none of ASCII character has a specific value. */ ++ Lisp_Object ascii; + - Lisp_Object contents[(1 << CHARTAB_SIZE_BITS_0)]; ++ Lisp_Object contents[(1 << CHARTAB_SIZE_BITS_0)]; + - /* These hold additional data. It is a vector. */ - Lisp_Object extras[1]; -}; ++ /* These hold additional data. It is a vector. */ + Lisp_Object extras[1]; + }; + struct Lisp_Sub_Char_Table -{ - /* This is the vector's size field, which also holds the - pseudovector type information. It holds the size, too. */ - EMACS_INT size; - struct Lisp_Vector *next; ++ { ++ /* This is the vector's size field, which also holds the ++ pseudovector type information. It holds the size, too. */ ++ EMACS_INT size; ++ struct Lisp_Vector *next; + - /* Depth of this sub char-table. It should be 1, 2, or 3. A sub - char-table of depth 1 contains 16 elments, and each element - covers 4096 (128*32) characters. A sub char-table of depth 2 - contains 32 elements, and each element covers 128 characters. A - sub char-table of depth 3 contains 128 elements, and each element - is for one character. */ - Lisp_Object depth; ++ /* Depth of this sub char-table. It should be 1, 2, or 3. A sub ++ char-table of depth 1 contains 16 elments, and each element ++ covers 4096 (128*32) characters. A sub char-table of depth 2 ++ contains 32 elements, and each element covers 128 characters. A ++ sub char-table of depth 3 contains 128 elements, and each element ++ is for one character. */ ++ Lisp_Object depth; + - /* Minimum character covered by the sub char-table. */ - Lisp_Object min_char; ++ /* Minimum character covered by the sub char-table. */ ++ Lisp_Object min_char; + - Lisp_Object contents[1]; -}; ++ Lisp_Object contents[1]; ++ }; + /* A boolvector is a kind of vectorlike, with contents are like a string. */ struct Lisp_Bool_Vector { @@@ -1264,9 -1260,9 +1269,9 @@@ typedef unsigned char UCHAR (CHAR_ALT | CHAR_SUPER | CHAR_HYPER | CHAR_SHIFT | CHAR_CTL | CHAR_META) - /* Actually, the current Emacs uses 19 bits for the character value + /* Actually, the current Emacs uses 22 bits for the character value itself. */ - #define CHARACTERBITS 19 -#define CHARACTERBITS 22 ++#define CHARACTERBITS 2 /* The maximum byte size consumed by push_key_description. All callers should assure that at least this size of memory is @@@ -1322,9 -1318,9 +1327,9 @@@ #define GLYPH int /* Mask bits for face. */ - #define GLYPH_MASK_FACE 0x7FF80000 + #define GLYPH_MASK_FACE 0x7FC00000 /* Mask bits for character code. */ - #define GLYPH_MASK_CHAR 0x0007FFFF /* The lowest 19 bits */ -#define GLYPH_MASK_CHAR 0x003FFFFF /* The lowest 19 bits */ ++#define GLYPH_MASK_CHAR 0x003FFFFF /* The lowest 22 bits */ /* The FAST macros assume that we already know we're in an X window. */ @@@ -1422,7 -1418,9 +1427,9 @@@ #define BUFFERP(x) PSEUDOVECTORP (x, PVEC_BUFFER) #define GC_BUFFERP(x) GC_PSEUDOVECTORP (x, PVEC_BUFFER) #define CHAR_TABLE_P(x) PSEUDOVECTORP (x, PVEC_CHAR_TABLE) -#define GC_CHAR_TABLE_P(x) GC_PSEUDOVECTORP (x, PVEC_CHAR_TABLE) + #define SUB_CHAR_TABLE_P(x) PSEUDOVECTORP (x, PVEC_SUB_CHAR_TABLE) + #define GC_SUB_CHAR_TABLE_P(x) GC_PSEUDOVECTORP (x, PVEC_SUB_CHAR_TABLE) +#define GC_CHAR_TABLE_P(x) GC_PSEUDOVECTORP (x, PVEC_CHAR_TABLE) #define BOOL_VECTOR_P(x) PSEUDOVECTORP (x, PVEC_BOOL_VECTOR) #define GC_BOOL_VECTOR_P(x) GC_PSEUDOVECTORP (x, PVEC_BOOL_VECTOR) #define FRAMEP(x) PSEUDOVECTORP (x, PVEC_FRAME) @@@ -1530,6 -1527,6 +1536,20 @@@ XSETCDR ((x), tmp); \ } while (0) ++#define CHECK_NATNUM_CAR(x) \ ++ do { \ ++ Lisp_Object tmp = XCAR (x); \ ++ CHECK_NATNUM (tmp); \ ++ XSETCAR ((x), tmp); \ ++ } while (0) ++ ++#define CHECK_NATNUM_CDR(x) \ ++ do { \ ++ Lisp_Object tmp = XCDR (x); \ ++ CHECK_NATNUM (tmp); \ ++ XSETCDR ((x), tmp); \ ++ } while (0) ++ /* Cast pointers to this type to compare them. Some machines want int. */ #ifndef PNTR_COMPARISON_TYPE #define PNTR_COMPARISON_TYPE EMACS_UINT @@@ -2132,10 -2102,10 +2152,10 @@@ EXFUN (Fread_coding_system, 2) EXFUN (Fread_non_nil_coding_system, 1); EXFUN (Ffind_operation_coding_system, MANY); EXFUN (Fupdate_coding_systems_internal, 0); - EXFUN (Fencode_coding_string, 3); - EXFUN (Fdecode_coding_string, 3); + EXFUN (Fencode_coding_string, 4); + EXFUN (Fdecode_coding_string, 4); -extern Lisp_Object detect_coding_system P_ ((unsigned char *, int, int, int, - Lisp_Object)); +extern Lisp_Object detect_coding_system P_ ((const unsigned char *, int, int, - int)); ++ int, Lisp_Object)); extern void init_coding P_ ((void)); extern void init_coding_once P_ ((void)); extern void syms_of_coding P_ ((void)); @@@ -2146,15 -2118,16 +2168,18 @@@ extern void syms_of_character P_ ((void EXFUN (Fchar_bytes, 1); EXFUN (Fchar_width, 1); EXFUN (Fstring, MANY); -extern int chars_in_text P_ ((unsigned char *, int)); -extern int multibyte_chars_in_text P_ ((unsigned char *, int)); +extern int chars_in_text P_ ((const unsigned char *, int)); +extern int multibyte_chars_in_text P_ ((const unsigned char *, int)); - extern int unibyte_char_to_multibyte P_ ((int)); extern int multibyte_char_to_unibyte P_ ((int, Lisp_Object)); extern Lisp_Object Qcharset; + extern void init_charset P_ ((void)); extern void init_charset_once P_ ((void)); extern void syms_of_charset P_ ((void)); ++/* Structure forward declarations. */ ++struct charset; + + /* Defined in composite.c */ + extern void syms_of_composite P_ ((void)); /* Defined in syntax.c */ EXFUN (Fforward_word, 1); @@@ -2254,10 -2226,6 +2279,7 @@@ extern Lisp_Object string_make_unibyte EXFUN (Fcopy_alist, 1); EXFUN (Fplist_get, 2); EXFUN (Fplist_put, 3); +EXFUN (Fplist_member, 2); - EXFUN (Fset_char_table_parent, 2); - EXFUN (Fchar_table_extra_slot, 2); - EXFUN (Fset_char_table_extra_slot, 3); EXFUN (Frassoc, 2); EXFUN (Fstring_equal, 2); EXFUN (Fcompare_strings, 7); @@@ -2281,14 -2244,15 +2298,15 @@@ extern Lisp_Object Qinhibit_modificatio extern void move_gap P_ ((int)); extern void move_gap_both P_ ((int, int)); extern void make_gap P_ ((int)); -extern int copy_text P_ ((unsigned char *, unsigned char *, int, int, int)); -extern int count_size_as_multibyte P_ ((unsigned char *, int)); -extern int count_combining_before P_ ((unsigned char *, int, int, int)); -extern int count_combining_after P_ ((unsigned char *, int, int, int)); -extern void insert P_ ((unsigned char *, int)); -extern void insert_and_inherit P_ ((unsigned char *, int)); -extern void insert_1 P_ ((unsigned char *, int, int, int, int)); -extern void insert_1_both P_ ((unsigned char *, int, int, int, int, int)); +extern int copy_text P_ ((const unsigned char *, unsigned char *, int, int, int)); +extern int count_size_as_multibyte P_ ((const unsigned char *, int)); +extern int count_combining_before P_ ((const unsigned char *, int, int, int)); +extern int count_combining_after P_ ((const unsigned char *, int, int, int)); +extern void insert P_ ((const unsigned char *, int)); +extern void insert_and_inherit P_ ((const unsigned char *, int)); +extern void insert_1 P_ ((const unsigned char *, int, int, int, int)); +extern void insert_1_both P_ ((const unsigned char *, int, int, int, int, int)); + extern void insert_from_gap P_ ((int, int)); extern void insert_from_string P_ ((Lisp_Object, int, int, int, int, int)); extern void insert_from_buffer P_ ((struct buffer *, int, int, int)); extern void insert_char P_ ((int)); @@@ -2433,6 -2389,32 +2449,31 @@@ extern void init_alloc P_ ((void)) extern void syms_of_alloc P_ ((void)); extern struct buffer * allocate_buffer P_ ((void)); + /* Defined in chartab.c */ + EXFUN (Fmake_char_table, 2); + EXFUN (Fchar_table_parent, 1); + EXFUN (Fset_char_table_parent, 2); + EXFUN (Fchar_table_extra_slot, 2); + EXFUN (Fset_char_table_extra_slot, 3); + EXFUN (Fchar_table_range, 2); + EXFUN (Fset_char_table_range, 3); + EXFUN (Fset_char_table_default, 3); + EXFUN (Foptimize_char_table, 1); + EXFUN (Fmap_char_table, 2); + extern Lisp_Object copy_char_table P_ ((Lisp_Object)); + extern Lisp_Object sub_char_table_ref P_ ((Lisp_Object, int)); + extern Lisp_Object char_table_ref P_ ((Lisp_Object, int)); + extern Lisp_Object char_table_ref_and_range P_ ((Lisp_Object, int, + int *, int *)); + extern Lisp_Object char_table_set P_ ((Lisp_Object, int, Lisp_Object)); + extern Lisp_Object char_table_set_range P_ ((Lisp_Object, int, int, + Lisp_Object)); + extern int char_table_translate P_ ((Lisp_Object, int)); + extern void map_char_table P_ ((void (*) (Lisp_Object, Lisp_Object, + Lisp_Object), - Lisp_Object, Lisp_Object, Lisp_Object, int, - Lisp_Object *)); ++ Lisp_Object, Lisp_Object, Lisp_Object)); + extern void syms_of_chartab P_ ((void)); + /* Defined in print.c */ extern Lisp_Object Vprin1_to_string_buffer; extern void debug_print P_ ((Lisp_Object)); @@@ -2963,7 -2977,11 +3005,8 @@@ extern Lisp_Object Qinsert_in_front_hoo EXFUN (Fnext_single_property_change, 4); EXFUN (Fnext_single_char_property_change, 4); EXFUN (Fprevious_single_property_change, 4); + EXFUN (Fget_text_property, 3); EXFUN (Fput_text_property, 5); -EXFUN (Fset_text_properties, 4); -EXFUN (Ftext_property_not_all, 5); EXFUN (Fprevious_char_property_change, 2); EXFUN (Fnext_char_property_change, 2); extern void report_interval_modification P_ ((Lisp_Object, Lisp_Object)); diff --cc src/lread.c index 0c9bc140b73,8f990aaeb11..256df2776a5 --- a/src/lread.c +++ b/src/lread.c @@@ -157,13 -159,6 +170,10 @@@ static int read_from_string_index static int read_from_string_index_byte; static int read_from_string_limit; - /* Number of bytes left to read in the buffer character - that `readchar' has already advanced over. */ - static int readchar_backlog; +/* Number of characters read in the current call to Fread or + Fread_from_string. */ +static int readchar_count; + /* This contains the last string skipped with #@. */ static char *saved_doc_string; /* Length of buffer allocated in saved_doc_string. */ @@@ -203,8 -198,10 +213,10 @@@ int load_dangerous_libraries static Lisp_Object Vbytecomp_version_regexp; - static void to_multibyte P_ ((char **, char **, int *)); + static int read_emacs_mule_char P_ ((int, int (*) (int, Lisp_Object), + Lisp_Object)); + -static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object, +static void readevalloop P_ ((Lisp_Object, FILE*, Lisp_Object, Lisp_Object (*) (), int, Lisp_Object, Lisp_Object)); static Lisp_Object load_unwind P_ ((Lisp_Object)); @@@ -228,15 -227,24 +242,25 @@@ static int readbyte_from_string P_ ((in #define READCHAR readchar (readcharfun) #define UNREAD(c) unreadchar (readcharfun, c) + /* When READCHARFUN is Qget_file_char, Qget_emacs_mule_file_char, - Qlambda, or a cons, we use this to keep unread character because a - file stream can't handle multibyte-char unreading. The value -1 ++ Qlambda, or a cons, we use this to keep an unread character because ++ a file stream can't handle multibyte-char unreading. The value -1 + means that there's no unread character. */ + static int unread_char; + - static int readchar (readcharfun) Lisp_Object readcharfun; { Lisp_Object tem; register int c; + int (*readbyte) P_ ((int, Lisp_Object)); + unsigned char buf[MAX_MULTIBYTE_LENGTH]; + int i, len; + int emacs_mule_encoding = 0; + readchar_count++; + if (BUFFERP (readcharfun)) { register struct buffer *inbuffer = XBUFFER (readcharfun); @@@ -313,22 -303,15 +319,17 @@@ } if (EQ (readcharfun, Qlambda)) - return read_bytecode_char (0); + { + readbyte = readbyte_for_lambda; + goto read_multibyte; + } + if (EQ (readcharfun, Qget_file_char)) { - c = getc (instream); - #ifdef EINTR - /* Interrupted reads have been observed while reading over the network */ - while (c == EOF && ferror (instream) && errno == EINTR) - { - clearerr (instream); - c = getc (instream); - } - #endif - return c; + readbyte = readbyte_from_file; + goto read_multibyte; } + if (STRINGP (readcharfun)) { if (read_from_string_index >= read_from_string_limit) @@@ -340,14 -323,61 +341,62 @@@ return c; } + + if (CONSP (readcharfun)) + { + /* This is the case that read_vector is reading from a unibyte + string that contains a byte sequence previously skipped + because of #@NUMBER. The car part of readcharfun is that + string, and the cdr part is a value of readcharfun given to + read_vector. */ + readbyte = readbyte_from_string; + if (EQ (XCDR (readcharfun), Qget_emacs_mule_file_char)) + emacs_mule_encoding = 1; + goto read_multibyte; + } ++ + if (EQ (readcharfun, Qget_emacs_mule_file_char)) + { + readbyte = readbyte_from_file; + emacs_mule_encoding = 1; + goto read_multibyte; + } + tem = call0 (readcharfun); if (NILP (tem)) return -1; return XINT (tem); + + read_multibyte: + if (unread_char >= 0) + { + c = unread_char; + unread_char = -1; + return c; + } + c = (*readbyte) (-1, readcharfun); + if (c < 0 || ASCII_BYTE_P (c) || load_each_byte) + return c; + if (emacs_mule_encoding) + return read_emacs_mule_char (c, readbyte, readcharfun); + i = 0; + buf[i++] = c; + len = BYTES_BY_CHAR_HEAD (c); + while (i < len) + { + c = (*readbyte) (-1, readcharfun); + if (c < 0 || ! TRAILING_CODE_P (c)) + { + while (--i > 1) + (*readbyte) (buf[i], readcharfun); + return BYTE8_TO_CHAR (buf[0]); + } + buf[i++] = c; + } + return STRING_CHAR (buf, i); } - /* Unread the character C in the way appropriate for the stream READCHARFUN. If the stream is a user function, call it with the char as argument. */ @@@ -411,20 -442,134 +462,144 @@@ unreadchar (readcharfun, c call1 (readcharfun, make_number (c)); } + static int + readbyte_for_lambda (c, readcharfun) + int c; + Lisp_Object readcharfun; + { + return read_bytecode_char (c >= 0); + } + + + static int + readbyte_from_file (c, readcharfun) + int c; + Lisp_Object readcharfun; + { + if (c >= 0) + { + ungetc (c, instream); + return 0; + } + + c = getc (instream); + #ifdef EINTR + /* Interrupted reads have been observed while reading over the network */ + while (c == EOF && ferror (instream) && errno == EINTR) + { + clearerr (instream); + c = getc (instream); + } + #endif + return (c == EOF ? -1 : c); + } + + static int + readbyte_from_string (c, readcharfun) + int c; + Lisp_Object readcharfun; + { + Lisp_Object string = XCAR (readcharfun); + + if (c >= 0) + { + read_from_string_index--; + read_from_string_index_byte + = string_char_to_byte (string, read_from_string_index); + } - ++ + if (read_from_string_index >= read_from_string_limit) + c = -1; + else + FETCH_STRING_CHAR_ADVANCE (c, string, + read_from_string_index, + read_from_string_index_byte); + return c; + } + + + /* Read one non-ASCII character from INSTREAM. The character is + encoded in `emacs-mule' and the first byte is already read in + C. */ + + extern char emacs_mule_bytes[256]; + + static int + read_emacs_mule_char (c, readbyte, readcharfun) + int c; + int (*readbyte) P_ ((int, Lisp_Object)); + Lisp_Object readcharfun; + { + /* Emacs-mule coding uses at most 4-byte for one character. */ + unsigned char buf[4]; + int len = emacs_mule_bytes[c]; + struct charset *charset; + int i; + unsigned code; + + if (len == 1) + /* C is not a valid leading-code of `emacs-mule'. */ + return BYTE8_TO_CHAR (c); + + i = 0; + buf[i++] = c; + while (i < len) + { + c = (*readbyte) (-1, readcharfun); + if (c < 0xA0) + { + while (--i > 1) + (*readbyte) (buf[i], readcharfun); + return BYTE8_TO_CHAR (buf[0]); + } + buf[i++] = c; + } + + if (len == 2) + { + charset = emacs_mule_charset[buf[0]]; + code = buf[1] & 0x7F; + } + else if (len == 3) + { + if (buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_11 + || buf[0] == EMACS_MULE_LEADING_CODE_PRIVATE_12) + { + charset = emacs_mule_charset[buf[1]]; + code = buf[2] & 0x7F; + } + else + { + charset = emacs_mule_charset[buf[0]]; + code = ((buf[1] << 8) | buf[2]) & 0x7F7F; + } + } + else + { + charset = emacs_mule_charset[buf[1]]; - code = ((buf[2] << 8) | buf[3]) & 0x7F7F; ++ code = ((buf[2] << 8) | buf[3]) & 0x7F7F; + } + c = DECODE_CHAR (charset, code); + if (c < 0) + Fsignal (Qinvalid_read_syntax, + Fcons (build_string ("invalid multibyte form"), Qnil)); + return c; + } + + -static Lisp_Object read0 (), read1 (), read_list (), read_vector (); -static Lisp_Object substitute_object_recurse (); -static void substitute_object_in_subtree (), substitute_in_interval (); +static Lisp_Object read_internal_start P_ ((Lisp_Object, Lisp_Object, + Lisp_Object)); +static Lisp_Object read0 P_ ((Lisp_Object)); +static Lisp_Object read1 P_ ((Lisp_Object, int *, int)); + +static Lisp_Object read_list P_ ((int, Lisp_Object)); +static Lisp_Object read_vector P_ ((Lisp_Object, int)); - static int read_multibyte P_ ((int, Lisp_Object)); + +static Lisp_Object substitute_object_recurse P_ ((Lisp_Object, Lisp_Object, + Lisp_Object)); +static void substitute_object_in_subtree P_ ((Lisp_Object, + Lisp_Object)); +static void substitute_in_interval P_ ((INTERVAL, Lisp_Object)); /* Get a character from the tty. */ @@@ -593,11 -733,11 +768,11 @@@ DEFUN ("get-file-char", Fget_file_char - /* Value is non-zero if the file asswociated with file descriptor FD - is a compiled Lisp file that's safe to load. Only files compiled - with Emacs are safe to load. Files compiled with XEmacs can lead - to a crash in Fbyte_code because of an incompatible change in the - byte compiler. */ + /* Value is a version number of byte compiled code if the file - associated with file descriptor FD is a compiled Lisp file that's ++ asswociated with file descriptor FD is a compiled Lisp file that's + safe to load. Only files compiled with Emacs are safe to load. + Files compiled with XEmacs can lead to a crash in Fbyte_code + because of an incompatible change in the byte compiler. */ static int safe_to_load_p (fd) @@@ -605,7 -745,7 +780,8 @@@ { char buf[512]; int nbytes, i; - int safe_p = 1, version = 0; + int safe_p = 1; ++ int version = 1; /* Read the first few bytes from the file, and look for a line specifying the byte compiler version used. */ @@@ -798,8 -926,10 +979,10 @@@ Return t if file exists. */ Vloads_in_progress = Fcons (found, Vloads_in_progress); } + version = -1; - if (!bcmp (&(XSTRING (found)->data[STRING_BYTES (XSTRING (found)) - 4]), + if (!bcmp (SDATA (found) + SBYTES (found) - 4, - ".elc", 4)) + ".elc", 4) + || (version = safe_to_load_p (fd)) > 0) /* Load .elc files directly, but not when they are remote and have no handler! */ { @@@ -1509,63 -1611,45 +1700,70 @@@ START and END optionally delimit a subs (string, start, end) Lisp_Object string, start, end; { - int startval, endval; - Lisp_Object str; - Lisp_Object tem; + Lisp_Object ret; + CHECK_STRING (string); + /* read_internal_start sets read_from_string_index. */ + ret = read_internal_start (string, start, end); + return Fcons (ret, make_number (read_from_string_index)); +} - if (CONSP (string)) - str = XCAR (string); - else - str = string; - CHECK_STRING (str); +/* Function to set up the global context we need in toplevel read + calls. */ +static Lisp_Object +read_internal_start (stream, start, end) + Lisp_Object stream; + Lisp_Object start; /* Only used when stream is a string. */ + Lisp_Object end; /* Only used when stream is a string. */ +{ + Lisp_Object retval; - readchar_backlog = -1; - if (NILP (end)) - endval = XSTRING (str)->size; - else - { - CHECK_NUMBER (end); - endval = XINT (end); - if (endval < 0 || endval > XSTRING (str)->size) - args_out_of_range (str, end); - } + readchar_count = 0; + new_backquote_flag = 0; + read_objects = Qnil; + if (EQ (Vread_with_symbol_positions, Qt) + || EQ (Vread_with_symbol_positions, stream)) + Vread_symbol_positions_list = Qnil; - if (STRINGP (stream)) - if (NILP (start)) - startval = 0; - else ++ if (STRINGP (stream) ++ || ((CONSP (stream) && STRINGP (XCAR (stream))))) { - CHECK_NUMBER (start); - startval = XINT (start); - if (startval < 0 || startval > endval) - args_out_of_range (str, start); - } + int startval, endval; ++ Lisp_Object string; + - read_from_string_index = startval; - read_from_string_index_byte = string_char_to_byte (str, startval); - read_from_string_limit = endval; ++ if (STRINGP (stream)) ++ string = stream; ++ else ++ string = XCAR (stream); + - new_backquote_flag = 0; - read_objects = Qnil; + if (NILP (end)) - endval = SCHARS (stream); ++ endval = SCHARS (string); + else + { + CHECK_NUMBER (end); + endval = XINT (end); - if (endval < 0 || endval > SCHARS (stream)) - args_out_of_range (stream, end); ++ if (endval < 0 || endval > SCHARS (string)) ++ args_out_of_range (string, end); + } + + if (NILP (start)) + startval = 0; + else + { + CHECK_NUMBER (start); + startval = XINT (start); + if (startval < 0 || startval > endval) - args_out_of_range (stream, start); ++ args_out_of_range (string, start); + } + read_from_string_index = startval; - read_from_string_index_byte = string_char_to_byte (stream, startval); ++ read_from_string_index_byte = string_char_to_byte (string, startval); + read_from_string_limit = endval; + } - tem = read0 (string); - return Fcons (tem, make_number (read_from_string_index)); + retval = read0 (stream); + if (EQ (Vread_with_symbol_positions, Qt) + || EQ (Vread_with_symbol_positions, stream)) + Vread_symbol_positions_list = Fnreverse (Vread_symbol_positions_list); + return retval; } /* Use this for recursive reads, in contexts where internal tokens @@@ -1590,51 -1674,13 +1788,13 @@@ read0 (readcharfun static int read_buffer_size; static char *read_buffer; - /* Read multibyte form and return it as a character. C is a first - byte of multibyte form, and rest of them are read from - READCHARFUN. */ - - static int - read_multibyte (c, readcharfun) - register int c; - Lisp_Object readcharfun; - { - /* We need the actual character code of this multibyte - characters. */ - unsigned char str[MAX_MULTIBYTE_LENGTH]; - int len = 0; - int bytes; - - if (c < 0) - return c; - - str[len++] = c; - while ((c = READCHAR) >= 0xA0 - && len < MAX_MULTIBYTE_LENGTH) - { - str[len++] = c; - readchar_count--; - } - UNREAD (c); - if (UNIBYTE_STR_AS_MULTIBYTE_P (str, len, bytes)) - return STRING_CHAR (str, len); - /* The byte sequence is not valid as multibyte. Unread all bytes - but the first one, and return the first byte. */ - while (--len > 0) - UNREAD (str[len]); - return str[0]; - } - /* Read a \-escape sequence, assuming we already read the `\'. - If the escape sequence forces unibyte, store 1 into *BYTEREP. - If the escape sequence forces multibyte, store 2 into *BYTEREP. - Otherwise store 0 into *BYTEREP. */ - If the escape sequence forces unibyte, return eight-bit-char. */ ++ If the escape sequence forces unibyte, return eight-bit char. */ static int - read_escape (readcharfun, stringp, byterep) + read_escape (readcharfun, stringp) Lisp_Object readcharfun; int stringp; - int *byterep; { register int c = READCHAR; @@@ -1707,16 -1751,12 +1865,16 @@@ return c | alt_modifier; case 's': + if (stringp) + return ' '; c = READCHAR; - if (c != '-') - error ("Invalid escape character syntax"); + if (c != '-') { + UNREAD (c); + return ' '; + } c = READCHAR; if (c == '\\') - c = read_escape (readcharfun, 0, byterep); + c = read_escape (readcharfun, 0); return c | super_modifier; case 'C': @@@ -1765,8 -1805,9 +1923,9 @@@ break; } } - - if (! ASCII_BYTE_P (i)) + - *byterep = 1; ++ if (i >= 0x80 && i < 0x100) + i = BYTE8_TO_CHAR (i); return i; } @@@ -1973,11 -1977,18 +2095,18 @@@ read1 (readcharfun, pch, first_in_list if (c == '[') { Lisp_Object tmp; + int depth, size; - ++ tmp = read_vector (readcharfun, 0); - if (XVECTOR (tmp)->size != SUB_CHAR_TABLE_STANDARD_SLOTS) + if (!INTEGERP (AREF (tmp, 0))) + error ("Invalid depth in char-table"); + depth = XINT (AREF (tmp, 0)); + if (depth < 1 || depth > 3) + error ("Invalid depth in char-table"); + size = XVECTOR (tmp)->size + 2; + if (chartab_size [depth] != size) error ("Invalid size char-table"); - XSETCHAR_TABLE (tmp, XCHAR_TABLE (tmp)); - XCHAR_TABLE (tmp)->top = Qnil; + XSETSUB_CHAR_TABLE (tmp, XSUB_CHAR_TABLE (tmp)); return tmp; } Fsignal (Qinvalid_read_syntax, @@@ -1998,17 -2009,19 +2127,19 @@@ UNREAD (c); tmp = read1 (readcharfun, pch, first_in_list); - if (size_in_chars != SCHARS (tmp) - /* We used to print 1 char too many - when the number of bits was a multiple of 8. - Accept such input in case it came from an old version. */ - && ! (XFASTINT (length) - == (SCHARS (tmp) - 1) * BITS_PER_CHAR)) + if (STRING_MULTIBYTE (tmp) - || (size_in_chars != XSTRING (tmp)->size ++ || (size_in_chars != SCHARS (tmp) + /* We used to print 1 char too many + when the number of bits was a multiple of 8. + Accept such input in case it came from an old + version. */ + && ! (XFASTINT (length) - == (XSTRING (tmp)->size - 1) * BITS_PER_CHAR))) ++ == (SCHARS (tmp) - 1) * BITS_PER_CHAR))) Fsignal (Qinvalid_read_syntax, Fcons (make_string ("#&...", 5), Qnil)); - + val = Fmake_bool_vector (length, Qnil); - bcopy (XSTRING (tmp)->data, XBOOL_VECTOR (val)->data, + bcopy (SDATA (tmp), XBOOL_VECTOR (val)->data, size_in_chars); /* Clear the extraneous bits in the last byte. */ if (XINT (length) != size_in_chars * BITS_PER_CHAR) @@@ -2078,8 -2092,10 +2210,10 @@@ } if (c >= 0) UNREAD (c); - + - if (load_force_doc_strings && EQ (readcharfun, Qget_file_char)) + if (load_force_doc_strings + && (EQ (readcharfun, Qget_file_char) + || EQ (readcharfun, Qget_emacs_mule_file_char))) { /* If we are supposed to force doc strings into core right now, record the last string that we skipped, @@@ -2131,16 -2147,9 +2265,17 @@@ c = READCHAR; } + load_each_byte = 0; goto retry; } + if (c == '!') + { + /* #! appears at the beginning of an executable file. + Skip the first line. */ + while (c != '\n' && c >= 0) + c = READCHAR; + goto retry; + } if (c == '$') return Vload_file_name; if (c == '\'') @@@ -2260,51 -2269,19 +2395,54 @@@ case '?': { - int discard; + int modifiers; + int next_char; + int ok; c = READCHAR; if (c < 0) end_of_file_error (); + + /* Accept `single space' syntax like (list ? x) where the + whitespace character is SPC or TAB. + Other literal whitespace like NL, CR, and FF are not accepted, + as there are well-established escape sequences for these. */ + if (c == ' ' || c == '\t') + return make_number (c); + if (c == '\\') - c = read_escape (readcharfun, 0, &discard); - else if (BASE_LEADING_CODE_P (c)) - c = read_multibyte (c, readcharfun); + c = read_escape (readcharfun, 0); + modifiers = c & CHAR_MODIFIER_MASK; + c &= ~CHAR_MODIFIER_MASK; + if (CHAR_BYTE8_P (c)) + c = CHAR_TO_BYTE8 (c); + c |= modifiers; + next_char = READCHAR; + if (next_char == '.') + { + /* Only a dotted-pair dot is valid after a char constant. */ + int next_next_char = READCHAR; + UNREAD (next_next_char); + + ok = (next_next_char <= 040 + || (next_next_char < 0200 + && (index ("\"';([#?", next_next_char) + || (!first_in_list && next_next_char == '`') + || (new_backquote_flag && next_next_char == ',')))); + } + else + { + ok = (next_char <= 040 + || (next_char < 0200 + && (index ("\"';()[]#?", next_char) + || (!first_in_list && next_char == '`') + || (new_backquote_flag && next_char == ',')))); + } + UNREAD (next_char); + if (!ok) + Fsignal (Qinvalid_read_syntax, Fcons (make_string ("?", 1), Qnil)); + return make_number (c); } @@@ -2350,53 -2325,57 +2486,58 @@@ continue; } - if (byterep == 1) + modifiers = c & CHAR_MODIFIER_MASK; + c = c & ~CHAR_MODIFIER_MASK; + + if (CHAR_BYTE8_P (c)) force_singlebyte = 1; - else if (byterep == 2) + else if (! ASCII_CHAR_P (c)) force_multibyte = 1; - } - - /* A character that must be multibyte forces multibyte. */ - if (! SINGLE_BYTE_CHAR_P (c & ~CHAR_MODIFIER_MASK)) - force_multibyte = 1; + else /* i.e. ASCII_CHAR_P (c) */ + { + /* Allow `\C- ' and `\C-?'. */ + if (modifiers == CHAR_CTL) + { + if (c == ' ') + c = 0, modifiers = 0; + else if (c == '?') + c = 127, modifiers = 0; + } + if (modifiers & CHAR_SHIFT) + { + /* Shift modifier is valid only with [A-Za-z]. */ + if (c >= 'A' && c <= 'Z') + modifiers &= ~CHAR_SHIFT; + else if (c >= 'a' && c <= 'z') + c -= ('a' - 'A'), modifiers &= ~CHAR_SHIFT; + } + + if (modifiers & CHAR_META) + { + /* Move the meta bit to the right place for a + string. */ + modifiers &= ~CHAR_META; + c = BYTE8_TO_CHAR (c | 0x80); + force_singlebyte = 1; + } + } - /* If we just discovered the need to be multibyte, - convert the text accumulated thus far. */ - if (force_multibyte && ! is_multibyte) - { - is_multibyte = 1; - to_multibyte (&p, &end, &nchars); + /* Any modifiers remaining are invalid. */ + if (modifiers) + error ("Invalid modifier in string"); + p += CHAR_STRING (c, (unsigned char *) p); } - - /* Allow `\C- ' and `\C-?'. */ - if (c == (CHAR_CTL | ' ')) - c = 0; - else if (c == (CHAR_CTL | '?')) - c = 127; - - if (c & CHAR_SHIFT) + else { - /* Shift modifier is valid only with [A-Za-z]. */ - if ((c & 0377) >= 'A' && (c & 0377) <= 'Z') - c &= ~CHAR_SHIFT; - else if ((c & 0377) >= 'a' && (c & 0377) <= 'z') - c = (c & ~CHAR_SHIFT) - ('a' - 'A'); + p += CHAR_STRING (c, (unsigned char *) p); + if (CHAR_BYTE8_P (c)) + force_singlebyte = 1; + else if (! ASCII_CHAR_P (c)) + force_multibyte = 1; } - - if (c & CHAR_META) - /* Move the meta bit to the right place for a string. */ - c = (c & ~CHAR_META) | 0x80; - if (c & CHAR_MODIFIER_MASK) - error ("Invalid modifier in string"); - - if (is_multibyte) - p += CHAR_STRING (c, p); - else - *p++ = c; - nchars++; } + if (c < 0) end_of_file_error (); @@@ -2406,49 -2385,24 +2547,30 @@@ if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel) return make_number (0); - if (is_multibyte || force_singlebyte) + if (force_multibyte) + /* READ_BUFFER already contains valid multibyte forms. */ ; - else if (load_convert_to_unibyte) - { - Lisp_Object string; - to_multibyte (&p, &end, &nchars); - if (p - read_buffer != nchars) - { - string = make_multibyte_string (read_buffer, nchars, - p - read_buffer); - return Fstring_make_unibyte (string); - } - /* We can make a unibyte string directly. */ - is_multibyte = 0; - } - else if (EQ (readcharfun, Qget_file_char) - || EQ (readcharfun, Qlambda)) + else if (force_singlebyte) { - /* Nowadays, reading directly from a file is used only for - compiled Emacs Lisp files, and those always use the - Emacs internal encoding. Meanwhile, Qlambda is used - for reading dynamic byte code (compiled with - byte-compile-dynamic = t). So make the string multibyte - if the string contains any multibyte sequences. - (to_multibyte is a no-op if not.) */ - to_multibyte (&p, &end, &nchars); - is_multibyte = (p - read_buffer) != nchars; + nchars = str_as_unibyte (read_buffer, p - read_buffer); + p = read_buffer + nchars; } else - /* In all other cases, if we read these bytes as - separate characters, treat them as separate characters now. */ + /* Otherwise, READ_BUFFER contains only ASCII. */ + ; + /* We want readchar_count to be the number of characters, not + bytes. Hence we adjust for multibyte characters in the + string. ... But it doesn't seem to be necessary, because + READCHAR *does* read multibyte characters from buffers. */ + /* readchar_count -= (p - read_buffer) - nchars; */ if (read_pure) return make_pure_string (read_buffer, nchars, p - read_buffer, - is_multibyte); + (force_multibyte + || (p - read_buffer != nchars))); return make_specified_string (read_buffer, nchars, p - read_buffer, - is_multibyte); + (force_multibyte + || (p - read_buffer != nchars))); } case '.': @@@ -2841,10 -2779,10 +2961,10 @@@ read_vector (readcharfun, bytecodeflag /* Coerce string to unibyte (like string-as-unibyte, but without generating extra garbage and guaranteeing no change in the contents). */ - XSTRING (bytestr)->size = STRING_BYTES (XSTRING (bytestr)); - SET_STRING_BYTES (XSTRING (bytestr), -1); + STRING_SET_CHARS (bytestr, SBYTES (bytestr)); + STRING_SET_UNIBYTE (bytestr); - item = Fread (bytestr); + item = Fread (Fcons (bytestr, readcharfun)); if (!CONSP (item)) error ("invalid byte code"); @@@ -2954,7 -2901,14 +3083,15 @@@ read_list (flag, readcharfun if (doc_reference == 2) { /* Get a doc string from the file we are loading. - If it's in saved_doc_string, get it from there. */ + If it's in saved_doc_string, get it from there. + + Here, we don't know if the string is a + bytecode string or a doc string. As a + bytecode string must be unibyte, we always + return a unibyte string. If it is actually a + doc string, caller must make it + multibyte. */ ++ int pos = XINT (XCDR (val)); /* Position is negative for user variables. */ if (pos < 0) pos = -pos; diff --cc src/minibuf.c index d265e8063f1,88113df57bb..50de309d21b --- a/src/minibuf.c +++ b/src/minibuf.c @@@ -2123,23 -1967,14 +2123,14 @@@ Return nil if there is no valid complet /* Now find first word-break in the stuff found by completion. i gets index in string of where to stop completing. */ - { - int len, c; - int bytes = SBYTES (completion); - completion_string = SDATA (completion); - for (; i_byte < SBYTES (completion); i_byte += len, i++) - { - c = STRING_CHAR_AND_LENGTH (completion_string + i_byte, - bytes - i_byte, - len); - if (SYNTAX (c) != Sword) - { - i_byte += len; - i++; - break; - } - } - } - while (i_byte < STRING_BYTES (XSTRING (completion))) ++ while (i_byte < SBYTES (completion)) + { + int c; + + FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, completion, i, i_byte); + if (SYNTAX (c) != Sword) + break; + } /* If got no characters, print help for user. */ diff --cc src/print.c index e06b8a0052d,bb97843da4a..c8e66095110 --- a/src/print.c +++ b/src/print.c @@@ -463,11 -460,15 +463,15 @@@ print_string (string, printcharfun { int chars; + if (print_escape_nonascii) + string = string_escape_byte8 (string); + if (STRING_MULTIBYTE (string)) - chars = XSTRING (string)->size; + chars = SCHARS (string); - else if (EQ (printcharfun, Qt) - ? ! NILP (buffer_defaults.enable_multibyte_characters) - : ! NILP (current_buffer->enable_multibyte_characters)) + else if (! print_escape_nonascii + && (EQ (printcharfun, Qt) + ? ! NILP (buffer_defaults.enable_multibyte_characters) + : ! NILP (current_buffer->enable_multibyte_characters))) { /* If unibyte string STRING contains 8-bit codes, we must convert STRING to a multibyte string containing the same @@@ -511,13 -512,8 +515,8 @@@ /* Here, we must convert each multi-byte form to the corresponding character code before handing it to PRINTCHAR. */ int len; - int ch = STRING_CHAR_AND_LENGTH (XSTRING (string)->data + i, + int ch = STRING_CHAR_AND_LENGTH (SDATA (string) + i, size_byte - i, len); - if (!CHAR_VALID_P (ch, 0)) - { - ch = SREF (string, i); - len = 1; - } PRINTCHAR (ch); i += len; } @@@ -1455,19 -1425,19 +1451,24 @@@ print_object (obj, printcharfun, escape PRINTCHAR ('\\'); PRINTCHAR ('f'); } - else if (multibyte && ! ASCII_BYTE_P (c) - && (print_escape_multibyte || CHAR_BYTE8_P (c))) + else if (multibyte - && ! ASCII_BYTE_P (c) - && (SINGLE_BYTE_CHAR_P (c) || print_escape_multibyte)) ++ && (CHAR_BYTE8_P (c) || print_escape_multibyte)) { /* When multibyte is disabled, - print multibyte string chars using hex escapes. */ + print multibyte string chars using hex escapes. + For a char code that could be in a unibyte string, + when found in a multibyte string, always use a hex escape + so it reads back as multibyte. */ unsigned char outbuf[50]; - sprintf (outbuf, "\\x%x", c); + + if (CHAR_BYTE8_P (c)) + sprintf (outbuf, "\\%03o", CHAR_TO_BYTE8 (c)); + else - sprintf (outbuf, "\\x%04x", c); ++ { ++ sprintf (outbuf, "\\x%04x", c); ++ need_nonhex = 1; ++ } strout (outbuf, -1, -1, printcharfun, 0); -- need_nonhex = 1; } else if (! multibyte && SINGLE_BYTE_CHAR_P (c) && ! ASCII_BYTE_P (c) diff --cc src/process.c index c4ba96a9e5c,618e7b577b4..3051b81f7cd --- a/src/process.c +++ b/src/process.c @@@ -590,42 -484,6 +590,43 @@@ remove_process (proc deactivate_process (proc); } + +/* Setup coding systems of PROCESS. */ + +void +setup_process_coding_systems (process) + Lisp_Object process; +{ + struct Lisp_Process *p = XPROCESS (process); + int inch = XINT (p->infd); + int outch = XINT (p->outfd); ++ Lisp_Object coding_system; + + if (inch < 0 || outch < 0) + return; + + if (!proc_decode_coding_system[inch]) + proc_decode_coding_system[inch] + = (struct coding_system *) xmalloc (sizeof (struct coding_system)); - setup_coding_system (p->decode_coding_system, - proc_decode_coding_system[inch]); ++ coding_system = p->decode_coding_system; + if (! NILP (p->filter)) + { + if (NILP (p->filter_multibyte)) - setup_raw_text_coding_system (proc_decode_coding_system[inch]); ++ coding_system = raw_text_coding_system (coding_system); + } + else if (BUFFERP (p->buffer)) + { + if (NILP (XBUFFER (p->buffer)->enable_multibyte_characters)) - setup_raw_text_coding_system (proc_decode_coding_system[inch]); ++ coding_system = raw_text_coding_system (coding_system); + } ++ setup_coding_system (coding_system, proc_decode_coding_system[inch]); + + if (!proc_encode_coding_system[outch]) + proc_encode_coding_system[outch] + = (struct coding_system *) xmalloc (sizeof (struct coding_system)); + setup_coding_system (p->encode_coding_system, + proc_encode_coding_system[outch]); +} DEFUN ("processp", Fprocessp, Sprocessp, 1, 1, 0, doc: /* Return t if OBJECT is a process. */) @@@ -4400,7 -2906,7 +4401,7 @@@ read_process_output (proc, channel Lisp_Object proc; register int channel; { -- register int nchars, nbytes; ++ register int nbytes; char *chars; register Lisp_Object outstream; register struct buffer *old = current_buffer; @@@ -4534,13 -3018,17 +4535,13 @@@ save the match data in a special nonrecursive fashion. */ running_asynch_code = 1; - text = decode_coding_string (make_unibyte_string (chars, nbytes), - coding, 0); - Vlast_coding_system_used = coding->symbol; + decode_coding_c_string (coding, chars, nbytes, Qt); + text = coding->dst_object; - if (NILP (buffer_defaults.enable_multibyte_characters)) - /* We had better return unibyte string. */ - text = string_make_unibyte (text); - + Vlast_coding_system_used = CODING_ID_NAME (coding->id); /* A new coding system might be found. */ - if (!EQ (p->decode_coding_system, coding->symbol)) + if (!EQ (p->decode_coding_system, Vlast_coding_system_used)) { - p->decode_coding_system = coding->symbol; + p->decode_coding_system = Vlast_coding_system_used; /* Don't call setup_coding_system for proc_decode_coding_system[channel] here. It is done in @@@ -4562,16 -3050,15 +4563,18 @@@ } } - carryover = nbytes - coding->consumed; - bcopy (chars + coding->consumed, SDATA (p->decoding_buf), - carryover); - XSETINT (p->decoding_carryover, carryover); + if (coding->carryover_bytes > 0) + { - bcopy (coding->carryover, XSTRING (p->decoding_buf)->data, ++ bcopy (coding->carryover, SDATA (p->decoding_buf), + coding->carryover_bytes); + XSETINT (p->decoding_carryover, coding->carryover_bytes); + } - nbytes = STRING_BYTES (XSTRING (text)); - nchars = XSTRING (text)->size; - if (nbytes > 0) + /* Adjust the multibyteness of TEXT to that of the filter. */ + if (NILP (p->filter_multibyte) != ! STRING_MULTIBYTE (text)) + text = (STRING_MULTIBYTE (text) + ? Fstring_as_unibyte (text) + : Fstring_to_multibyte (text)); + if (SBYTES (text) > 0) internal_condition_case_1 (read_process_output_call, Fcons (outstream, Fcons (proc, Fcons (text, Qnil))), @@@ -4666,10 -3153,12 +4669,12 @@@ proc_encode_coding_system[XINT (p->outfd)]); } } - carryover = nbytes - coding->consumed; - bcopy (chars + coding->consumed, SDATA (p->decoding_buf), - carryover); - XSETINT (p->decoding_carryover, carryover); + if (coding->carryover_bytes > 0) + { - bcopy (coding->carryover, XSTRING (p->decoding_buf)->data, ++ bcopy (coding->carryover, SDATA (p->decoding_buf), + coding->carryover_bytes); + XSETINT (p->decoding_carryover, coding->carryover_bytes); + } /* Adjust the multibyteness of TEXT to that of the buffer. */ if (NILP (current_buffer->enable_multibyte_characters) != ! STRING_MULTIBYTE (text)) @@@ -4784,13 -3274,13 +4789,13 @@@ send_process (proc, buf, len, object update_status (XPROCESS (proc)); if (! EQ (XPROCESS (proc)->status, Qrun)) error ("Process %s not running", - XSTRING (XPROCESS (proc)->name)->data); + SDATA (XPROCESS (proc)->name)); if (XINT (XPROCESS (proc)->outfd) < 0) error ("Output file descriptor of %s is closed", - XSTRING (XPROCESS (proc)->name)->data); + SDATA (XPROCESS (proc)->name)); coding = proc_encode_coding_system[XINT (XPROCESS (proc)->outfd)]; - Vlast_coding_system_used = coding->symbol; + Vlast_coding_system_used = CODING_ID_NAME (coding->id); if ((STRINGP (object) && STRING_MULTIBYTE (object)) || (BUFFERP (object) @@@ -4821,9 -3308,12 +4823,12 @@@ /* But, before changing the coding, we must flush out data. */ coding->mode |= CODING_MODE_LAST_BLOCK; send_process (proc, "", 0, Qt); - coding->mode &= ~CODING_MODE_LAST_BLOCK; ++ coding->mode &= CODING_MODE_LAST_BLOCK; } coding->src_multibyte = 0; - setup_raw_text_coding_system (coding); + setup_coding_system (raw_text_coding_system + (Vlast_coding_system_used), + coding); } } coding->dst_multibyte = 0; @@@ -4842,34 -3341,16 +4856,16 @@@ } else if (STRINGP (object)) { - from_byte = buf - SDATA (object); - from = string_byte_to_char (object, from_byte); - to = string_byte_to_char (object, from_byte + len); + encode_coding_string (coding, object, 1); } - - if (coding->composing != COMPOSITION_DISABLED) + else { - if (from_byte >= 0) - coding_save_composition (coding, from, to, object); - else - coding->composing = COMPOSITION_DISABLED; + coding->dst_object = make_unibyte_string (buf, len); + coding->produced = len; } - if (SBYTES (XPROCESS (proc)->encoding_buf) < require) - XPROCESS (proc)->encoding_buf = make_uninit_string (require); - - if (from_byte >= 0) - buf = (BUFFERP (object) - ? BUF_BYTE_ADDRESS (XBUFFER (object), from_byte) - : SDATA (object) + from_byte); - - object = XPROCESS (proc)->encoding_buf; - encode_coding (coding, (char *) buf, SDATA (object), - len, SBYTES (object)); len = coding->produced; - buf = SDATA (object); - if (temp_buf) - xfree (temp_buf); - buf = XSTRING (coding->dst_object)->data; ++ buf = SDATA (coding->dst_object); } #ifdef VMS diff --cc src/puresize.h index 67c8aede134,11c42a28d6c..cc91da7d9f7 --- a/src/puresize.h +++ b/src/puresize.h @@@ -42,7 -42,7 +42,7 @@@ Boston, MA 02111-1307, USA. * #endif #ifndef BASE_PURESIZE - #define BASE_PURESIZE (920000 + SYSTEM_PURESIZE_EXTRA + SITELOAD_PURESIZE_EXTRA) -#define BASE_PURESIZE (900000 + SYSTEM_PURESIZE_EXTRA + SITELOAD_PURESIZE_EXTRA) ++#define BASE_PURESIZE (1100000 + SYSTEM_PURESIZE_EXTRA + SITELOAD_PURESIZE_EXTRA) #endif /* Increase BASE_PURESIZE by a ratio depending on the machine's word size. */ diff --cc src/regex.c index f55cc5aeb61,bc88663897c..453ca3d85d5 --- a/src/regex.c +++ b/src/regex.c @@@ -148,23 -152,39 +149,38 @@@ # define RE_STRING_CHAR_AND_LENGTH(p, s, len) \ (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p))) - /* Set C a (possibly multibyte) character before P. P points into a - string which is the virtual concatenation of STR1 (which ends at - END1) or STR2 (which ends at END2). */ - # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ - do { \ - if (multibyte) \ - { \ - re_char *dtemp = (p) == (str2) ? (end1) : (p); \ - re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ - re_char *d0 = dtemp; \ - PREV_CHAR_BOUNDARY (d0, dlimit); \ - c = STRING_CHAR (d0, dtemp - d0); \ - } \ - else \ - (c = ((p) == (str2) ? (end1) : (p))[-1]); \ + /* Set C a (possibly converted to multibyte) character before P. P + points into a string which is the virtual concatenation of STR1 + (which ends at END1) or STR2 (which ends at END2). */ + # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ + do { \ + if (multibyte) \ + { \ + re_char *dtemp = (p) == (str2) ? (end1) : (p); \ + re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \ + while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \ + c = STRING_CHAR (dtemp, (p) - dtemp); \ + } \ + else \ + { \ + (c = ((p) == (str2) ? (end1) : (p))[-1]); \ + MAKE_CHAR_MULTIBYTE (c); \ + } \ } while (0) + /* Set C a (possibly converted to multibyte) character at P, and set + LEN to the byte length of that character. */ + # define GET_CHAR_AFTER(c, p, len) \ + do { \ + if (multibyte) \ + c = STRING_CHAR_AND_LENGTH (p, 0, len); \ + else \ + { \ + c = *p; \ + len = 1; \ + MAKE_CHAR_MULTIBYTE (c); \ + } \ - } while (0) - ++ } while (0) #else /* not emacs */ @@@ -244,7 -264,14 +261,15 @@@ enum syntaxcode { Swhitespace = 0, Swor # define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \ (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1))) + # define GET_CHAR_AFTER(c, p, len) \ + (c = *p, len = 1) # define MAKE_CHAR(charset, c1, c2) (c1) + # define BYTE8_TO_CHAR(c) (c) + # define CHAR_BYTE8_P(c) (0) + # define MAKE_CHAR_MULTIBYTE(c) (c) + # define MAKE_CHAR_UNIBYTE(c) (c) + # define CHAR_LEADING_CODE(c) (c) ++ #endif /* not emacs */ #ifndef RE_TRANSLATE @@@ -1864,19 -1891,21 +1889,19 @@@ struct range_table_work_are int bits; /* flag to record character classes */ }; -/* Make sure that WORK_AREA can hold more N multibyte characters. */ -#define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n) \ - do { \ - if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \ - { \ - (work_area).allocated += 16 * sizeof (int); \ - if ((work_area).table) \ - (work_area).table \ - = (int *) realloc ((work_area).table, (work_area).allocated); \ - else \ - (work_area).table \ - = (int *) malloc ((work_area).allocated); \ - if ((work_area).table == 0) \ - FREE_STACK_RETURN (REG_ESPACE); \ - } \ +/* Make sure that WORK_AREA can hold more N multibyte characters. + This is used only in set_image_of_range and set_image_of_range_1. + It expects WORK_AREA to be a pointer. + If it can't get the space, it returns from the surrounding function. */ + +#define EXTEND_RANGE_TABLE(work_area, n) \ + do { \ - if (((work_area)->used + (n)) * sizeof (int) > (work_area)->allocated) \ ++ if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \ + { \ - extend_range_table_work_area (work_area); \ - if ((work_area)->table == 0) \ ++ extend_range_table_work_area (&work_area); \ ++ if ((work_area).table == 0) \ + return (REG_ESPACE); \ + } \ } while (0) #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \ @@@ -1891,15 -1920,12 +1916,12 @@@ #define BIT_UPPER 0x10 #define BIT_MULTIBYTE 0x20 - /* Set a range START..END to WORK_AREA. - The range is passed through TRANSLATE, so START and END - should be untranslated. */ - #define SET_RANGE_TABLE_WORK_AREA(work_area, start, end) \ + /* Set a range (RANGE_START, RANGE_END) to WORK_AREA. */ + #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \ do { \ - int tem; \ - tem = set_image_of_range (&work_area, start, end, translate); \ - if (tem > 0) \ - FREE_STACK_RETURN (tem); \ - EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \ ++ EXTEND_RANGE_TABLE ((work_area), 2); \ + (work_area).table[(work_area).used++] = (range_start); \ + (work_area).table[(work_area).used++] = (range_end); \ } while (0) /* Free allocated memory for WORK_AREA. */ @@@ -1919,6 -1945,35 +1941,38 @@@ #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) + #ifdef emacs + -/* It is better to implement this jumbo macro by a function, but it's - not that easy because macros called within it assumes various - variables being defined. */ ++/* Store characters in the rage range C0 to C1 in WORK_AREA while ++ translating them and paying attention to the continuity of ++ translated characters. ++ ++ Implementation note: It is better to implement this fairly big ++ macro by a function, but it's not that easy because macros called ++ in this macro assume various local variables already declared. */ + + #define SETUP_MULTIBYTE_RANGE(work_area, c0, c1) \ + do { \ + re_wchar_t c, t, t_last; \ + int n; \ + \ + c = (c0); \ + t_last = multibyte ? TRANSLATE (c) : TRANSLATE (MAKE_CHAR_MULTIBYTE (c)); \ + for (c++, n = 1; c <= (c1); c++, n++) \ + { \ + t = multibyte ? TRANSLATE (c) : TRANSLATE (MAKE_CHAR_MULTIBYTE (c)); \ + if (t_last + n == t) \ + continue; \ + SET_RANGE_TABLE_WORK_AREA ((work_area), t_last, t_last + n - 1); \ + t_last = t; \ - n = 1; \ ++ n = 0; \ + } \ + if (n > 0) \ + SET_RANGE_TABLE_WORK_AREA ((work_area), t_last, t_last + n - 1); \ + } while (0) + - + #endif /* emacs */ + /* Get the next unsigned number in the uncompiled pattern. */ #define GET_UNSIGNED_NUMBER(num) \ do { if (p != pend) \ @@@ -2056,256 -2103,17 +2110,258 @@@ re_wctype_to_bit (cc } } #endif + +/* Filling in the work area of a range. */ -/* Explicit quit checking is only used on NTemacs. */ -#if defined WINDOWSNT && defined emacs && defined QUIT -extern int immediate_quit; -# define IMMEDIATE_QUIT_CHECK \ - do { \ - if (immediate_quit) QUIT; \ - } while (0) -#else -# define IMMEDIATE_QUIT_CHECK ((void)0) +/* Actually extend the space in WORK_AREA. */ + +static void +extend_range_table_work_area (work_area) + struct range_table_work_area *work_area; +{ + work_area->allocated += 16 * sizeof (int); + if (work_area->table) + work_area->table + = (int *) realloc (work_area->table, work_area->allocated); + else + work_area->table + = (int *) malloc (work_area->allocated); +} + ++#if 0 +#ifdef emacs + +/* Carefully find the ranges of codes that are equivalent + under case conversion to the range start..end when passed through + TRANSLATE. Handle the case where non-letters can come in between + two upper-case letters (which happens in Latin-1). + Also handle the case of groups of more than 2 case-equivalent chars. + + The basic method is to look at consecutive characters and see + if they can form a run that can be handled as one. + + Returns -1 if successful, REG_ESPACE if ran out of space. */ + +static int +set_image_of_range_1 (work_area, start, end, translate) + RE_TRANSLATE_TYPE translate; + struct range_table_work_area *work_area; + re_wchar_t start, end; +{ + /* `one_case' indicates a character, or a run of characters, + each of which is an isolate (no case-equivalents). + This includes all ASCII non-letters. + + `two_case' indicates a character, or a run of characters, + each of which has two case-equivalent forms. + This includes all ASCII letters. + + `strange' indicates a character that has more than one + case-equivalent. */ + + enum case_type {one_case, two_case, strange}; + + /* Describe the run that is in progress, + which the next character can try to extend. + If run_type is strange, that means there really is no run. + If run_type is one_case, then run_start...run_end is the run. + If run_type is two_case, then the run is run_start...run_end, + and the case-equivalents end at run_eqv_end. */ + + enum case_type run_type = strange; + int run_start, run_end, run_eqv_end; + + Lisp_Object eqv_table; + + if (!RE_TRANSLATE_P (translate)) + { + EXTEND_RANGE_TABLE (work_area, 2); + work_area->table[work_area->used++] = (start); + work_area->table[work_area->used++] = (end); + return -1; + } + + eqv_table = XCHAR_TABLE (translate)->extras[2]; + + for (; start <= end; start++) + { + enum case_type this_type; + int eqv = RE_TRANSLATE (eqv_table, start); + int minchar, maxchar; + + /* Classify this character */ + if (eqv == start) + this_type = one_case; + else if (RE_TRANSLATE (eqv_table, eqv) == start) + this_type = two_case; + else + this_type = strange; + + if (start < eqv) + minchar = start, maxchar = eqv; + else + minchar = eqv, maxchar = start; + + /* Can this character extend the run in progress? */ + if (this_type == strange || this_type != run_type + || !(minchar == run_end + 1 + && (run_type == two_case + ? maxchar == run_eqv_end + 1 : 1))) + { + /* No, end the run. + Record each of its equivalent ranges. */ + if (run_type == one_case) + { + EXTEND_RANGE_TABLE (work_area, 2); + work_area->table[work_area->used++] = run_start; + work_area->table[work_area->used++] = run_end; + } + else if (run_type == two_case) + { + EXTEND_RANGE_TABLE (work_area, 4); + work_area->table[work_area->used++] = run_start; + work_area->table[work_area->used++] = run_end; + work_area->table[work_area->used++] + = RE_TRANSLATE (eqv_table, run_start); + work_area->table[work_area->used++] + = RE_TRANSLATE (eqv_table, run_end); + } + run_type = strange; + } + + if (this_type == strange) + { + /* For a strange character, add each of its equivalents, one + by one. Don't start a range. */ + do + { + EXTEND_RANGE_TABLE (work_area, 2); + work_area->table[work_area->used++] = eqv; + work_area->table[work_area->used++] = eqv; + eqv = RE_TRANSLATE (eqv_table, eqv); + } + while (eqv != start); + } + + /* Add this char to the run, or start a new run. */ + else if (run_type == strange) + { + /* Initialize a new range. */ + run_type = this_type; + run_start = start; + run_end = start; + run_eqv_end = RE_TRANSLATE (eqv_table, run_end); + } + else + { + /* Extend a running range. */ + run_end = minchar; + run_eqv_end = RE_TRANSLATE (eqv_table, run_end); + } + } + + /* If a run is still in progress at the end, finish it now + by recording its equivalent ranges. */ + if (run_type == one_case) + { + EXTEND_RANGE_TABLE (work_area, 2); + work_area->table[work_area->used++] = run_start; + work_area->table[work_area->used++] = run_end; + } + else if (run_type == two_case) + { + EXTEND_RANGE_TABLE (work_area, 4); + work_area->table[work_area->used++] = run_start; + work_area->table[work_area->used++] = run_end; + work_area->table[work_area->used++] + = RE_TRANSLATE (eqv_table, run_start); + work_area->table[work_area->used++] + = RE_TRANSLATE (eqv_table, run_end); + } + + return -1; +} + +#endif /* emacs */ + +/* Record the the image of the range start..end when passed through + TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end) + and is not even necessarily contiguous. + Normally we approximate it with the smallest contiguous range that contains + all the chars we need. However, for Latin-1 we go to extra effort + to do a better job. + + This function is not called for ASCII ranges. + + Returns -1 if successful, REG_ESPACE if ran out of space. */ + +static int +set_image_of_range (work_area, start, end, translate) + RE_TRANSLATE_TYPE translate; + struct range_table_work_area *work_area; + re_wchar_t start, end; +{ + re_wchar_t cmin, cmax; + +#ifdef emacs + /* For Latin-1 ranges, use set_image_of_range_1 + to get proper handling of ranges that include letters and nonletters. + For a range that includes the whole of Latin-1, this is not necessary. + For other character sets, we don't bother to get this right. */ + if (RE_TRANSLATE_P (translate) && start < 04400 + && !(start < 04200 && end >= 04377)) + { + int newend; + int tem; + newend = end; + if (newend > 04377) + newend = 04377; + tem = set_image_of_range_1 (work_area, start, newend, translate); + if (tem > 0) + return tem; + + start = 04400; + if (end < 04400) + return -1; + } #endif + + EXTEND_RANGE_TABLE (work_area, 2); + work_area->table[work_area->used++] = (start); + work_area->table[work_area->used++] = (end); + + cmin = -1, cmax = -1; + + if (RE_TRANSLATE_P (translate)) + { + int ch; + + for (ch = start; ch <= end; ch++) + { + re_wchar_t c = TRANSLATE (ch); + if (! (start <= c && c <= end)) + { + if (cmin == -1) + cmin = c, cmax = c; + else + { + cmin = MIN (cmin, c); + cmax = MAX (cmax, c); + } + } + } + + if (cmin != -1) + { + EXTEND_RANGE_TABLE (work_area, 2); + work_area->table[work_area->used++] = (cmin); + work_area->table[work_area->used++] = (cmax); + } + } + + return -1; +} ++#endif /* 0 */ #ifndef MATCH_MAY_ALLOCATE @@@ -2449,6 -2257,9 +2505,9 @@@ regex_compile (pattern, size, syntax, b /* If the object matched can contain multibyte characters. */ const boolean multibyte = RE_MULTIBYTE_P (bufp); - /* If a target can contain multibyte characters. */ ++ /* If a target of matching can contain multibyte characters. */ + const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); + #ifdef DEBUG debug++; DEBUG_PRINT1 ("\nCompiling pattern: "); @@@ -2808,8 -2610,8 +2863,9 @@@ them). */ if (c == ':' && *p == ']') { - int ch; + re_wchar_t ch; re_wctype_t cc; ++ int limit; cc = re_wctype (str); @@@ -2827,17 -2629,20 +2883,33 @@@ is_digit, is_cntrl, and is_xdigit, since they can only match ASCII characters. We don't need to handle them for multibyte. - They are distinguished by a negative wctype. - We need this only for Emacs. */ -#ifdef emacs - SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work, - re_wctype_to_bit (cc)); -#endif + They are distinguished by a negative wctype. */ - if (multibyte) - SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work, - re_wctype_to_bit (cc)); - for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) ++ for (ch = 0; ch < 128; ++ch) ++ if (re_iswctype (btowc (ch), cc)) ++ { ++ c = TRANSLATE (ch); ++ SET_LIST_BIT (c); ++ } + - for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) ++ if (target_multibyte) + { - MAKE_CHAR_MULTIBYTE (ch); - ch = TRANSLATE (ch); - if (IS_REAL_ASCII (ch) - & re_iswctype (btowc (ch), cc)) - SET_LIST_BIT (ch); ++ SET_RANGE_TABLE_WORK_AREA_BIT ++ (range_table_work, re_wctype_to_bit (cc)); ++ } ++ else + { - int translated = TRANSLATE (ch); - if (re_iswctype (btowc (ch), cc)) - SET_LIST_BIT (translated); ++ for (ch = 0; ch < (1 << BYTEWIDTH); ++ch) ++ { ++ c = ch; ++ MAKE_CHAR_MULTIBYTE (c); ++ if (re_iswctype (btowc (c), cc)) ++ { ++ c = TRANSLATE (c); ++ MAKE_CHAR_UNIBYTE (c); ++ SET_LIST_BIT (c); ++ } ++ } } /* Repeat the loop. */ @@@ -2860,61 -2665,45 +2932,55 @@@ { /* Discard the `-'. */ - PATFETCH_RAW (c1); + PATFETCH (c1); /* Fetch the character which ends the range. */ - PATFETCH_RAW (c1); + PATFETCH (c1); - - if (SINGLE_BYTE_CHAR_P (c)) + if (c > c1) { - if (! SINGLE_BYTE_CHAR_P (c1)) - { - /* Handle a range starting with a - character of less than 256, and ending - with a character of not less than 256. - Split that into two ranges, the low one - ending at 0377, and the high one - starting at the smallest character in - the charset of C1 and ending at C1. */ - int charset = CHAR_CHARSET (c1); - re_wchar_t c2 = MAKE_CHAR (charset, 0, 0); - - SET_RANGE_TABLE_WORK_AREA (range_table_work, - c2, c1); - c1 = 0377; - } + if (syntax & RE_NO_EMPTY_RANGES) + FREE_STACK_RETURN (REG_ERANGE); + /* Else, repeat the loop. */ } - else if (!SAME_CHARSET_P (c, c1)) - FREE_STACK_RETURN (REG_ERANGE); } else + /* Range from C to C. */ c1 = c; + - /* Set the range ... */ - if (SINGLE_BYTE_CHAR_P (c)) - /* ... into bitmap. */ + #ifndef emacs + c = TRANSLATE (c); + c1 = TRANSLATE (c1); ++ /* Set the range into bitmap */ ++ for (; c <= c1; c++) ++ SET_LIST_BIT (TRANSLATE (c)); + #else /* not emacs */ + if (target_multibyte) { - re_wchar_t this_char; - re_wchar_t range_start = c, range_end = c1; - - /* If the start is after the end, the range is empty. */ - if (range_start > range_end) - if (! IS_REAL_ASCII (c1)) ++ if (c1 >= 128) { - if (syntax & RE_NO_EMPTY_RANGES) - FREE_STACK_RETURN (REG_ERANGE); - /* Else, repeat the loop. */ + re_wchar_t c0 = MAX (c, 128); + + SETUP_MULTIBYTE_RANGE (range_table_work, c0, c1); - c1 = MIN (127, c1); ++ c1 = 127; } - else ++ for (; c <= c1; c++) ++ SET_LIST_BIT (TRANSLATE (c)); + } + else + { - if (multibyte) ++ re_wchar_t c0; ++ ++ for (; c <= c1; c++) { - for (this_char = range_start; this_char <= range_end; - this_char++) - SET_LIST_BIT (TRANSLATE (this_char)); - MAKE_CHAR_UNIBYTE (c); - MAKE_CHAR_UNIBYTE (c1); ++ c0 = c; ++ if (! multibyte) ++ MAKE_CHAR_MULTIBYTE (c0); ++ c0 = TRANSLATE (c0); ++ MAKE_CHAR_UNIBYTE (c0); ++ SET_LIST_BIT (c0); } } - else - /* ... into range table. */ - SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); + #endif /* not emacs */ - /* Set the range into bitmap */ - for (; c <= c1; c++) - SET_LIST_BIT (TRANSLATE (c)); } /* Discard any (non)matching list bytes that are all 0 at the @@@ -4214,19 -4036,25 +4304,22 @@@ re_search_2 (bufp, str1, size1, str2, s buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim, buf_charlen); - buf_ch = RE_TRANSLATE (translate, buf_ch); - if (buf_ch >= 0400 - || fastmap[buf_ch]) + if (fastmap[CHAR_LEADING_CODE (buf_ch)]) break; + range -= buf_charlen; d += buf_charlen; } else - while (range > lim - && !fastmap[RE_TRANSLATE (translate, *d)]) + while (range > lim) { + buf_ch = *d; -#ifdef emacs + MAKE_CHAR_MULTIBYTE (buf_ch); -#endif + buf_ch = RE_TRANSLATE (translate, buf_ch); -#ifdef emacs + MAKE_CHAR_UNIBYTE (buf_ch); -#endif + if (fastmap[buf_ch]) + break; d++; range--; } @@@ -4245,12 -4087,19 +4352,18 @@@ int room = (startpos >= size1 ? size2 + size1 - startpos : size1 - startpos); - buf_ch = RE_STRING_CHAR (d, room); - buf_ch = TRANSLATE (buf_ch); -- - if (! (buf_ch >= 0400 - || fastmap[buf_ch])) - goto advance; + if (multibyte) + { + buf_ch = STRING_CHAR (d, room); + buf_ch = TRANSLATE (buf_ch); + if (! fastmap[CHAR_LEADING_CODE (buf_ch)]) + goto advance; + } + else + { + if (! fastmap[TRANSLATE (*d)]) + goto advance; + } } } @@@ -5146,58 -5015,69 +5280,70 @@@ re_match_2_internal (bufp, string1, siz /* This is written out as an if-else so we don't waste time testing `translate' inside the loop. */ if (RE_TRANSLATE_P (translate)) - { - if (multibyte) - do + do + { + PREFETCH (); + if (RE_TRANSLATE (translate, *d) != *p++) { - int pat_charlen, buf_charlen; - unsigned int pat_ch, buf_ch; - - PREFETCH (); - pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen); - buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen); + d = dfail; + goto fail; + } + d++; + } + while (--mcnt); + else + do + { + PREFETCH (); + if (*d++ != *p++) + { + d = dfail; + goto fail; + } + } + while (--mcnt); + #else /* emacs */ + /* The cost of testing `translate' is comparatively small. */ + if (multibyte) + do + { + int pat_charlen, buf_charlen; + unsigned int pat_ch, buf_ch; - if (RE_TRANSLATE (translate, buf_ch) - != pat_ch) - { - d = dfail; - goto fail; - } + PREFETCH (); + pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen); + buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen); - p += pat_charlen; - d += buf_charlen; - mcnt -= pat_charlen; - } - while (mcnt > 0); - else - do + if (TRANSLATE (buf_ch) != pat_ch) { - PREFETCH (); - if (RE_TRANSLATE (translate, *d) != *p++) - { - d = dfail; - goto fail; - } - d++; + d = dfail; + goto fail; } - while (--mcnt); - } + + p += pat_charlen; + d += buf_charlen; + mcnt -= pat_charlen; + } + while (mcnt > 0); else - { - do - { - PREFETCH (); - if (*d++ != *p++) - { - d = dfail; - goto fail; - } - } - while (--mcnt); - } + do + { + unsigned int buf_ch; + + PREFETCH (); + buf_ch = *d++; + TRANSLATE_VIA_MULTIBYTE (buf_ch); + if (buf_ch != *p++) + { + d = dfail; + goto fail; + } + } + while (--mcnt); + #endif break; + /* Match any character except possibly a newline or a null. */ case anychar: { @@@ -5732,9 -5596,9 +5880,9 @@@ UPDATE_SYNTAX_TABLE (charpos); #endif PREFETCH (); - c2 = RE_STRING_CHAR (d, dend - d); + GET_CHAR_AFTER (c2, d, dummy); s2 = SYNTAX (c2); - + /* Case 2: S2 is not Sword. */ if (s2 != Sword) goto fail; @@@ -5854,8 -5718,8 +6002,7 @@@ int len; re_wchar_t c; - c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len); - + GET_CHAR_AFTER (c, d, len); - if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not) goto fail; d += len; diff --cc src/regex.h index 1cfd4363ea7,eb1051d1949..b23c8855eff --- a/src/regex.h +++ b/src/regex.h @@@ -391,9 -391,13 +391,15 @@@ struct re_pattern_buffe unsigned not_eol : 1; #ifdef emacs - /* If true, multi-byte form in the `buffer' should be recognized as a - multibyte character. */ + /* If true, multi-byte form in the regexp pattern should be - recognized as a multibyte character. */ ++ recognized as a multibyte character. When the pattern is ++ compiled, this is set to the same value as target_multibyte ++ below. */ unsigned multibyte : 1; + + /* If true, multi-byte form in the target of match should be + recognized as a multibyte character. */ + unsigned target_multibyte : 1; #endif /* [[[end pattern_buffer]]] */ diff --cc src/search.c index 89a4a5ad68e,507ad5c8992..96ea41e8f8e --- a/src/search.c +++ b/src/search.c @@@ -104,9 -104,9 +104,8 @@@ matcher_overflow ( subexpression bounds. POSIX is nonzero if we want full backtracking (POSIX style) for this pattern. 0 means backtrack only enough to get a valid match. -- MULTIBYTE is nonzero if we want to handle multibyte characters in - PATTERN. 0 means all multibyte characters are recognized just as - the target. 0 means all multibyte characters are recognized just as -- sequences of binary data. */ ++ MULTIBYTE is nonzero iff a target of match is a multibyte buffer or ++ string. */ static void compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) @@@ -160,8 -128,9 +127,8 @@@ BLOCK_INPUT; old = re_set_syntax (RE_SYNTAX_EMACS | (posix ? 0 : RE_NO_POSIX_BACKTRACKING)); - val = (char *) re_compile_pattern ((char *)raw_pattern, - raw_pattern_size, &cp->buf); - val = (char *) re_compile_pattern ((char *) (XSTRING (pattern)->data), - STRING_BYTES (XSTRING (pattern)), - &cp->buf); ++ val = (char *) re_compile_pattern ((char *) SDATA (pattern), ++ SBYTES (pattern), &cp->buf); re_set_syntax (old); UNBLOCK_INPUT; if (val) @@@ -1139,8 -1105,13 +1106,13 @@@ search_buffer (string, pos, pos_byte, l int raw_pattern_size_byte; unsigned char *patbuf; int multibyte = !NILP (current_buffer->enable_multibyte_characters); - unsigned char *base_pat = XSTRING (string)->data; - /* High bits of char, calculated by (CHAR & 0x3F). Characters - of the same high bits have the same sequence of bytes but - last. To do the BM search, all characters in STRING must - have the same high bits (including their case - translations). */ + unsigned char *base_pat = SDATA (string); - int charset_base = -1; ++ /* High bits of char; 0 for ASCII characters, (CHAR & ~0x3F) ++ otherwise. Characters of the same high bits have the same ++ sequence of bytes but last. To do the BM search, all ++ characters in STRING must have the same high bits (including ++ their case translations). */ + int char_high_bits = -1; int boyer_moore_ok = 1; /* MULTIBYTE says whether the text to be searched is multibyte. @@@ -1231,14 -1184,15 +1185,15 @@@ { /* Keep track of which character set row contains the characters that need translation. */ - int charset_base_code = c & ~CHAR_FIELD3_MASK; - int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK; - int this_high_bit = c & ~0x3F; - int trt_high_bit = ((inverse != c ? inverse : translated) - & ~0x3F); - ++ int this_high_bit = ASCII_CHAR_P (c) ? 0 : (c & ~0x3F); ++ int c1 = inverse != c ? inverse : translated; ++ int trt_high_bit = ASCII_CHAR_P (c1) ? 0 : (c1 & ~0x3F); + - if (charset_base_code != inverse_charset_base) + if (this_high_bit != trt_high_bit) boyer_moore_ok = 0; - else if (charset_base == -1) - charset_base = charset_base_code; - else if (charset_base != charset_base_code) + else if (char_high_bits == -1) + char_high_bits = this_high_bit; + else if (char_high_bits != this_high_bit) /* If two different rows appear, needing translation, then we cannot use boyer_moore search. */ boyer_moore_ok = 0; @@@ -1622,7 -1575,7 +1576,8 @@@ boyer_moore (n, base_pat, len, len_byte while (! CHAR_HEAD_P (*charstart)) charstart--; untranslated = STRING_CHAR (charstart, ptr - charstart + 1); - if (charset_base == (untranslated & ~CHAR_FIELD3_MASK)) - if (char_high_bits == (untranslated & ~0x3F)) ++ if (char_high_bits ++ == (ASCII_CHAR_P (untranslated) ? 0 : untranslated & ~0x3F)) { TRANSLATE (ch, trt, untranslated); if (! CHAR_HEAD_P (*ptr)) @@@ -1957,8 -1911,8 +1913,8 @@@ wordify (string for (i = 0, i_byte = 0; i < len; ) { int c; - + - FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); + FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte); if (SYNTAX (c) != Sword) { @@@ -1992,8 -1946,8 +1948,8 @@@ { int c; int i_byte_orig = i_byte; - + - FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); + FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte); if (SYNTAX (c) == Sword) { @@@ -2445,10 -2396,7 +2401,7 @@@ since only regular expressions have dis Lisp_Object rev_tbl; int really_changed = 0; - rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table) - ? Fchar_table_extra_slot (Vnonascii_translation_table, - make_number (0)) - : Qnil); - rev_tbl= Qnil; ++ rev_tbl = Qnil; substed_alloc_size = length * 2 + 100; substed = (unsigned char *) xmalloc (substed_alloc_size + 1); diff --cc src/syntax.c index 706706a53a1,32e5802beac..5b25371fcbc --- a/src/syntax.c +++ b/src/syntax.c @@@ -378,21 -373,24 +379,21 @@@ find_defun_start (pos, pos_byte syntax-tables. */ gl_state.current_syntax_table = current_buffer->syntax_table; gl_state.use_global = 0; - if (open_paren_in_column_0_is_defun_start) + while (PT > BEGV) { - while (PT > BEGV) + /* Open-paren at start of line means we may have found our + defun-start. */ - if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen) ++ if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen) { - /* Open-paren at start of line means we may have found our - defun-start. */ + SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */ - if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen) + if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen) - { - SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */ - if (SYNTAX (FETCH_CHAR_AS_MULTIBYTE (PT_BYTE)) == Sopen) - break; - /* Now fallback to the default value. */ - gl_state.current_syntax_table = current_buffer->syntax_table; - gl_state.use_global = 0; - } - /* Move to beg of previous line. */ - scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1); + break; + /* Now fallback to the default value. */ + gl_state.current_syntax_table = current_buffer->syntax_table; + gl_state.use_global = 0; } + /* Move to beg of previous line. */ + scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1); } /* Record what we found, for the next try. */ @@@ -1015,7 -992,13 +995,13 @@@ usage: (modify-syntax-entry CHAR NEWENT (c, newentry, syntax_table) Lisp_Object c, newentry, syntax_table; { - CHECK_NUMBER (c); + if (CONSP (c)) + { - CHECK_CHARACTER (XCAR (c)); - CHECK_CHARACTER (XCDR (c)); ++ CHECK_CHARACTER_CAR (c); ++ CHECK_CHARACTER_CDR (c); + } + else + CHECK_CHARACTER (c); if (NILP (syntax_table)) syntax_table = current_buffer->syntax_table; @@@ -1176,6 -1163,8 +1166,10 @@@ DEFUN ("internal-describe-syntax-value" int parse_sexp_ignore_comments; -Lisp_Object Vnext_word_boundary_function_table; ++/* Char-table of functions that find the next or previous word ++ boundary. */ ++Lisp_Object Vfind_word_boundary_function_table; + /* Return the position across COUNT words from FROM. If that many words cannot be found before the end of the buffer, return 0. COUNT negative means scan backward and stop at word beginning. */ @@@ -1189,6 -1178,6 +1183,7 @@@ scan_words (from, count register int from_byte = CHAR_TO_BYTE (from); register enum syntaxcode code; int ch0, ch1; ++ Lisp_Object func, script, pos; immediate_quit = 1; QUIT; @@@ -1216,19 -1207,36 +1211,34 @@@ } /* Now CH0 is a character which begins a word and FROM is the position of the next character. */ - while (1) - func = CHAR_TABLE_REF (Vnext_word_boundary_function_table, ch0); ++ func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0); + if (! NILP (Ffboundp (func))) { - if (from == end) break; - UPDATE_SYNTAX_TABLE_FORWARD (from); - ch1 = FETCH_CHAR (from_byte); - code = SYNTAX (ch1); - if (!(words_include_escapes - && (code == Sescape || code == Scharquote))) - if (code != Sword || WORD_BOUNDARY_P (ch0, ch1)) - break; - INC_BOTH (from, from_byte); - ch0 = ch1; - Lisp_Object pos; - + pos = call2 (func, make_number (from - 1), make_number (end)); - from = XINT (pos); - from_byte = CHAR_TO_BYTE (from); ++ if (INTEGERP (pos) && XINT (pos) > from) ++ { ++ from = XINT (pos); ++ from_byte = CHAR_TO_BYTE (from); ++ } + } + else + { - Lisp_Object script; - + script = CHAR_TABLE_REF (Vchar_script_table, ch0); + while (1) + { + if (from == end) break; + UPDATE_SYNTAX_TABLE_FORWARD (from); + ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte); + code = SYNTAX (ch1); + if ((code != Sword + && (! words_include_escapes + || (code != Sescape && code != Scharquote))) + || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch1), script)) + break; + INC_BOTH (from, from_byte); + ch0 = ch1; + } } - count--; } while (count < 0) @@@ -1252,22 -1262,38 +1262,37 @@@ } /* Now CH1 is a character which ends a word and FROM is the position of it. */ - while (1) - func = CHAR_TABLE_REF (Vnext_word_boundary_function_table, ch1); ++ func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1); + if (! NILP (Ffboundp (func))) - { - Lisp_Object pos; - ++ { + pos = call2 (func, make_number (from), make_number (beg)); - from = XINT (pos); - from_byte = CHAR_TO_BYTE (from); ++ if (INTEGERP (pos) && XINT (pos) < from) ++ { ++ from = XINT (pos); ++ from_byte = CHAR_TO_BYTE (from); ++ } + } + else { - int temp_byte; - Lisp_Object script; - + script = CHAR_TABLE_REF (Vchar_script_table, ch1); + while (1) + { + int temp_byte; - if (from == beg) - break; - temp_byte = dec_bytepos (from_byte); - UPDATE_SYNTAX_TABLE_BACKWARD (from); - ch0 = FETCH_CHAR (temp_byte); - code = SYNTAX (ch0); - if (!(words_include_escapes - && (code == Sescape || code == Scharquote))) - if (code != Sword || WORD_BOUNDARY_P (ch0, ch1)) - break; - DEC_BOTH (from, from_byte); - ch1 = ch0; + if (from == beg) + break; + temp_byte = dec_bytepos (from_byte); + UPDATE_SYNTAX_TABLE_BACKWARD (from); + ch0 = FETCH_CHAR_AS_MULTIBYTE (temp_byte); + code = SYNTAX (ch0); + if ((code != Sword + && (! words_include_escapes + || (code != Sescape && code != Scharquote))) + || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch0), script)) + break; + DEC_BOTH (from, from_byte); + ch1 = ch0; + } } count++; } @@@ -1367,10 -1389,14 +1390,14 @@@ skip_chars (forwardp, string, lim int n_char_ranges = 0; int negate = 0; register int i, i_byte; - int multibyte = !NILP (current_buffer->enable_multibyte_characters); + /* Set to 1 if the current buffer is multibyte and the region + contains non-ASCII chars. */ + int multibyte; + /* Set to 1 if STRING is multibyte and it contains non-ASCII + chars. */ int string_multibyte; int size_byte; - unsigned char *str; + const unsigned char *str; int len; CHECK_STRING (string); @@@ -1410,29 -1412,120 +1413,123 @@@ if (XINT (lim) < BEGV) XSETFASTINT (lim, BEGV); + multibyte = (!NILP (current_buffer->enable_multibyte_characters) + && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE)); - string_multibyte = STRING_BYTES (XSTRING (string)) > XSTRING (string)->size; ++ string_multibyte = SBYTES (string) > SCHARS (string); + bzero (fastmap, sizeof fastmap); + if (multibyte) - char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2); ++ char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2); - i_byte = 0; - str = XSTRING (string)->data; - size_byte = STRING_BYTES (XSTRING (string)); ++ str = SDATA (string); ++ size_byte = SBYTES (string); + i_byte = 0; if (i_byte < size_byte - && XSTRING (string)->data[0] == '^') + && SREF (string, 0) == '^') { negate = 1; i_byte++; } /* Find the characters specified and set their elements of fastmap. - If syntaxp, each character counts as itself. - Otherwise, handle backslashes and ranges specially. */ + Handle backslashes and ranges specially. - while (i_byte < size_byte) + If STRING contains non-ASCII characters, setup char_ranges for + them and use fastmap only for their leading codes. */ + + if (! string_multibyte) { - c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len); - i_byte += len; + int string_has_eight_bit = 0; - if (syntaxp) - fastmap[syntax_spec_code[c & 0377]] = 1; - else + /* At first setup fastmap. */ + while (i_byte < size_byte) + { + c = str[i_byte++]; + + if (c == '\\') + { + if (i_byte == size_byte) + break; + + c = str[i_byte++]; + } + if (i_byte < size_byte + && str[i_byte] == '-') + { + unsigned int c2; + + /* Skip over the dash. */ + i_byte++; + + if (i_byte == size_byte) + break; + + /* Get the end of the range. */ + c2 = str[i_byte++]; + if (c2 == '\\' + && i_byte < size_byte) + c2 = str[i_byte++]; + - while (c <= c2) - fastmap[c++] = 1; - if (! ASCII_CHAR_P (c2)) - string_has_eight_bit = 1; ++ if (c <= c2) ++ { ++ while (c <= c2) ++ fastmap[c++] = 1; ++ if (! ASCII_CHAR_P (c2)) ++ string_has_eight_bit = 1; ++ } + } + else + { + fastmap[c] = 1; + if (! ASCII_CHAR_P (c)) + string_has_eight_bit = 1; + } + } + + /* If the current range is multibyte and STRING contains + eight-bit chars, arrange fastmap and setup char_ranges for + the corresponding multibyte chars. */ + if (multibyte && string_has_eight_bit) + { + unsigned char fastmap2[0400]; + int range_start_byte, range_start_char; + + bcopy (fastmap2 + 0200, fastmap + 0200, 0200); + bzero (fastmap + 0200, 0200); + /* We are sure that this loop stops. */ + for (i = 0200; ! fastmap2[i]; i++); + c = unibyte_char_to_multibyte (i); + fastmap[CHAR_LEADING_CODE (c)] = 1; + range_start_byte = i; + range_start_char = c; + for (i = 129; i < 0400; i++) + { + c = unibyte_char_to_multibyte (i); + fastmap[CHAR_LEADING_CODE (c)] = 1; + if (i - range_start_byte != c - range_start_char) + { + char_ranges[n_char_ranges++] = range_start_char; + char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte) + + range_start_char); + range_start_byte = i; + range_start_char = c; - } ++ } + } + char_ranges[n_char_ranges++] = range_start_char; + char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte) + + range_start_char); + } + } + else + { + while (i_byte < size_byte) { + unsigned char leading_code; + + leading_code = str[i_byte]; + c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len); + i_byte += len; + if (c == '\\') { if (i_byte == size_byte) @@@ -1507,228 -1628,218 +1632,287 @@@ int start_point = PT; int pos = PT; int pos_byte = PT_BYTE; + unsigned char *p = PT_ADDR, *endp, *stop; + + if (forwardp) + { - endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim)); - stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp; ++ endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim)); ++ stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp; + } + else + { - endp = CHAR_POS_ADDR (XINT (lim)); - stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp; ++ endp = CHAR_POS_ADDR (XINT (lim)); ++ stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp; + } immediate_quit = 1; - if (syntaxp) + if (forwardp) { - SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1); - if (forwardp) - { - if (multibyte) - while (1) - { - int nbytes; + if (multibyte) - while (pos < XINT (lim)) ++ while (1) + { - c = FETCH_BYTE (pos_byte); - if (! fastmap[c]) ++ int nbytes; + - if (p >= stop) - { - if (p >= endp) - break; - p = GAP_END_ADDR; - stop = endp; - } - c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes); - if (! fastmap[(int) SYNTAX (c)]) - break; - p += nbytes, pos++, pos_byte += nbytes; - UPDATE_SYNTAX_TABLE_FORWARD (pos); - } - else - while (1) ++ if (p >= stop) + { - if (p >= stop) - { - if (p >= endp) - break; - p = GAP_END_ADDR; - stop = endp; - } - if (! fastmap[(int) SYNTAX (*p)]) ++ if (p >= endp) + break; - p++, pos++; - UPDATE_SYNTAX_TABLE_FORWARD (pos); ++ p = GAP_END_ADDR; ++ stop = endp; + } - } - else - { - if (multibyte) - while (1) ++ if (! fastmap[*p]) + break; ++ c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes); + if (! ASCII_CHAR_P (c)) { - unsigned char *prev_p; - int nbytes; - - if (p <= stop) - { - if (p <= endp) - break; - p = GPT_ADDR; - stop = endp; - } - prev_p = p; - while (--p >= stop && ! CHAR_HEAD_P (*p)); - PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes); - if (prev_p - p > nbytes) - p = prev_p - 1, c = *p, nbytes = 1; - else - c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH); - pos--, pos_byte -= nbytes; - UPDATE_SYNTAX_TABLE_BACKWARD (pos); - if (! fastmap[(int) SYNTAX (c)]) - { - pos++; - pos_byte += nbytes; - c = FETCH_MULTIBYTE_CHAR (pos_byte); + /* As we are looking at a multibyte character, we + must look up the character in the table + CHAR_RANGES. If there's no data in the table, + that character is not what we want to skip. */ + + /* The following code do the right thing even if + n_char_ranges is zero (i.e. no data in + CHAR_RANGES). */ + for (i = 0; i < n_char_ranges; i += 2) + if (c >= char_ranges[i] && c <= char_ranges[i + 1]) break; - } + if (!(negate ^ (i < n_char_ranges))) + break; } - else - while (1) - /* Since we already checked for multibyteness, avoid - using INC_BOTH which checks again. */ - INC_POS (pos_byte); - pos++; ++ p += nbytes, pos++, pos_byte += nbytes; + } + else - { - while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos_byte)]) - pos++, pos_byte++; - } ++ while (1) ++ { ++ if (p >= stop) + { - if (p <= stop) - { - if (p <= endp) - break; - p = GPT_ADDR; - stop = endp; - } - if (! fastmap[(int) SYNTAX (p[-1])]) ++ if (p >= endp) + break; - p--, pos--; - UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1); ++ p = GAP_END_ADDR; ++ stop = endp; + } - } ++ if (!fastmap[*p]) ++ break; ++ p++, pos++, pos_byte++; ++ } } else { - if (forwardp) - { - if (multibyte) - while (1) - { - int nbytes; + if (multibyte) - while (pos > XINT (lim)) ++ while (1) + { - int prev_pos_byte = pos_byte; ++ unsigned char *prev_p; - if (p >= stop) - { - if (p >= endp) - break; - p = GAP_END_ADDR; - stop = endp; - } - c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes); - if (SINGLE_BYTE_CHAR_P (c)) - { - if (!fastmap[c]) - break; - } - else - { - /* If we are looking at a multibyte character, - we must look up the character in the table - CHAR_RANGES. If there's no data in the - table, that character is not what we want to - skip. */ - - /* The following code do the right thing even if - n_char_ranges is zero (i.e. no data in - CHAR_RANGES). */ - for (i = 0; i < n_char_ranges; i += 2) - if (c >= char_ranges[i] && c <= char_ranges[i + 1]) - break; - if (!(negate ^ (i < n_char_ranges))) - break; - } - p += nbytes, pos++, pos_byte += nbytes; - } - else - while (1) - DEC_POS (prev_pos_byte); - c = FETCH_BYTE (prev_pos_byte); - if (! fastmap[c]) ++ if (p <= stop) + { - if (p >= stop) - { - if (p >= endp) - break; - p = GAP_END_ADDR; - stop = endp; - } - if (!fastmap[*p]) ++ if (p <= endp) + break; - p++, pos++; ++ p = GPT_ADDR; ++ stop = endp; + } - } - else - { - if (multibyte) - while (1) ++ prev_p = p; ++ while (--p >= stop && ! CHAR_HEAD_P (*p)); ++ if (! fastmap[*p]) + break; ++ c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH); + if (! ASCII_CHAR_P (c)) { - unsigned char *prev_p; - int nbytes; - - if (p <= stop) - { - if (p <= endp) - break; - p = GPT_ADDR; - stop = endp; - } - prev_p = p; - while (--p >= stop && ! CHAR_HEAD_P (*p)); - PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes); - if (prev_p - p > nbytes) - p = prev_p - 1, c = *p, nbytes = 1; - else - c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH); - if (SINGLE_BYTE_CHAR_P (c)) - { - if (!fastmap[c]) - break; - } - else - { - /* See the comment in the previous similar code. */ - for (i = 0; i < n_char_ranges; i += 2) - if (c >= char_ranges[i] && c <= char_ranges[i + 1]) - break; - if (!(negate ^ (i < n_char_ranges))) - break; - } - pos--, pos_byte -= nbytes; - c = FETCH_MULTIBYTE_CHAR (prev_pos_byte); + /* See the comment in the previous similar code. */ + for (i = 0; i < n_char_ranges; i += 2) + if (c >= char_ranges[i] && c <= char_ranges[i + 1]) + break; + if (!(negate ^ (i < n_char_ranges))) + break; } - else - while (1) - pos--; - pos_byte = prev_pos_byte; ++ pos--, pos_byte -= prev_p - p; + } + else - { - while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos_byte - 1)]) - pos--, pos_byte--; - } ++ while (1) ++ { ++ if (p <= stop) + { - if (p <= stop) - { - if (p <= endp) - break; - p = GPT_ADDR; - stop = endp; - } - if (!fastmap[p[-1]]) ++ if (p <= endp) + break; - p--, pos--; ++ p = GPT_ADDR; ++ stop = endp; + } - } ++ if (!fastmap[p[-1]]) ++ break; ++ p--, pos--, pos_byte--; ++ } } - #if 0 /* Not needed now that a position in mid-character - cannot be specified in Lisp. */ - if (multibyte - /* INC_POS or DEC_POS might have moved POS over LIM. */ - && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim)))) - pos = XINT (lim); - #endif + SET_PT_BOTH (pos, pos_byte); + immediate_quit = 0; + + return make_number (PT - start_point); + } + } - if (! multibyte) - pos_byte = pos; + + static Lisp_Object + skip_syntaxes (forwardp, string, lim) + int forwardp; + Lisp_Object string, lim; + { + register unsigned int c; + unsigned char fastmap[0400]; + int negate = 0; + register int i, i_byte; + int multibyte; + int size_byte; + unsigned char *str; + + CHECK_STRING (string); + + if (NILP (lim)) + XSETINT (lim, forwardp ? ZV : BEGV); + else + CHECK_NUMBER_COERCE_MARKER (lim); + + /* In any case, don't allow scan outside bounds of buffer. */ + if (XINT (lim) > ZV) + XSETFASTINT (lim, ZV); + if (XINT (lim) < BEGV) + XSETFASTINT (lim, BEGV); + + if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim))) + return Qnil; + + multibyte = (!NILP (current_buffer->enable_multibyte_characters) + && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE)); + + bzero (fastmap, sizeof fastmap); + - if (STRING_BYTES (XSTRING (string)) > XSTRING (string)->size) - /* As this is very rare case, don't consider efficiency. */ ++ if (SBYTES (string) > SCHARS (string)) ++ /* As this is very rare case (syntax spec is ASCII only), don't ++ consider efficiency. */ + string = string_make_unibyte (string); + - str = XSTRING (string)->data; - size_byte = STRING_BYTES (XSTRING (string)); ++ str = SDATA (string); ++ size_byte = SBYTES (string); + + i_byte = 0; + if (i_byte < size_byte - && XSTRING (string)->data[0] == '^') ++ && SREF (string, 0) == '^') + { + negate = 1; i_byte++; + } + + /* Find the syntaxes specified and set their elements of fastmap. */ + + while (i_byte < size_byte) + { + c = str[i_byte++]; - fastmap[syntax_spec_code[c]] = 1; ++ fastmap[syntax_spec_code[c]] = 1; + } + + /* If ^ was the first character, complement the fastmap. */ + if (negate) + for (i = 0; i < sizeof fastmap; i++) + fastmap[i] ^= 1; + + { + int start_point = PT; + int pos = PT; + int pos_byte = PT_BYTE; ++ unsigned char *p = PT_ADDR, *endp, *stop; ++ ++ if (forwardp) ++ { ++ endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim)); ++ stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp; ++ } ++ else ++ { ++ endp = CHAR_POS_ADDR (XINT (lim)); ++ stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp; ++ } + + immediate_quit = 1; + SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1); + if (forwardp) + { + if (multibyte) + { - while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))]) ++ while (1) + { - /* Since we already checked for multibyteness, - avoid using INC_BOTH which checks again. */ - INC_POS (pos_byte); - pos++; - if (pos >= XINT (lim)) ++ int nbytes; ++ ++ if (p >= stop) ++ { ++ if (p >= endp) ++ break; ++ p = GAP_END_ADDR; ++ stop = endp; ++ } ++ c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes); ++ if (! fastmap[(int) SYNTAX (c)]) + break; ++ p += nbytes, pos++, pos_byte += nbytes; + UPDATE_SYNTAX_TABLE_FORWARD (pos); + } + } + else + { + while (1) + { - c = FETCH_BYTE (pos_byte); - MAKE_CHAR_MULTIBYTE (c); - if (! fastmap[(int) SYNTAX (c)]) - break; - pos++, pos_byte++; - if (pos >= XINT (lim)) ++ if (p >= stop) ++ { ++ if (p >= endp) ++ break; ++ p = GAP_END_ADDR; ++ stop = endp; ++ } ++ if (! fastmap[(int) SYNTAX (*p)]) + break; ++ p++, pos++, pos_byte++; + UPDATE_SYNTAX_TABLE_FORWARD (pos); + } + } + } + else + { + if (multibyte) + { + while (1) + { - int savepos = pos_byte; - /* Since we already checked for multibyteness, - avoid using DEC_BOTH which checks again. */ - pos--; - DEC_POS (pos_byte); - if (pos <= XINT (lim)) - break; - UPDATE_SYNTAX_TABLE_BACKWARD (pos); - if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))]) ++ unsigned char *prev_p; ++ ++ if (p <= stop) + { - pos++; - pos_byte = savepos; - break; ++ if (p <= endp) ++ break; ++ p = GPT_ADDR; ++ stop = endp; + } ++ prev_p = p; ++ while (--p >= stop && ! CHAR_HEAD_P (*p)); ++ c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH); ++ if (! fastmap[(int) SYNTAX (c)]) ++ break; ++ pos--, pos_byte -= prev_p - p; ++ UPDATE_SYNTAX_TABLE_BACKWARD (pos); + } + } + else + { + while (1) + { - c = FETCH_BYTE (pos_byte - 1); - MAKE_CHAR_MULTIBYTE (c); - if (! fastmap[(int) SYNTAX (c)]) - break; - pos--, pos_byte--; - if (pos <= XINT (lim)) ++ if (p <= stop) ++ { ++ if (p <= endp) ++ break; ++ p = GPT_ADDR; ++ stop = endp; ++ } ++ if (! fastmap[(int) SYNTAX (p[-1])]) + break; ++ p--, pos--, pos_byte--; + UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1); + } + } + } SET_PT_BOTH (pos, pos_byte); immediate_quit = 0; @@@ -1788,7 -1899,7 +1972,7 @@@ forw_comment (from, from_byte, stop, ne *bytepos_ptr = from_byte; return 0; } -- c = FETCH_CHAR (from_byte); ++ c = FETCH_CHAR_AS_MULTIBYTE (from_byte); syntax = SYNTAX_WITH_FLAGS (c); code = syntax & 0xff; if (code == Sendcomment @@@ -1818,7 -1929,7 +2002,7 @@@ forw_incomment: if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax) && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style -- && (c1 = FETCH_CHAR (from_byte), ++ && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte), SYNTAX_COMEND_SECOND (c1)) && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) || SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0)) @@@ -1837,7 -1948,7 +2021,7 @@@ if (nesting > 0 && from < stop && SYNTAX_FLAGS_COMSTART_FIRST (syntax) -- && (c1 = FETCH_CHAR (from_byte), ++ && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte), SYNTAX_COMMENT_STYLE (c1) == style && SYNTAX_COMSTART_SECOND (c1)) && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) || @@@ -1909,10 -2019,10 +2093,10 @@@ between them, return t; otherwise retur INC_BOTH (from, from_byte); UPDATE_SYNTAX_TABLE_FORWARD (from); if (from < stop && comstart_first - && (c1 = FETCH_CHAR (from_byte), + && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte), SYNTAX_COMSTART_SECOND (c1))) { - /* We have encountered a comment start sequence and we + /* We have encountered a comment start sequence and we are ignoring all text inside comments. We must record the comment style this sequence begins so that later, only a comment end of the same style actually ends @@@ -2000,11 -2110,11 +2184,11 @@@ if (from == stop) break; UPDATE_SYNTAX_TABLE_BACKWARD (from); - c = FETCH_CHAR (from_byte); + c = FETCH_CHAR_AS_MULTIBYTE (from_byte); if (SYNTAX (c) == Scomment_fence - && !char_quoted (from, from_byte)) + && !char_quoted (from, from_byte)) { - found = 1; + found = 1; break; } } @@@ -2119,10 -2229,10 +2303,10 @@@ scan_lists (from, count, depth, sexpfla INC_BOTH (from, from_byte); UPDATE_SYNTAX_TABLE_FORWARD (from); if (from < stop && comstart_first - && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte)) + && SYNTAX_COMSTART_SECOND (FETCH_CHAR_AS_MULTIBYTE (from_byte)) && parse_sexp_ignore_comments) { - /* we have encountered a comment start sequence and we + /* we have encountered a comment start sequence and we are ignoring all text inside comments. We must record the comment style this sequence begins so that later, only a comment end of the same style actually ends @@@ -2396,16 -2500,16 +2580,16 @@@ DEC_BOTH (from, from_byte); if (from == stop) goto lose; UPDATE_SYNTAX_TABLE_BACKWARD (from); - if (!char_quoted (from, from_byte) + if (!char_quoted (from, from_byte) - && (c = FETCH_CHAR (from_byte), + && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte), SYNTAX_WITH_MULTIBYTE_CHECK (c) == code)) break; } if (code == Sstring_fence && !depth && sexpflag) goto done2; break; - + case Sstring: - stringterm = FETCH_CHAR (from_byte); + stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte); while (1) { if (from == stop) goto lose; @@@ -2590,10 -2692,9 +2775,10 @@@ scan_sexps_forward (stateptr, from, fro do { prev_from = from; \ prev_from_byte = from_byte; \ prev_from_syntax \ -- = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \ ++ = SYNTAX_WITH_FLAGS (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte)); \ INC_BOTH (from, from_byte); \ - UPDATE_SYNTAX_TABLE_FORWARD (from); \ + if (from < end) \ + UPDATE_SYNTAX_TABLE_FORWARD (from); \ } while (0) immediate_quit = 1; @@@ -2706,7 -2813,7 +2891,7 @@@ } else if (from < end) if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)) -- if (c1 = FETCH_CHAR (from_byte), ++ if (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte), SYNTAX_COMSTART_SECOND (c1)) /* Duplicate code to avoid a complex if-expression which causes trouble for the SGI compiler. */ @@@ -2816,8 -2922,8 +3001,8 @@@ state.comstr_start = from - 1; if (stopbefore) goto stop; /* this arg means stop at sexp start */ curlevel->last = prev_from; - state.instring = (code == Sstring + state.instring = (code == Sstring - ? (FETCH_CHAR (prev_from_byte)) + ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte)) : ST_STRING_STYLE); if (boundary_stop) goto done; startinstring: @@@ -3079,9 -3180,27 +3263,28 @@@ See the info node `(elisp)Syntax Proper DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start", &open_paren_in_column_0_is_defun_start, - doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */); + doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */); open_paren_in_column_0_is_defun_start = 1; - DEFVAR_LISP ("next-word-boundary-function-table", - &Vnext_word_boundary_function_table, ++ ++ DEFVAR_LISP ("find-word-boundary-function-table", ++ &Vfind_word_boundary_function_table, + doc: /* -Char table of functions to search for the next word boundary. ++Char table of functions to search for the word boundary. + Each function is called with two arguments; POS and LIMIT. + POS and LIMIT are character positions in the current buffer. + + If POS is less than LIMIT, POS is at the first character of a word, + and the return value of a function is a position after the last + character of that word. + + If POS is not less than LIMIT, POS is at the last character of a word, + and the return value of a function is a position at the first + character of that word. + + In both cases, LIMIT bounds the search. */); - Vnext_word_boundary_function_table = Fmake_char_table (Qnil, Qnil); ++ Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil); + defsubr (&Ssyntax_table_p); defsubr (&Ssyntax_table); defsubr (&Sstandard_syntax_table); diff --cc src/syntax.h index 6d8f201baeb,1e702bb65c3..bdf7ebb31bd --- a/src/syntax.h +++ b/src/syntax.h @@@ -57,40 -57,17 +57,17 @@@ enum syntaxcod /* Set the syntax entry VAL for char C in table TABLE. */ - #define SET_RAW_SYNTAX_ENTRY(table, c, val) \ - ((c) < CHAR_TABLE_SINGLE_BYTE_SLOTS \ - ? (XCHAR_TABLE (table)->contents[(unsigned char) (c)] = (val)) \ - : Faset ((table), make_number (c), (val))) + #define SET_RAW_SYNTAX_ENTRY(table, c, val) \ + CHAR_TABLE_SET ((table), c, (val)) - /* Fetch the syntax entry for char C in syntax table TABLE. - This macro is called only when C is less than CHAR_TABLE_ORDINARY_SLOTS. - Do inheritance. */ + /* Set the syntax entry VAL for char-range RANGE in table TABLE. + RANGE is a cons (FROM . TO) specifying the range of characters. */ - #ifdef __GNUC__ - #define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \ - ({ Lisp_Object tbl = table; \ - Lisp_Object temp = XCHAR_TABLE (tbl)->contents[(c)]; \ - while (NILP (temp)) \ - { \ - tbl = XCHAR_TABLE (tbl)->parent; \ - if (NILP (tbl)) \ - break; \ - temp = XCHAR_TABLE (tbl)->contents[(c)]; \ - } \ - temp; }) - #else - extern Lisp_Object syntax_temp; - extern Lisp_Object syntax_parent_lookup P_ ((Lisp_Object, int)); - - #define SYNTAX_ENTRY_FOLLOW_PARENT(table, c) \ - (syntax_temp = XCHAR_TABLE (table)->contents[(c)], \ - (NILP (syntax_temp) \ - ? syntax_parent_lookup (table, (c)) \ - : syntax_temp)) - #endif + #define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \ + Fset_char_table_range ((table), (range), (val)) /* SYNTAX_ENTRY fetches the information from the entry for character C - in syntax table TABLE, or from globally kept data (gl_state). + in syntax table TABLE, or from globally kept data (gl_state). Does inheritance. */ /* CURRENT_SYNTAX_TABLE gives the syntax table valid for current position, it is either the buffer's syntax table, or syntax table diff --cc src/term.c index 829f2d88e6b,2ae413178a8..4bc460d0f78 --- a/src/term.c +++ b/src/term.c @@@ -867,13 -872,14 +868,14 @@@ encode_terminal_code (src, dst, src_len coding->src_multibyte = STRING_MULTIBYTE (tbase[g]); } } - + - result = encode_coding (coding, buf, dst, len, dst_end - dst); + coding->source = buf; + coding->destination = dst; + coding->dst_bytes = dst_end - dst; + encode_coding_object (coding, Qnil, 0, 0, 1, len, Qnil); len -= coding->consumed; dst += coding->produced; - if (result == CODING_FINISH_INSUFFICIENT_DST - || (result == CODING_FINISH_INSUFFICIENT_SRC - && len > dst_end - dst)) + if (coding->result == CODING_RESULT_INSUFFICIENT_DST) /* The remaining output buffer is too short. We must break the loop here without increasing SRC so that the next call of this function starts from the same glyph. */ @@@ -1721,15 -1717,9 +1725,9 @@@ produce_glyphs (it } else { - /* A multi-byte character. The display width is fixed for all - characters of the set. Some of the glyphs may have to be - ignored because they are already displayed in a continued - line. */ - int charset = CHAR_CHARSET (it->c); - - it->pixel_width = CHARSET_WIDTH (charset); + it->pixel_width = CHAR_WIDTH (it->c); it->nglyphs = it->pixel_width; - + if (it->glyph_row) append_glyph (it); } diff --cc src/w16select.c index e655936e48b,6365a2b2fc8..199f4363a60 --- a/src/w16select.c +++ b/src/w16select.c @@@ -38,9 -38,8 +38,9 @@@ Boston, MA 02111-1307, USA. * #include "frame.h" /* Need this to get the X window of selected_frame */ #include "blockinput.h" #include "buffer.h" - #include "charset.h" + #include "character.h" #include "coding.h" +#include "composite.h" /* If ever some function outside this file will need to call any clipboard-related function, the following prototypes and constants diff --cc src/w32term.c index 539df459402,8241be623d8..35952e03347 --- a/src/w32term.c +++ b/src/w32term.c @@@ -996,10 -1402,9 +996,10 @@@ w32_encode_char (c, char2b, font_info, struct font_info *font_info; int * two_byte_p; { -- int charset = CHAR_CHARSET (c); ++ struct charset *charset = CHAR_CHARSET (c); int codepage; int unicode_p = 0; + int internal_two_byte_p = 0; XFontStruct *font = font_info->font; @@@ -1015,18 -1422,17 +1015,18 @@@ if (CHARSET_DIMENSION (charset) == 1) { -- ccl->reg[0] = charset; - ccl->reg[1] = BYTE2 (*char2b); ++ ccl->reg[0] = CHARSET_ID (charset); + ccl->reg[1] = XCHAR2B_BYTE2 (char2b); + ccl->reg[2] = -1; } else { -- ccl->reg[0] = charset; - ccl->reg[1] = BYTE1 (*char2b); - ccl->reg[2] = BYTE2 (*char2b); ++ ccl->reg[0] = CHARSET_ID (charset); + ccl->reg[1] = XCHAR2B_BYTE1 (char2b); + ccl->reg[2] = XCHAR2B_BYTE2 (char2b); } - ccl_driver (ccl, NULL, NULL, 0, 0, NULL); - ccl_driver (ccl, NULL, NULL, 0, 0, NULL, Qnil); ++ ccl_driver (ccl, NULL, NULL, 0, 0, Qnil); /* We assume that MSBs are appropriately set/reset by CCL program. */ @@@ -1040,21 -1446,21 +1040,20 @@@ /* Fixed encoding scheme. See fontset.h for the meaning of the encoding numbers. */ int enc = font_info->encoding[charset]; - + if ((enc == 1 || enc == 2) && CHARSET_DIMENSION (charset) == 2) - *char2b = BUILD_WCHAR_T (BYTE1 (*char2b) | 0x80, BYTE2 (*char2b)); - + STORE_XCHAR2B (char2b, XCHAR2B_BYTE1 (char2b) | 0x80, XCHAR2B_BYTE2 (char2b)); + if (enc == 1 || enc == 3 || (enc == 4 && CHARSET_DIMENSION (charset) == 1)) - *char2b = BUILD_WCHAR_T (BYTE1 (*char2b), BYTE2 (*char2b) | 0x80); + STORE_XCHAR2B (char2b, XCHAR2B_BYTE1 (char2b), XCHAR2B_BYTE2 (char2b) | 0x80); else if (enc == 4) { -- int sjis1, sjis2; ++ int code = (int) char2b; - ENCODE_SJIS (XCHAR2B_BYTE1 (char2b), XCHAR2B_BYTE2 (char2b), - ENCODE_SJIS (BYTE1 (*char2b), BYTE2 (*char2b), -- sjis1, sjis2); - STORE_XCHAR2B (char2b, sjis1, sjis2); - *char2b = BUILD_WCHAR_T (sjis1, sjis2); ++ JIS_TO_SJIS (code); ++ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF)); } } codepage = font_info->codepage; @@@ -1062,12 -1468,12 +1061,11 @@@ /* If charset is not ASCII or Latin-1, may need to move it into Unicode space. */ if ( font && !font->bdf && w32_use_unicode_for_codepage (codepage) -- && charset != CHARSET_ASCII && charset != charset_latin_iso8859_1 -- && charset != CHARSET_8_BIT_CONTROL && charset != CHARSET_8_BIT_GRAPHIC) ++ && c >= 0x100) { char temp[3]; - temp[0] = BYTE1 (*char2b); - temp[1] = BYTE2 (*char2b); + temp[0] = XCHAR2B_BYTE1 (char2b); + temp[1] = XCHAR2B_BYTE2 (char2b); temp[2] = '\0'; if (codepage != CP_UNICODE) { diff --cc src/xdisp.c index a61ead4bef2,50e85a185dc..762ed45b8a2 --- a/src/xdisp.c +++ b/src/xdisp.c @@@ -3765,6 -3287,44 +3768,44 @@@ string_buffer_position (w, string, arou `composition' property ***********************************************************************/ + static enum prop_handled + handle_auto_composed_prop (it) + struct it *it; + { + enum prop_handled handled = HANDLED_NORMALLY; + + if (! NILP (Vauto_composition_function)) + { + Lisp_Object val; + int pos; + + if (STRINGP (it->string)) + pos = IT_STRING_CHARPOS (*it); + else + pos = IT_CHARPOS (*it); + + val =Fget_char_property (make_number (pos), Qauto_composed, it->string); + if (NILP (val)) + { - int count = BINDING_STACK_SIZE (); ++ int count = SPECPDL_INDEX (); + Lisp_Object args[3]; + + args[0] = Vauto_composition_function; + specbind (Qauto_composition_function, Qnil); + args[1] = make_number (pos); + args[2] = it->string; + safe_call (3, args); + unbind_to (count, Qnil); + + val = Fget_char_property (args[1], Qauto_composed, it->string); + if (! NILP (val)) + handled = HANDLED_RECOMPUTE_PROPS; + } + } + + return handled; + } + /* Set up iterator IT from `composition' property at its current position. Called from handle_stop. */ @@@ -6202,13 -5719,13 +6244,13 @@@ message_dolog (m, nbytes, nlflag, multi { int i, c, char_bytes; unsigned char work[1]; - + /* Convert a multibyte string to single-byte for the *Message* buffer. */ - for (i = 0; i < nbytes; i += nbytes) + for (i = 0; i < nbytes; i += char_bytes) { c = string_char_and_length (m + i, nbytes - i, &char_bytes); - work[0] = (SINGLE_BYTE_CHAR_P (c) + work[0] = (ASCII_CHAR_P (c) ? c : multibyte_char_to_unibyte (c, Qnil)); insert_1_both (work, 1, 1, 1, 0, 0); @@@ -10376,36 -9280,25 +10418,25 @@@ disp_char_vector (dp, c struct Lisp_Char_Table *dp; int c; { - int code[4], i; Lisp_Object val; - if (SINGLE_BYTE_CHAR_P (c)) - return (dp->contents[c]); - - SPLIT_CHAR (c, code[0], code[1], code[2]); - if (code[1] < 32) - code[1] = -1; - else if (code[2] < 32) - code[2] = -1; - - /* Here, the possible range of code[0] (== charset ID) is - 128..max_charset. Since the top level char table contains data - for multibyte characters after 256th element, we must increment - code[0] by 128 to get a correct index. */ - code[0] += 128; - code[3] = -1; /* anchor */ - - for (i = 0; code[i] >= 0; i++, dp = XCHAR_TABLE (val)) + if (ASCII_CHAR_P (c)) { - val = dp->contents[code[i]]; - if (!SUB_CHAR_TABLE_P (val)) - return (NILP (val) ? dp->defalt : val); + val = dp->ascii; + if (SUB_CHAR_TABLE_P (val)) + val = XSUB_CHAR_TABLE (val)->contents[c]; } + else + { + Lisp_Object table; - /* Here, val is a sub char table. We return the default value of - it. */ - return (dp->defalt); + XSETCHAR_TABLE (table, dp); + val = char_table_ref (table, c); + } + if (NILP (val)) + val = dp->defalt; + return val; -} +} @@@ -14191,8 -12935,8 +14222,8 @@@ highlight_trailing_whitespace (f, row && glyph->u.ch == ' ')) && trailing_whitespace_p (glyph->charpos)) { - int face_id = lookup_named_face (f, Qtrailing_whitespace, 0); + int face_id = lookup_named_face (f, Qtrailing_whitespace); - + while (glyph >= start && BUFFERP (glyph->object) && (glyph->type == STRETCH_GLYPH @@@ -15643,15 -14106,13 +15676,14 @@@ decode_mode_spec_coding (coding_system /* Mention the EOL conversion if it is not the usual one. */ if (STRINGP (eoltype)) { - eol_str = XSTRING (eoltype)->data; - eol_str_len = XSTRING (eoltype)->size; + eol_str = SDATA (eoltype); + eol_str_len = SBYTES (eoltype); } - else if (INTEGERP (eoltype) - && CHAR_VALID_P (XINT (eoltype), 0)) + else if (CHARACTERP (eoltype)) { - eol_str = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH); - eol_str_len = CHAR_STRING (XINT (eoltype), eol_str); + unsigned char *tmp = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH); + eol_str_len = CHAR_STRING (XINT (eoltype), tmp); + eol_str = tmp; } else { @@@ -16402,4209 -14863,41 +16436,4191 @@@ invisible_p (propval, list /*********************************************************************** - Initialization + Glyph Display ***********************************************************************/ +#ifdef HAVE_WINDOW_SYSTEM + +#if GLYPH_DEBUG + void -syms_of_xdisp () +dump_glyph_string (s) + struct glyph_string *s; { - Vwith_echo_area_save_vector = Qnil; - staticpro (&Vwith_echo_area_save_vector); + fprintf (stderr, "glyph string\n"); + fprintf (stderr, " x, y, w, h = %d, %d, %d, %d\n", + s->x, s->y, s->width, s->height); + fprintf (stderr, " ybase = %d\n", s->ybase); + fprintf (stderr, " hl = %d\n", s->hl); + fprintf (stderr, " left overhang = %d, right = %d\n", + s->left_overhang, s->right_overhang); + fprintf (stderr, " nchars = %d\n", s->nchars); + fprintf (stderr, " extends to end of line = %d\n", + s->extends_to_end_of_line_p); + fprintf (stderr, " font height = %d\n", FONT_HEIGHT (s->font)); + fprintf (stderr, " bg width = %d\n", s->background_width); +} - Vmessage_stack = Qnil; - staticpro (&Vmessage_stack); - - Qinhibit_redisplay = intern ("inhibit-redisplay"); - staticpro (&Qinhibit_redisplay); +#endif /* GLYPH_DEBUG */ - message_dolog_marker1 = Fmake_marker (); - staticpro (&message_dolog_marker1); - message_dolog_marker2 = Fmake_marker (); - staticpro (&message_dolog_marker2); - message_dolog_marker3 = Fmake_marker (); - staticpro (&message_dolog_marker3); +/* Initialize glyph string S. CHAR2B is a suitably allocated vector + of XChar2b structures for S; it can't be allocated in + init_glyph_string because it must be allocated via `alloca'. W + is the window on which S is drawn. ROW and AREA are the glyph row + and area within the row from which S is constructed. START is the + index of the first glyph structure covered by S. HL is a + face-override for drawing S. */ -#if GLYPH_DEBUG - defsubr (&Sdump_glyph_matrix); - defsubr (&Sdump_glyph_row); - defsubr (&Sdump_tool_bar_row); - defsubr (&Strace_redisplay); - defsubr (&Strace_to_stderr); -#endif -#ifdef HAVE_WINDOW_SYSTEM - defsubr (&Stool_bar_lines_needed); +#ifdef HAVE_NTGUI +#define OPTIONAL_HDC(hdc) hdc, +#define DECLARE_HDC(hdc) HDC hdc; +#define ALLOCATE_HDC(hdc, f) hdc = get_frame_dc ((f)) +#define RELEASE_HDC(hdc, f) release_frame_dc ((f), (hdc)) #endif - staticpro (&Qmenu_bar_update_hook); - Qmenu_bar_update_hook = intern ("menu-bar-update-hook"); +#ifndef OPTIONAL_HDC +#define OPTIONAL_HDC(hdc) +#define DECLARE_HDC(hdc) +#define ALLOCATE_HDC(hdc, f) +#define RELEASE_HDC(hdc, f) +#endif + +static void +init_glyph_string (s, OPTIONAL_HDC (hdc) char2b, w, row, area, start, hl) + struct glyph_string *s; + DECLARE_HDC (hdc) + XChar2b *char2b; + struct window *w; + struct glyph_row *row; + enum glyph_row_area area; + int start; + enum draw_glyphs_face hl; +{ + bzero (s, sizeof *s); + s->w = w; + s->f = XFRAME (w->frame); +#ifdef HAVE_NTGUI + s->hdc = hdc; +#endif + s->display = FRAME_X_DISPLAY (s->f); + s->window = FRAME_X_WINDOW (s->f); + s->char2b = char2b; + s->hl = hl; + s->row = row; + s->area = area; + s->first_glyph = row->glyphs[area] + start; + s->height = row->height; + s->y = WINDOW_TO_FRAME_PIXEL_Y (w, row->y); + + /* Display the internal border below the tool-bar window. */ + if (s->w == XWINDOW (s->f->tool_bar_window)) + s->y -= FRAME_INTERNAL_BORDER_WIDTH (s->f); + + s->ybase = s->y + row->ascent; +} + + +/* Append the list of glyph strings with head H and tail T to the list + with head *HEAD and tail *TAIL. Set *HEAD and *TAIL to the result. */ + +static INLINE void +append_glyph_string_lists (head, tail, h, t) + struct glyph_string **head, **tail; + struct glyph_string *h, *t; +{ + if (h) + { + if (*head) + (*tail)->next = h; + else + *head = h; + h->prev = *tail; + *tail = t; + } +} + + +/* Prepend the list of glyph strings with head H and tail T to the + list with head *HEAD and tail *TAIL. Set *HEAD and *TAIL to the + result. */ + +static INLINE void +prepend_glyph_string_lists (head, tail, h, t) + struct glyph_string **head, **tail; + struct glyph_string *h, *t; +{ + if (h) + { + if (*head) + (*head)->prev = t; + else + *tail = t; + t->next = *head; + *head = h; + } +} + + +/* Append glyph string S to the list with head *HEAD and tail *TAIL. + Set *HEAD and *TAIL to the resulting list. */ + +static INLINE void +append_glyph_string (head, tail, s) + struct glyph_string **head, **tail; + struct glyph_string *s; +{ + s->next = s->prev = NULL; + append_glyph_string_lists (head, tail, s, s); +} + + +/* Get face and two-byte form of character glyph GLYPH on frame F. + The encoding of GLYPH->u.ch is returned in *CHAR2B. Value is + a pointer to a realized face that is ready for display. */ + +static INLINE struct face * +get_glyph_face_and_encoding (f, glyph, char2b, two_byte_p) + struct frame *f; + struct glyph *glyph; + XChar2b *char2b; + int *two_byte_p; +{ + struct face *face; + + xassert (glyph->type == CHAR_GLYPH); + face = FACE_FROM_ID (f, glyph->face_id); + + if (two_byte_p) + *two_byte_p = 0; + + if (!glyph->multibyte_p) + { + /* Unibyte case. We don't have to encode, but we have to make + sure to use a face suitable for unibyte. */ + STORE_XCHAR2B (char2b, 0, glyph->u.ch); + } + else if (glyph->u.ch < 128 + && glyph->face_id < BASIC_FACE_ID_SENTINEL) + { + /* Case of ASCII in a face known to fit ASCII. */ + STORE_XCHAR2B (char2b, 0, glyph->u.ch); + } + else + { - int c1, c2, charset; ++ struct font_info *font_info ++ = FONT_INFO_FROM_ID (f, face->font_info_id); ++ if (font_info) ++ { ++ struct charset *charset = CHARSET_FROM_ID (font_info->charset); ++ unsigned code = ENCODE_CHAR (charset, glyph->u.ch); + - /* Split characters into bytes. If c2 is -1 afterwards, C is - really a one-byte character so that byte1 is zero. */ - SPLIT_CHAR (glyph->u.ch, charset, c1, c2); - if (c2 > 0) - STORE_XCHAR2B (char2b, c1, c2); - else - STORE_XCHAR2B (char2b, 0, c1); ++ if (CHARSET_DIMENSION (charset) == 1) ++ STORE_XCHAR2B (char2b, 0, code); ++ else ++ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF)); + - /* Maybe encode the character in *CHAR2B. */ - if (charset != CHARSET_ASCII) - { - struct font_info *font_info - = FONT_INFO_FROM_ID (f, face->font_info_id); - if (font_info) - glyph->font_type - = rif->encode_char (glyph->u.ch, char2b, font_info, two_byte_p); ++ /* Maybe encode the character in *CHAR2B. */ ++ if (CHARSET_ID (charset) != charset_ascii) ++ { ++ glyph->font_type ++ = rif->encode_char (glyph->u.ch, char2b, font_info, charset, ++ two_byte_p); ++ } + } + } + + /* Make sure X resources of the face are allocated. */ + xassert (face != NULL); + PREPARE_FACE_FOR_DISPLAY (f, face); + return face; +} + + +/* Fill glyph string S with composition components specified by S->cmp. + + FACES is an array of faces for all components of this composition. + S->gidx is the index of the first component for S. + OVERLAPS_P non-zero means S should draw the foreground only, and + use its physical height for clipping. + + Value is the index of a component not in S. */ + +static int +fill_composite_glyph_string (s, faces, overlaps_p) + struct glyph_string *s; + struct face **faces; + int overlaps_p; +{ + int i; + + xassert (s); + + s->for_overlaps_p = overlaps_p; + + s->face = faces[s->gidx]; + s->font = s->face->font; + s->font_info = FONT_INFO_FROM_ID (s->f, s->face->font_info_id); + + /* For all glyphs of this composition, starting at the offset + S->gidx, until we reach the end of the definition or encounter a + glyph that requires the different face, add it to S. */ + ++s->nchars; + for (i = s->gidx + 1; i < s->cmp->glyph_len && faces[i] == s->face; ++i) + ++s->nchars; + + /* All glyph strings for the same composition has the same width, + i.e. the width set for the first component of the composition. */ + + s->width = s->first_glyph->pixel_width; + + /* If the specified font could not be loaded, use the frame's + default font, but record the fact that we couldn't load it in + the glyph string so that we can draw rectangles for the + characters of the glyph string. */ + if (s->font == NULL) + { + s->font_not_found_p = 1; + s->font = FRAME_FONT (s->f); + } + + /* Adjust base line for subscript/superscript text. */ + s->ybase += s->first_glyph->voffset; + + xassert (s->face && s->face->gc); + + /* This glyph string must always be drawn with 16-bit functions. */ + s->two_byte_p = 1; + + return s->gidx + s->nchars; +} + + +/* Fill glyph string S from a sequence of character glyphs. + + FACE_ID is the face id of the string. START is the index of the + first glyph to consider, END is the index of the last + 1. + OVERLAPS_P non-zero means S should draw the foreground only, and + use its physical height for clipping. + + Value is the index of the first glyph not in S. */ + +static int +fill_glyph_string (s, face_id, start, end, overlaps_p) + struct glyph_string *s; + int face_id; + int start, end, overlaps_p; +{ + struct glyph *glyph, *last; + int voffset; + int glyph_not_available_p; + + xassert (s->f == XFRAME (s->w->frame)); + xassert (s->nchars == 0); + xassert (start >= 0 && end > start); + + s->for_overlaps_p = overlaps_p, + glyph = s->row->glyphs[s->area] + start; + last = s->row->glyphs[s->area] + end; + voffset = glyph->voffset; + + glyph_not_available_p = glyph->glyph_not_available_p; + + while (glyph < last + && glyph->type == CHAR_GLYPH + && glyph->voffset == voffset + /* Same face id implies same font, nowadays. */ + && glyph->face_id == face_id + && glyph->glyph_not_available_p == glyph_not_available_p) + { + int two_byte_p; + + s->face = get_glyph_face_and_encoding (s->f, glyph, + s->char2b + s->nchars, + &two_byte_p); + s->two_byte_p = two_byte_p; + ++s->nchars; + xassert (s->nchars <= end - start); + s->width += glyph->pixel_width; + ++glyph; + } + + s->font = s->face->font; + s->font_info = FONT_INFO_FROM_ID (s->f, s->face->font_info_id); + + /* If the specified font could not be loaded, use the frame's font, + but record the fact that we couldn't load it in + S->font_not_found_p so that we can draw rectangles for the + characters of the glyph string. */ + if (s->font == NULL || glyph_not_available_p) + { + s->font_not_found_p = 1; + s->font = FRAME_FONT (s->f); + } + + /* Adjust base line for subscript/superscript text. */ + s->ybase += voffset; + + xassert (s->face && s->face->gc); + return glyph - s->row->glyphs[s->area]; +} + + +/* Fill glyph string S from image glyph S->first_glyph. */ + +static void +fill_image_glyph_string (s) + struct glyph_string *s; +{ + xassert (s->first_glyph->type == IMAGE_GLYPH); + s->img = IMAGE_FROM_ID (s->f, s->first_glyph->u.img_id); + xassert (s->img); + s->face = FACE_FROM_ID (s->f, s->first_glyph->face_id); + s->font = s->face->font; + s->width = s->first_glyph->pixel_width; + + /* Adjust base line for subscript/superscript text. */ + s->ybase += s->first_glyph->voffset; +} + + +/* Fill glyph string S from a sequence of stretch glyphs. + + ROW is the glyph row in which the glyphs are found, AREA is the + area within the row. START is the index of the first glyph to + consider, END is the index of the last + 1. + + Value is the index of the first glyph not in S. */ + +static int +fill_stretch_glyph_string (s, row, area, start, end) + struct glyph_string *s; + struct glyph_row *row; + enum glyph_row_area area; + int start, end; +{ + struct glyph *glyph, *last; + int voffset, face_id; + + xassert (s->first_glyph->type == STRETCH_GLYPH); + + glyph = s->row->glyphs[s->area] + start; + last = s->row->glyphs[s->area] + end; + face_id = glyph->face_id; + s->face = FACE_FROM_ID (s->f, face_id); + s->font = s->face->font; + s->font_info = FONT_INFO_FROM_ID (s->f, s->face->font_info_id); + s->width = glyph->pixel_width; + voffset = glyph->voffset; + + for (++glyph; + (glyph < last + && glyph->type == STRETCH_GLYPH + && glyph->voffset == voffset + && glyph->face_id == face_id); + ++glyph) + s->width += glyph->pixel_width; + + /* Adjust base line for subscript/superscript text. */ + s->ybase += voffset; + + /* The case that face->gc == 0 is handled when drawing the glyph + string by calling PREPARE_FACE_FOR_DISPLAY. */ + xassert (s->face); + return glyph - s->row->glyphs[s->area]; +} + + +/* EXPORT for RIF: + Set *LEFT and *RIGHT to the left and right overhang of GLYPH on + frame F. Overhangs of glyphs other than type CHAR_GLYPH are + assumed to be zero. */ + +void +x_get_glyph_overhangs (glyph, f, left, right) + struct glyph *glyph; + struct frame *f; + int *left, *right; +{ + *left = *right = 0; + + if (glyph->type == CHAR_GLYPH) + { + XFontStruct *font; + struct face *face; + struct font_info *font_info; + XChar2b char2b; + XCharStruct *pcm; + + face = get_glyph_face_and_encoding (f, glyph, &char2b, NULL); + font = face->font; + font_info = FONT_INFO_FROM_ID (f, face->font_info_id); + if (font /* ++KFS: Should this be font_info ? */ + && (pcm = rif->per_char_metric (font, &char2b, glyph->font_type))) + { + if (pcm->rbearing > pcm->width) + *right = pcm->rbearing - pcm->width; + if (pcm->lbearing < 0) + *left = -pcm->lbearing; + } + } +} + + +/* Return the index of the first glyph preceding glyph string S that + is overwritten by S because of S's left overhang. Value is -1 + if no glyphs are overwritten. */ + +static int +left_overwritten (s) + struct glyph_string *s; +{ + int k; + + if (s->left_overhang) + { + int x = 0, i; + struct glyph *glyphs = s->row->glyphs[s->area]; + int first = s->first_glyph - glyphs; + + for (i = first - 1; i >= 0 && x > -s->left_overhang; --i) + x -= glyphs[i].pixel_width; + + k = i + 1; + } + else + k = -1; + + return k; +} + + +/* Return the index of the first glyph preceding glyph string S that + is overwriting S because of its right overhang. Value is -1 if no + glyph in front of S overwrites S. */ + +static int +left_overwriting (s) + struct glyph_string *s; +{ + int i, k, x; + struct glyph *glyphs = s->row->glyphs[s->area]; + int first = s->first_glyph - glyphs; + + k = -1; + x = 0; + for (i = first - 1; i >= 0; --i) + { + int left, right; + x_get_glyph_overhangs (glyphs + i, s->f, &left, &right); + if (x + right > 0) + k = i; + x -= glyphs[i].pixel_width; + } + + return k; +} + + +/* Return the index of the last glyph following glyph string S that is + not overwritten by S because of S's right overhang. Value is -1 if + no such glyph is found. */ + +static int +right_overwritten (s) + struct glyph_string *s; +{ + int k = -1; + + if (s->right_overhang) + { + int x = 0, i; + struct glyph *glyphs = s->row->glyphs[s->area]; + int first = (s->first_glyph - glyphs) + (s->cmp ? 1 : s->nchars); + int end = s->row->used[s->area]; + + for (i = first; i < end && s->right_overhang > x; ++i) + x += glyphs[i].pixel_width; + + k = i; + } + + return k; +} + + +/* Return the index of the last glyph following glyph string S that + overwrites S because of its left overhang. Value is negative + if no such glyph is found. */ + +static int +right_overwriting (s) + struct glyph_string *s; +{ + int i, k, x; + int end = s->row->used[s->area]; + struct glyph *glyphs = s->row->glyphs[s->area]; + int first = (s->first_glyph - glyphs) + (s->cmp ? 1 : s->nchars); + + k = -1; + x = 0; + for (i = first; i < end; ++i) + { + int left, right; + x_get_glyph_overhangs (glyphs + i, s->f, &left, &right); + if (x - left < 0) + k = i; + x += glyphs[i].pixel_width; + } + + return k; +} + + +/* Get face and two-byte form of character C in face FACE_ID on frame + F. The encoding of C is returned in *CHAR2B. MULTIBYTE_P non-zero + means we want to display multibyte text. DISPLAY_P non-zero means + make sure that X resources for the face returned are allocated. + Value is a pointer to a realized face that is ready for display if + DISPLAY_P is non-zero. */ + +static INLINE struct face * +get_char_face_and_encoding (f, c, face_id, char2b, multibyte_p, display_p) + struct frame *f; + int c, face_id; + XChar2b *char2b; + int multibyte_p, display_p; +{ + struct face *face = FACE_FROM_ID (f, face_id); + + if (!multibyte_p) + { + /* Unibyte case. We don't have to encode, but we have to make + sure to use a face suitable for unibyte. */ + STORE_XCHAR2B (char2b, 0, c); + face_id = FACE_FOR_CHAR (f, face, c); + face = FACE_FROM_ID (f, face_id); + } + else if (c < 128 && face_id < BASIC_FACE_ID_SENTINEL) + { + /* Case of ASCII in a face known to fit ASCII. */ + STORE_XCHAR2B (char2b, 0, c); + } - else ++ else if (face->font != NULL) + { - int c1, c2, charset; ++ struct font_info *font_info ++ = FONT_INFO_FROM_ID (f, face->font_info_id); ++ struct charset *charset = CHARSET_FROM_ID (font_info->charset); ++ unsigned code = ENCODE_CHAR (charset, c); + - /* Split characters into bytes. If c2 is -1 afterwards, C is - really a one-byte character so that byte1 is zero. */ - SPLIT_CHAR (c, charset, c1, c2); - if (c2 > 0) - STORE_XCHAR2B (char2b, c1, c2); ++ if (CHARSET_DIMENSION (charset) == 1) ++ STORE_XCHAR2B (char2b, 0, code); + else - STORE_XCHAR2B (char2b, 0, c1); - - /* Maybe encode the character in *CHAR2B. */ - if (face->font != NULL) - { - struct font_info *font_info - = FONT_INFO_FROM_ID (f, face->font_info_id); - if (font_info) - rif->encode_char (c, char2b, font_info, 0); - } ++ STORE_XCHAR2B (char2b, (code >> 8), (code & 0xFF)); ++ /* Maybe encode the character in *CHAR2B. */ ++ rif->encode_char (c, char2b, font_info, charset, NULL); + } + + /* Make sure X resources of the face are allocated. */ +#ifdef HAVE_X_WINDOWS + if (display_p) +#endif + { + xassert (face != NULL); + PREPARE_FACE_FOR_DISPLAY (f, face); + } + + return face; +} + + +/* Set background width of glyph string S. START is the index of the + first glyph following S. LAST_X is the right-most x-position + 1 + in the drawing area. */ + +static INLINE void +set_glyph_string_background_width (s, start, last_x) + struct glyph_string *s; + int start; + int last_x; +{ + /* If the face of this glyph string has to be drawn to the end of + the drawing area, set S->extends_to_end_of_line_p. */ + struct face *default_face = FACE_FROM_ID (s->f, DEFAULT_FACE_ID); + + if (start == s->row->used[s->area] + && s->area == TEXT_AREA + && ((s->hl == DRAW_NORMAL_TEXT + && (s->row->fill_line_p + || s->face->background != default_face->background + || s->face->stipple != default_face->stipple + || s->row->mouse_face_p)) + || s->hl == DRAW_MOUSE_FACE + || ((s->hl == DRAW_IMAGE_RAISED || s->hl == DRAW_IMAGE_SUNKEN) + && s->row->fill_line_p))) + s->extends_to_end_of_line_p = 1; + + /* If S extends its face to the end of the line, set its + background_width to the distance to the right edge of the drawing + area. */ + if (s->extends_to_end_of_line_p) + s->background_width = last_x - s->x + 1; + else + s->background_width = s->width; +} + + +/* Compute overhangs and x-positions for glyph string S and its + predecessors, or successors. X is the starting x-position for S. + BACKWARD_P non-zero means process predecessors. */ + +static void +compute_overhangs_and_x (s, x, backward_p) + struct glyph_string *s; + int x; + int backward_p; +{ + if (backward_p) + { + while (s) + { + if (rif->compute_glyph_string_overhangs) + rif->compute_glyph_string_overhangs (s); + x -= s->width; + s->x = x; + s = s->prev; + } + } + else + { + while (s) + { + if (rif->compute_glyph_string_overhangs) + rif->compute_glyph_string_overhangs (s); + s->x = x; + x += s->width; + s = s->next; + } + } +} + + + +/* The following macros are only called from draw_glyphs below. + They reference the following parameters of that function directly: + `w', `row', `area', and `overlap_p' + as well as the following local variables: + `s', `f', and `hdc' (in W32) */ + +#ifdef HAVE_NTGUI +/* On W32, silently add local `hdc' variable to argument list of + init_glyph_string. */ +#define INIT_GLYPH_STRING(s, char2b, w, row, area, start, hl) \ + init_glyph_string (s, hdc, char2b, w, row, area, start, hl) +#else +#define INIT_GLYPH_STRING(s, char2b, w, row, area, start, hl) \ + init_glyph_string (s, char2b, w, row, area, start, hl) +#endif + +/* Add a glyph string for a stretch glyph to the list of strings + between HEAD and TAIL. START is the index of the stretch glyph in + row area AREA of glyph row ROW. END is the index of the last glyph + in that glyph row area. X is the current output position assigned + to the new glyph string constructed. HL overrides that face of the + glyph; e.g. it is DRAW_CURSOR if a cursor has to be drawn. LAST_X + is the right-most x-position of the drawing area. */ + +/* SunOS 4 bundled cc, barfed on continuations in the arg lists here + and below -- keep them on one line. */ +#define BUILD_STRETCH_GLYPH_STRING(START, END, HEAD, TAIL, HL, X, LAST_X) \ + do \ + { \ + s = (struct glyph_string *) alloca (sizeof *s); \ + INIT_GLYPH_STRING (s, NULL, w, row, area, START, HL); \ + START = fill_stretch_glyph_string (s, row, area, START, END); \ + append_glyph_string (&HEAD, &TAIL, s); \ + s->x = (X); \ + } \ + while (0) + + +/* Add a glyph string for an image glyph to the list of strings + between HEAD and TAIL. START is the index of the image glyph in + row area AREA of glyph row ROW. END is the index of the last glyph + in that glyph row area. X is the current output position assigned + to the new glyph string constructed. HL overrides that face of the + glyph; e.g. it is DRAW_CURSOR if a cursor has to be drawn. LAST_X + is the right-most x-position of the drawing area. */ + +#define BUILD_IMAGE_GLYPH_STRING(START, END, HEAD, TAIL, HL, X, LAST_X) \ + do \ + { \ + s = (struct glyph_string *) alloca (sizeof *s); \ + INIT_GLYPH_STRING (s, NULL, w, row, area, START, HL); \ + fill_image_glyph_string (s); \ + append_glyph_string (&HEAD, &TAIL, s); \ + ++START; \ + s->x = (X); \ + } \ + while (0) + + +/* Add a glyph string for a sequence of character glyphs to the list + of strings between HEAD and TAIL. START is the index of the first + glyph in row area AREA of glyph row ROW that is part of the new + glyph string. END is the index of the last glyph in that glyph row + area. X is the current output position assigned to the new glyph + string constructed. HL overrides that face of the glyph; e.g. it + is DRAW_CURSOR if a cursor has to be drawn. LAST_X is the + right-most x-position of the drawing area. */ + +#define BUILD_CHAR_GLYPH_STRINGS(START, END, HEAD, TAIL, HL, X, LAST_X) \ + do \ + { \ + int c, face_id; \ + XChar2b *char2b; \ + \ + c = (row)->glyphs[area][START].u.ch; \ + face_id = (row)->glyphs[area][START].face_id; \ + \ + s = (struct glyph_string *) alloca (sizeof *s); \ + char2b = (XChar2b *) alloca ((END - START) * sizeof *char2b); \ + INIT_GLYPH_STRING (s, char2b, w, row, area, START, HL); \ + append_glyph_string (&HEAD, &TAIL, s); \ + s->x = (X); \ + START = fill_glyph_string (s, face_id, START, END, overlaps_p); \ + } \ + while (0) + + +/* Add a glyph string for a composite sequence to the list of strings + between HEAD and TAIL. START is the index of the first glyph in + row area AREA of glyph row ROW that is part of the new glyph + string. END is the index of the last glyph in that glyph row area. + X is the current output position assigned to the new glyph string + constructed. HL overrides that face of the glyph; e.g. it is + DRAW_CURSOR if a cursor has to be drawn. LAST_X is the right-most + x-position of the drawing area. */ + +#define BUILD_COMPOSITE_GLYPH_STRING(START, END, HEAD, TAIL, HL, X, LAST_X) \ + do { \ + int cmp_id = (row)->glyphs[area][START].u.cmp_id; \ + int face_id = (row)->glyphs[area][START].face_id; \ + struct face *base_face = FACE_FROM_ID (f, face_id); \ + struct composition *cmp = composition_table[cmp_id]; \ + int glyph_len = cmp->glyph_len; \ + XChar2b *char2b; \ + struct face **faces; \ + struct glyph_string *first_s = NULL; \ + int n; \ + \ + base_face = base_face->ascii_face; \ + char2b = (XChar2b *) alloca ((sizeof *char2b) * glyph_len); \ + faces = (struct face **) alloca ((sizeof *faces) * glyph_len); \ + /* At first, fill in `char2b' and `faces'. */ \ + for (n = 0; n < glyph_len; n++) \ + { \ + int c = COMPOSITION_GLYPH (cmp, n); \ + int this_face_id = FACE_FOR_CHAR (f, base_face, c); \ + faces[n] = FACE_FROM_ID (f, this_face_id); \ + get_char_face_and_encoding (f, c, this_face_id, \ + char2b + n, 1, 1); \ + } \ + \ + /* Make glyph_strings for each glyph sequence that is drawable by \ + the same face, and append them to HEAD/TAIL. */ \ + for (n = 0; n < cmp->glyph_len;) \ + { \ + s = (struct glyph_string *) alloca (sizeof *s); \ + INIT_GLYPH_STRING (s, char2b + n, w, row, area, START, HL); \ + append_glyph_string (&(HEAD), &(TAIL), s); \ + s->cmp = cmp; \ + s->gidx = n; \ + s->x = (X); \ + \ + if (n == 0) \ + first_s = s; \ + \ + n = fill_composite_glyph_string (s, faces, overlaps_p); \ + } \ + \ + ++START; \ + s = first_s; \ + } while (0) + + +/* Build a list of glyph strings between HEAD and TAIL for the glyphs + of AREA of glyph row ROW on window W between indices START and END. + HL overrides the face for drawing glyph strings, e.g. it is + DRAW_CURSOR to draw a cursor. X and LAST_X are start and end + x-positions of the drawing area. + + This is an ugly monster macro construct because we must use alloca + to allocate glyph strings (because draw_glyphs can be called + asynchronously). */ + +#define BUILD_GLYPH_STRINGS(START, END, HEAD, TAIL, HL, X, LAST_X) \ + do \ + { \ + HEAD = TAIL = NULL; \ + while (START < END) \ + { \ + struct glyph *first_glyph = (row)->glyphs[area] + START; \ + switch (first_glyph->type) \ + { \ + case CHAR_GLYPH: \ + BUILD_CHAR_GLYPH_STRINGS (START, END, HEAD, TAIL, \ + HL, X, LAST_X); \ + break; \ + \ + case COMPOSITE_GLYPH: \ + BUILD_COMPOSITE_GLYPH_STRING (START, END, HEAD, TAIL, \ + HL, X, LAST_X); \ + break; \ + \ + case STRETCH_GLYPH: \ + BUILD_STRETCH_GLYPH_STRING (START, END, HEAD, TAIL, \ + HL, X, LAST_X); \ + break; \ + \ + case IMAGE_GLYPH: \ + BUILD_IMAGE_GLYPH_STRING (START, END, HEAD, TAIL, \ + HL, X, LAST_X); \ + break; \ + \ + default: \ + abort (); \ + } \ + \ + set_glyph_string_background_width (s, START, LAST_X); \ + (X) += s->width; \ + } \ + } \ + while (0) + + +/* Draw glyphs between START and END in AREA of ROW on window W, + starting at x-position X. X is relative to AREA in W. HL is a + face-override with the following meaning: + + DRAW_NORMAL_TEXT draw normally + DRAW_CURSOR draw in cursor face + DRAW_MOUSE_FACE draw in mouse face. + DRAW_INVERSE_VIDEO draw in mode line face + DRAW_IMAGE_SUNKEN draw an image with a sunken relief around it + DRAW_IMAGE_RAISED draw an image with a raised relief around it + + If OVERLAPS_P is non-zero, draw only the foreground of characters + and clip to the physical height of ROW. + + Value is the x-position reached, relative to AREA of W. */ + +static int +draw_glyphs (w, x, row, area, start, end, hl, overlaps_p) + struct window *w; + int x; + struct glyph_row *row; + enum glyph_row_area area; + int start, end; + enum draw_glyphs_face hl; + int overlaps_p; +{ + struct glyph_string *head, *tail; + struct glyph_string *s; + int last_x, area_width; + int x_reached; + int i, j; + struct frame *f = XFRAME (WINDOW_FRAME (w)); + DECLARE_HDC (hdc); + + ALLOCATE_HDC (hdc, f); + + /* Let's rather be paranoid than getting a SEGV. */ + end = min (end, row->used[area]); + start = max (0, start); + start = min (end, start); + + /* Translate X to frame coordinates. Set last_x to the right + end of the drawing area. */ + if (row->full_width_p) + { + /* X is relative to the left edge of W, without scroll bars + or fringes. */ + x += WINDOW_LEFT_EDGE_X (w); + last_x = WINDOW_LEFT_EDGE_X (w) + WINDOW_TOTAL_WIDTH (w); + } + else + { + int area_left = window_box_left (w, area); + x += area_left; + area_width = window_box_width (w, area); + last_x = area_left + area_width; + } + + /* Build a doubly-linked list of glyph_string structures between + head and tail from what we have to draw. Note that the macro + BUILD_GLYPH_STRINGS will modify its start parameter. That's + the reason we use a separate variable `i'. */ + i = start; + BUILD_GLYPH_STRINGS (i, end, head, tail, hl, x, last_x); + if (tail) + x_reached = tail->x + tail->background_width; + else + x_reached = x; + + /* If there are any glyphs with lbearing < 0 or rbearing > width in + the row, redraw some glyphs in front or following the glyph + strings built above. */ + if (head && !overlaps_p && row->contains_overlapping_glyphs_p) + { + int dummy_x = 0; + struct glyph_string *h, *t; + + /* Compute overhangs for all glyph strings. */ + if (rif->compute_glyph_string_overhangs) + for (s = head; s; s = s->next) + rif->compute_glyph_string_overhangs (s); + + /* Prepend glyph strings for glyphs in front of the first glyph + string that are overwritten because of the first glyph + string's left overhang. The background of all strings + prepended must be drawn because the first glyph string + draws over it. */ + i = left_overwritten (head); + if (i >= 0) + { + j = i; + BUILD_GLYPH_STRINGS (j, start, h, t, + DRAW_NORMAL_TEXT, dummy_x, last_x); + start = i; + compute_overhangs_and_x (t, head->x, 1); + prepend_glyph_string_lists (&head, &tail, h, t); + } + + /* Prepend glyph strings for glyphs in front of the first glyph + string that overwrite that glyph string because of their + right overhang. For these strings, only the foreground must + be drawn, because it draws over the glyph string at `head'. + The background must not be drawn because this would overwrite + right overhangs of preceding glyphs for which no glyph + strings exist. */ + i = left_overwriting (head); + if (i >= 0) + { + BUILD_GLYPH_STRINGS (i, start, h, t, + DRAW_NORMAL_TEXT, dummy_x, last_x); + for (s = h; s; s = s->next) + s->background_filled_p = 1; + compute_overhangs_and_x (t, head->x, 1); + prepend_glyph_string_lists (&head, &tail, h, t); + } + + /* Append glyphs strings for glyphs following the last glyph + string tail that are overwritten by tail. The background of + these strings has to be drawn because tail's foreground draws + over it. */ + i = right_overwritten (tail); + if (i >= 0) + { + BUILD_GLYPH_STRINGS (end, i, h, t, + DRAW_NORMAL_TEXT, x, last_x); + compute_overhangs_and_x (h, tail->x + tail->width, 0); + append_glyph_string_lists (&head, &tail, h, t); + } + + /* Append glyph strings for glyphs following the last glyph + string tail that overwrite tail. The foreground of such + glyphs has to be drawn because it writes into the background + of tail. The background must not be drawn because it could + paint over the foreground of following glyphs. */ + i = right_overwriting (tail); + if (i >= 0) + { + BUILD_GLYPH_STRINGS (end, i, h, t, + DRAW_NORMAL_TEXT, x, last_x); + for (s = h; s; s = s->next) + s->background_filled_p = 1; + compute_overhangs_and_x (h, tail->x + tail->width, 0); + append_glyph_string_lists (&head, &tail, h, t); + } + } + + /* Draw all strings. */ + for (s = head; s; s = s->next) + rif->draw_glyph_string (s); + + if (area == TEXT_AREA + && !row->full_width_p + /* When drawing overlapping rows, only the glyph strings' + foreground is drawn, which doesn't erase a cursor + completely. */ + && !overlaps_p) + { + int x0 = head ? head->x : x; + int x1 = tail ? tail->x + tail->background_width : x; + + int text_left = window_box_left (w, TEXT_AREA); + x0 -= text_left; + x1 -= text_left; + + notice_overwritten_cursor (w, TEXT_AREA, x0, x1, + row->y, MATRIX_ROW_BOTTOM_Y (row)); + } + + /* Value is the x-position up to which drawn, relative to AREA of W. + This doesn't include parts drawn because of overhangs. */ + if (row->full_width_p) + x_reached = FRAME_TO_WINDOW_PIXEL_X (w, x_reached); + else + x_reached -= window_box_left (w, area); + + RELEASE_HDC (hdc, f); + + return x_reached; +} + + +/* Store one glyph for IT->char_to_display in IT->glyph_row. + Called from x_produce_glyphs when IT->glyph_row is non-null. */ + +static INLINE void +append_glyph (it) + struct it *it; +{ + struct glyph *glyph; + enum glyph_row_area area = it->area; + + xassert (it->glyph_row); + xassert (it->char_to_display != '\n' && it->char_to_display != '\t'); + + glyph = it->glyph_row->glyphs[area] + it->glyph_row->used[area]; + if (glyph < it->glyph_row->glyphs[area + 1]) + { + glyph->charpos = CHARPOS (it->position); + glyph->object = it->object; + glyph->pixel_width = it->pixel_width; + glyph->voffset = it->voffset; + glyph->type = CHAR_GLYPH; + glyph->multibyte_p = it->multibyte_p; + glyph->left_box_line_p = it->start_of_box_run_p; + glyph->right_box_line_p = it->end_of_box_run_p; + glyph->overlaps_vertically_p = (it->phys_ascent > it->ascent + || it->phys_descent > it->descent); + glyph->padding_p = 0; + glyph->glyph_not_available_p = it->glyph_not_available_p; + glyph->face_id = it->face_id; + glyph->u.ch = it->char_to_display; + glyph->font_type = FONT_TYPE_UNKNOWN; + ++it->glyph_row->used[area]; + } +} + +/* Store one glyph for the composition IT->cmp_id in IT->glyph_row. + Called from x_produce_glyphs when IT->glyph_row is non-null. */ + +static INLINE void +append_composite_glyph (it) + struct it *it; +{ + struct glyph *glyph; + enum glyph_row_area area = it->area; + + xassert (it->glyph_row); + + glyph = it->glyph_row->glyphs[area] + it->glyph_row->used[area]; + if (glyph < it->glyph_row->glyphs[area + 1]) + { + glyph->charpos = CHARPOS (it->position); + glyph->object = it->object; + glyph->pixel_width = it->pixel_width; + glyph->voffset = it->voffset; + glyph->type = COMPOSITE_GLYPH; + glyph->multibyte_p = it->multibyte_p; + glyph->left_box_line_p = it->start_of_box_run_p; + glyph->right_box_line_p = it->end_of_box_run_p; + glyph->overlaps_vertically_p = (it->phys_ascent > it->ascent + || it->phys_descent > it->descent); + glyph->padding_p = 0; + glyph->glyph_not_available_p = 0; + glyph->face_id = it->face_id; + glyph->u.cmp_id = it->cmp_id; + glyph->font_type = FONT_TYPE_UNKNOWN; + ++it->glyph_row->used[area]; + } +} + + +/* Change IT->ascent and IT->height according to the setting of + IT->voffset. */ + +static INLINE void +take_vertical_position_into_account (it) + struct it *it; +{ + if (it->voffset) + { + if (it->voffset < 0) + /* Increase the ascent so that we can display the text higher + in the line. */ + it->ascent += abs (it->voffset); + else + /* Increase the descent so that we can display the text lower + in the line. */ + it->descent += it->voffset; + } +} + + +/* Produce glyphs/get display metrics for the image IT is loaded with. + See the description of struct display_iterator in dispextern.h for + an overview of struct display_iterator. */ + +static void +produce_image_glyph (it) + struct it *it; +{ + struct image *img; + struct face *face; + + xassert (it->what == IT_IMAGE); + + face = FACE_FROM_ID (it->f, it->face_id); + img = IMAGE_FROM_ID (it->f, it->image_id); + xassert (img); + + /* Make sure X resources of the face and image are loaded. */ + PREPARE_FACE_FOR_DISPLAY (it->f, face); + prepare_image_for_display (it->f, img); + + it->ascent = it->phys_ascent = image_ascent (img, face); + it->descent = it->phys_descent = img->height + 2 * img->vmargin - it->ascent; + it->pixel_width = img->width + 2 * img->hmargin; + + it->nglyphs = 1; + + if (face->box != FACE_NO_BOX) + { + if (face->box_line_width > 0) + { + it->ascent += face->box_line_width; + it->descent += face->box_line_width; + } + + if (it->start_of_box_run_p) + it->pixel_width += abs (face->box_line_width); + if (it->end_of_box_run_p) + it->pixel_width += abs (face->box_line_width); + } + + take_vertical_position_into_account (it); + + if (it->glyph_row) + { + struct glyph *glyph; + enum glyph_row_area area = it->area; + + glyph = it->glyph_row->glyphs[area] + it->glyph_row->used[area]; + if (glyph < it->glyph_row->glyphs[area + 1]) + { + glyph->charpos = CHARPOS (it->position); + glyph->object = it->object; + glyph->pixel_width = it->pixel_width; + glyph->voffset = it->voffset; + glyph->type = IMAGE_GLYPH; + glyph->multibyte_p = it->multibyte_p; + glyph->left_box_line_p = it->start_of_box_run_p; + glyph->right_box_line_p = it->end_of_box_run_p; + glyph->overlaps_vertically_p = 0; + glyph->padding_p = 0; + glyph->glyph_not_available_p = 0; + glyph->face_id = it->face_id; + glyph->u.img_id = img->id; + glyph->font_type = FONT_TYPE_UNKNOWN; + ++it->glyph_row->used[area]; + } + } +} + + +/* Append a stretch glyph to IT->glyph_row. OBJECT is the source + of the glyph, WIDTH and HEIGHT are the width and height of the + stretch. ASCENT is the percentage/100 of HEIGHT to use for the + ascent of the glyph (0 <= ASCENT <= 1). */ + +static void +append_stretch_glyph (it, object, width, height, ascent) + struct it *it; + Lisp_Object object; + int width, height; + double ascent; +{ + struct glyph *glyph; + enum glyph_row_area area = it->area; + + xassert (ascent >= 0 && ascent <= 1); + + glyph = it->glyph_row->glyphs[area] + it->glyph_row->used[area]; + if (glyph < it->glyph_row->glyphs[area + 1]) + { + glyph->charpos = CHARPOS (it->position); + glyph->object = object; + glyph->pixel_width = width; + glyph->voffset = it->voffset; + glyph->type = STRETCH_GLYPH; + glyph->multibyte_p = it->multibyte_p; + glyph->left_box_line_p = it->start_of_box_run_p; + glyph->right_box_line_p = it->end_of_box_run_p; + glyph->overlaps_vertically_p = 0; + glyph->padding_p = 0; + glyph->glyph_not_available_p = 0; + glyph->face_id = it->face_id; + glyph->u.stretch.ascent = height * ascent; + glyph->u.stretch.height = height; + glyph->font_type = FONT_TYPE_UNKNOWN; + ++it->glyph_row->used[area]; + } +} + + +/* Produce a stretch glyph for iterator IT. IT->object is the value + of the glyph property displayed. The value must be a list + `(space KEYWORD VALUE ...)' with the following KEYWORD/VALUE pairs + being recognized: + + 1. `:width WIDTH' specifies that the space should be WIDTH * + canonical char width wide. WIDTH may be an integer or floating + point number. + + 2. `:relative-width FACTOR' specifies that the width of the stretch + should be computed from the width of the first character having the + `glyph' property, and should be FACTOR times that width. + + 3. `:align-to HPOS' specifies that the space should be wide enough + to reach HPOS, a value in canonical character units. + + Exactly one of the above pairs must be present. + + 4. `:height HEIGHT' specifies that the height of the stretch produced + should be HEIGHT, measured in canonical character units. + + 5. `:relative-height FACTOR' specifies that the height of the + stretch should be FACTOR times the height of the characters having + the glyph property. + + Either none or exactly one of 4 or 5 must be present. + + 6. `:ascent ASCENT' specifies that ASCENT percent of the height + of the stretch should be used for the ascent of the stretch. + ASCENT must be in the range 0 <= ASCENT <= 100. */ + +#define NUMVAL(X) \ + ((INTEGERP (X) || FLOATP (X)) \ + ? XFLOATINT (X) \ + : - 1) + + +static void +produce_stretch_glyph (it) + struct it *it; +{ + /* (space :width WIDTH :height HEIGHT. */ + Lisp_Object prop, plist; + int width = 0, height = 0; + double ascent = 0; + struct face *face = FACE_FROM_ID (it->f, it->face_id); + XFontStruct *font = face->font ? face->font : FRAME_FONT (it->f); + + PREPARE_FACE_FOR_DISPLAY (it->f, face); + + /* List should start with `space'. */ + xassert (CONSP (it->object) && EQ (XCAR (it->object), Qspace)); + plist = XCDR (it->object); + + /* Compute the width of the stretch. */ + if (prop = Fplist_get (plist, QCwidth), + NUMVAL (prop) > 0) + /* Absolute width `:width WIDTH' specified and valid. */ + width = NUMVAL (prop) * FRAME_COLUMN_WIDTH (it->f); + else if (prop = Fplist_get (plist, QCrelative_width), + NUMVAL (prop) > 0) + { + /* Relative width `:relative-width FACTOR' specified and valid. + Compute the width of the characters having the `glyph' + property. */ + struct it it2; + unsigned char *p = BYTE_POS_ADDR (IT_BYTEPOS (*it)); + + it2 = *it; + if (it->multibyte_p) + { + int maxlen = ((IT_BYTEPOS (*it) >= GPT ? ZV : GPT) + - IT_BYTEPOS (*it)); + it2.c = STRING_CHAR_AND_LENGTH (p, maxlen, it2.len); + } + else + it2.c = *p, it2.len = 1; + + it2.glyph_row = NULL; + it2.what = IT_CHARACTER; + x_produce_glyphs (&it2); + width = NUMVAL (prop) * it2.pixel_width; + } + else if (prop = Fplist_get (plist, QCalign_to), + NUMVAL (prop) > 0) + width = NUMVAL (prop) * FRAME_COLUMN_WIDTH (it->f) - it->current_x; + else + /* Nothing specified -> width defaults to canonical char width. */ + width = FRAME_COLUMN_WIDTH (it->f); + + /* Compute height. */ + if (prop = Fplist_get (plist, QCheight), + NUMVAL (prop) > 0) + height = NUMVAL (prop) * FRAME_LINE_HEIGHT (it->f); + else if (prop = Fplist_get (plist, QCrelative_height), + NUMVAL (prop) > 0) + height = FONT_HEIGHT (font) * NUMVAL (prop); + else + height = FONT_HEIGHT (font); + + /* Compute percentage of height used for ascent. If + `:ascent ASCENT' is present and valid, use that. Otherwise, + derive the ascent from the font in use. */ + if (prop = Fplist_get (plist, QCascent), + NUMVAL (prop) > 0 && NUMVAL (prop) <= 100) + ascent = NUMVAL (prop) / 100.0; + else + ascent = (double) FONT_BASE (font) / FONT_HEIGHT (font); + + if (width <= 0) + width = 1; + if (height <= 0) + height = 1; + + if (it->glyph_row) + { + Lisp_Object object = it->stack[it->sp - 1].string; + if (!STRINGP (object)) + object = it->w->buffer; + append_stretch_glyph (it, object, width, height, ascent); + } + + it->pixel_width = width; + it->ascent = it->phys_ascent = height * ascent; + it->descent = it->phys_descent = height - it->ascent; + it->nglyphs = 1; + + if (face->box != FACE_NO_BOX) + { + if (face->box_line_width > 0) + { + it->ascent += face->box_line_width; + it->descent += face->box_line_width; + } + + if (it->start_of_box_run_p) + it->pixel_width += abs (face->box_line_width); + if (it->end_of_box_run_p) + it->pixel_width += abs (face->box_line_width); + } + + take_vertical_position_into_account (it); +} + +/* RIF: + Produce glyphs/get display metrics for the display element IT is + loaded with. See the description of struct display_iterator in + dispextern.h for an overview of struct display_iterator. */ + +void +x_produce_glyphs (it) + struct it *it; +{ + it->glyph_not_available_p = 0; + + if (it->what == IT_CHARACTER) + { + XChar2b char2b; + XFontStruct *font; + struct face *face = FACE_FROM_ID (it->f, it->face_id); + XCharStruct *pcm; + int font_not_found_p; + struct font_info *font_info; + int boff; /* baseline offset */ + /* We may change it->multibyte_p upon unibyte<->multibyte + conversion. So, save the current value now and restore it + later. + + Note: It seems that we don't have to record multibyte_p in + struct glyph because the character code itself tells if or + not the character is multibyte. Thus, in the future, we must + consider eliminating the field `multibyte_p' in the struct + glyph. */ + int saved_multibyte_p = it->multibyte_p; + + /* Maybe translate single-byte characters to multibyte, or the + other way. */ + it->char_to_display = it->c; - if (!ASCII_BYTE_P (it->c)) ++ if (!ASCII_BYTE_P (it->c) ++ && ! it->multibyte_p) + { - if (unibyte_display_via_language_environment - && SINGLE_BYTE_CHAR_P (it->c) - && (it->c >= 0240 - || !NILP (Vnonascii_translation_table))) - { - it->char_to_display = unibyte_char_to_multibyte (it->c); - it->multibyte_p = 1; - it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display); - face = FACE_FROM_ID (it->f, it->face_id); - } - else if (!SINGLE_BYTE_CHAR_P (it->c) - && !it->multibyte_p) ++ if (SINGLE_BYTE_CHAR_P (it->c) ++ && unibyte_display_via_language_environment) ++ it->char_to_display = unibyte_char_to_multibyte (it->c); ++ if (! SINGLE_BYTE_CHAR_P (it->c)) + { + it->multibyte_p = 1; + it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display); + face = FACE_FROM_ID (it->f, it->face_id); + } + } + + /* Get font to use. Encode IT->char_to_display. */ + get_char_face_and_encoding (it->f, it->char_to_display, it->face_id, + &char2b, it->multibyte_p, 0); + font = face->font; + + /* When no suitable font found, use the default font. */ + font_not_found_p = font == NULL; + if (font_not_found_p) + { + font = FRAME_FONT (it->f); + boff = FRAME_BASELINE_OFFSET (it->f); + font_info = NULL; + } + else + { + font_info = FONT_INFO_FROM_ID (it->f, face->font_info_id); + boff = font_info->baseline_offset; + if (font_info->vertical_centering) + boff = VCENTER_BASELINE_OFFSET (font, it->f) - boff; + } + + if (it->char_to_display >= ' ' + && (!it->multibyte_p || it->char_to_display < 128)) + { + /* Either unibyte or ASCII. */ + int stretched_p; + + it->nglyphs = 1; + + pcm = rif->per_char_metric (font, &char2b, + FONT_TYPE_FOR_UNIBYTE (font, it->char_to_display)); + it->ascent = FONT_BASE (font) + boff; + it->descent = FONT_DESCENT (font) - boff; + + if (pcm) + { + it->phys_ascent = pcm->ascent + boff; + it->phys_descent = pcm->descent - boff; + it->pixel_width = pcm->width; + } + else + { + it->glyph_not_available_p = 1; + it->phys_ascent = FONT_BASE (font) + boff; + it->phys_descent = FONT_DESCENT (font) - boff; + it->pixel_width = FONT_WIDTH (font); + } + + /* If this is a space inside a region of text with + `space-width' property, change its width. */ + stretched_p = it->char_to_display == ' ' && !NILP (it->space_width); + if (stretched_p) + it->pixel_width *= XFLOATINT (it->space_width); + + /* If face has a box, add the box thickness to the character + height. If character has a box line to the left and/or + right, add the box line width to the character's width. */ + if (face->box != FACE_NO_BOX) + { + int thick = face->box_line_width; + + if (thick > 0) + { + it->ascent += thick; + it->descent += thick; + } + else + thick = -thick; + + if (it->start_of_box_run_p) + it->pixel_width += thick; + if (it->end_of_box_run_p) + it->pixel_width += thick; + } + + /* If face has an overline, add the height of the overline + (1 pixel) and a 1 pixel margin to the character height. */ + if (face->overline_p) + it->ascent += 2; + + take_vertical_position_into_account (it); + + /* If we have to actually produce glyphs, do it. */ + if (it->glyph_row) + { + if (stretched_p) + { + /* Translate a space with a `space-width' property + into a stretch glyph. */ + double ascent = (double) FONT_BASE (font) + / FONT_HEIGHT (font); + append_stretch_glyph (it, it->object, it->pixel_width, + it->ascent + it->descent, ascent); + } + else + append_glyph (it); + + /* If characters with lbearing or rbearing are displayed + in this line, record that fact in a flag of the + glyph row. This is used to optimize X output code. */ + if (pcm && (pcm->lbearing < 0 || pcm->rbearing > pcm->width)) + it->glyph_row->contains_overlapping_glyphs_p = 1; + } + } + else if (it->char_to_display == '\n') + { + /* A newline has no width but we need the height of the line. */ + it->pixel_width = 0; + it->nglyphs = 0; + it->ascent = it->phys_ascent = FONT_BASE (font) + boff; + it->descent = it->phys_descent = FONT_DESCENT (font) - boff; + + if (face->box != FACE_NO_BOX + && face->box_line_width > 0) + { + it->ascent += face->box_line_width; + it->descent += face->box_line_width; + } + } + else if (it->char_to_display == '\t') + { + int tab_width = it->tab_width * FRAME_COLUMN_WIDTH (it->f); + int x = it->current_x + it->continuation_lines_width; + int next_tab_x = ((1 + x + tab_width - 1) / tab_width) * tab_width; + + /* If the distance from the current position to the next tab + stop is less than a canonical character width, use the + tab stop after that. */ + if (next_tab_x - x < FRAME_COLUMN_WIDTH (it->f)) + next_tab_x += tab_width; + + it->pixel_width = next_tab_x - x; + it->nglyphs = 1; + it->ascent = it->phys_ascent = FONT_BASE (font) + boff; + it->descent = it->phys_descent = FONT_DESCENT (font) - boff; + + if (it->glyph_row) + { + double ascent = (double) it->ascent / (it->ascent + it->descent); + append_stretch_glyph (it, it->object, it->pixel_width, + it->ascent + it->descent, ascent); + } + } + else + { + /* A multi-byte character. Assume that the display width of the + character is the width of the character multiplied by the + width of the font. */ + + /* If we found a font, this font should give us the right + metrics. If we didn't find a font, use the frame's - default font and calculate the width of the character - from the charset width; this is what old redisplay code - did. */ ++ default font and calculate the width of the character by ++ multiplying the width of font by the width of the ++ character. */ + + pcm = rif->per_char_metric (font, &char2b, + FONT_TYPE_FOR_MULTIBYTE (font, it->c)); + + if (font_not_found_p || !pcm) + { - int charset = CHAR_CHARSET (it->char_to_display); - + it->glyph_not_available_p = 1; + it->pixel_width = (FRAME_COLUMN_WIDTH (it->f) - * CHARSET_WIDTH (charset)); ++ * CHAR_WIDTH (it->char_to_display)); + it->phys_ascent = FONT_BASE (font) + boff; + it->phys_descent = FONT_DESCENT (font) - boff; + } + else + { + it->pixel_width = pcm->width; + it->phys_ascent = pcm->ascent + boff; + it->phys_descent = pcm->descent - boff; + if (it->glyph_row + && (pcm->lbearing < 0 + || pcm->rbearing > pcm->width)) + it->glyph_row->contains_overlapping_glyphs_p = 1; + } + it->nglyphs = 1; + it->ascent = FONT_BASE (font) + boff; + it->descent = FONT_DESCENT (font) - boff; + if (face->box != FACE_NO_BOX) + { + int thick = face->box_line_width; + + if (thick > 0) + { + it->ascent += thick; + it->descent += thick; + } + else + thick = - thick; + + if (it->start_of_box_run_p) + it->pixel_width += thick; + if (it->end_of_box_run_p) + it->pixel_width += thick; + } + + /* If face has an overline, add the height of the overline + (1 pixel) and a 1 pixel margin to the character height. */ + if (face->overline_p) + it->ascent += 2; + + take_vertical_position_into_account (it); + + if (it->glyph_row) + append_glyph (it); + } + it->multibyte_p = saved_multibyte_p; + } + else if (it->what == IT_COMPOSITION) + { + /* Note: A composition is represented as one glyph in the + glyph matrix. There are no padding glyphs. */ + XChar2b char2b; + XFontStruct *font; + struct face *face = FACE_FROM_ID (it->f, it->face_id); + XCharStruct *pcm; + int font_not_found_p; + struct font_info *font_info; + int boff; /* baseline offset */ + struct composition *cmp = composition_table[it->cmp_id]; + + /* Maybe translate single-byte characters to multibyte. */ + it->char_to_display = it->c; + if (unibyte_display_via_language_environment - && SINGLE_BYTE_CHAR_P (it->c) - && (it->c >= 0240 - || (it->c >= 0200 - && !NILP (Vnonascii_translation_table)))) ++ && it->c >= 0200) + { + it->char_to_display = unibyte_char_to_multibyte (it->c); + } + + /* Get face and font to use. Encode IT->char_to_display. */ + it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display); + face = FACE_FROM_ID (it->f, it->face_id); + get_char_face_and_encoding (it->f, it->char_to_display, it->face_id, + &char2b, it->multibyte_p, 0); + font = face->font; + + /* When no suitable font found, use the default font. */ + font_not_found_p = font == NULL; + if (font_not_found_p) + { + font = FRAME_FONT (it->f); + boff = FRAME_BASELINE_OFFSET (it->f); + font_info = NULL; + } + else + { + font_info = FONT_INFO_FROM_ID (it->f, face->font_info_id); + boff = font_info->baseline_offset; + if (font_info->vertical_centering) + boff = VCENTER_BASELINE_OFFSET (font, it->f) - boff; + } + + /* There are no padding glyphs, so there is only one glyph to + produce for the composition. Important is that pixel_width, + ascent and descent are the values of what is drawn by + draw_glyphs (i.e. the values of the overall glyphs composed). */ + it->nglyphs = 1; + + /* If we have not yet calculated pixel size data of glyphs of + the composition for the current face font, calculate them + now. Theoretically, we have to check all fonts for the + glyphs, but that requires much time and memory space. So, + here we check only the font of the first glyph. This leads + to incorrect display very rarely, and C-l (recenter) can + correct the display anyway. */ + if (cmp->font != (void *) font) + { + /* Ascent and descent of the font of the first character of + this composition (adjusted by baseline offset). Ascent + and descent of overall glyphs should not be less than + them respectively. */ + int font_ascent = FONT_BASE (font) + boff; + int font_descent = FONT_DESCENT (font) - boff; + /* Bounding box of the overall glyphs. */ + int leftmost, rightmost, lowest, highest; + int i, width, ascent, descent; + + cmp->font = (void *) font; + + /* Initialize the bounding box. */ + if (font_info + && (pcm = rif->per_char_metric (font, &char2b, + FONT_TYPE_FOR_MULTIBYTE (font, it->c)))) + { + width = pcm->width; + ascent = pcm->ascent; + descent = pcm->descent; + } + else + { + width = FONT_WIDTH (font); + ascent = FONT_BASE (font); + descent = FONT_DESCENT (font); + } + + rightmost = width; + lowest = - descent + boff; + highest = ascent + boff; + leftmost = 0; + + if (font_info + && font_info->default_ascent + && CHAR_TABLE_P (Vuse_default_ascent) + && !NILP (Faref (Vuse_default_ascent, + make_number (it->char_to_display)))) + highest = font_info->default_ascent + boff; + + /* Draw the first glyph at the normal position. It may be + shifted to right later if some other glyphs are drawn at + the left. */ + cmp->offsets[0] = 0; + cmp->offsets[1] = boff; + + /* Set cmp->offsets for the remaining glyphs. */ + for (i = 1; i < cmp->glyph_len; i++) + { + int left, right, btm, top; + int ch = COMPOSITION_GLYPH (cmp, i); + int face_id = FACE_FOR_CHAR (it->f, face, ch); + + face = FACE_FROM_ID (it->f, face_id); + get_char_face_and_encoding (it->f, ch, face->id, + &char2b, it->multibyte_p, 0); + font = face->font; + if (font == NULL) + { + font = FRAME_FONT (it->f); + boff = FRAME_BASELINE_OFFSET (it->f); + font_info = NULL; + } + else + { + font_info + = FONT_INFO_FROM_ID (it->f, face->font_info_id); + boff = font_info->baseline_offset; + if (font_info->vertical_centering) + boff = VCENTER_BASELINE_OFFSET (font, it->f) - boff; + } + + if (font_info + && (pcm = rif->per_char_metric (font, &char2b, + FONT_TYPE_FOR_MULTIBYTE (font, ch)))) + { + width = pcm->width; + ascent = pcm->ascent; + descent = pcm->descent; + } + else + { + width = FONT_WIDTH (font); + ascent = 1; + descent = 0; + } + + if (cmp->method != COMPOSITION_WITH_RULE_ALTCHARS) + { + /* Relative composition with or without + alternate chars. */ + left = (leftmost + rightmost - width) / 2; + btm = - descent + boff; + if (font_info && font_info->relative_compose + && (! CHAR_TABLE_P (Vignore_relative_composition) + || NILP (Faref (Vignore_relative_composition, + make_number (ch))))) + { + + if (- descent >= font_info->relative_compose) + /* One extra pixel between two glyphs. */ + btm = highest + 1; + else if (ascent <= 0) + /* One extra pixel between two glyphs. */ + btm = lowest - 1 - ascent - descent; + } + } + else + { + /* A composition rule is specified by an integer + value that encodes global and new reference + points (GREF and NREF). GREF and NREF are + specified by numbers as below: + + 0---1---2 -- ascent + | | + | | + | | + 9--10--11 -- center + | | + ---3---4---5--- baseline + | | + 6---7---8 -- descent + */ + int rule = COMPOSITION_RULE (cmp, i); + int gref, nref, grefx, grefy, nrefx, nrefy; + + COMPOSITION_DECODE_RULE (rule, gref, nref); + grefx = gref % 3, nrefx = nref % 3; + grefy = gref / 3, nrefy = nref / 3; + + left = (leftmost + + grefx * (rightmost - leftmost) / 2 + - nrefx * width / 2); + btm = ((grefy == 0 ? highest + : grefy == 1 ? 0 + : grefy == 2 ? lowest + : (highest + lowest) / 2) + - (nrefy == 0 ? ascent + descent + : nrefy == 1 ? descent - boff + : nrefy == 2 ? 0 + : (ascent + descent) / 2)); + } + + cmp->offsets[i * 2] = left; + cmp->offsets[i * 2 + 1] = btm + descent; + + /* Update the bounding box of the overall glyphs. */ + right = left + width; + top = btm + descent + ascent; + if (left < leftmost) + leftmost = left; + if (right > rightmost) + rightmost = right; + if (top > highest) + highest = top; + if (btm < lowest) + lowest = btm; + } + + /* If there are glyphs whose x-offsets are negative, + shift all glyphs to the right and make all x-offsets + non-negative. */ + if (leftmost < 0) + { + for (i = 0; i < cmp->glyph_len; i++) + cmp->offsets[i * 2] -= leftmost; + rightmost -= leftmost; + } + + cmp->pixel_width = rightmost; + cmp->ascent = highest; + cmp->descent = - lowest; + if (cmp->ascent < font_ascent) + cmp->ascent = font_ascent; + if (cmp->descent < font_descent) + cmp->descent = font_descent; + } + + it->pixel_width = cmp->pixel_width; + it->ascent = it->phys_ascent = cmp->ascent; + it->descent = it->phys_descent = cmp->descent; + + if (face->box != FACE_NO_BOX) + { + int thick = face->box_line_width; + + if (thick > 0) + { + it->ascent += thick; + it->descent += thick; + } + else + thick = - thick; + + if (it->start_of_box_run_p) + it->pixel_width += thick; + if (it->end_of_box_run_p) + it->pixel_width += thick; + } + + /* If face has an overline, add the height of the overline + (1 pixel) and a 1 pixel margin to the character height. */ + if (face->overline_p) + it->ascent += 2; + + take_vertical_position_into_account (it); + + if (it->glyph_row) + append_composite_glyph (it); + } + else if (it->what == IT_IMAGE) + produce_image_glyph (it); + else if (it->what == IT_STRETCH) + produce_stretch_glyph (it); + + /* Accumulate dimensions. Note: can't assume that it->descent > 0 + because this isn't true for images with `:ascent 100'. */ + xassert (it->ascent >= 0 && it->descent >= 0); + if (it->area == TEXT_AREA) + it->current_x += it->pixel_width; + + it->descent += it->extra_line_spacing; + + it->max_ascent = max (it->max_ascent, it->ascent); + it->max_descent = max (it->max_descent, it->descent); + it->max_phys_ascent = max (it->max_phys_ascent, it->phys_ascent); + it->max_phys_descent = max (it->max_phys_descent, it->phys_descent); +} + +/* EXPORT for RIF: + Output LEN glyphs starting at START at the nominal cursor position. + Advance the nominal cursor over the text. The global variable + updated_window contains the window being updated, updated_row is + the glyph row being updated, and updated_area is the area of that + row being updated. */ + +void +x_write_glyphs (start, len) + struct glyph *start; + int len; +{ + int x, hpos; + + xassert (updated_window && updated_row); + BLOCK_INPUT; + + /* Write glyphs. */ + + hpos = start - updated_row->glyphs[updated_area]; + x = draw_glyphs (updated_window, output_cursor.x, + updated_row, updated_area, + hpos, hpos + len, + DRAW_NORMAL_TEXT, 0); + + /* Invalidate old phys cursor if the glyph at its hpos is redrawn. */ + if (updated_area == TEXT_AREA + && updated_window->phys_cursor_on_p + && updated_window->phys_cursor.vpos == output_cursor.vpos + && updated_window->phys_cursor.hpos >= hpos + && updated_window->phys_cursor.hpos < hpos + len) + updated_window->phys_cursor_on_p = 0; + + UNBLOCK_INPUT; + + /* Advance the output cursor. */ + output_cursor.hpos += len; + output_cursor.x = x; +} + + +/* EXPORT for RIF: + Insert LEN glyphs from START at the nominal cursor position. */ + +void +x_insert_glyphs (start, len) + struct glyph *start; + int len; +{ + struct frame *f; + struct window *w; + int line_height, shift_by_width, shifted_region_width; + struct glyph_row *row; + struct glyph *glyph; + int frame_x, frame_y, hpos; + + xassert (updated_window && updated_row); + BLOCK_INPUT; + w = updated_window; + f = XFRAME (WINDOW_FRAME (w)); + + /* Get the height of the line we are in. */ + row = updated_row; + line_height = row->height; + + /* Get the width of the glyphs to insert. */ + shift_by_width = 0; + for (glyph = start; glyph < start + len; ++glyph) + shift_by_width += glyph->pixel_width; + + /* Get the width of the region to shift right. */ + shifted_region_width = (window_box_width (w, updated_area) + - output_cursor.x + - shift_by_width); + + /* Shift right. */ + frame_x = window_box_left (w, updated_area) + output_cursor.x; + frame_y = WINDOW_TO_FRAME_PIXEL_Y (w, output_cursor.y); + + rif->shift_glyphs_for_insert (f, frame_x, frame_y, shifted_region_width, + line_height, shift_by_width); + + /* Write the glyphs. */ + hpos = start - row->glyphs[updated_area]; + draw_glyphs (w, output_cursor.x, row, updated_area, + hpos, hpos + len, + DRAW_NORMAL_TEXT, 0); + + /* Advance the output cursor. */ + output_cursor.hpos += len; + output_cursor.x += shift_by_width; + UNBLOCK_INPUT; +} + + +/* EXPORT for RIF: + Erase the current text line from the nominal cursor position + (inclusive) to pixel column TO_X (exclusive). The idea is that + everything from TO_X onward is already erased. + + TO_X is a pixel position relative to updated_area of + updated_window. TO_X == -1 means clear to the end of this area. */ + +void +x_clear_end_of_line (to_x) + int to_x; +{ + struct frame *f; + struct window *w = updated_window; + int max_x, min_y, max_y; + int from_x, from_y, to_y; + + xassert (updated_window && updated_row); + f = XFRAME (w->frame); + + if (updated_row->full_width_p) + max_x = WINDOW_TOTAL_WIDTH (w); + else + max_x = window_box_width (w, updated_area); + max_y = window_text_bottom_y (w); + + /* TO_X == 0 means don't do anything. TO_X < 0 means clear to end + of window. For TO_X > 0, truncate to end of drawing area. */ + if (to_x == 0) + return; + else if (to_x < 0) + to_x = max_x; + else + to_x = min (to_x, max_x); + + to_y = min (max_y, output_cursor.y + updated_row->height); + + /* Notice if the cursor will be cleared by this operation. */ + if (!updated_row->full_width_p) + notice_overwritten_cursor (w, updated_area, + output_cursor.x, -1, + updated_row->y, + MATRIX_ROW_BOTTOM_Y (updated_row)); + + from_x = output_cursor.x; + + /* Translate to frame coordinates. */ + if (updated_row->full_width_p) + { + from_x = WINDOW_TO_FRAME_PIXEL_X (w, from_x); + to_x = WINDOW_TO_FRAME_PIXEL_X (w, to_x); + } + else + { + int area_left = window_box_left (w, updated_area); + from_x += area_left; + to_x += area_left; + } + + min_y = WINDOW_HEADER_LINE_HEIGHT (w); + from_y = WINDOW_TO_FRAME_PIXEL_Y (w, max (min_y, output_cursor.y)); + to_y = WINDOW_TO_FRAME_PIXEL_Y (w, to_y); + + /* Prevent inadvertently clearing to end of the X window. */ + if (to_x > from_x && to_y > from_y) + { + BLOCK_INPUT; + rif->clear_frame_area (f, from_x, from_y, + to_x - from_x, to_y - from_y); + UNBLOCK_INPUT; + } +} + +#endif /* HAVE_WINDOW_SYSTEM */ + + + +/*********************************************************************** + Cursor types + ***********************************************************************/ + +/* Value is the internal representation of the specified cursor type + ARG. If type is BAR_CURSOR, return in *WIDTH the specified width + of the bar cursor. */ + +enum text_cursor_kinds +get_specified_cursor_type (arg, width) + Lisp_Object arg; + int *width; +{ + enum text_cursor_kinds type; + + if (NILP (arg)) + return NO_CURSOR; + + if (EQ (arg, Qbox)) + return FILLED_BOX_CURSOR; + + if (EQ (arg, Qhollow)) + return HOLLOW_BOX_CURSOR; + + if (EQ (arg, Qbar)) + { + *width = 2; + return BAR_CURSOR; + } + + if (CONSP (arg) + && EQ (XCAR (arg), Qbar) + && INTEGERP (XCDR (arg)) + && XINT (XCDR (arg)) >= 0) + { + *width = XINT (XCDR (arg)); + return BAR_CURSOR; + } + + if (EQ (arg, Qhbar)) + { + *width = 2; + return HBAR_CURSOR; + } + + if (CONSP (arg) + && EQ (XCAR (arg), Qhbar) + && INTEGERP (XCDR (arg)) + && XINT (XCDR (arg)) >= 0) + { + *width = XINT (XCDR (arg)); + return HBAR_CURSOR; + } + + /* Treat anything unknown as "hollow box cursor". + It was bad to signal an error; people have trouble fixing + .Xdefaults with Emacs, when it has something bad in it. */ + type = HOLLOW_BOX_CURSOR; + + return type; +} + +/* Set the default cursor types for specified frame. */ +void +set_frame_cursor_types (f, arg) + struct frame *f; + Lisp_Object arg; +{ + int width; + Lisp_Object tem; + + FRAME_DESIRED_CURSOR (f) = get_specified_cursor_type (arg, &width); + FRAME_CURSOR_WIDTH (f) = width; + + /* By default, set up the blink-off state depending on the on-state. */ + + tem = Fassoc (arg, Vblink_cursor_alist); + if (!NILP (tem)) + { + FRAME_BLINK_OFF_CURSOR (f) + = get_specified_cursor_type (XCDR (tem), &width); + FRAME_BLINK_OFF_CURSOR_WIDTH (f) = width; + } + else + FRAME_BLINK_OFF_CURSOR (f) = DEFAULT_CURSOR; +} + + +/* Return the cursor we want to be displayed in window W. Return + width of bar/hbar cursor through WIDTH arg. Return with + ACTIVE_CURSOR arg set to 1 if cursor in window W is `active' + (i.e. if the `system caret' should track this cursor). + + In a mini-buffer window, we want the cursor only to appear if we + are reading input from this window. For the selected window, we + want the cursor type given by the frame parameter or buffer local + setting of cursor-type. If explicitly marked off, draw no cursor. + In all other cases, we want a hollow box cursor. */ + +enum text_cursor_kinds +get_window_cursor_type (w, width, active_cursor) + struct window *w; + int *width; + int *active_cursor; +{ + struct frame *f = XFRAME (w->frame); + struct buffer *b = XBUFFER (w->buffer); + int cursor_type = DEFAULT_CURSOR; + Lisp_Object alt_cursor; + int non_selected = 0; + + *active_cursor = 1; + + /* Echo area */ + if (cursor_in_echo_area + && FRAME_HAS_MINIBUF_P (f) + && EQ (FRAME_MINIBUF_WINDOW (f), echo_area_window)) + { + if (w == XWINDOW (echo_area_window)) + { + *width = FRAME_CURSOR_WIDTH (f); + return FRAME_DESIRED_CURSOR (f); + } + + *active_cursor = 0; + non_selected = 1; + } + + /* Nonselected window or nonselected frame. */ + else if (w != XWINDOW (f->selected_window) +#ifdef HAVE_WINDOW_SYSTEM + || f != FRAME_X_DISPLAY_INFO (f)->x_highlight_frame +#endif + ) + { + *active_cursor = 0; + + if (MINI_WINDOW_P (w) && minibuf_level == 0) + return NO_CURSOR; + + non_selected = 1; + } + + /* Never display a cursor in a window in which cursor-type is nil. */ + if (NILP (b->cursor_type)) + return NO_CURSOR; + + /* Use cursor-in-non-selected-windows for non-selected window or frame. */ + if (non_selected) + { + alt_cursor = Fbuffer_local_value (Qcursor_in_non_selected_windows, w->buffer); + return get_specified_cursor_type (alt_cursor, width); + } + + /* Get the normal cursor type for this window. */ + if (EQ (b->cursor_type, Qt)) + { + cursor_type = FRAME_DESIRED_CURSOR (f); + *width = FRAME_CURSOR_WIDTH (f); + } + else + cursor_type = get_specified_cursor_type (b->cursor_type, width); + + /* Use normal cursor if not blinked off. */ + if (!w->cursor_off_p) + return cursor_type; + + /* Cursor is blinked off, so determine how to "toggle" it. */ + + /* First look for an entry matching the buffer's cursor-type in blink-cursor-alist. */ + if ((alt_cursor = Fassoc (b->cursor_type, Vblink_cursor_alist), !NILP (alt_cursor))) + return get_specified_cursor_type (XCDR (alt_cursor), width); + + /* Then see if frame has specified a specific blink off cursor type. */ + if (FRAME_BLINK_OFF_CURSOR (f) != DEFAULT_CURSOR) + { + *width = FRAME_BLINK_OFF_CURSOR_WIDTH (f); + return FRAME_BLINK_OFF_CURSOR (f); + } + + /* Finally perform built-in cursor blinking: + filled box <-> hollow box + wide [h]bar <-> narrow [h]bar + narrow [h]bar <-> no cursor + other type <-> no cursor */ + + if (cursor_type == FILLED_BOX_CURSOR) + return HOLLOW_BOX_CURSOR; + + if ((cursor_type == BAR_CURSOR || cursor_type == HBAR_CURSOR) && *width > 1) + { + *width = 1; + return cursor_type; + } + + return NO_CURSOR; +} + + +#ifdef HAVE_WINDOW_SYSTEM + +/* Notice when the text cursor of window W has been completely + overwritten by a drawing operation that outputs glyphs in AREA + starting at X0 and ending at X1 in the line starting at Y0 and + ending at Y1. X coordinates are area-relative. X1 < 0 means all + the rest of the line after X0 has been written. Y coordinates + are window-relative. */ + +static void +notice_overwritten_cursor (w, area, x0, x1, y0, y1) + struct window *w; + enum glyph_row_area area; + int x0, y0, x1, y1; +{ + if (area == TEXT_AREA && w->phys_cursor_on_p) + { + int cx0 = w->phys_cursor.x; + int cx1 = cx0 + w->phys_cursor_width; + int cy0 = w->phys_cursor.y; + int cy1 = cy0 + w->phys_cursor_height; + + if (x0 <= cx0 && (x1 < 0 || x1 >= cx1)) + { + /* The cursor image will be completely removed from the + screen if the output area intersects the cursor area in + y-direction. When we draw in [y0 y1[, and some part of + the cursor is at y < y0, that part must have been drawn + before. When scrolling, the cursor is erased before + actually scrolling, so we don't come here. When not + scrolling, the rows above the old cursor row must have + changed, and in this case these rows must have written + over the cursor image. + + Likewise if part of the cursor is below y1, with the + exception of the cursor being in the first blank row at + the buffer and window end because update_text_area + doesn't draw that row. (Except when it does, but + that's handled in update_text_area.) */ + + if (((y0 >= cy0 && y0 < cy1) || (y1 > cy0 && y1 < cy1)) + && w->current_matrix->rows[w->phys_cursor.vpos].displays_text_p) + w->phys_cursor_on_p = 0; + } + } +} + +#endif /* HAVE_WINDOW_SYSTEM */ + + +/************************************************************************ + Mouse Face + ************************************************************************/ + +#ifdef HAVE_WINDOW_SYSTEM + +/* EXPORT for RIF: + Fix the display of area AREA of overlapping row ROW in window W. */ + +void +x_fix_overlapping_area (w, row, area) + struct window *w; + struct glyph_row *row; + enum glyph_row_area area; +{ + int i, x; + + BLOCK_INPUT; + + x = 0; + for (i = 0; i < row->used[area];) + { + if (row->glyphs[area][i].overlaps_vertically_p) + { + int start = i, start_x = x; + + do + { + x += row->glyphs[area][i].pixel_width; + ++i; + } + while (i < row->used[area] + && row->glyphs[area][i].overlaps_vertically_p); + + draw_glyphs (w, start_x, row, area, + start, i, + DRAW_NORMAL_TEXT, 1); + } + else + { + x += row->glyphs[area][i].pixel_width; + ++i; + } + } + + UNBLOCK_INPUT; +} + + +/* EXPORT: + Draw the cursor glyph of window W in glyph row ROW. See the + comment of draw_glyphs for the meaning of HL. */ + +void +draw_phys_cursor_glyph (w, row, hl) + struct window *w; + struct glyph_row *row; + enum draw_glyphs_face hl; +{ + /* If cursor hpos is out of bounds, don't draw garbage. This can + happen in mini-buffer windows when switching between echo area + glyphs and mini-buffer. */ + if (w->phys_cursor.hpos < row->used[TEXT_AREA]) + { + int on_p = w->phys_cursor_on_p; + int x1; + x1 = draw_glyphs (w, w->phys_cursor.x, row, TEXT_AREA, + w->phys_cursor.hpos, w->phys_cursor.hpos + 1, + hl, 0); + w->phys_cursor_on_p = on_p; + + if (hl == DRAW_CURSOR) + w->phys_cursor_width = x1 - w->phys_cursor.x; + /* When we erase the cursor, and ROW is overlapped by other + rows, make sure that these overlapping parts of other rows + are redrawn. */ + else if (hl == DRAW_NORMAL_TEXT && row->overlapped_p) + { + if (row > w->current_matrix->rows + && MATRIX_ROW_OVERLAPS_SUCC_P (row - 1)) + x_fix_overlapping_area (w, row - 1, TEXT_AREA); + + if (MATRIX_ROW_BOTTOM_Y (row) < window_text_bottom_y (w) + && MATRIX_ROW_OVERLAPS_PRED_P (row + 1)) + x_fix_overlapping_area (w, row + 1, TEXT_AREA); + } + } +} + + +/* EXPORT: + Erase the image of a cursor of window W from the screen. */ + +void +erase_phys_cursor (w) + struct window *w; +{ + struct frame *f = XFRAME (w->frame); + Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (f); + int hpos = w->phys_cursor.hpos; + int vpos = w->phys_cursor.vpos; + int mouse_face_here_p = 0; + struct glyph_matrix *active_glyphs = w->current_matrix; + struct glyph_row *cursor_row; + struct glyph *cursor_glyph; + enum draw_glyphs_face hl; + + /* No cursor displayed or row invalidated => nothing to do on the + screen. */ + if (w->phys_cursor_type == NO_CURSOR) + goto mark_cursor_off; + + /* VPOS >= active_glyphs->nrows means that window has been resized. + Don't bother to erase the cursor. */ + if (vpos >= active_glyphs->nrows) + goto mark_cursor_off; + + /* If row containing cursor is marked invalid, there is nothing we + can do. */ + cursor_row = MATRIX_ROW (active_glyphs, vpos); + if (!cursor_row->enabled_p) + goto mark_cursor_off; + + /* If row is completely invisible, don't attempt to delete a cursor which + isn't there. This can happen if cursor is at top of a window, and + we switch to a buffer with a header line in that window. */ + if (cursor_row->visible_height <= 0) + goto mark_cursor_off; + + /* This can happen when the new row is shorter than the old one. + In this case, either draw_glyphs or clear_end_of_line + should have cleared the cursor. Note that we wouldn't be + able to erase the cursor in this case because we don't have a + cursor glyph at hand. */ + if (w->phys_cursor.hpos >= cursor_row->used[TEXT_AREA]) + goto mark_cursor_off; + + /* If the cursor is in the mouse face area, redisplay that when + we clear the cursor. */ + if (! NILP (dpyinfo->mouse_face_window) + && w == XWINDOW (dpyinfo->mouse_face_window) + && (vpos > dpyinfo->mouse_face_beg_row + || (vpos == dpyinfo->mouse_face_beg_row + && hpos >= dpyinfo->mouse_face_beg_col)) + && (vpos < dpyinfo->mouse_face_end_row + || (vpos == dpyinfo->mouse_face_end_row + && hpos < dpyinfo->mouse_face_end_col)) + /* Don't redraw the cursor's spot in mouse face if it is at the + end of a line (on a newline). The cursor appears there, but + mouse highlighting does not. */ + && cursor_row->used[TEXT_AREA] > hpos) + mouse_face_here_p = 1; + + /* Maybe clear the display under the cursor. */ + if (w->phys_cursor_type == HOLLOW_BOX_CURSOR) + { + int x, y; + int header_line_height = WINDOW_HEADER_LINE_HEIGHT (w); + + cursor_glyph = get_phys_cursor_glyph (w); + if (cursor_glyph == NULL) + goto mark_cursor_off; + + x = WINDOW_TEXT_TO_FRAME_PIXEL_X (w, w->phys_cursor.x); + y = WINDOW_TO_FRAME_PIXEL_Y (w, max (header_line_height, cursor_row->y)); + + rif->clear_frame_area (f, x, y, + cursor_glyph->pixel_width, cursor_row->visible_height); + } + + /* Erase the cursor by redrawing the character underneath it. */ + if (mouse_face_here_p) + hl = DRAW_MOUSE_FACE; + else + hl = DRAW_NORMAL_TEXT; + draw_phys_cursor_glyph (w, cursor_row, hl); + + mark_cursor_off: + w->phys_cursor_on_p = 0; + w->phys_cursor_type = NO_CURSOR; +} + + +/* EXPORT: + Display or clear cursor of window W. If ON is zero, clear the + cursor. If it is non-zero, display the cursor. If ON is nonzero, + where to put the cursor is specified by HPOS, VPOS, X and Y. */ + +void +display_and_set_cursor (w, on, hpos, vpos, x, y) + struct window *w; + int on, hpos, vpos, x, y; +{ + struct frame *f = XFRAME (w->frame); + int new_cursor_type; + int new_cursor_width; + int active_cursor; + struct glyph_matrix *current_glyphs; + struct glyph_row *glyph_row; + struct glyph *glyph; + + /* This is pointless on invisible frames, and dangerous on garbaged + windows and frames; in the latter case, the frame or window may + be in the midst of changing its size, and x and y may be off the + window. */ + if (! FRAME_VISIBLE_P (f) + || FRAME_GARBAGED_P (f) + || vpos >= w->current_matrix->nrows + || hpos >= w->current_matrix->matrix_w) + return; + + /* If cursor is off and we want it off, return quickly. */ + if (!on && !w->phys_cursor_on_p) + return; + + current_glyphs = w->current_matrix; + glyph_row = MATRIX_ROW (current_glyphs, vpos); + glyph = glyph_row->glyphs[TEXT_AREA] + hpos; + + /* If cursor row is not enabled, we don't really know where to + display the cursor. */ + if (!glyph_row->enabled_p) + { + w->phys_cursor_on_p = 0; + return; + } + + xassert (interrupt_input_blocked); + + /* Set new_cursor_type to the cursor we want to be displayed. */ + new_cursor_type = get_window_cursor_type (w, &new_cursor_width, &active_cursor); + + /* If cursor is currently being shown and we don't want it to be or + it is in the wrong place, or the cursor type is not what we want, + erase it. */ + if (w->phys_cursor_on_p + && (!on + || w->phys_cursor.x != x + || w->phys_cursor.y != y + || new_cursor_type != w->phys_cursor_type + || ((new_cursor_type == BAR_CURSOR || new_cursor_type == HBAR_CURSOR) + && new_cursor_width != w->phys_cursor_width))) + erase_phys_cursor (w); + + /* Don't check phys_cursor_on_p here because that flag is only set + to zero in some cases where we know that the cursor has been + completely erased, to avoid the extra work of erasing the cursor + twice. In other words, phys_cursor_on_p can be 1 and the cursor + still not be visible, or it has only been partly erased. */ + if (on) + { + w->phys_cursor_ascent = glyph_row->ascent; + w->phys_cursor_height = glyph_row->height; + + /* Set phys_cursor_.* before x_draw_.* is called because some + of them may need the information. */ + w->phys_cursor.x = x; + w->phys_cursor.y = glyph_row->y; + w->phys_cursor.hpos = hpos; + w->phys_cursor.vpos = vpos; + } + + rif->draw_window_cursor (w, glyph_row, x, y, + new_cursor_type, new_cursor_width, + on, active_cursor); +} + + +/* Switch the display of W's cursor on or off, according to the value + of ON. */ + +static void +update_window_cursor (w, on) + struct window *w; + int on; +{ + /* Don't update cursor in windows whose frame is in the process + of being deleted. */ + if (w->current_matrix) + { + BLOCK_INPUT; + display_and_set_cursor (w, on, w->phys_cursor.hpos, w->phys_cursor.vpos, + w->phys_cursor.x, w->phys_cursor.y); + UNBLOCK_INPUT; + } +} + + +/* Call update_window_cursor with parameter ON_P on all leaf windows + in the window tree rooted at W. */ + +static void +update_cursor_in_window_tree (w, on_p) + struct window *w; + int on_p; +{ + while (w) + { + if (!NILP (w->hchild)) + update_cursor_in_window_tree (XWINDOW (w->hchild), on_p); + else if (!NILP (w->vchild)) + update_cursor_in_window_tree (XWINDOW (w->vchild), on_p); + else + update_window_cursor (w, on_p); + + w = NILP (w->next) ? 0 : XWINDOW (w->next); + } +} + + +/* EXPORT: + Display the cursor on window W, or clear it, according to ON_P. + Don't change the cursor's position. */ + +void +x_update_cursor (f, on_p) + struct frame *f; + int on_p; +{ + update_cursor_in_window_tree (XWINDOW (f->root_window), on_p); +} + + +/* EXPORT: + Clear the cursor of window W to background color, and mark the + cursor as not shown. This is used when the text where the cursor + is is about to be rewritten. */ + +void +x_clear_cursor (w) + struct window *w; +{ + if (FRAME_VISIBLE_P (XFRAME (w->frame)) && w->phys_cursor_on_p) + update_window_cursor (w, 0); +} + + +/* EXPORT: + Display the active region described by mouse_face_* according to DRAW. */ + +void +show_mouse_face (dpyinfo, draw) + Display_Info *dpyinfo; + enum draw_glyphs_face draw; +{ + struct window *w = XWINDOW (dpyinfo->mouse_face_window); + struct frame *f = XFRAME (WINDOW_FRAME (w)); + + if (/* If window is in the process of being destroyed, don't bother + to do anything. */ + w->current_matrix != NULL + /* Don't update mouse highlight if hidden */ + && (draw != DRAW_MOUSE_FACE || !dpyinfo->mouse_face_hidden) + /* Recognize when we are called to operate on rows that don't exist + anymore. This can happen when a window is split. */ + && dpyinfo->mouse_face_end_row < w->current_matrix->nrows) + { + int phys_cursor_on_p = w->phys_cursor_on_p; + struct glyph_row *row, *first, *last; + + first = MATRIX_ROW (w->current_matrix, dpyinfo->mouse_face_beg_row); + last = MATRIX_ROW (w->current_matrix, dpyinfo->mouse_face_end_row); + + for (row = first; row <= last && row->enabled_p; ++row) + { + int start_hpos, end_hpos, start_x; + + /* For all but the first row, the highlight starts at column 0. */ + if (row == first) + { + start_hpos = dpyinfo->mouse_face_beg_col; + start_x = dpyinfo->mouse_face_beg_x; + } + else + { + start_hpos = 0; + start_x = 0; + } + + if (row == last) + end_hpos = dpyinfo->mouse_face_end_col; + else + end_hpos = row->used[TEXT_AREA]; + + if (end_hpos > start_hpos) + { + draw_glyphs (w, start_x, row, TEXT_AREA, + start_hpos, end_hpos, + draw, 0); + + row->mouse_face_p + = draw == DRAW_MOUSE_FACE || draw == DRAW_IMAGE_RAISED; + } + } + + /* When we've written over the cursor, arrange for it to + be displayed again. */ + if (phys_cursor_on_p && !w->phys_cursor_on_p) + { + BLOCK_INPUT; + display_and_set_cursor (w, 1, + w->phys_cursor.hpos, w->phys_cursor.vpos, + w->phys_cursor.x, w->phys_cursor.y); + UNBLOCK_INPUT; + } + } + + /* Change the mouse cursor. */ + if (draw == DRAW_NORMAL_TEXT) + rif->define_frame_cursor (f, FRAME_X_OUTPUT (f)->text_cursor); + else if (draw == DRAW_MOUSE_FACE) + rif->define_frame_cursor (f, FRAME_X_OUTPUT (f)->hand_cursor); + else + rif->define_frame_cursor (f, FRAME_X_OUTPUT (f)->nontext_cursor); +} + +/* EXPORT: + Clear out the mouse-highlighted active region. + Redraw it un-highlighted first. Value is non-zero if mouse + face was actually drawn unhighlighted. */ + +int +clear_mouse_face (dpyinfo) + Display_Info *dpyinfo; +{ + int cleared = 0; + + if (!NILP (dpyinfo->mouse_face_window)) + { + show_mouse_face (dpyinfo, DRAW_NORMAL_TEXT); + cleared = 1; + } + + dpyinfo->mouse_face_beg_row = dpyinfo->mouse_face_beg_col = -1; + dpyinfo->mouse_face_end_row = dpyinfo->mouse_face_end_col = -1; + dpyinfo->mouse_face_window = Qnil; + dpyinfo->mouse_face_overlay = Qnil; + return cleared; +} + + +/* EXPORT: + Non-zero if physical cursor of window W is within mouse face. */ + +int +cursor_in_mouse_face_p (w) + struct window *w; +{ + Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (XFRAME (w->frame)); + int in_mouse_face = 0; + + if (WINDOWP (dpyinfo->mouse_face_window) + && XWINDOW (dpyinfo->mouse_face_window) == w) + { + int hpos = w->phys_cursor.hpos; + int vpos = w->phys_cursor.vpos; + + if (vpos >= dpyinfo->mouse_face_beg_row + && vpos <= dpyinfo->mouse_face_end_row + && (vpos > dpyinfo->mouse_face_beg_row + || hpos >= dpyinfo->mouse_face_beg_col) + && (vpos < dpyinfo->mouse_face_end_row + || hpos < dpyinfo->mouse_face_end_col + || dpyinfo->mouse_face_past_end)) + in_mouse_face = 1; + } + + return in_mouse_face; +} + + + + +/* Find the glyph matrix position of buffer position CHARPOS in window + *W. HPOS, *VPOS, *X, and *Y are set to the positions found. W's + current glyphs must be up to date. If CHARPOS is above window + start return (0, 0, 0, 0). If CHARPOS is after end of W, return end + of last line in W. In the row containing CHARPOS, stop before glyphs + having STOP as object. */ + +#if 1 /* This is a version of fast_find_position that's more correct + in the presence of hscrolling, for example. I didn't install + it right away because the problem fixed is minor, it failed + in 20.x as well, and I think it's too risky to install + so near the release of 21.1. 2001-09-25 gerd. */ + +static int +fast_find_position (w, charpos, hpos, vpos, x, y, stop) + struct window *w; + int charpos; + int *hpos, *vpos, *x, *y; + Lisp_Object stop; +{ + struct glyph_row *row, *first; + struct glyph *glyph, *end; + int past_end = 0; + + first = MATRIX_FIRST_TEXT_ROW (w->current_matrix); + row = row_containing_pos (w, charpos, first, NULL, 0); + if (row == NULL) + { + if (charpos < MATRIX_ROW_START_CHARPOS (first)) + { + *x = *y = *hpos = *vpos = 0; + return 0; + } + else + { + row = MATRIX_ROW (w->current_matrix, XFASTINT (w->window_end_vpos)); + past_end = 1; + } + } + + *x = row->x; + *y = row->y; + *vpos = MATRIX_ROW_VPOS (row, w->current_matrix); + + glyph = row->glyphs[TEXT_AREA]; + end = glyph + row->used[TEXT_AREA]; + + /* Skip over glyphs not having an object at the start of the row. + These are special glyphs like truncation marks on terminal + frames. */ + if (row->displays_text_p) + while (glyph < end + && INTEGERP (glyph->object) + && !EQ (stop, glyph->object) + && glyph->charpos < 0) + { + *x += glyph->pixel_width; + ++glyph; + } + + while (glyph < end + && !INTEGERP (glyph->object) + && !EQ (stop, glyph->object) + && (!BUFFERP (glyph->object) + || glyph->charpos < charpos)) + { + *x += glyph->pixel_width; + ++glyph; + } + + *hpos = glyph - row->glyphs[TEXT_AREA]; + return past_end; +} + +#else /* not 1 */ + +static int +fast_find_position (w, pos, hpos, vpos, x, y, stop) + struct window *w; + int pos; + int *hpos, *vpos, *x, *y; + Lisp_Object stop; +{ + int i; + int lastcol; + int maybe_next_line_p = 0; + int line_start_position; + int yb = window_text_bottom_y (w); + struct glyph_row *row, *best_row; + int row_vpos, best_row_vpos; + int current_x; + + row = best_row = MATRIX_FIRST_TEXT_ROW (w->current_matrix); + row_vpos = best_row_vpos = MATRIX_ROW_VPOS (row, w->current_matrix); + + while (row->y < yb) + { + if (row->used[TEXT_AREA]) + line_start_position = row->glyphs[TEXT_AREA]->charpos; + else + line_start_position = 0; + + if (line_start_position > pos) + break; + /* If the position sought is the end of the buffer, + don't include the blank lines at the bottom of the window. */ + else if (line_start_position == pos + && pos == BUF_ZV (XBUFFER (w->buffer))) + { + maybe_next_line_p = 1; + break; + } + else if (line_start_position > 0) + { + best_row = row; + best_row_vpos = row_vpos; + } + + if (row->y + row->height >= yb) + break; + + ++row; + ++row_vpos; + } + + /* Find the right column within BEST_ROW. */ + lastcol = 0; + current_x = best_row->x; + for (i = 0; i < best_row->used[TEXT_AREA]; i++) + { + struct glyph *glyph = best_row->glyphs[TEXT_AREA] + i; + int charpos = glyph->charpos; + + if (BUFFERP (glyph->object)) + { + if (charpos == pos) + { + *hpos = i; + *vpos = best_row_vpos; + *x = current_x; + *y = best_row->y; + return 1; + } + else if (charpos > pos) + break; + } + else if (EQ (glyph->object, stop)) + break; + + if (charpos > 0) + lastcol = i; + current_x += glyph->pixel_width; + } + + /* If we're looking for the end of the buffer, + and we didn't find it in the line we scanned, + use the start of the following line. */ + if (maybe_next_line_p) + { + ++best_row; + ++best_row_vpos; + lastcol = 0; + current_x = best_row->x; + } + + *vpos = best_row_vpos; + *hpos = lastcol + 1; + *x = current_x; + *y = best_row->y; + return 0; +} + +#endif /* not 1 */ + + +/* Find the position of the glyph for position POS in OBJECT in + window W's current matrix, and return in *X, *Y the pixel + coordinates, and return in *HPOS, *VPOS the column/row of the glyph. + + RIGHT_P non-zero means return the position of the right edge of the + glyph, RIGHT_P zero means return the left edge position. + + If no glyph for POS exists in the matrix, return the position of + the glyph with the next smaller position that is in the matrix, if + RIGHT_P is zero. If RIGHT_P is non-zero, and no glyph for POS + exists in the matrix, return the position of the glyph with the + next larger position in OBJECT. + + Value is non-zero if a glyph was found. */ + +static int +fast_find_string_pos (w, pos, object, hpos, vpos, x, y, right_p) + struct window *w; + int pos; + Lisp_Object object; + int *hpos, *vpos, *x, *y; + int right_p; +{ + int yb = window_text_bottom_y (w); + struct glyph_row *r; + struct glyph *best_glyph = NULL; + struct glyph_row *best_row = NULL; + int best_x = 0; + + for (r = MATRIX_FIRST_TEXT_ROW (w->current_matrix); + r->enabled_p && r->y < yb; + ++r) + { + struct glyph *g = r->glyphs[TEXT_AREA]; + struct glyph *e = g + r->used[TEXT_AREA]; + int gx; + + for (gx = r->x; g < e; gx += g->pixel_width, ++g) + if (EQ (g->object, object)) + { + if (g->charpos == pos) + { + best_glyph = g; + best_x = gx; + best_row = r; + goto found; + } + else if (best_glyph == NULL + || ((abs (g->charpos - pos) + < abs (best_glyph->charpos - pos)) + && (right_p + ? g->charpos < pos + : g->charpos > pos))) + { + best_glyph = g; + best_x = gx; + best_row = r; + } + } + } + + found: + + if (best_glyph) + { + *x = best_x; + *hpos = best_glyph - best_row->glyphs[TEXT_AREA]; + + if (right_p) + { + *x += best_glyph->pixel_width; + ++*hpos; + } + + *y = best_row->y; + *vpos = best_row - w->current_matrix->rows; + } + + return best_glyph != NULL; +} + + +/* Take proper action when mouse has moved to the mode or header line + or marginal area AREA of window W, x-position X and y-position Y. + X is relative to the start of the text display area of W, so the + width of bitmap areas and scroll bars must be subtracted to get a + position relative to the start of the mode line. */ + +static void +note_mode_line_or_margin_highlight (w, x, y, area) + struct window *w; + int x, y; + enum window_part area; +{ + struct frame *f = XFRAME (w->frame); + Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (f); + Cursor cursor = dpyinfo->vertical_scroll_bar_cursor; + int charpos; + Lisp_Object string, help, map, pos; + + if (area == ON_MODE_LINE || area == ON_HEADER_LINE) + string = mode_line_string (w, x, y, area, &charpos); + else + string = marginal_area_string (w, x, y, area, &charpos); + + if (STRINGP (string)) + { + pos = make_number (charpos); + + /* If we're on a string with `help-echo' text property, arrange + for the help to be displayed. This is done by setting the + global variable help_echo_string to the help string. */ + help = Fget_text_property (pos, Qhelp_echo, string); + if (!NILP (help)) + { + help_echo_string = help; + XSETWINDOW (help_echo_window, w); + help_echo_object = string; + help_echo_pos = charpos; + } + + /* Change the mouse pointer according to what is under X/Y. */ + map = Fget_text_property (pos, Qlocal_map, string); + if (!KEYMAPP (map)) + map = Fget_text_property (pos, Qkeymap, string); + if (KEYMAPP (map)) + cursor = FRAME_X_OUTPUT (f)->nontext_cursor; + } + + rif->define_frame_cursor (f, cursor); +} + + +/* EXPORT: + Take proper action when the mouse has moved to position X, Y on + frame F as regards highlighting characters that have mouse-face + properties. Also de-highlighting chars where the mouse was before. + X and Y can be negative or out of range. */ + +void +note_mouse_highlight (f, x, y) + struct frame *f; + int x, y; +{ + Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (f); + enum window_part part; + Lisp_Object window; + struct window *w; + Cursor cursor = No_Cursor; + struct buffer *b; + + /* When a menu is active, don't highlight because this looks odd. */ +#if defined (USE_X_TOOLKIT) || defined (USE_GTK) || defined (HAVE_NTGUI) + if (popup_activated ()) + return; +#endif + + if (NILP (Vmouse_highlight) + || !f->glyphs_initialized_p) + return; + + dpyinfo->mouse_face_mouse_x = x; + dpyinfo->mouse_face_mouse_y = y; + dpyinfo->mouse_face_mouse_frame = f; + + if (dpyinfo->mouse_face_defer) + return; + + if (gc_in_progress) + { + dpyinfo->mouse_face_deferred_gc = 1; + return; + } + + /* Which window is that in? */ + window = window_from_coordinates (f, x, y, &part, 0, 0, 1); + + /* If we were displaying active text in another window, clear that. */ + if (! EQ (window, dpyinfo->mouse_face_window)) + clear_mouse_face (dpyinfo); + + /* Not on a window -> return. */ + if (!WINDOWP (window)) + return; + + /* Reset help_echo_string. It will get recomputed below. */ + /* ++KFS: X version didn't do this, but it looks harmless. */ + help_echo_string = Qnil; + + /* Convert to window-relative pixel coordinates. */ + w = XWINDOW (window); + frame_to_window_pixel_xy (w, &x, &y); + + /* Handle tool-bar window differently since it doesn't display a + buffer. */ + if (EQ (window, f->tool_bar_window)) + { + note_tool_bar_highlight (f, x, y); + return; + } + + /* Mouse is on the mode, header line or margin? */ + if (part == ON_MODE_LINE || part == ON_HEADER_LINE + || part == ON_LEFT_MARGIN || part == ON_RIGHT_MARGIN) + { + note_mode_line_or_margin_highlight (w, x, y, part); + return; + } + + if (part == ON_VERTICAL_BORDER) + cursor = FRAME_X_OUTPUT (f)->horizontal_drag_cursor; + else + cursor = FRAME_X_OUTPUT (f)->text_cursor; + + /* Are we in a window whose display is up to date? + And verify the buffer's text has not changed. */ + b = XBUFFER (w->buffer); + if (part == ON_TEXT + && EQ (w->window_end_valid, w->buffer) + && XFASTINT (w->last_modified) == BUF_MODIFF (b) + && XFASTINT (w->last_overlay_modified) == BUF_OVERLAY_MODIFF (b)) + { + int hpos, vpos, pos, i, area; + struct glyph *glyph; + Lisp_Object object; + Lisp_Object mouse_face = Qnil, overlay = Qnil, position; + Lisp_Object *overlay_vec = NULL; + int len, noverlays; + struct buffer *obuf; + int obegv, ozv, same_region; + + /* Find the glyph under X/Y. */ + glyph = x_y_to_hpos_vpos (w, x, y, &hpos, &vpos, &area, 0); + + /* Clear mouse face if X/Y not over text. */ + if (glyph == NULL + || area != TEXT_AREA + || !MATRIX_ROW (w->current_matrix, vpos)->displays_text_p) + { +#if defined (HAVE_NTGUI) + /* ++KFS: Why is this necessary on W32 ? */ + clear_mouse_face (dpyinfo); + cursor = FRAME_X_OUTPUT (f)->nontext_cursor; +#else + if (clear_mouse_face (dpyinfo)) + cursor = No_Cursor; +#endif + goto set_cursor; + } + + pos = glyph->charpos; + object = glyph->object; + if (!STRINGP (object) && !BUFFERP (object)) + goto set_cursor; + + /* If we get an out-of-range value, return now; avoid an error. */ + if (BUFFERP (object) && pos > BUF_Z (b)) + goto set_cursor; + + /* Make the window's buffer temporarily current for + overlays_at and compute_char_face. */ + obuf = current_buffer; + current_buffer = b; + obegv = BEGV; + ozv = ZV; + BEGV = BEG; + ZV = Z; + + /* Is this char mouse-active or does it have help-echo? */ + position = make_number (pos); + + if (BUFFERP (object)) + { + /* Put all the overlays we want in a vector in overlay_vec. + Store the length in len. If there are more than 10, make + enough space for all, and try again. */ + len = 10; + overlay_vec = (Lisp_Object *) alloca (len * sizeof (Lisp_Object)); + noverlays = overlays_at (pos, 0, &overlay_vec, &len, NULL, NULL, 0); + if (noverlays > len) + { + len = noverlays; + overlay_vec = (Lisp_Object *) alloca (len * sizeof (Lisp_Object)); + noverlays = overlays_at (pos, 0, &overlay_vec, &len, NULL, NULL,0); + } + + /* Sort overlays into increasing priority order. */ + noverlays = sort_overlays (overlay_vec, noverlays, w); + } + else + noverlays = 0; + + same_region = (EQ (window, dpyinfo->mouse_face_window) + && vpos >= dpyinfo->mouse_face_beg_row + && vpos <= dpyinfo->mouse_face_end_row + && (vpos > dpyinfo->mouse_face_beg_row + || hpos >= dpyinfo->mouse_face_beg_col) + && (vpos < dpyinfo->mouse_face_end_row + || hpos < dpyinfo->mouse_face_end_col + || dpyinfo->mouse_face_past_end)); + + if (same_region) + cursor = No_Cursor; + + /* Check mouse-face highlighting. */ + if (! same_region + /* If there exists an overlay with mouse-face overlapping + the one we are currently highlighting, we have to + check if we enter the overlapping overlay, and then + highlight only that. */ + || (OVERLAYP (dpyinfo->mouse_face_overlay) + && mouse_face_overlay_overlaps (dpyinfo->mouse_face_overlay))) + { + /* Find the highest priority overlay that has a mouse-face + property. */ + overlay = Qnil; + for (i = noverlays - 1; i >= 0 && NILP (overlay); --i) + { + mouse_face = Foverlay_get (overlay_vec[i], Qmouse_face); + if (!NILP (mouse_face)) + overlay = overlay_vec[i]; + } + + /* If we're actually highlighting the same overlay as + before, there's no need to do that again. */ + if (!NILP (overlay) + && EQ (overlay, dpyinfo->mouse_face_overlay)) + goto check_help_echo; + + dpyinfo->mouse_face_overlay = overlay; + + /* Clear the display of the old active region, if any. */ + if (clear_mouse_face (dpyinfo)) + cursor = No_Cursor; + + /* If no overlay applies, get a text property. */ + if (NILP (overlay)) + mouse_face = Fget_text_property (position, Qmouse_face, object); + + /* Handle the overlay case. */ + if (!NILP (overlay)) + { + /* Find the range of text around this char that + should be active. */ + Lisp_Object before, after; + int ignore; + + before = Foverlay_start (overlay); + after = Foverlay_end (overlay); + /* Record this as the current active region. */ + fast_find_position (w, XFASTINT (before), + &dpyinfo->mouse_face_beg_col, + &dpyinfo->mouse_face_beg_row, + &dpyinfo->mouse_face_beg_x, + &dpyinfo->mouse_face_beg_y, Qnil); + + dpyinfo->mouse_face_past_end + = !fast_find_position (w, XFASTINT (after), + &dpyinfo->mouse_face_end_col, + &dpyinfo->mouse_face_end_row, + &dpyinfo->mouse_face_end_x, + &dpyinfo->mouse_face_end_y, Qnil); + dpyinfo->mouse_face_window = window; + + dpyinfo->mouse_face_face_id + = face_at_buffer_position (w, pos, 0, 0, + &ignore, pos + 1, + !dpyinfo->mouse_face_hidden); + + /* Display it as active. */ + show_mouse_face (dpyinfo, DRAW_MOUSE_FACE); + cursor = No_Cursor; + } + /* Handle the text property case. */ + else if (!NILP (mouse_face) && BUFFERP (object)) + { + /* Find the range of text around this char that + should be active. */ + Lisp_Object before, after, beginning, end; + int ignore; + + beginning = Fmarker_position (w->start); + end = make_number (BUF_Z (XBUFFER (object)) + - XFASTINT (w->window_end_pos)); + before + = Fprevious_single_property_change (make_number (pos + 1), + Qmouse_face, + object, beginning); + after + = Fnext_single_property_change (position, Qmouse_face, + object, end); + + /* Record this as the current active region. */ + fast_find_position (w, XFASTINT (before), + &dpyinfo->mouse_face_beg_col, + &dpyinfo->mouse_face_beg_row, + &dpyinfo->mouse_face_beg_x, + &dpyinfo->mouse_face_beg_y, Qnil); + dpyinfo->mouse_face_past_end + = !fast_find_position (w, XFASTINT (after), + &dpyinfo->mouse_face_end_col, + &dpyinfo->mouse_face_end_row, + &dpyinfo->mouse_face_end_x, + &dpyinfo->mouse_face_end_y, Qnil); + dpyinfo->mouse_face_window = window; + + if (BUFFERP (object)) + dpyinfo->mouse_face_face_id + = face_at_buffer_position (w, pos, 0, 0, + &ignore, pos + 1, + !dpyinfo->mouse_face_hidden); + + /* Display it as active. */ + show_mouse_face (dpyinfo, DRAW_MOUSE_FACE); + cursor = No_Cursor; + } + else if (!NILP (mouse_face) && STRINGP (object)) + { + Lisp_Object b, e; + int ignore; + + b = Fprevious_single_property_change (make_number (pos + 1), + Qmouse_face, + object, Qnil); + e = Fnext_single_property_change (position, Qmouse_face, + object, Qnil); + if (NILP (b)) + b = make_number (0); + if (NILP (e)) + e = make_number (SCHARS (object) - 1); + fast_find_string_pos (w, XINT (b), object, + &dpyinfo->mouse_face_beg_col, + &dpyinfo->mouse_face_beg_row, + &dpyinfo->mouse_face_beg_x, + &dpyinfo->mouse_face_beg_y, 0); + fast_find_string_pos (w, XINT (e), object, + &dpyinfo->mouse_face_end_col, + &dpyinfo->mouse_face_end_row, + &dpyinfo->mouse_face_end_x, + &dpyinfo->mouse_face_end_y, 1); + dpyinfo->mouse_face_past_end = 0; + dpyinfo->mouse_face_window = window; + dpyinfo->mouse_face_face_id + = face_at_string_position (w, object, pos, 0, 0, 0, &ignore, + glyph->face_id, 1); + show_mouse_face (dpyinfo, DRAW_MOUSE_FACE); + cursor = No_Cursor; + } + else if (STRINGP (object) && NILP (mouse_face)) + { + /* A string which doesn't have mouse-face, but + the text ``under'' it might have. */ + struct glyph_row *r = MATRIX_ROW (w->current_matrix, vpos); + int start = MATRIX_ROW_START_CHARPOS (r); + + pos = string_buffer_position (w, object, start); + if (pos > 0) + mouse_face = get_char_property_and_overlay (make_number (pos), + Qmouse_face, + w->buffer, + &overlay); + if (!NILP (mouse_face) && !NILP (overlay)) + { + Lisp_Object before = Foverlay_start (overlay); + Lisp_Object after = Foverlay_end (overlay); + int ignore; + + /* Note that we might not be able to find position + BEFORE in the glyph matrix if the overlay is + entirely covered by a `display' property. In + this case, we overshoot. So let's stop in + the glyph matrix before glyphs for OBJECT. */ + fast_find_position (w, XFASTINT (before), + &dpyinfo->mouse_face_beg_col, + &dpyinfo->mouse_face_beg_row, + &dpyinfo->mouse_face_beg_x, + &dpyinfo->mouse_face_beg_y, + object); + + dpyinfo->mouse_face_past_end + = !fast_find_position (w, XFASTINT (after), + &dpyinfo->mouse_face_end_col, + &dpyinfo->mouse_face_end_row, + &dpyinfo->mouse_face_end_x, + &dpyinfo->mouse_face_end_y, + Qnil); + dpyinfo->mouse_face_window = window; + dpyinfo->mouse_face_face_id + = face_at_buffer_position (w, pos, 0, 0, + &ignore, pos + 1, + !dpyinfo->mouse_face_hidden); + + /* Display it as active. */ + show_mouse_face (dpyinfo, DRAW_MOUSE_FACE); + cursor = No_Cursor; + } + } + } + + check_help_echo: + + /* Look for a `help-echo' property. */ + { + Lisp_Object help, overlay; + + /* Check overlays first. */ + help = overlay = Qnil; + for (i = noverlays - 1; i >= 0 && NILP (help); --i) + { + overlay = overlay_vec[i]; + help = Foverlay_get (overlay, Qhelp_echo); + } + + if (!NILP (help)) + { + help_echo_string = help; + help_echo_window = window; + help_echo_object = overlay; + help_echo_pos = pos; + } + else + { + Lisp_Object object = glyph->object; + int charpos = glyph->charpos; + + /* Try text properties. */ + if (STRINGP (object) + && charpos >= 0 + && charpos < SCHARS (object)) + { + help = Fget_text_property (make_number (charpos), + Qhelp_echo, object); + if (NILP (help)) + { + /* If the string itself doesn't specify a help-echo, + see if the buffer text ``under'' it does. */ + struct glyph_row *r + = MATRIX_ROW (w->current_matrix, vpos); + int start = MATRIX_ROW_START_CHARPOS (r); + int pos = string_buffer_position (w, object, start); + if (pos > 0) + { + help = Fget_char_property (make_number (pos), + Qhelp_echo, w->buffer); + if (!NILP (help)) + { + charpos = pos; + object = w->buffer; + } + } + } + } + else if (BUFFERP (object) + && charpos >= BEGV + && charpos < ZV) + help = Fget_text_property (make_number (charpos), Qhelp_echo, + object); + + if (!NILP (help)) + { + help_echo_string = help; + help_echo_window = window; + help_echo_object = object; + help_echo_pos = charpos; + } + } + } + + BEGV = obegv; + ZV = ozv; + current_buffer = obuf; + } + + set_cursor: + +#ifndef HAVE_CARBON + if (cursor != No_Cursor) +#else + if (bcmp (&cursor, &No_Cursor, sizeof (Cursor))) +#endif + rif->define_frame_cursor (f, cursor); +} + + +/* EXPORT for RIF: + Clear any mouse-face on window W. This function is part of the + redisplay interface, and is called from try_window_id and similar + functions to ensure the mouse-highlight is off. */ + +void +x_clear_window_mouse_face (w) + struct window *w; +{ + Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (XFRAME (w->frame)); + Lisp_Object window; + + BLOCK_INPUT; + XSETWINDOW (window, w); + if (EQ (window, dpyinfo->mouse_face_window)) + clear_mouse_face (dpyinfo); + UNBLOCK_INPUT; +} + + +/* EXPORT: + Just discard the mouse face information for frame F, if any. + This is used when the size of F is changed. */ + +void +cancel_mouse_face (f) + struct frame *f; +{ + Lisp_Object window; + Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (f); + + window = dpyinfo->mouse_face_window; + if (! NILP (window) && XFRAME (XWINDOW (window)->frame) == f) + { + dpyinfo->mouse_face_beg_row = dpyinfo->mouse_face_beg_col = -1; + dpyinfo->mouse_face_end_row = dpyinfo->mouse_face_end_col = -1; + dpyinfo->mouse_face_window = Qnil; + } +} + + +#endif /* HAVE_WINDOW_SYSTEM */ + + +/*********************************************************************** + Exposure Events + ***********************************************************************/ + +#ifdef HAVE_WINDOW_SYSTEM + +/* Redraw the part of glyph row area AREA of glyph row ROW on window W + which intersects rectangle R. R is in window-relative coordinates. */ + +static void +expose_area (w, row, r, area) + struct window *w; + struct glyph_row *row; + XRectangle *r; + enum glyph_row_area area; +{ + struct glyph *first = row->glyphs[area]; + struct glyph *end = row->glyphs[area] + row->used[area]; + struct glyph *last; + int first_x, start_x, x; + + if (area == TEXT_AREA && row->fill_line_p) + /* If row extends face to end of line write the whole line. */ + draw_glyphs (w, 0, row, area, + 0, row->used[area], + DRAW_NORMAL_TEXT, 0); + else + { + /* Set START_X to the window-relative start position for drawing glyphs of + AREA. The first glyph of the text area can be partially visible. + The first glyphs of other areas cannot. */ + start_x = window_box_left_offset (w, area); + if (area == TEXT_AREA) + start_x += row->x; + x = start_x; + + /* Find the first glyph that must be redrawn. */ + while (first < end + && x + first->pixel_width < r->x) + { + x += first->pixel_width; + ++first; + } + + /* Find the last one. */ + last = first; + first_x = x; + while (last < end + && x < r->x + r->width) + { + x += last->pixel_width; + ++last; + } + + /* Repaint. */ + if (last > first) + draw_glyphs (w, first_x - start_x, row, area, + first - row->glyphs[area], last - row->glyphs[area], + DRAW_NORMAL_TEXT, 0); + } +} + + +/* Redraw the parts of the glyph row ROW on window W intersecting + rectangle R. R is in window-relative coordinates. Value is + non-zero if mouse-face was overwritten. */ + +static int +expose_line (w, row, r) + struct window *w; + struct glyph_row *row; + XRectangle *r; +{ + xassert (row->enabled_p); + + if (row->mode_line_p || w->pseudo_window_p) + draw_glyphs (w, 0, row, TEXT_AREA, + 0, row->used[TEXT_AREA], + DRAW_NORMAL_TEXT, 0); + else + { + if (row->used[LEFT_MARGIN_AREA]) + expose_area (w, row, r, LEFT_MARGIN_AREA); + if (row->used[TEXT_AREA]) + expose_area (w, row, r, TEXT_AREA); + if (row->used[RIGHT_MARGIN_AREA]) + expose_area (w, row, r, RIGHT_MARGIN_AREA); + draw_row_fringe_bitmaps (w, row); + } + + return row->mouse_face_p; +} + + +/* Redraw those parts of glyphs rows during expose event handling that + overlap other rows. Redrawing of an exposed line writes over parts + of lines overlapping that exposed line; this function fixes that. + + W is the window being exposed. FIRST_OVERLAPPING_ROW is the first + row in W's current matrix that is exposed and overlaps other rows. + LAST_OVERLAPPING_ROW is the last such row. */ + +static void +expose_overlaps (w, first_overlapping_row, last_overlapping_row) + struct window *w; + struct glyph_row *first_overlapping_row; + struct glyph_row *last_overlapping_row; +{ + struct glyph_row *row; + + for (row = first_overlapping_row; row <= last_overlapping_row; ++row) + if (row->overlapping_p) + { + xassert (row->enabled_p && !row->mode_line_p); + + if (row->used[LEFT_MARGIN_AREA]) + x_fix_overlapping_area (w, row, LEFT_MARGIN_AREA); + + if (row->used[TEXT_AREA]) + x_fix_overlapping_area (w, row, TEXT_AREA); + + if (row->used[RIGHT_MARGIN_AREA]) + x_fix_overlapping_area (w, row, RIGHT_MARGIN_AREA); + } +} + + +/* Return non-zero if W's cursor intersects rectangle R. */ + +static int +phys_cursor_in_rect_p (w, r) + struct window *w; + XRectangle *r; +{ + XRectangle cr, result; + struct glyph *cursor_glyph; + + cursor_glyph = get_phys_cursor_glyph (w); + if (cursor_glyph) + { + /* r is relative to W's box, but w->phys_cursor.x is relative + to left edge of W's TEXT area. Adjust it. */ + cr.x = window_box_left_offset (w, TEXT_AREA) + w->phys_cursor.x; + cr.y = w->phys_cursor.y; + cr.width = cursor_glyph->pixel_width; + cr.height = w->phys_cursor_height; + /* ++KFS: W32 version used W32-specific IntersectRect here, but + I assume the effect is the same -- and this is portable. */ + return x_intersect_rectangles (&cr, r, &result); + } + else + return 0; +} + + +/* EXPORT: + Draw a vertical window border to the right of window W if W doesn't + have vertical scroll bars. */ + +void +x_draw_vertical_border (w) + struct window *w; +{ + /* We could do better, if we knew what type of scroll-bar the adjacent + windows (on either side) have... But we don't :-( + However, I think this works ok. ++KFS 2003-04-25 */ + + /* Redraw borders between horizontally adjacent windows. Don't + do it for frames with vertical scroll bars because either the + right scroll bar of a window, or the left scroll bar of its + neighbor will suffice as a border. */ + if (!WINDOW_RIGHTMOST_P (w) + && !WINDOW_HAS_VERTICAL_SCROLL_BAR_ON_RIGHT (w)) + { + int x0, x1, y0, y1; + + window_box_edges (w, -1, &x0, &y0, &x1, &y1); + y1 -= 1; + + rif->draw_vertical_window_border (w, x1, y0, y1); + } + else if (!WINDOW_LEFTMOST_P (w) + && !WINDOW_HAS_VERTICAL_SCROLL_BAR_ON_LEFT (w)) + { + int x0, x1, y0, y1; + + window_box_edges (w, -1, &x0, &y0, &x1, &y1); + y1 -= 1; + + rif->draw_vertical_window_border (w, x0, y0, y1); + } +} + + +/* Redraw the part of window W intersection rectangle FR. Pixel + coordinates in FR are frame-relative. Call this function with + input blocked. Value is non-zero if the exposure overwrites + mouse-face. */ + +static int +expose_window (w, fr) + struct window *w; + XRectangle *fr; +{ + struct frame *f = XFRAME (w->frame); + XRectangle wr, r; + int mouse_face_overwritten_p = 0; + + /* If window is not yet fully initialized, do nothing. This can + happen when toolkit scroll bars are used and a window is split. + Reconfiguring the scroll bar will generate an expose for a newly + created window. */ + if (w->current_matrix == NULL) + return 0; + + /* When we're currently updating the window, display and current + matrix usually don't agree. Arrange for a thorough display + later. */ + if (w == updated_window) + { + SET_FRAME_GARBAGED (f); + return 0; + } + + /* Frame-relative pixel rectangle of W. */ + wr.x = WINDOW_LEFT_EDGE_X (w); + wr.y = WINDOW_TOP_EDGE_Y (w); + wr.width = WINDOW_TOTAL_WIDTH (w); + wr.height = WINDOW_TOTAL_HEIGHT (w); + + if (x_intersect_rectangles (fr, &wr, &r)) + { + int yb = window_text_bottom_y (w); + struct glyph_row *row; + int cursor_cleared_p; + struct glyph_row *first_overlapping_row, *last_overlapping_row; + + TRACE ((stderr, "expose_window (%d, %d, %d, %d)\n", + r.x, r.y, r.width, r.height)); + + /* Convert to window coordinates. */ + r.x -= WINDOW_LEFT_EDGE_X (w); + r.y -= WINDOW_TOP_EDGE_Y (w); + + /* Turn off the cursor. */ + if (!w->pseudo_window_p + && phys_cursor_in_rect_p (w, &r)) + { + x_clear_cursor (w); + cursor_cleared_p = 1; + } + else + cursor_cleared_p = 0; + + /* Update lines intersecting rectangle R. */ + first_overlapping_row = last_overlapping_row = NULL; + for (row = w->current_matrix->rows; + row->enabled_p; + ++row) + { + int y0 = row->y; + int y1 = MATRIX_ROW_BOTTOM_Y (row); + + if ((y0 >= r.y && y0 < r.y + r.height) + || (y1 > r.y && y1 < r.y + r.height) + || (r.y >= y0 && r.y < y1) + || (r.y + r.height > y0 && r.y + r.height < y1)) + { + if (row->overlapping_p) + { + if (first_overlapping_row == NULL) + first_overlapping_row = row; + last_overlapping_row = row; + } + + if (expose_line (w, row, &r)) + mouse_face_overwritten_p = 1; + } + + if (y1 >= yb) + break; + } + + /* Display the mode line if there is one. */ + if (WINDOW_WANTS_MODELINE_P (w) + && (row = MATRIX_MODE_LINE_ROW (w->current_matrix), + row->enabled_p) + && row->y < r.y + r.height) + { + if (expose_line (w, row, &r)) + mouse_face_overwritten_p = 1; + } + + if (!w->pseudo_window_p) + { + /* Fix the display of overlapping rows. */ + if (first_overlapping_row) + expose_overlaps (w, first_overlapping_row, last_overlapping_row); + + /* Draw border between windows. */ + x_draw_vertical_border (w); + + /* Turn the cursor on again. */ + if (cursor_cleared_p) + update_window_cursor (w, 1); + } + } + +#ifdef HAVE_CARBON + /* Display scroll bar for this window. */ + if (!NILP (w->vertical_scroll_bar)) + { + /* ++KFS: + If this doesn't work here (maybe some header files are missing), + make a function in macterm.c and call it to do the job! */ + ControlHandle ch + = SCROLL_BAR_CONTROL_HANDLE (XSCROLL_BAR (w->vertical_scroll_bar)); + + Draw1Control (ch); + } +#endif + + return mouse_face_overwritten_p; +} + + + +/* Redraw (parts) of all windows in the window tree rooted at W that + intersect R. R contains frame pixel coordinates. Value is + non-zero if the exposure overwrites mouse-face. */ + +static int +expose_window_tree (w, r) + struct window *w; + XRectangle *r; +{ + struct frame *f = XFRAME (w->frame); + int mouse_face_overwritten_p = 0; + + while (w && !FRAME_GARBAGED_P (f)) + { + if (!NILP (w->hchild)) + mouse_face_overwritten_p + |= expose_window_tree (XWINDOW (w->hchild), r); + else if (!NILP (w->vchild)) + mouse_face_overwritten_p + |= expose_window_tree (XWINDOW (w->vchild), r); + else + mouse_face_overwritten_p |= expose_window (w, r); + + w = NILP (w->next) ? NULL : XWINDOW (w->next); + } + + return mouse_face_overwritten_p; +} + + +/* EXPORT: + Redisplay an exposed area of frame F. X and Y are the upper-left + corner of the exposed rectangle. W and H are width and height of + the exposed area. All are pixel values. W or H zero means redraw + the entire frame. */ + +void +expose_frame (f, x, y, w, h) + struct frame *f; + int x, y, w, h; +{ + XRectangle r; + int mouse_face_overwritten_p = 0; + + TRACE ((stderr, "expose_frame ")); + + /* No need to redraw if frame will be redrawn soon. */ + if (FRAME_GARBAGED_P (f)) + { + TRACE ((stderr, " garbaged\n")); + return; + } + +#ifdef HAVE_CARBON + /* MAC_TODO: this is a kludge, but if scroll bars are not activated + or deactivated here, for unknown reasons, activated scroll bars + are shown in deactivated frames in some instances. */ + if (f == FRAME_MAC_DISPLAY_INFO (f)->x_focus_frame) + activate_scroll_bars (f); + else + deactivate_scroll_bars (f); +#endif + + /* If basic faces haven't been realized yet, there is no point in + trying to redraw anything. This can happen when we get an expose + event while Emacs is starting, e.g. by moving another window. */ + if (FRAME_FACE_CACHE (f) == NULL + || FRAME_FACE_CACHE (f)->used < BASIC_FACE_ID_SENTINEL) + { + TRACE ((stderr, " no faces\n")); + return; + } + + if (w == 0 || h == 0) + { + r.x = r.y = 0; + r.width = FRAME_COLUMN_WIDTH (f) * FRAME_COLS (f); + r.height = FRAME_LINE_HEIGHT (f) * FRAME_LINES (f); + } + else + { + r.x = x; + r.y = y; + r.width = w; + r.height = h; + } + + TRACE ((stderr, "(%d, %d, %d, %d)\n", r.x, r.y, r.width, r.height)); + mouse_face_overwritten_p = expose_window_tree (XWINDOW (f->root_window), &r); + + if (WINDOWP (f->tool_bar_window)) + mouse_face_overwritten_p + |= expose_window (XWINDOW (f->tool_bar_window), &r); + +#ifdef HAVE_X_WINDOWS +#ifndef MSDOS +#ifndef USE_X_TOOLKIT + if (WINDOWP (f->menu_bar_window)) + mouse_face_overwritten_p + |= expose_window (XWINDOW (f->menu_bar_window), &r); +#endif /* not USE_X_TOOLKIT */ +#endif +#endif + + /* Some window managers support a focus-follows-mouse style with + delayed raising of frames. Imagine a partially obscured frame, + and moving the mouse into partially obscured mouse-face on that + frame. The visible part of the mouse-face will be highlighted, + then the WM raises the obscured frame. With at least one WM, KDE + 2.1, Emacs is not getting any event for the raising of the frame + (even tried with SubstructureRedirectMask), only Expose events. + These expose events will draw text normally, i.e. not + highlighted. Which means we must redo the highlight here. + Subsume it under ``we love X''. --gerd 2001-08-15 */ + /* Included in Windows version because Windows most likely does not + do the right thing if any third party tool offers + focus-follows-mouse with delayed raise. --jason 2001-10-12 */ + if (mouse_face_overwritten_p && !FRAME_GARBAGED_P (f)) + { + Display_Info *dpyinfo = FRAME_X_DISPLAY_INFO (f); + if (f == dpyinfo->mouse_face_mouse_frame) + { + int x = dpyinfo->mouse_face_mouse_x; + int y = dpyinfo->mouse_face_mouse_y; + clear_mouse_face (dpyinfo); + note_mouse_highlight (f, x, y); + } + } +} + + +/* EXPORT: + Determine the intersection of two rectangles R1 and R2. Return + the intersection in *RESULT. Value is non-zero if RESULT is not + empty. */ + +int +x_intersect_rectangles (r1, r2, result) + XRectangle *r1, *r2, *result; +{ + XRectangle *left, *right; + XRectangle *upper, *lower; + int intersection_p = 0; + + /* Rearrange so that R1 is the left-most rectangle. */ + if (r1->x < r2->x) + left = r1, right = r2; + else + left = r2, right = r1; + + /* X0 of the intersection is right.x0, if this is inside R1, + otherwise there is no intersection. */ + if (right->x <= left->x + left->width) + { + result->x = right->x; + + /* The right end of the intersection is the minimum of the + the right ends of left and right. */ + result->width = (min (left->x + left->width, right->x + right->width) + - result->x); + + /* Same game for Y. */ + if (r1->y < r2->y) + upper = r1, lower = r2; + else + upper = r2, lower = r1; + + /* The upper end of the intersection is lower.y0, if this is inside + of upper. Otherwise, there is no intersection. */ + if (lower->y <= upper->y + upper->height) + { + result->y = lower->y; + + /* The lower end of the intersection is the minimum of the lower + ends of upper and lower. */ + result->height = (min (lower->y + lower->height, + upper->y + upper->height) + - result->y); + intersection_p = 1; + } + } + + return intersection_p; +} + +#endif /* HAVE_WINDOW_SYSTEM */ + + +/*********************************************************************** + Initialization + ***********************************************************************/ + +void +syms_of_xdisp () +{ + Vwith_echo_area_save_vector = Qnil; + staticpro (&Vwith_echo_area_save_vector); + + Vmessage_stack = Qnil; + staticpro (&Vmessage_stack); + + Qinhibit_redisplay = intern ("inhibit-redisplay"); + staticpro (&Qinhibit_redisplay); + + message_dolog_marker1 = Fmake_marker (); + staticpro (&message_dolog_marker1); + message_dolog_marker2 = Fmake_marker (); + staticpro (&message_dolog_marker2); + message_dolog_marker3 = Fmake_marker (); + staticpro (&message_dolog_marker3); + +#if GLYPH_DEBUG + defsubr (&Sdump_frame_glyph_matrix); + defsubr (&Sdump_glyph_matrix); + defsubr (&Sdump_glyph_row); + defsubr (&Sdump_tool_bar_row); + defsubr (&Strace_redisplay); + defsubr (&Strace_to_stderr); +#endif +#ifdef HAVE_WINDOW_SYSTEM + defsubr (&Stool_bar_lines_needed); +#endif + defsubr (&Sformat_mode_line); + + staticpro (&Qmenu_bar_update_hook); + Qmenu_bar_update_hook = intern ("menu-bar-update-hook"); staticpro (&Qoverriding_terminal_local_map); Qoverriding_terminal_local_map = intern ("overriding-terminal-local-map"); diff --cc src/xfaces.c index e296c52a2b8,cb976c33153..91214977d8e --- a/src/xfaces.c +++ b/src/xfaces.c @@@ -495,12 -543,13 +498,12 @@@ static int try_font_list P_ ((struct fr static int try_alternative_families P_ ((struct frame *f, Lisp_Object, Lisp_Object, struct font_name **)); static int cmp_font_names P_ ((const void *, const void *)); - static struct face *realize_face P_ ((struct face_cache *, Lisp_Object *, int, - struct face *, int)); - static struct face *realize_x_face P_ ((struct face_cache *, - Lisp_Object *, int, struct face *)); - static struct face *realize_tty_face P_ ((struct face_cache *, - Lisp_Object *, int)); + static struct face *realize_face P_ ((struct face_cache *, Lisp_Object *, + int)); + static struct face *realize_non_ascii_face P_ ((struct frame *, int, + struct face *)); - + static struct face *realize_x_face P_ ((struct face_cache *, Lisp_Object *)); + static struct face *realize_tty_face P_ ((struct face_cache *, Lisp_Object *)); static int realize_basic_faces P_ ((struct frame *)); static int realize_default_face P_ ((struct frame *)); static void realize_named_face P_ ((struct frame *, Lisp_Object, int)); @@@ -527,9 -574,8 +528,8 @@@ static int set_lface_from_font_name P_ Lisp_Object, int, int)); static Lisp_Object lface_from_face_name P_ ((struct frame *, Lisp_Object, int)); static struct face *make_realized_face P_ ((Lisp_Object *)); - static void free_realized_faces P_ ((struct face_cache *)); static char *best_matching_font P_ ((struct frame *, Lisp_Object *, - struct font_name *, int, int)); + struct font_name *, int, int, int *)); static void cache_face P_ ((struct face_cache *, struct face *, unsigned)); static void uncache_face P_ ((struct face_cache *, struct face *)); static int xlfd_numeric_slant P_ ((struct font_name *)); @@@ -1232,9 -1271,10 +1230,9 @@@ load_face_font (f, face face->font_info_id = -1; face->font = NULL; + face->font_name = NULL; - font_name = choose_face_font (f, face->lface, face->fontset, c, - &needs_overstrike); - font_name = choose_face_font (f, face->lface, Qnil); - ++ font_name = choose_face_font (f, face->lface, Qnil, &needs_overstrike); if (!font_name) return; @@@ -2151,6 -2122,63 +2149,62 @@@ face_numeric_swidth (width return face_numeric_value (swidth_table, DIM (swidth_table), width); } - + Lisp_Object + split_font_name_into_vector (fontname) + Lisp_Object fontname; + { + struct font_name font; + Lisp_Object vec; + int i; + + font.name = LSTRDUPA (fontname); + if (! split_font_name (NULL, &font, 0)) + return Qnil; + vec = Fmake_vector (make_number (XLFD_LAST), Qnil); + for (i = 0; i < XLFD_LAST; i++) + if (font.fields[i][0] != '*') + ASET (vec, i, build_string (font.fields[i])); + return vec; + } + + Lisp_Object + build_font_name_from_vector (vec) + Lisp_Object vec; + { + struct font_name font; + Lisp_Object fontname; + char *p; + int i; + + for (i = 0; i < XLFD_LAST; i++) + { + font.fields[i] = (NILP (AREF (vec, i)) - ? "*" : (char *) XSTRING (AREF (vec, i))->data); ++ ? "*" : (char *) SDATA (AREF (vec, i))); + if ((i == XLFD_FAMILY || i == XLFD_REGISTRY) + && (p = strchr (font.fields[i], '-'))) + { + char *p1 = STRDUPA (font.fields[i]); + + p1[p - font.fields[i]] = '\0'; + if (i == XLFD_FAMILY) + { + font.fields[XLFD_FOUNDRY] = p1; + font.fields[XLFD_FAMILY] = p + 1; + } + else + { + font.fields[XLFD_REGISTRY] = p1; + font.fields[XLFD_ENCODING] = p + 1; + break; + } + } + } + + p = build_font_name (&font); + fontname = build_string (p); + xfree (p); + return fontname; + } #ifdef HAVE_WINDOW_SYSTEM @@@ -2249,10 -2277,9 +2303,10 @@@ pixel_point_size (f, pixel /* Return a rescaling ratio of a font of NAME. */ static double -font_rescale_ratio (char *name) +font_rescale_ratio (name) + char *name; { -- Lisp_Object tail, elt; ++ Lisp_Object tail, elt; for (tail = Vface_font_rescale_alist; CONSP (tail); tail = XCDR (tail)) { @@@ -2463,9 -2487,6 +2517,9 @@@ x_face_list_fonts (f, pattern, pfonts, lfonts = x_list_fonts (f, lpattern, -1, nfonts); #endif + if (nfonts < 0 && CONSP (lfonts)) + num_fonts = XFASTINT (Flength (lfonts)); - ++ /* Make a copy of the font names we got from X, and split them into fields. */ n = nignored = 0; @@@ -2638,12 -2647,11 +2692,12 @@@ cmp_font_names (a, b } - /* Get a sorted list of fonts of family FAMILY on frame F. If PATTERN - is non-nil list fonts matching that pattern. Otherwise, if - REGISTRY is non-nil return only fonts with that registry, otherwise - return fonts of any registry. Set *FONTS to a vector of font_name - structures allocated from the heap containing the fonts found. - Value is the number of fonts found. */ -/* Get a sorted list of fonts matching PATTERN. If PATTERN is nil, - list fonts matching FAMILY and REGISTRY. FAMILY is a family name - string or nil. REGISTRY is a registry name string. Set *FONTS to - a vector of font_name structures allocated from the heap containing - the fonts found. Value is the number of fonts found. */ ++/* Get a sorted list of fonts matching PATTERN on frame F. If PATTERN ++ is nil, list fonts matching FAMILY and REGISTRY. FAMILY is a ++ family name string or nil. REGISTRY is a registry name string. ++ Set *FONTS to a vector of font_name structures allocated from the ++ heap containing the fonts found. Value is the number of fonts ++ found. */ static int font_list_1 (f, pattern, family, registry, fonts) @@@ -2704,11 -2712,12 +2758,12 @@@ concat_font_list (fonts1, nfonts1, font /* Get a sorted list of fonts of family FAMILY on frame F. - If PATTERN is non-nil list fonts matching that pattern. + If PATTERN is non-nil, list fonts matching that pattern. - If REGISTRY is non-nil, return fonts with that registry and the - alternative registries from Vface_alternative_font_registry_alist. + If REGISTRY is non-nil, it is a list of registry (and encoding) + names. Return fonts with those registries and the alternative + registries from Vface_alternative_font_registry_alist. - + If REGISTRY is nil return fonts of any registry. Set *FONTS to a vector of font_name structures allocated from the @@@ -3179,8 -3199,8 +3239,8 @@@ lface_fully_specified_p (attrs for (i = 1; i < LFACE_VECTOR_SIZE; ++i) if (i != LFACE_FONT_INDEX && i != LFACE_INHERIT_INDEX -- && i != LFACE_AVGWIDTH_INDEX) - if (UNSPECIFIEDP (attrs[i])) ++ && i != LFACE_AVGWIDTH_INDEX && i != LFACE_FONTSET_INDEX) + if (UNSPECIFIEDP (attrs[i])) break; return i == LFACE_VECTOR_SIZE; @@@ -3217,8 -3240,14 +3277,14 @@@ set_lface_from_font_name (f, lface, fon /* If FONTNAME is actually a fontset name, get ASCII font name of it. */ fontset = fs_query_fontset (fontname, 0); - if (fontset >= 0) + if (fontset > 0) - font_name = XSTRING (fontset_ascii (fontset))->data; + font_name = SDATA (fontset_ascii (fontset)); + else if (fontset == 0) + { + if (may_fail_p) + return 0; + abort (); + } /* Check if FONT_NAME is surely available on the system. Usually FONT_NAME is already cached for the frame F and FS_LOAD_FONT @@@ -4126,10 -4160,10 +4197,10 @@@ FRAME 0 means change the face on all fr LFACE_SWIDTH (lface) = value; font_related_attr_p = 1; } - else if (EQ (attr, QCfont)) + else if (EQ (attr, QCfont) || EQ (attr, QCfontset)) { #ifdef HAVE_WINDOW_SYSTEM - if (FRAME_WINDOW_P (XFRAME (frame))) + if (EQ (frame, Qt) || FRAME_WINDOW_P (XFRAME (frame))) { /* Set font-related attributes of the Lisp face from an XLFD font name. */ @@@ -4141,19 -4176,21 +4212,26 @@@ else f = check_x_frame (frame); - /* VALUE may be a fontset name or an alias of fontset. In - such a case, use the base fontset name. */ - tmp = Fquery_fontset (value, Qnil); - if (!NILP (tmp)) - value = tmp; - else if (EQ (attr, QCfontset)) - signal_error ("Invalid fontset name", value); - - if (EQ (attr, QCfont)) + if (!UNSPECIFIEDP (value)) { - if (!set_lface_from_font_name (f, lface, value, 1, 1)) - signal_error ("Invalid font or fontset name", value); + CHECK_STRING (value); + + /* VALUE may be a fontset name or an alias of fontset. In + such a case, use the base fontset name. */ + tmp = Fquery_fontset (value, Qnil); + if (!NILP (tmp)) + value = tmp; ++ else if (EQ (attr, QCfontset)) ++ signal_error ("Invalid fontset name", value); + - if (!set_lface_from_font_name (f, lface, value, 1, 1)) - signal_error ("Invalid font or fontset name", value); ++ if (EQ (attr, QCfont)) ++ { ++ if (!set_lface_from_font_name (f, lface, value, 1, 1)) ++ signal_error ("Invalid font or fontset name", value); ++ } ++ else ++ LFACE_FONTSET (lface) = value; } - else - LFACE_FONTSET (lface) = value; font_attr_p = 1; } @@@ -4333,7 -4371,7 +4412,7 @@@ set_font_frame_param (frame, lface /* Choose a font name that reflects LFACE's attributes and has the registry and encoding pattern specified in the default fontset (3rd arg: -1) for ASCII characters (4th arg: 0). */ - font = choose_face_font (f, XVECTOR (lface)->contents, -1, 0, 0); - font = choose_face_font (f, XVECTOR (lface)->contents, Qnil); ++ font = choose_face_font (f, XVECTOR (lface)->contents, Qnil, NULL); if (!font) error ("No font matches the specified attribute"); font_name = build_string (font); @@@ -4988,8 -5031,14 +5069,14 @@@ lface_same_font_attributes_p (lface1, l && (EQ (lface1[LFACE_FONT_INDEX], lface2[LFACE_FONT_INDEX]) || (STRINGP (lface1[LFACE_FONT_INDEX]) && STRINGP (lface2[LFACE_FONT_INDEX]) - && xstricmp (SDATA (lface1[LFACE_FONT_INDEX]), - SDATA (lface2[LFACE_FONT_INDEX]))))); - && ! xstricmp (XSTRING (lface1[LFACE_FONT_INDEX])->data, - XSTRING (lface2[LFACE_FONT_INDEX])->data))) ++ && ! xstricmp (SDATA (lface1[LFACE_FONT_INDEX]), ++ SDATA (lface2[LFACE_FONT_INDEX])))) + && (EQ (lface1[LFACE_FONTSET_INDEX], lface2[LFACE_FONTSET_INDEX]) + || (STRINGP (lface1[LFACE_FONTSET_INDEX]) + && STRINGP (lface2[LFACE_FONTSET_INDEX]) - && ! xstricmp (XSTRING (lface1[LFACE_FONTSET_INDEX])->data, - XSTRING (lface2[LFACE_FONTSET_INDEX])->data))) ++ && ! xstricmp (SDATA (lface1[LFACE_FONTSET_INDEX]), ++ SDATA (lface2[LFACE_FONTSET_INDEX])))) + ); } @@@ -5671,16 -5469,59 +5750,58 @@@ lookup_face (f, attr } + /* Look up a realized face that has the same attributes as BASE_FACE + except for the font in the face cache of frame F. If FONT_ID is + not negative, it is an ID number of an already opened font that is + used by the face. If FONT_ID is negative, the face has no font. + Value is the ID of the face found. If no suitable face is found, + realize a new one. */ + -INLINE int ++int + lookup_non_ascii_face (f, font_id, base_face) + struct frame *f; + int font_id; + struct face *base_face; + { + struct face_cache *cache = FRAME_FACE_CACHE (f); + unsigned hash; + int i; + struct face *face; + + xassert (cache != NULL); + base_face = base_face->ascii_face; + hash = lface_hash (base_face->lface); + i = hash % FACE_CACHE_BUCKETS_SIZE; + + for (face = cache->buckets[i]; face; face = face->next) + { + if (face->ascii_face == face) + continue; + if (face->ascii_face == base_face + && face->font_info_id == font_id) + break; + } + + /* If not found, realize a new face. */ + if (face == NULL) + face = realize_non_ascii_face (f, font_id, base_face); + + #if GLYPH_DEBUG + xassert (face == FACE_FROM_ID (f, face->id)); + #endif /* GLYPH_DEBUG */ + + return face->id; + } + - /* Return the face id of the realized face for named face SYMBOL on - frame F suitable for displaying character C. Value is -1 if the - face couldn't be determined, which might happen if the default face - isn't realized and cannot be realized. */ + frame F suitable for displaying ASCII characters. Value is -1 if + the face couldn't be determined, which might happen if the default + face isn't realized and cannot be realized. */ int - lookup_named_face (f, symbol, c) + lookup_named_face (f, symbol) struct frame *f; Lisp_Object symbol; - int c; { Lisp_Object attrs[LFACE_VECTOR_SIZE]; Lisp_Object symbol_attrs[LFACE_VECTOR_SIZE]; @@@ -5859,7 -5699,7 +5979,7 @@@ DEFUN ("face-attributes-as-vector", Ffa Font selection ***********************************************************************/ --DEFUN ("internal-set-font-selection-order", ++ DEFUN ("internal-set-font-selection-order", Finternal_set_font_selection_order, Sinternal_set_font_selection_order, 1, 1, 0, doc: /* Set font selection order for face font selection to ORDER. @@@ -6363,69 -6176,53 +6483,53 @@@ try_alternative_families (f, family, re /* Get a list of matching fonts on frame F. - FAMILY, if a string, specifies a font family derived from the fontset. - It is only used if the face does not specify any family in ATTRS or - if we cannot find any font of the face's family. + PATTERN, if a string, specifies a font name pattern to match while + ignoring FAMILY and REGISTRY. - REGISTRY, if a string, specifies a font registry and encoding to - match. A value of nil means include fonts of any registry and - encoding. + FAMILY, if a list, specifies a list of font families to try. - If PREFER_FACE_FAMILY is nonzero, perfer face's family to FAMILY. - Otherwise, prefer FAMILY. + REGISTRY, if a list, specifies a list of font registries and + encodinging to try. - + Return in *FONTS a pointer to a vector of font_name structures for the fonts matched. Value is the number of fonts found. */ static int - try_font_list (f, attrs, family, registry, fonts, prefer_face_family) + try_font_list (f, pattern, family, registry, fonts) struct frame *f; - Lisp_Object *attrs; - Lisp_Object family, registry; + Lisp_Object pattern, family, registry; struct font_name **fonts; - int prefer_face_family; { int nfonts = 0; - Lisp_Object face_family = attrs[LFACE_FAMILY_INDEX]; - Lisp_Object try_family; - - try_family = (prefer_face_family || NILP (family)) ? face_family : family; - - if (STRINGP (try_family)) - nfonts = try_alternative_families (f, try_family, registry, fonts); - #ifdef MAC_OS - /* When realizing the default face and a font spec does not matched - exactly, Emacs looks for ones with the same registry as the - default font. On the Mac, this is mac-roman, which does not work - if the family is -etl-fixed, e.g. The following widens the - choices and fixes that problem. */ - if (nfonts == 0 && STRINGP (try_family) && STRINGP (registry) - && xstricmp (SDATA (registry), "mac-roman") == 0) - nfonts = try_alternative_families (f, try_family, Qnil, fonts); - #endif + if (STRINGP (pattern)) + nfonts = font_list (f, pattern, Qnil, Qnil, fonts); + else + { + Lisp_Object tail; - if (EQ (try_family, family)) - family = face_family; + if (NILP (family)) + nfonts = font_list (f, Qnil, Qnil, registry, fonts); + else + for (tail = family; ! nfonts && CONSP (tail); tail = XCDR (tail)) + nfonts = try_alternative_families (f, XCAR (tail), registry, fonts); - if (nfonts == 0 && STRINGP (family)) - nfonts = try_alternative_families (f, family, registry, fonts); + /* Try font family of the default face or "fixed". */ + if (nfonts == 0 && !NILP (family)) + { + struct face *default_face = FACE_FROM_ID (f, DEFAULT_FACE_ID); + if (default_face) + family = default_face->lface[LFACE_FAMILY_INDEX]; + else + family = build_string ("fixed"); - nfonts = font_list (f, Qnil, family, registry, fonts); ++ nfonts = try_alternative_families (f, family, registry, fonts); + } - + - /* Try font family of the default face or "fixed". */ - if (nfonts == 0) - { - struct face *default_face = FACE_FROM_ID (f, DEFAULT_FACE_ID); - if (default_face) - family = default_face->lface[LFACE_FAMILY_INDEX]; - else - family = build_string ("fixed"); - nfonts = font_list (f, Qnil, family, registry, fonts); + /* Try any family with the given registry. */ + if (nfonts == 0 && !NILP (family)) - nfonts = font_list (f, Qnil, Qnil, registry, fonts); ++ nfonts = try_alternative_families (f, Qnil, registry, fonts); } - /* Try any family with the given registry. */ - if (nfonts == 0) - nfonts = font_list (f, Qnil, Qnil, registry, fonts); - return nfonts; } @@@ -6439,65 -6236,102 +6543,110 @@@ face_fontset (attrs Lisp_Object *attrs; { Lisp_Object name; - int fontset; - name = attrs[LFACE_FONT_INDEX]; + name = attrs[LFACE_FONTSET_INDEX]; if (!STRINGP (name)) return -1; return fs_query_fontset (name, 0); } - /* Choose a name of font to use on frame F to display character C with + /* Choose a name of font to use on frame F to display characters with Lisp face attributes specified by ATTRS. The font name is - determined by the font-related attributes in ATTRS and the name - pattern for C in FONTSET. Value is the font name which is - allocated from the heap and must be freed by the caller, or NULL if - we can get no information about the font name of C. It is assured - that we always get some information for a single byte - character. + determined by the font-related attributes in ATTRS and FONT-SPEC + (if specified). - If NEEDS_OVERSTRIKE is non-zero, a boolean is returned in it to - indicate whether the resulting font should be drawn using overstrike - to simulate bold-face. */ + When we are choosing a font for ASCII characters, FONT-SPEC is + always nil. Otherwise FONT-SPEC is a list + [ FAMILY WEIGHT SLANT SWIDTH ADSTYLE REGISTRY ] + or a string specifying a font name pattern. - static char * - choose_face_font (f, attrs, fontset, c, needs_overstrike) ++ If NEEDS_OVERSTRIKE is not NULL, a boolean is returned in it to ++ indicate whether the resulting font should be drawn using ++ overstrike to simulate bold-face. ++ + Value is the font name which is allocated from the heap and must be + freed by the caller. */ + + char * -choose_face_font (f, attrs, font_spec) ++choose_face_font (f, attrs, font_spec, needs_overstrike) struct frame *f; Lisp_Object *attrs; - int fontset, c; + Lisp_Object font_spec; + int *needs_overstrike; { - Lisp_Object pattern; + Lisp_Object pattern, family, adstyle, registry; char *font_name = NULL; struct font_name *fonts; - int nfonts, width_ratio; + int nfonts; + if (needs_overstrike) + *needs_overstrike = 0; + - /* Get (foundry and) family name and registry (and encoding) name of - a font for C. */ - pattern = fontset_font_pattern (f, fontset, c); - if (NILP (pattern)) + /* If we are choosing an ASCII font and a font name is explicitly + specified in ATTRS, return it. */ + if (NILP (font_spec) && STRINGP (attrs[LFACE_FONT_INDEX])) - return xstrdup (XSTRING (attrs[LFACE_FONT_INDEX])->data); ++ return xstrdup (SDATA (attrs[LFACE_FONT_INDEX])); + + if (NILP (attrs[LFACE_FAMILY_INDEX])) + family = Qnil; + else + family = Fcons (attrs[LFACE_FAMILY_INDEX], Qnil); + + /* Decide FAMILY, ADSTYLE, and REGISTRY from FONT_SPEC. But, + ADSTYLE is not used in the font selector for the moment. */ + if (VECTORP (font_spec)) { - xassert (!SINGLE_BYTE_CHAR_P (c)); - return NULL; + pattern = Qnil; + if (STRINGP (AREF (font_spec, FONT_SPEC_FAMILY_INDEX))) + family = Fcons (AREF (font_spec, FONT_SPEC_FAMILY_INDEX), family); + adstyle = AREF (font_spec, FONT_SPEC_ADSTYLE_INDEX); + registry = Fcons (AREF (font_spec, FONT_SPEC_REGISTRY_INDEX), Qnil); + } + else if (STRINGP (font_spec)) + { + pattern = font_spec; + family = Qnil; + adstyle = Qnil; + registry = Qnil; + } + else + { + /* We are choosing an ASCII font. By default, use the registry + name "iso8859-1". But, if the registry name of the ASCII + font specified in the fontset of ATTRS is not "iso8859-1" + (e.g "iso10646-1"), use also that name with higher + priority. */ + int fontset = face_fontset (attrs); + Lisp_Object ascii; + int len; + struct font_name font; + + pattern = Qnil; + adstyle = Qnil; + registry = Fcons (build_string ("iso8859-1"), Qnil); + + ascii = fontset_ascii (fontset); - len = STRING_BYTES (XSTRING (ascii)); ++ len = SBYTES (ascii); + if (len < 9 - || strcmp (XSTRING (ascii)->data + len - 9, "iso8859-1")) ++ || strcmp (SDATA (ascii) + len - 9, "iso8859-1")) + { + font.name = LSTRDUPA (ascii); + /* Check if the name is in XLFD. */ + if (split_font_name (f, &font, 0)) + { + font.fields[XLFD_ENCODING][-1] = '-'; + registry = Fcons (build_string (font.fields[XLFD_REGISTRY]), + registry); + } + } } - - /* If what we got is a name pattern, return it. */ - if (STRINGP (pattern)) - return xstrdup (SDATA (pattern)); /* Get a list of fonts matching that pattern and choose the best match for the specified face attributes from it. */ - nfonts = try_font_list (f, attrs, XCAR (pattern), XCDR (pattern), &fonts, - (SINGLE_BYTE_CHAR_P (c) - || CHAR_CHARSET (c) == charset_latin_iso8859_1)); - width_ratio = (SINGLE_BYTE_CHAR_P (c) - ? 1 - : CHARSET_WIDTH (CHAR_CHARSET (c))); - font_name = best_matching_font (f, attrs, fonts, nfonts, width_ratio, + nfonts = try_font_list (f, pattern, family, registry, &fonts); - font_name = best_matching_font (f, attrs, fonts, nfonts, NILP (font_spec)); ++ font_name = best_matching_font (f, attrs, fonts, nfonts, NILP (font_spec), + needs_overstrike); return font_name; } @@@ -6739,10 -6566,50 +6884,50 @@@ realize_face (cache, attrs, former_face /* Insert the new face. */ cache_face (cache, face, lface_hash (attrs)); - #ifdef HAVE_WINDOW_SYSTEM - if (FRAME_WINDOW_P (cache->f) && face->font == NULL) - load_face_font (cache->f, face, c); - #endif /* HAVE_WINDOW_SYSTEM */ + return face; + } + + + /* Realize the fully-specified face that has the same attributes as + BASE_FACE except for the font on frame F. If FONT_ID is not + negative, it is an ID number of an already opened font that should + be used by the face. If FONT_ID is negative, the face has no font, + i.e., characters are displayed by empty boxes. */ + + static struct face * + realize_non_ascii_face (f, font_id, base_face) + struct frame *f; + int font_id; + struct face *base_face; + { + struct face_cache *cache = FRAME_FACE_CACHE (f); - struct face *face; ++ struct face *face; + struct font_info *font_info; + + face = (struct face *) xmalloc (sizeof *face); + *face = *base_face; + face->gc = 0; + + /* Don't try to free the colors copied bitwise from BASE_FACE. */ + face->colors_copied_bitwise_p = 1; + + face->font_info_id = font_id; + if (font_id >= 0) + { + font_info = FONT_INFO_FROM_ID (f, font_id); + face->font = font_info->font; + face->font_name = font_info->full_name; + } + else + { + face->font = NULL; + face->font_name = NULL; + } + + face->gc = 0; + + cache_face (cache, face, face->hash); + return face; } @@@ -6824,10 -6661,30 +6979,16 @@@ realize_x_face (cache, attrs are constructed from ATTRS. */ int fontset = face_fontset (attrs); - if ((fontset == -1) && default_face) + /* If we are realizing the default face, ATTRS should specify a + fontset. In other words, if FONTSET is -1, we are not + realizing the default face, thus the default face should have + already been realized. */ + if (fontset == -1) fontset = default_face->fontset; - face->fontset = make_fontset_for_ascii_face (f, fontset); - face->font = NULL; /* to force realize_face to load font */ + if (fontset == -1) + abort (); - face->font = NULL; /* to force realize_face to load font */ - -#ifdef macintosh - /* Load the font if it is specified in ATTRS. This fixes - changing frame font on the Mac. */ - if (STRINGP (attrs[LFACE_FONT_INDEX])) - { - struct font_info *font_info = - FS_LOAD_FONT (f, XSTRING (attrs[LFACE_FONT_INDEX])->data); - if (font_info) - face->font = font_info->font; - } -#endif - if (! face->font) - load_face_font (f, face); ++ load_face_font (f, face); + face->fontset = make_fontset_for_ascii_face (f, fontset, face); } /* Load colors, and set remaining attributes. */ @@@ -6959,7 -6816,7 +7120,6 @@@ if (!NILP (stipple)) face->stipple = load_pixmap (f, stipple, &face->pixmap_w, &face->pixmap_h); -- xassert (FACE_SUITABLE_FOR_CHAR_P (face, c)); return face; #endif /* HAVE_WINDOW_SYSTEM */ } @@@ -7430,9 -7291,8 +7594,8 @@@ dump_realized_face (face fprintf (stderr, "fontset: %d\n", face->fontset); fprintf (stderr, "underline: %d (%s)\n", face->underline_p, - XSTRING (Fsymbol_name (face->lface[LFACE_UNDERLINE_INDEX]))->data); + SDATA (Fsymbol_name (face->lface[LFACE_UNDERLINE_INDEX]))); fprintf (stderr, "hash: %d\n", face->hash); - fprintf (stderr, "charset: %d\n", face->charset); } diff --cc src/xfns.c index af7ed7473e8,014b82fc1ca..758a0fc8345 --- a/src/xfns.c +++ b/src/xfns.c @@@ -1838,49 -2363,29 +1839,29 @@@ x_encode_text (string, coding_system, s int *text_bytes, *stringp; int selectionp; { - unsigned char *str = SDATA (string); - int chars = SCHARS (string); - int bytes = SBYTES (string); - int charset_info; - int bufsize; - unsigned char *buf; + int result = string_xstring_p (string); struct coding_system coding; + extern Lisp_Object Qcompound_text_with_extensions; - charset_info = find_charset_in_text (str, chars, bytes, NULL, Qnil); - if (charset_info == 0) + if (result == 0) { /* No multibyte character in OBJ. We need not encode it. */ - *text_bytes = bytes; - *text_bytes = STRING_BYTES (XSTRING (string)); ++ *text_bytes = SBYTES (string); *stringp = 1; - return str; - return XSTRING (string)->data; ++ return SDATA (string); } setup_coding_system (coding_system, &coding); - if (selectionp - && SYMBOLP (coding.pre_write_conversion) - && !NILP (Ffboundp (coding.pre_write_conversion))) - { - string = run_pre_post_conversion_on_str (string, &coding, 1); - str = SDATA (string); - chars = SCHARS (string); - bytes = SBYTES (string); - } - coding.src_multibyte = 1; - coding.dst_multibyte = 0; - coding.mode |= CODING_MODE_LAST_BLOCK; - if (coding.type == coding_type_iso2022) - coding.flags |= CODING_FLAG_ISO_SAFE; + coding.mode |= (CODING_MODE_SAFE_ENCODING | CODING_MODE_LAST_BLOCK); /* We suppress producing escape sequences for composition. */ - coding.composing = COMPOSITION_DISABLED; - bufsize = encoding_buffer_size (&coding, bytes); - buf = (unsigned char *) xmalloc (bufsize); - encode_coding (&coding, str, buf, bytes, bufsize); + coding.common_flags &= ~CODING_ANNOTATION_MASK; - coding.dst_bytes = XSTRING (string)->size * 2; ++ coding.dst_bytes = SCHARS (string) * 2; + coding.destination = (unsigned char *) xmalloc (coding.dst_bytes); + encode_coding_object (&coding, string, 0, 0, - XSTRING (string)->size, - STRING_BYTES (XSTRING (string)), Qnil); ++ SCHARS (string), SBYTES (string), Qnil); *text_bytes = coding.produced; - *stringp = (charset_info == 1 - || (!EQ (coding_system, Qcompound_text) - && !EQ (coding_system, Qcompound_text_with_extensions))); - return buf; + *stringp = (result == 1 || !EQ (coding_system, Qcompound_text)); + return coding.destination; } @@@ -3320,37 -4424,42 +3301,41 @@@ This function is an internal primitive- { Lisp_Object font; - font = x_get_arg (dpyinfo, parms, Qfont, - "font", "Font", RES_TYPE_STRING); + font = x_get_arg (dpyinfo, parms, Qfont, "font", "Font", RES_TYPE_STRING); - BLOCK_INPUT; - /* First, try whatever font the caller has specified. */ - if (STRINGP (font)) - { - tem = Fquery_fontset (font, Qnil); - if (STRINGP (tem)) - font = x_new_fontset (f, SDATA (tem)); - else - font = x_new_font (f, SDATA (font)); - } - - /* Try out a font which we hope has bold and italic variations. */ - if (!STRINGP (font)) - font = x_new_font (f, "-adobe-courier-medium-r-*-*-*-120-*-*-*-*-iso8859-1"); + /* If the caller has specified no font, try out fonts which we + hope have bold and italic variations. */ if (!STRINGP (font)) - font = x_new_font (f, "-misc-fixed-medium-r-normal-*-*-140-*-*-c-*-iso8859-1"); - if (! STRINGP (font)) - font = x_new_font (f, "-*-*-medium-r-normal-*-*-140-*-*-c-*-iso8859-1"); - if (! STRINGP (font)) - /* This was formerly the first thing tried, but it finds too many fonts - and takes too long. */ - font = x_new_font (f, "-*-*-medium-r-*-*-*-*-*-*-c-*-iso8859-1"); - /* If those didn't work, look for something which will at least work. */ - if (! STRINGP (font)) - font = x_new_font (f, "-*-fixed-*-*-*-*-*-140-*-*-c-*-iso8859-1"); - UNBLOCK_INPUT; - if (! STRINGP (font)) - font = build_string ("fixed"); + { + char *names[] + = { "-adobe-courier-medium-r-*-*-*-120-*-*-*-*-iso8859-1", + "-misc-fixed-medium-r-normal-*-*-140-*-*-c-*-iso8859-1", + "-*-*-medium-r-normal-*-*-140-*-*-c-*-iso8859-1", + /* This was formerly the first thing tried, but it finds + too many fonts and takes too long. */ + "-*-*-medium-r-*-*-*-*-*-*-c-*-iso8859-1", + /* If those didn't work, look for something which will + at least work. */ + "-*-fixed-*-*-*-*-*-140-*-*-c-*-iso8859-1", + NULL }; + int i; + + BLOCK_INPUT; + for (i = 0; names[i]; i++) + { + Lisp_Object list; + list = x_list_fonts (f, build_string (names[i]), 0, 1); + if (CONSP (list)) + { + font = XCAR (list); + break; + } + } + UNBLOCK_INPUT; + if (! STRINGP (font)) + font = build_string ("fixed"); + } x_default_parameter (f, parms, Qfont, font, "font", "Font", RES_TYPE_STRING); } @@@ -9885,11 -11030,11 +9870,11 @@@ x_create_tip_frame (dpyinfo, parms, tex { tem = Fquery_fontset (font, Qnil); if (STRINGP (tem)) - font = x_new_fontset (f, SDATA (tem)); + font = x_new_fontset (f, tem); else - font = x_new_fontset (f, font); + font = x_new_font (f, SDATA (font)); } - + /* Try out a font which we hope has bold and italic variations. */ if (!STRINGP (font)) font = x_new_font (f, "-adobe-courier-medium-r-*-*-*-120-*-*-*-*-iso8859-1"); diff --cc src/xterm.c index 5463ce8e192,b6c63aacfa1..e22e06a107e --- a/src/xterm.c +++ b/src/xterm.c @@@ -765,7 -1139,21 +766,8 @@@ XTreset_terminal_modes ( /* Function prototypes of this page. */ - static int x_encode_char P_ ((int, XChar2b *, struct font_info *, int *)); -static struct face *x_get_glyph_face_and_encoding P_ ((struct frame *, - struct glyph *, - XChar2b *, - int *)); -static struct face *x_get_char_face_and_encoding P_ ((struct frame *, int, - int, XChar2b *, int)); -static XCharStruct *x_per_char_metric P_ ((XFontStruct *, XChar2b *)); -static void x_encode_char P_ ((int, XChar2b *, struct font_info *, - struct charset *)); -static void x_append_glyph P_ ((struct it *)); -static void x_append_composite_glyph P_ ((struct it *)); -static void x_append_stretch_glyph P_ ((struct it *it, Lisp_Object, - int, int, double)); -static void x_produce_glyphs P_ ((struct it *)); -static void x_produce_image_glyph P_ ((struct it *it)); ++static int x_encode_char P_ ((int, XChar2b *, struct font_info *, ++ struct charset *, int *)); /* Get metrics of character CHAR2B in FONT. Value is null if CHAR2B @@@ -843,14 -1230,13 +845,14 @@@ x_per_char_metric (font, char2b, font_t /* Encode CHAR2B using encoding information from FONT_INFO. CHAR2B is the two-byte form of C. Encoding is returned in *CHAR2B. */ -static INLINE void -x_encode_char (c, char2b, font_info, charset) +static int - x_encode_char (c, char2b, font_info, two_byte_p) ++x_encode_char (c, char2b, font_info, charset, two_byte_p) int c; XChar2b *char2b; struct font_info *font_info; + struct charset *charset; + int *two_byte_p; { - int charset = CHAR_CHARSET (c); XFontStruct *font = font_info->font; /* FONT_INFO may define a scheme by which to encode byte1 and byte2. @@@ -863,9 -1249,8 +865,9 @@@ if (CHARSET_DIMENSION (charset) == 1) { - ccl->reg[0] = charset; + ccl->reg[0] = CHARSET_ID (charset); ccl->reg[1] = char2b->byte2; + ccl->reg[2] = -1; } else { @@@ -873,22 -1258,22 +875,22 @@@ ccl->reg[1] = char2b->byte1; ccl->reg[2] = char2b->byte2; } - + - ccl_driver (ccl, NULL, NULL, 0, 0, NULL); + ccl_driver (ccl, NULL, NULL, 0, 0, Qnil); - + /* We assume that MSBs are appropriately set/reset by CCL program. */ if (font->max_byte1 == 0) /* 1-byte font */ -- char2b->byte1 = 0, char2b->byte2 = ccl->reg[1]; ++ STORE_XCHAR2B (char2b, 0, ccl->reg[1]); else -- char2b->byte1 = ccl->reg[1], char2b->byte2 = ccl->reg[2]; ++ STORE_XCHAR2B (char2b, ccl->reg[1], ccl->reg[2]); } - else if (font_info->encoding[charset]) + else if (font_info->encoding_type) { /* Fixed encoding scheme. See fontset.h for the meaning of the encoding numbers. */ - int enc = font_info->encoding[charset]; + unsigned char enc = font_info->encoding_type; - + if ((enc == 1 || enc == 2) && CHARSET_DIMENSION (charset) == 2) char2b->byte1 |= 0x80; @@@ -6030,1129 -10330,880 +6032,1129 @@@ handle_one_xevent (dpyinfo, eventp, buf #endif /* USE_LUCID */ #ifdef USE_TOOLKIT_SCROLL_BARS - /* Dispatch event to the widget. */ - goto OTHER; + /* Dispatch event to the widget. */ + goto OTHER; #else /* not USE_TOOLKIT_SCROLL_BARS */ - bar = x_window_to_scroll_bar (event.xexpose.window); + bar = x_window_to_scroll_bar (event.xexpose.window); - if (bar) - x_scroll_bar_expose (bar, &event); + if (bar) + x_scroll_bar_expose (bar, &event); #ifdef USE_X_TOOLKIT - else - goto OTHER; + else + goto OTHER; #endif /* USE_X_TOOLKIT */ #endif /* not USE_TOOLKIT_SCROLL_BARS */ - } - break; + } + break; - case GraphicsExpose: /* This occurs when an XCopyArea's - source area was obscured or not - available. */ - f = x_window_to_frame (dpyinfo, event.xgraphicsexpose.drawable); - if (f) - { - expose_frame (f, - event.xgraphicsexpose.x, event.xgraphicsexpose.y, - event.xgraphicsexpose.width, - event.xgraphicsexpose.height); - } + case GraphicsExpose: /* This occurs when an XCopyArea's + source area was obscured or not + available. */ + f = x_window_to_frame (dpyinfo, event.xgraphicsexpose.drawable); + if (f) + { + expose_frame (f, + event.xgraphicsexpose.x, event.xgraphicsexpose.y, + event.xgraphicsexpose.width, + event.xgraphicsexpose.height); + } #ifdef USE_X_TOOLKIT - else - goto OTHER; + else + goto OTHER; #endif /* USE_X_TOOLKIT */ - break; + break; - case NoExpose: /* This occurs when an XCopyArea's - source area was completely - available. */ - break; + case NoExpose: /* This occurs when an XCopyArea's + source area was completely + available. */ + break; - case UnmapNotify: - /* Redo the mouse-highlight after the tooltip has gone. */ - if (event.xmap.window == tip_window) - { - tip_window = 0; - redo_mouse_highlight (); - } - - f = x_top_window_to_frame (dpyinfo, event.xunmap.window); - if (f) /* F may no longer exist if - the frame was deleted. */ - { - /* While a frame is unmapped, display generation is - disabled; you don't want to spend time updating a - display that won't ever be seen. */ - f->async_visible = 0; - /* We can't distinguish, from the event, whether the window - has become iconified or invisible. So assume, if it - was previously visible, than now it is iconified. - But x_make_frame_invisible clears both - the visible flag and the iconified flag; - and that way, we know the window is not iconified now. */ - if (FRAME_VISIBLE_P (f) || FRAME_ICONIFIED_P (f)) - { - f->async_iconified = 1; - - bufp->kind = iconify_event; - XSETFRAME (bufp->frame_or_window, f); - bufp->arg = Qnil; - bufp++; - count++; - numchars--; - } - } - goto OTHER; - - case MapNotify: - if (event.xmap.window == tip_window) - /* The tooltip has been drawn already. Avoid - the SET_FRAME_GARBAGED below. */ - goto OTHER; - - /* We use x_top_window_to_frame because map events can - come for sub-windows and they don't mean that the - frame is visible. */ - f = x_top_window_to_frame (dpyinfo, event.xmap.window); - if (f) - { - f->async_visible = 1; - f->async_iconified = 0; - f->output_data.x->has_been_visible = 1; + case UnmapNotify: + /* Redo the mouse-highlight after the tooltip has gone. */ + if (event.xmap.window == tip_window) + { + tip_window = 0; + redo_mouse_highlight (); + } - /* wait_reading_process_input will notice this and update - the frame's display structures. */ - SET_FRAME_GARBAGED (f); + f = x_top_window_to_frame (dpyinfo, event.xunmap.window); + if (f) /* F may no longer exist if + the frame was deleted. */ + { + /* While a frame is unmapped, display generation is + disabled; you don't want to spend time updating a + display that won't ever be seen. */ + f->async_visible = 0; + /* We can't distinguish, from the event, whether the window + has become iconified or invisible. So assume, if it + was previously visible, than now it is iconified. + But x_make_frame_invisible clears both + the visible flag and the iconified flag; + and that way, we know the window is not iconified now. */ + if (FRAME_VISIBLE_P (f) || FRAME_ICONIFIED_P (f)) + { + f->async_iconified = 1; + + bufp->kind = ICONIFY_EVENT; + XSETFRAME (bufp->frame_or_window, f); + bufp->arg = Qnil; + bufp++; + count++; + numchars--; + } + } + goto OTHER; + + case MapNotify: + if (event.xmap.window == tip_window) + /* The tooltip has been drawn already. Avoid + the SET_FRAME_GARBAGED below. */ + goto OTHER; + + /* We use x_top_window_to_frame because map events can + come for sub-windows and they don't mean that the + frame is visible. */ + f = x_top_window_to_frame (dpyinfo, event.xmap.window); + if (f) + { + /* wait_reading_process_input will notice this and update + the frame's display structures. + If we where iconified, we should not set garbaged, + because that stops redrawing on Expose events. This looks + bad if we are called from a recursive event loop + (x_dispatch_event), for example when a dialog is up. */ + if (! f->async_iconified) + SET_FRAME_GARBAGED (f); + + f->async_visible = 1; + f->async_iconified = 0; + f->output_data.x->has_been_visible = 1; + + if (f->iconified) + { + bufp->kind = DEICONIFY_EVENT; + XSETFRAME (bufp->frame_or_window, f); + bufp->arg = Qnil; + bufp++; + count++; + numchars--; + } + else if (! NILP (Vframe_list) + && ! NILP (XCDR (Vframe_list))) + /* Force a redisplay sooner or later + to update the frame titles + in case this is the second frame. */ + record_asynch_buffer_change (); + } + goto OTHER; - if (f->iconified) - { - bufp->kind = deiconify_event; - XSETFRAME (bufp->frame_or_window, f); - bufp->arg = Qnil; - bufp++; - count++; - numchars--; - } - else if (! NILP (Vframe_list) - && ! NILP (XCDR (Vframe_list))) - /* Force a redisplay sooner or later - to update the frame titles - in case this is the second frame. */ - record_asynch_buffer_change (); - } - goto OTHER; + case KeyPress: + +#if defined (USE_X_TOOLKIT) || defined (USE_GTK) + /* Dispatch KeyPress events when in menu. */ + if (popup_activated ()) + goto OTHER; +#endif - case KeyPress: - f = x_any_window_to_frame (dpyinfo, event.xkey.window); + f = x_any_window_to_frame (dpyinfo, event.xkey.window); - if (!dpyinfo->mouse_face_hidden && INTEGERP (Vmouse_highlight)) - { - dpyinfo->mouse_face_hidden = 1; - clear_mouse_face (dpyinfo); - } + if (!dpyinfo->mouse_face_hidden && INTEGERP (Vmouse_highlight)) + { + dpyinfo->mouse_face_hidden = 1; + clear_mouse_face (dpyinfo); + } #if defined USE_MOTIF && defined USE_TOOLKIT_SCROLL_BARS - if (f == 0) - { - /* Scroll bars consume key events, but we want - the keys to go to the scroll bar's frame. */ - Widget widget = XtWindowToWidget (dpyinfo->display, - event.xkey.window); - if (widget && XmIsScrollBar (widget)) - { - widget = XtParent (widget); - f = x_any_window_to_frame (dpyinfo, XtWindow (widget)); - } - } + if (f == 0) + { + /* Scroll bars consume key events, but we want + the keys to go to the scroll bar's frame. */ + Widget widget = XtWindowToWidget (dpyinfo->display, + event.xkey.window); + if (widget && XmIsScrollBar (widget)) + { + widget = XtParent (widget); + f = x_any_window_to_frame (dpyinfo, XtWindow (widget)); + } + } #endif /* USE_MOTIF and USE_TOOLKIT_SCROLL_BARS */ - if (f != 0) - { - KeySym keysym, orig_keysym; - /* al%imercury@uunet.uu.net says that making this 81 - instead of 80 fixed a bug whereby meta chars made - his Emacs hang. - - It seems that some version of XmbLookupString has - a bug of not returning XBufferOverflow in - status_return even if the input is too long to - fit in 81 bytes. So, we must prepare sufficient - bytes for copy_buffer. 513 bytes (256 chars for - two-byte character set) seems to be a fairly good - approximation. -- 2000.8.10 handa@etl.go.jp */ - unsigned char copy_buffer[513]; - unsigned char *copy_bufptr = copy_buffer; - int copy_bufsiz = sizeof (copy_buffer); - int modifiers; - - event.xkey.state - |= x_emacs_to_x_modifiers (FRAME_X_DISPLAY_INFO (f), - extra_keyboard_modifiers); - modifiers = event.xkey.state; - - /* This will have to go some day... */ - - /* make_lispy_event turns chars into control chars. - Don't do it here because XLookupString is too eager. */ - event.xkey.state &= ~ControlMask; - event.xkey.state &= ~(dpyinfo->meta_mod_mask - | dpyinfo->super_mod_mask - | dpyinfo->hyper_mod_mask - | dpyinfo->alt_mod_mask); - - /* In case Meta is ComposeCharacter, - clear its status. According to Markus Ehrnsperger - Markus.Ehrnsperger@lehrstuhl-bross.physik.uni-muenchen.de - this enables ComposeCharacter to work whether or - not it is combined with Meta. */ - if (modifiers & dpyinfo->meta_mod_mask) - bzero (&compose_status, sizeof (compose_status)); + if (f != 0) + { + KeySym keysym, orig_keysym; + /* al%imercury@uunet.uu.net says that making this 81 + instead of 80 fixed a bug whereby meta chars made + his Emacs hang. + + It seems that some version of XmbLookupString has + a bug of not returning XBufferOverflow in + status_return even if the input is too long to + fit in 81 bytes. So, we must prepare sufficient + bytes for copy_buffer. 513 bytes (256 chars for + two-byte character set) seems to be a fairly good + approximation. -- 2000.8.10 handa@etl.go.jp */ + unsigned char copy_buffer[513]; + unsigned char *copy_bufptr = copy_buffer; + int copy_bufsiz = sizeof (copy_buffer); + int modifiers; + Lisp_Object coding_system = Qlatin_1; + + event.xkey.state + |= x_emacs_to_x_modifiers (FRAME_X_DISPLAY_INFO (f), + extra_keyboard_modifiers); + modifiers = event.xkey.state; + + /* This will have to go some day... */ + + /* make_lispy_event turns chars into control chars. + Don't do it here because XLookupString is too eager. */ + event.xkey.state &= ~ControlMask; + event.xkey.state &= ~(dpyinfo->meta_mod_mask + | dpyinfo->super_mod_mask + | dpyinfo->hyper_mod_mask + | dpyinfo->alt_mod_mask); + + /* In case Meta is ComposeCharacter, + clear its status. According to Markus Ehrnsperger + Markus.Ehrnsperger@lehrstuhl-bross.physik.uni-muenchen.de + this enables ComposeCharacter to work whether or + not it is combined with Meta. */ + if (modifiers & dpyinfo->meta_mod_mask) + bzero (&compose_status, sizeof (compose_status)); #ifdef HAVE_X_I18N - if (FRAME_XIC (f)) - { - Status status_return; - - nbytes = XmbLookupString (FRAME_XIC (f), - &event.xkey, copy_bufptr, - copy_bufsiz, &keysym, - &status_return); - if (status_return == XBufferOverflow) - { - copy_bufsiz = nbytes + 1; - copy_bufptr = (char *) alloca (copy_bufsiz); - nbytes = XmbLookupString (FRAME_XIC (f), - &event.xkey, copy_bufptr, - copy_bufsiz, &keysym, - &status_return); - } + if (FRAME_XIC (f)) + { + Status status_return; + + coding_system = Vlocale_coding_system; + nbytes = XmbLookupString (FRAME_XIC (f), + &event.xkey, copy_bufptr, + copy_bufsiz, &keysym, + &status_return); + if (status_return == XBufferOverflow) + { + copy_bufsiz = nbytes + 1; + copy_bufptr = (char *) alloca (copy_bufsiz); + nbytes = XmbLookupString (FRAME_XIC (f), + &event.xkey, copy_bufptr, + copy_bufsiz, &keysym, + &status_return); + } + /* Xutf8LookupString is a new but already deprecated interface. -stef */ +#if 0 && defined X_HAVE_UTF8_STRING + else if (status_return == XLookupKeySym) + { /* Try again but with utf-8. */ + coding_system = Qutf_8; + nbytes = Xutf8LookupString (FRAME_XIC (f), + &event.xkey, copy_bufptr, + copy_bufsiz, &keysym, + &status_return); + if (status_return == XBufferOverflow) + { + copy_bufsiz = nbytes + 1; + copy_bufptr = (char *) alloca (copy_bufsiz); + nbytes = Xutf8LookupString (FRAME_XIC (f), + &event.xkey, + copy_bufptr, + copy_bufsiz, &keysym, + &status_return); + } + } +#endif - if (status_return == XLookupNone) - break; - else if (status_return == XLookupChars) - { - keysym = NoSymbol; - modifiers = 0; - } - else if (status_return != XLookupKeySym - && status_return != XLookupBoth) - abort (); - } - else - nbytes = XLookupString (&event.xkey, copy_bufptr, - copy_bufsiz, &keysym, - &compose_status); + if (status_return == XLookupNone) + break; + else if (status_return == XLookupChars) + { + keysym = NoSymbol; + modifiers = 0; + } + else if (status_return != XLookupKeySym + && status_return != XLookupBoth) + abort (); + } + else + nbytes = XLookupString (&event.xkey, copy_bufptr, + copy_bufsiz, &keysym, + &compose_status); #else - nbytes = XLookupString (&event.xkey, copy_bufptr, - copy_bufsiz, &keysym, - &compose_status); + nbytes = XLookupString (&event.xkey, copy_bufptr, + copy_bufsiz, &keysym, + &compose_status); #endif - orig_keysym = keysym; - - if (numchars > 1) - { - Lisp_Object c; - - /* First deal with keysyms which have defined - translations to characters. */ - if (keysym >= 32 && keysym < 128) - /* Avoid explicitly decoding each ASCII character. */ - { - bufp->kind = ascii_keystroke; - bufp->code = keysym; - XSETFRAME (bufp->frame_or_window, f); - bufp->arg = Qnil; - bufp->modifiers - = x_x_to_emacs_modifiers (FRAME_X_DISPLAY_INFO (f), - modifiers); - bufp->timestamp = event.xkey.time; - bufp++; - count++; - numchars--; - } - /* Now non-ASCII. */ - else if (HASH_TABLE_P (Vx_keysym_table) - && (CHARACTERP (c = Fgethash (make_number (keysym), - Vx_keysym_table, - Qnil)))) - { - bufp->kind = (ASCII_CHAR_P (XFASTINT (c)) - ? ascii_keystroke - : multibyte_char_keystroke); - bufp->code = XFASTINT (c); - XSETFRAME (bufp->frame_or_window, f); - bufp->arg = Qnil; - bufp->modifiers - = x_x_to_emacs_modifiers (FRAME_X_DISPLAY_INFO (f), - modifiers); - bufp->timestamp = event.xkey.time; - bufp++; - count++; - numchars--; - } - else if (((keysym >= XK_BackSpace && keysym <= XK_Escape) - || keysym == XK_Delete + orig_keysym = keysym; + + if (numchars > 1) + { + Lisp_Object c; + + /* First deal with keysyms which have defined + translations to characters. */ + if (keysym >= 32 && keysym < 128) + /* Avoid explicitly decoding each ASCII character. */ + { + bufp->kind = ASCII_KEYSTROKE_EVENT; + bufp->code = keysym; + XSETFRAME (bufp->frame_or_window, f); + bufp->arg = Qnil; + bufp->modifiers + = x_x_to_emacs_modifiers (FRAME_X_DISPLAY_INFO (f), + modifiers); + bufp->timestamp = event.xkey.time; + bufp++; + count++; + numchars--; + } + /* Now non-ASCII. */ + else if (HASH_TABLE_P (Vx_keysym_table) + && (NATNUMP (c = Fgethash (make_number (keysym), + Vx_keysym_table, + Qnil)))) + { + bufp->kind = (SINGLE_BYTE_CHAR_P (XFASTINT (c)) + ? ASCII_KEYSTROKE_EVENT + : MULTIBYTE_CHAR_KEYSTROKE_EVENT); + bufp->code = XFASTINT (c); + XSETFRAME (bufp->frame_or_window, f); + bufp->arg = Qnil; + bufp->modifiers + = x_x_to_emacs_modifiers (FRAME_X_DISPLAY_INFO (f), + modifiers); + bufp->timestamp = event.xkey.time; + bufp++; + count++; + numchars--; + } + /* Random non-modifier sorts of keysyms. */ + else if (((keysym >= XK_BackSpace && keysym <= XK_Escape) + || keysym == XK_Delete #ifdef XK_ISO_Left_Tab - || (keysym >= XK_ISO_Left_Tab && keysym <= XK_ISO_Enter) + || (keysym >= XK_ISO_Left_Tab + && keysym <= XK_ISO_Enter) #endif - || (keysym >= XK_Kanji && keysym <= XK_Eisu_toggle) - || IsCursorKey (keysym) /* 0xff50 <= x < 0xff60 */ - || IsMiscFunctionKey (keysym) /* 0xff60 <= x < VARIES */ + || IsCursorKey (keysym) /* 0xff50 <= x < 0xff60 */ + || IsMiscFunctionKey (keysym) /* 0xff60 <= x < VARIES */ #ifdef HPUX - /* This recognizes the "extended function - keys". It seems there's no cleaner way. - Test IsModifierKey to avoid handling - mode_switch incorrectly. */ - || ((unsigned) (keysym) >= XK_Select - && (unsigned)(keysym) < XK_KP_Space) + /* This recognizes the "extended function + keys". It seems there's no cleaner way. + Test IsModifierKey to avoid handling + mode_switch incorrectly. */ + || ((unsigned) (keysym) >= XK_Select + && (unsigned)(keysym) < XK_KP_Space) #endif #ifdef XK_dead_circumflex - || orig_keysym == XK_dead_circumflex + || orig_keysym == XK_dead_circumflex #endif #ifdef XK_dead_grave - || orig_keysym == XK_dead_grave + || orig_keysym == XK_dead_grave #endif #ifdef XK_dead_tilde - || orig_keysym == XK_dead_tilde + || orig_keysym == XK_dead_tilde #endif #ifdef XK_dead_diaeresis - || orig_keysym == XK_dead_diaeresis + || orig_keysym == XK_dead_diaeresis #endif #ifdef XK_dead_macron - || orig_keysym == XK_dead_macron + || orig_keysym == XK_dead_macron #endif #ifdef XK_dead_degree - || orig_keysym == XK_dead_degree + || orig_keysym == XK_dead_degree #endif #ifdef XK_dead_acute - || orig_keysym == XK_dead_acute + || orig_keysym == XK_dead_acute #endif #ifdef XK_dead_cedilla - || orig_keysym == XK_dead_cedilla + || orig_keysym == XK_dead_cedilla #endif #ifdef XK_dead_breve - || orig_keysym == XK_dead_breve + || orig_keysym == XK_dead_breve #endif #ifdef XK_dead_ogonek - || orig_keysym == XK_dead_ogonek + || orig_keysym == XK_dead_ogonek #endif #ifdef XK_dead_caron - || orig_keysym == XK_dead_caron + || orig_keysym == XK_dead_caron #endif #ifdef XK_dead_doubleacute - || orig_keysym == XK_dead_doubleacute + || orig_keysym == XK_dead_doubleacute #endif #ifdef XK_dead_abovedot - || orig_keysym == XK_dead_abovedot -#endif -#ifdef XK_dead_abovering - || orig_keysym == XK_dead_abovering -#endif -#ifdef XK_dead_iota - || orig_keysym == XK_dead_iota -#endif -#ifdef XK_dead_belowdot - || orig_keysym == XK_dead_belowdot -#endif -#ifdef XK_dead_voiced_sound - || orig_keysym == XK_dead_voiced_sound + || orig_keysym == XK_dead_abovedot #endif -#ifdef XK_dead_semivoiced_sound - || orig_keysym == XK_dead_semivoiced_sound -#endif -#ifdef XK_dead_hook - || orig_keysym == XK_dead_hook -#endif -#ifdef XK_dead_horn - || orig_keysym == XK_dead_horn -#endif - || IsKeypadKey (keysym) /* 0xff80 <= x < 0xffbe */ - || IsFunctionKey (keysym) /* 0xffbe <= x < 0xffe1 */ - /* Any "vendor-specific" key is ok. */ - || (orig_keysym & (1 << 28)) - || (keysym != NoSymbol && nbytes == 0)) - && ! (IsModifierKey (orig_keysym) + || IsKeypadKey (keysym) /* 0xff80 <= x < 0xffbe */ + || IsFunctionKey (keysym) /* 0xffbe <= x < 0xffe1 */ + /* Any "vendor-specific" key is ok. */ + || (orig_keysym & (1 << 28)) + || (keysym != NoSymbol && nbytes == 0)) + && ! (IsModifierKey (orig_keysym) #ifndef HAVE_X11R5 #ifdef XK_Mode_switch - || ((unsigned)(orig_keysym) == XK_Mode_switch) + || ((unsigned)(orig_keysym) == XK_Mode_switch) #endif #ifdef XK_Num_Lock - || ((unsigned)(orig_keysym) == XK_Num_Lock) + || ((unsigned)(orig_keysym) == XK_Num_Lock) #endif #endif /* not HAVE_X11R5 */ - /* The symbols from XK_ISO_Lock to - XK_ISO_Last_Group_Lock doesn't have - real modifiers but should be treated - similarly to Mode_switch by Emacs. */ + /* The symbols from XK_ISO_Lock + to XK_ISO_Last_Group_Lock + don't have real modifiers but + should be treated similarly to + Mode_switch by Emacs. */ #if defined XK_ISO_Lock && defined XK_ISO_Last_Group_Lock - || ((unsigned)(orig_keysym) >= XK_ISO_Lock - && (unsigned)(orig_keysym) <= XK_ISO_Last_Group_Lock) + || ((unsigned)(orig_keysym) + >= XK_ISO_Lock + && (unsigned)(orig_keysym) + <= XK_ISO_Last_Group_Lock) #endif - )) - { - if (temp_index == sizeof temp_buffer / sizeof (short)) - temp_index = 0; - temp_buffer[temp_index++] = keysym; - /* make_lispy_event will convert this to a symbolic - key. */ - bufp->kind = non_ascii_keystroke; - bufp->code = keysym; - XSETFRAME (bufp->frame_or_window, f); - bufp->arg = Qnil; - bufp->modifiers - = x_x_to_emacs_modifiers (FRAME_X_DISPLAY_INFO (f), - modifiers); - bufp->timestamp = event.xkey.time; - bufp++; - count++; - numchars--; - } - else if (numchars > nbytes) - { /* Raw characters, not keysym. */ - register int i; - register int c; - int nchars, len; - - for (i = 0; i < nbytes; i++) - { - if (temp_index == (sizeof temp_buffer - / sizeof (short))) - temp_index = 0; - temp_buffer[temp_index++] = copy_bufptr[i]; - } - - { - /* Decode the input data. */ - coding.destination - = (unsigned char *) malloc (nbytes); - if (! coding.destination) - break; - coding.dst_bytes = nbytes; - coding.mode |= CODING_MODE_LAST_BLOCK; - decode_coding_c_string (&coding, copy_bufptr, - nbytes, Qnil); - nbytes = coding.produced; - nchars = coding.produced_char; - if (copy_bufsiz < nbytes) - { - copy_bufsiz = nbytes; - copy_bufptr = (char *) alloca (nbytes); - } - bcopy (coding.destination, copy_bufptr, nbytes); - free (coding.destination); - } - - /* Convert the input data to a sequence of - character events. */ - for (i = 0; i < nbytes; i += len) - { - if (nchars == nbytes) - c = copy_bufptr[i], len = 1; - else - c = STRING_CHAR_AND_LENGTH (copy_bufptr + i, - nbytes - i, len); - - bufp->kind = (ASCII_CHAR_P (c) - ? ascii_keystroke - : multibyte_char_keystroke); - bufp->code = c; - XSETFRAME (bufp->frame_or_window, f); - bufp->arg = Qnil; - bufp->modifiers - = x_x_to_emacs_modifiers (FRAME_X_DISPLAY_INFO (f), - modifiers); - bufp->timestamp = event.xkey.time; - bufp++; - } - - count += nchars; - numchars -= nchars; - - if (keysym == NoSymbol) - break; - } - else - abort (); - } - else - abort (); - } + )) + { + if (temp_index == sizeof temp_buffer / sizeof (short)) + temp_index = 0; + temp_buffer[temp_index++] = keysym; + /* make_lispy_event will convert this to a symbolic + key. */ + bufp->kind = NON_ASCII_KEYSTROKE_EVENT; + bufp->code = keysym; + XSETFRAME (bufp->frame_or_window, f); + bufp->arg = Qnil; + bufp->modifiers + = x_x_to_emacs_modifiers (FRAME_X_DISPLAY_INFO (f), + modifiers); + bufp->timestamp = event.xkey.time; + bufp++; + count++; + numchars--; + } + else if (numchars > nbytes) + { /* Raw bytes, not keysym. */ + register int i; + register int c; + int nchars, len; + + /* The input should be decoded with `coding_system' + which depends on which X*LookupString function + we used just above and the locale. */ + setup_coding_system (coding_system, &coding); + coding.src_multibyte = 0; + coding.dst_multibyte = 1; + /* The input is converted to events, thus we can't + handle composition. Anyway, there's no XIM that + gives us composition information. */ - coding.composing = COMPOSITION_DISABLED; ++ coding.common_flags &= ~CODING_ANNOTATION_MASK; + + for (i = 0; i < nbytes; i++) + { + if (temp_index == (sizeof temp_buffer + / sizeof (short))) + temp_index = 0; + temp_buffer[temp_index++] = copy_bufptr[i]; + } + + { + /* Decode the input data. */ - int require; - unsigned char *p; - - require = decoding_buffer_size (&coding, nbytes); - p = (unsigned char *) alloca (require); - coding.mode |= CODING_MODE_LAST_BLOCK; - /* We explicitly disable composition - handling because key data should - not contain any composition - sequence. */ - coding.composing = COMPOSITION_DISABLED; - decode_coding (&coding, copy_bufptr, p, - nbytes, require); - nbytes = coding.produced; - nchars = coding.produced_char; - copy_bufptr = p; ++ coding.destination = (unsigned char *) malloc (nbytes); ++ if (! coding.destination) ++ break; ++ coding.dst_bytes = nbytes; ++ coding.mode |= CODING_MODE_LAST_BLOCK; ++ decode_coding_c_string (&coding, copy_bufptr, ++ nbytes, Qnil); ++ nbytes = coding.produced; ++ nchars = coding.produced_char; ++ if (copy_bufsiz < nbytes) ++ { ++ copy_bufsiz = nbytes; ++ copy_bufptr = (char *) alloca (nbytes); ++ } ++ bcopy (coding.destination, copy_bufptr, nbytes); ++ free (coding.destination); + } + + /* Convert the input data to a sequence of + character events. */ + for (i = 0; i < nbytes; i += len) + { + if (nchars == nbytes) + c = copy_bufptr[i], len = 1; + else + c = STRING_CHAR_AND_LENGTH (copy_bufptr + i, + nbytes - i, len); + - bufp->kind = (SINGLE_BYTE_CHAR_P (c) ++ bufp->kind = (ASCII_CHAR_P (c) + ? ASCII_KEYSTROKE_EVENT + : MULTIBYTE_CHAR_KEYSTROKE_EVENT); + bufp->code = c; + XSETFRAME (bufp->frame_or_window, f); + bufp->arg = Qnil; + bufp->modifiers + = x_x_to_emacs_modifiers (FRAME_X_DISPLAY_INFO (f), + modifiers); + bufp->timestamp = event.xkey.time; + bufp++; + } + + count += nchars; + numchars -= nchars; + + if (keysym == NoSymbol) + break; + } + else + abort (); + } + else + abort (); + } #ifdef HAVE_X_I18N - /* Don't dispatch this event since XtDispatchEvent calls - XFilterEvent, and two calls in a row may freeze the - client. */ - break; + /* Don't dispatch this event since XtDispatchEvent calls + XFilterEvent, and two calls in a row may freeze the + client. */ + break; #else - goto OTHER; + goto OTHER; #endif - case KeyRelease: + case KeyRelease: #ifdef HAVE_X_I18N - /* Don't dispatch this event since XtDispatchEvent calls - XFilterEvent, and two calls in a row may freeze the - client. */ - break; + /* Don't dispatch this event since XtDispatchEvent calls + XFilterEvent, and two calls in a row may freeze the + client. */ + break; #else - goto OTHER; + goto OTHER; #endif - /* Here's a possible interpretation of the whole - FocusIn-EnterNotify FocusOut-LeaveNotify mess. If - you get a FocusIn event, you have to get a FocusOut - event before you relinquish the focus. If you - haven't received a FocusIn event, then a mere - LeaveNotify is enough to free you. */ + case EnterNotify: + { + int n; - case EnterNotify: - { - f = x_any_window_to_frame (dpyinfo, event.xcrossing.window); + n = x_detect_focus_change (dpyinfo, &event, bufp, numchars); + if (n > 0) + { + bufp += n, count += n, numchars -= n; + } + + f = x_any_window_to_frame (dpyinfo, event.xcrossing.window); #if 0 - if (event.xcrossing.focus) - { - /* Avoid nasty pop/raise loops. */ - if (f && (!(f->auto_raise) - || !(f->auto_lower) - || (event.xcrossing.time - enter_timestamp) > 500)) - { - x_new_focus_frame (dpyinfo, f); - enter_timestamp = event.xcrossing.time; - } - } - else if (f == dpyinfo->x_focus_frame) - x_new_focus_frame (dpyinfo, 0); + if (event.xcrossing.focus) + { + /* Avoid nasty pop/raise loops. */ + if (f && (!(f->auto_raise) + || !(f->auto_lower) + || (event.xcrossing.time - enter_timestamp) > 500)) + { + x_new_focus_frame (dpyinfo, f); + enter_timestamp = event.xcrossing.time; + } + } + else if (f == dpyinfo->x_focus_frame) + x_new_focus_frame (dpyinfo, 0); #endif - /* EnterNotify counts as mouse movement, - so update things that depend on mouse position. */ - if (f && !f->output_data.x->hourglass_p) - note_mouse_movement (f, &event.xmotion); - goto OTHER; - } + /* EnterNotify counts as mouse movement, + so update things that depend on mouse position. */ + if (f && !f->output_data.x->hourglass_p) + note_mouse_movement (f, &event.xmotion); + goto OTHER; + } - case FocusIn: - f = x_any_window_to_frame (dpyinfo, event.xfocus.window); - if (event.xfocus.detail != NotifyPointer) - dpyinfo->x_focus_event_frame = f; - if (f) - { - x_new_focus_frame (dpyinfo, f); + case FocusIn: + { + int n; - /* Don't stop displaying the initial startup message - for a switch-frame event we don't need. */ - if (GC_NILP (Vterminal_frame) - && GC_CONSP (Vframe_list) - && !GC_NILP (XCDR (Vframe_list))) - { - bufp->kind = FOCUS_IN_EVENT; - XSETFRAME (bufp->frame_or_window, f); - bufp->arg = Qnil; - ++bufp, ++count, --numchars; - } - } + n = x_detect_focus_change (dpyinfo, &event, bufp, numchars); + if (n > 0) + { + bufp += n, count += n, numchars -= n; + } + } -#ifdef HAVE_X_I18N - if (f && FRAME_XIC (f)) - XSetICFocus (FRAME_XIC (f)); -#endif + goto OTHER; - goto OTHER; + case LeaveNotify: + { + int n; - case LeaveNotify: - f = x_top_window_to_frame (dpyinfo, event.xcrossing.window); - if (f) - { - if (f == dpyinfo->mouse_face_mouse_frame) - { - /* If we move outside the frame, then we're - certainly no longer on any text in the frame. */ - clear_mouse_face (dpyinfo); - dpyinfo->mouse_face_mouse_frame = 0; - } + n = x_detect_focus_change (dpyinfo, &event, bufp, numchars); + if (n > 0) + { + bufp += n, count += n, numchars -= n; + } + } - /* Generate a nil HELP_EVENT to cancel a help-echo. - Do it only if there's something to cancel. - Otherwise, the startup message is cleared when - the mouse leaves the frame. */ - if (any_help_event_p) - { - Lisp_Object frame; - int n; - - XSETFRAME (frame, f); - help_echo = Qnil; - n = gen_help_event (bufp, numchars, - Qnil, frame, Qnil, Qnil, 0); - bufp += n, count += n, numchars -= n; - } + f = x_top_window_to_frame (dpyinfo, event.xcrossing.window); + if (f) + { + if (f == dpyinfo->mouse_face_mouse_frame) + { + /* If we move outside the frame, then we're + certainly no longer on any text in the frame. */ + clear_mouse_face (dpyinfo); + dpyinfo->mouse_face_mouse_frame = 0; + } + + /* Generate a nil HELP_EVENT to cancel a help-echo. + Do it only if there's something to cancel. + Otherwise, the startup message is cleared when + the mouse leaves the frame. */ + if (any_help_event_p) + { + Lisp_Object frame; + int n; + + XSETFRAME (frame, f); + help_echo_string = Qnil; + n = gen_help_event (bufp, numchars, + Qnil, frame, Qnil, Qnil, 0); + bufp += n, count += n, numchars -= n; + } -#if 0 - if (event.xcrossing.focus) - x_mouse_leave (dpyinfo); - else - { - if (f == dpyinfo->x_focus_event_frame) - dpyinfo->x_focus_event_frame = 0; - if (f == dpyinfo->x_focus_frame) - x_new_focus_frame (dpyinfo, 0); - } + } + goto OTHER; + + case FocusOut: + { + int n; + + n = x_detect_focus_change (dpyinfo, &event, bufp, numchars); + if (n > 0) + { + bufp += n, count += n, numchars -= n; + } + } + + goto OTHER; + + case MotionNotify: + { + previous_help_echo_string = help_echo_string; + help_echo_string = help_echo_object = help_echo_window = Qnil; + help_echo_pos = -1; + + if (dpyinfo->grabbed && last_mouse_frame + && FRAME_LIVE_P (last_mouse_frame)) + f = last_mouse_frame; + else + f = x_window_to_frame (dpyinfo, event.xmotion.window); + + if (dpyinfo->mouse_face_hidden) + { + dpyinfo->mouse_face_hidden = 0; + clear_mouse_face (dpyinfo); + } + + if (f) + { + + /* Generate SELECT_WINDOW_EVENTs when needed. */ + if (mouse_autoselect_window) + { + Lisp_Object window; + + window = window_from_coordinates (f, + event.xmotion.x, event.xmotion.y, + 0, 0, 0, 0); + + /* Window will be selected only when it is not selected now and + last mouse movement event was not in it. Minibuffer window + will be selected iff it is active. */ + if (WINDOWP(window) + && !EQ (window, last_window) + && !EQ (window, selected_window) + && numchars > 0) + { + bufp->kind = SELECT_WINDOW_EVENT; + bufp->frame_or_window = window; + bufp->arg = Qnil; + ++bufp, ++count, --numchars; + } + + last_window=window; + } + note_mouse_movement (f, &event.xmotion); + } + else + { +#ifndef USE_TOOLKIT_SCROLL_BARS + struct scroll_bar *bar + = x_window_to_scroll_bar (event.xmotion.window); + + if (bar) + x_scroll_bar_note_movement (bar, &event); +#endif /* USE_TOOLKIT_SCROLL_BARS */ + + /* If we move outside the frame, then we're + certainly no longer on any text in the frame. */ + clear_mouse_face (dpyinfo); + } + + /* If the contents of the global variable help_echo_string + has changed, generate a HELP_EVENT. */ + if (!NILP (help_echo_string) + || !NILP (previous_help_echo_string)) + { + Lisp_Object frame; + int n; + + if (f) + XSETFRAME (frame, f); + else + frame = Qnil; + + any_help_event_p = 1; + n = gen_help_event (bufp, numchars, help_echo_string, frame, + help_echo_window, help_echo_object, + help_echo_pos); + bufp += n, count += n, numchars -= n; + } + + goto OTHER; + } + + case ConfigureNotify: + f = x_top_window_to_frame (dpyinfo, event.xconfigure.window); + if (f) + { +#ifndef USE_X_TOOLKIT +#ifdef USE_GTK + xg_resize_widgets (f, event.xconfigure.width, + event.xconfigure.height); +#else /* not USE_GTK */ + /* If there is a pending resize for fullscreen, don't + do this one, the right one will come later. + The toolkit version doesn't seem to need this, but we + need to reset it below. */ + int dont_resize + = ((f->want_fullscreen & FULLSCREEN_WAIT) + && f->new_text_cols != 0); + int rows = FRAME_PIXEL_HEIGHT_TO_TEXT_LINES (f, event.xconfigure.height); + int columns = FRAME_PIXEL_WIDTH_TO_TEXT_COLS (f, event.xconfigure.width); + + if (dont_resize) + goto OTHER; + + /* In the toolkit version, change_frame_size + is called by the code that handles resizing + of the EmacsFrame widget. */ + + /* Even if the number of character rows and columns has + not changed, the font size may have changed, so we need + to check the pixel dimensions as well. */ + if (columns != FRAME_COLS (f) + || rows != FRAME_LINES (f) + || event.xconfigure.width != FRAME_PIXEL_WIDTH (f) + || event.xconfigure.height != FRAME_PIXEL_HEIGHT (f)) + { + change_frame_size (f, rows, columns, 0, 1, 0); + SET_FRAME_GARBAGED (f); + cancel_mouse_face (f); + } +#endif /* not USE_GTK */ #endif - } - goto OTHER; - case FocusOut: - f = x_any_window_to_frame (dpyinfo, event.xfocus.window); - if (event.xfocus.detail != NotifyPointer - && f == dpyinfo->x_focus_event_frame) - dpyinfo->x_focus_event_frame = 0; - if (f && f == dpyinfo->x_focus_frame) - x_new_focus_frame (dpyinfo, 0); + FRAME_PIXEL_WIDTH (f) = event.xconfigure.width; + FRAME_PIXEL_HEIGHT (f) = event.xconfigure.height; + +#ifdef USE_GTK + /* GTK creates windows but doesn't map them. + Only get real positions and check fullscreen when mapped. */ + if (FRAME_GTK_OUTER_WIDGET (f) + && GTK_WIDGET_MAPPED (FRAME_GTK_OUTER_WIDGET (f))) +#endif + { + /* What we have now is the position of Emacs's own window. + Convert that to the position of the window manager window. */ + x_real_positions (f, &f->left_pos, &f->top_pos); + + x_check_fullscreen_move (f); + if (f->want_fullscreen & FULLSCREEN_WAIT) + f->want_fullscreen &= ~(FULLSCREEN_WAIT|FULLSCREEN_BOTH); + } #ifdef HAVE_X_I18N - if (f && FRAME_XIC (f)) - XUnsetICFocus (FRAME_XIC (f)); + if (FRAME_XIC (f) && (FRAME_XIC_STYLE (f) & XIMStatusArea)) + xic_set_statusarea (f); #endif - goto OTHER; + if (f->output_data.x->parent_desc != FRAME_X_DISPLAY_INFO (f)->root_window) + { + /* Since the WM decorations come below top_pos now, + we must put them below top_pos in the future. */ + f->win_gravity = NorthWestGravity; + x_wm_set_size_hint (f, (long) 0, 0); + } + } + goto OTHER; + + case ButtonRelease: + case ButtonPress: + { + /* If we decide we want to generate an event to be seen + by the rest of Emacs, we put it here. */ + struct input_event emacs_event; + int tool_bar_p = 0; + + emacs_event.kind = NO_EVENT; + bzero (&compose_status, sizeof (compose_status)); + + if (dpyinfo->grabbed + && last_mouse_frame + && FRAME_LIVE_P (last_mouse_frame)) + f = last_mouse_frame; + else + f = x_window_to_frame (dpyinfo, event.xbutton.window); + + if (f) + { + /* Is this in the tool-bar? */ + if (WINDOWP (f->tool_bar_window) + && WINDOW_TOTAL_LINES (XWINDOW (f->tool_bar_window))) + { + Lisp_Object window; + int x = event.xbutton.x; + int y = event.xbutton.y; + + window = window_from_coordinates (f, x, y, 0, 0, 0, 1); + if (EQ (window, f->tool_bar_window)) + { + if (event.xbutton.type == ButtonPress) + handle_tool_bar_click (f, x, y, 1, 0); + else + handle_tool_bar_click (f, x, y, 0, + x_x_to_emacs_modifiers (dpyinfo, + event.xbutton.state)); + tool_bar_p = 1; + } + } + + if (!tool_bar_p) + if (!dpyinfo->x_focus_frame + || f == dpyinfo->x_focus_frame) + { +#if defined (USE_X_TOOLKIT) || defined (USE_GTK) + if (! popup_activated ()) +#endif + construct_mouse_click (&emacs_event, &event, f); + } + } + else + { + struct scroll_bar *bar + = x_window_to_scroll_bar (event.xbutton.window); + +#ifdef USE_TOOLKIT_SCROLL_BARS + /* Make the "Ctrl-Mouse-2 splits window" work for toolkit + scroll bars. */ + if (bar && event.xbutton.state & ControlMask) + { + x_scroll_bar_handle_click (bar, &event, &emacs_event); + *finish = X_EVENT_DROP; + } +#else /* not USE_TOOLKIT_SCROLL_BARS */ + if (bar) + x_scroll_bar_handle_click (bar, &event, &emacs_event); +#endif /* not USE_TOOLKIT_SCROLL_BARS */ + } + + if (event.type == ButtonPress) + { + dpyinfo->grabbed |= (1 << event.xbutton.button); + last_mouse_frame = f; + /* Ignore any mouse motion that happened + before this event; any subsequent mouse-movement + Emacs events should reflect only motion after + the ButtonPress. */ + if (f != 0) + f->mouse_moved = 0; + + if (!tool_bar_p) + last_tool_bar_item = -1; + } + else + dpyinfo->grabbed &= ~(1 << event.xbutton.button); + + if (numchars >= 1 && emacs_event.kind != NO_EVENT) + { + bcopy (&emacs_event, bufp, sizeof (struct input_event)); + bufp++; + count++; + numchars--; + } + +#if defined (USE_X_TOOLKIT) || defined (USE_GTK) + f = x_menubar_window_to_frame (dpyinfo, event.xbutton.window); + /* For a down-event in the menu bar, + don't pass it to Xt right now. + Instead, save it away + and we will pass it to Xt from kbd_buffer_get_event. + That way, we can run some Lisp code first. */ + if ( +#ifdef USE_GTK + ! popup_activated () + && +#endif + f && event.type == ButtonPress + /* Verify the event is really within the menu bar + and not just sent to it due to grabbing. */ + && event.xbutton.x >= 0 + && event.xbutton.x < FRAME_PIXEL_WIDTH (f) + && event.xbutton.y >= 0 + && event.xbutton.y < f->output_data.x->menubar_height + && event.xbutton.same_screen) + { + SET_SAVED_BUTTON_EVENT; + XSETFRAME (last_mouse_press_frame, f); +#ifdef USE_GTK + *finish = X_EVENT_DROP; +#endif + } + else if (event.type == ButtonPress) + { + last_mouse_press_frame = Qnil; + goto OTHER; + } + +#ifdef USE_MOTIF /* This should do not harm for Lucid, + but I am trying to be cautious. */ + else if (event.type == ButtonRelease) + { + if (!NILP (last_mouse_press_frame)) + { + f = XFRAME (last_mouse_press_frame); + if (f->output_data.x) + SET_SAVED_BUTTON_EVENT; + } + else + goto OTHER; + } +#endif /* USE_MOTIF */ + else + goto OTHER; +#endif /* USE_X_TOOLKIT || USE_GTK */ + } + break; + + case CirculateNotify: + goto OTHER; + + case CirculateRequest: + goto OTHER; + + case VisibilityNotify: + goto OTHER; + + case MappingNotify: + /* Someone has changed the keyboard mapping - update the + local cache. */ + switch (event.xmapping.request) + { + case MappingModifier: + x_find_modifier_meanings (dpyinfo); + /* This is meant to fall through. */ + case MappingKeyboard: + XRefreshKeyboardMapping (&event.xmapping); + } + goto OTHER; + + default: + OTHER: +#ifdef USE_X_TOOLKIT + BLOCK_INPUT; + if (*finish != X_EVENT_DROP) + XtDispatchEvent (&event); + UNBLOCK_INPUT; +#endif /* USE_X_TOOLKIT */ + break; + } + + goto ret; + + out: + *finish = X_EVENT_GOTO_OUT; + + ret: + *bufp_r = bufp; + *numcharsp = numchars; + *eventp = event; + + return count; +} + + +/* Handles the XEvent EVENT on display DISPLAY. + This is used for event loops outside the normal event handling, + i.e. looping while a popup menu or a dialog is posted. + + Returns the value handle_one_xevent sets in the finish argument. */ +int +x_dispatch_event (event, display) + XEvent *event; + Display *display; +{ + struct x_display_info *dpyinfo; + struct input_event bufp[10]; + struct input_event *bufpp; + int numchars = 10; + int finish = X_EVENT_NORMAL; + + for (bufpp = bufp; bufpp != bufp + 10; bufpp++) + EVENT_INIT (*bufpp); + bufpp = bufp; + + for (dpyinfo = x_display_list; dpyinfo; dpyinfo = dpyinfo->next) + if (dpyinfo->display == display) + break; + + if (dpyinfo) + { + int i, events; + events = handle_one_xevent (dpyinfo, + event, + &bufpp, + &numchars, + &finish); + for (i = 0; i < events; ++i) + kbd_buffer_store_event (&bufp[i]); + } + + return finish; +} + + +/* Read events coming from the X server. + This routine is called by the SIGIO handler. + We return as soon as there are no more events to be read. + + Events representing keys are stored in buffer BUFP, + which can hold up to NUMCHARS characters. + We return the number of characters stored into the buffer, + thus pretending to be `read'. - case MotionNotify: - { - previous_help_echo = help_echo; - help_echo = help_echo_object = help_echo_window = Qnil; - help_echo_pos = -1; - - if (dpyinfo->grabbed && last_mouse_frame - && FRAME_LIVE_P (last_mouse_frame)) - f = last_mouse_frame; - else - f = x_window_to_frame (dpyinfo, event.xmotion.window); - - if (dpyinfo->mouse_face_hidden) - { - dpyinfo->mouse_face_hidden = 0; - clear_mouse_face (dpyinfo); - } + EXPECTED is nonzero if the caller knows input is available. */ - if (f) - note_mouse_movement (f, &event.xmotion); - else - { -#ifndef USE_TOOLKIT_SCROLL_BARS - struct scroll_bar *bar - = x_window_to_scroll_bar (event.xmotion.window); +static int +XTread_socket (sd, bufp, numchars, expected) + register int sd; + /* register */ struct input_event *bufp; + /* register */ int numchars; + int expected; +{ + int count = 0; + XEvent event; + int event_found = 0; + struct x_display_info *dpyinfo; - if (bar) - x_scroll_bar_note_movement (bar, &event); -#endif /* USE_TOOLKIT_SCROLL_BARS */ + if (interrupt_input_blocked) + { + interrupt_input_pending = 1; + return -1; + } - /* If we move outside the frame, then we're - certainly no longer on any text in the frame. */ - clear_mouse_face (dpyinfo); - } + interrupt_input_pending = 0; + BLOCK_INPUT; - /* If the contents of the global variable help_echo - has changed, generate a HELP_EVENT. */ - if (!NILP (help_echo) - || !NILP (previous_help_echo)) - { - Lisp_Object frame; - int n; + /* So people can tell when we have read the available input. */ + input_signal_count++; - if (f) - XSETFRAME (frame, f); - else - frame = Qnil; + if (numchars <= 0) + abort (); /* Don't think this happens. */ - any_help_event_p = 1; - n = gen_help_event (bufp, numchars, help_echo, frame, - help_echo_window, help_echo_object, - help_echo_pos); - bufp += n, count += n, numchars -= n; - } - - goto OTHER; - } + ++handling_signal; - case ConfigureNotify: - f = x_top_window_to_frame (dpyinfo, event.xconfigure.window); - if (f) - { -#ifndef USE_X_TOOLKIT - /* If there is a pending resize for fullscreen, don't - do this one, the right one will come later. - The toolkit version doesn't seem to need this, but we - need to reset it below. */ - int dont_resize = - ((f->output_data.x->want_fullscreen & FULLSCREEN_WAIT) - && FRAME_NEW_WIDTH (f) != 0); - int rows = PIXEL_TO_CHAR_HEIGHT (f, event.xconfigure.height); - int columns = PIXEL_TO_CHAR_WIDTH (f, event.xconfigure.width); - if (dont_resize) - goto OTHER; - - /* In the toolkit version, change_frame_size - is called by the code that handles resizing - of the EmacsFrame widget. */ - - /* Even if the number of character rows and columns has - not changed, the font size may have changed, so we need - to check the pixel dimensions as well. */ - if (columns != f->width - || rows != f->height - || event.xconfigure.width != f->output_data.x->pixel_width - || event.xconfigure.height != f->output_data.x->pixel_height) - { - change_frame_size (f, rows, columns, 0, 1, 0); - SET_FRAME_GARBAGED (f); - cancel_mouse_face (f); - } + /* Find the display we are supposed to read input for. + It's the one communicating on descriptor SD. */ + for (dpyinfo = x_display_list; dpyinfo; dpyinfo = dpyinfo->next) + { +#if 0 /* This ought to be unnecessary; let's verify it. */ +#ifdef FIOSNBIO + /* If available, Xlib uses FIOSNBIO to make the socket + non-blocking, and then looks for EWOULDBLOCK. If O_NDELAY is set, + FIOSNBIO is ignored, and instead of signaling EWOULDBLOCK, + a read returns 0, which Xlib interprets as equivalent to EPIPE. */ + fcntl (dpyinfo->connection, F_SETFL, 0); +#endif /* ! defined (FIOSNBIO) */ #endif - f->output_data.x->pixel_width = event.xconfigure.width; - f->output_data.x->pixel_height = event.xconfigure.height; - - /* What we have now is the position of Emacs's own window. - Convert that to the position of the window manager window. */ - x_real_positions (f, &f->output_data.x->left_pos, - &f->output_data.x->top_pos); - - x_check_fullscreen_move(f); - if (f->output_data.x->want_fullscreen & FULLSCREEN_WAIT) - f->output_data.x->want_fullscreen &= - ~(FULLSCREEN_WAIT|FULLSCREEN_BOTH); -#ifdef HAVE_X_I18N - if (FRAME_XIC (f) && (FRAME_XIC_STYLE (f) & XIMStatusArea)) - xic_set_statusarea (f); +#if 0 /* This code can't be made to work, with multiple displays, + and appears not to be used on any system any more. + Also keyboard.c doesn't turn O_NDELAY on and off + for X connections. */ +#ifndef SIGIO +#ifndef HAVE_SELECT + if (! (fcntl (dpyinfo->connection, F_GETFL, 0) & O_NDELAY)) + { + extern int read_alarm_should_throw; + read_alarm_should_throw = 1; + XPeekEvent (dpyinfo->display, &event); + read_alarm_should_throw = 0; + } +#endif /* HAVE_SELECT */ +#endif /* SIGIO */ #endif - if (f->output_data.x->parent_desc != FRAME_X_DISPLAY_INFO (f)->root_window) - { - /* Since the WM decorations come below top_pos now, - we must put them below top_pos in the future. */ - f->output_data.x->win_gravity = NorthWestGravity; - x_wm_set_size_hint (f, (long) 0, 0); - } -#ifdef USE_MOTIF - /* Some window managers pass (0,0) as the location of - the window, and the Motif event handler stores it - in the emacs widget, which messes up Motif menus. */ - if (event.xconfigure.x == 0 && event.xconfigure.y == 0) - { - event.xconfigure.x = f->output_data.x->widget->core.x; - event.xconfigure.y = f->output_data.x->widget->core.y; - } -#endif /* USE_MOTIF */ - } - goto OTHER; + /* For debugging, this gives a way to fake an I/O error. */ + if (dpyinfo == XTread_socket_fake_io_error) + { + XTread_socket_fake_io_error = 0; + x_io_error_quitter (dpyinfo->display); + } - case ButtonPress: - case ButtonRelease: - { - /* If we decide we want to generate an event to be seen - by the rest of Emacs, we put it here. */ - struct input_event emacs_event; - int tool_bar_p = 0; - - emacs_event.kind = no_event; - bzero (&compose_status, sizeof (compose_status)); - - if (dpyinfo->grabbed - && last_mouse_frame - && FRAME_LIVE_P (last_mouse_frame)) - f = last_mouse_frame; - else - f = x_window_to_frame (dpyinfo, event.xbutton.window); - - if (f) - { - /* Is this in the tool-bar? */ - if (WINDOWP (f->tool_bar_window) - && XFASTINT (XWINDOW (f->tool_bar_window)->height)) - { - Lisp_Object window; - int p, x, y; - - x = event.xbutton.x; - y = event.xbutton.y; - - /* Set x and y. */ - window = window_from_coordinates (f, x, y, &p, 1); - if (EQ (window, f->tool_bar_window)) - { - x_handle_tool_bar_click (f, &event.xbutton); - tool_bar_p = 1; - } - } +#ifdef HAVE_X_SM + BLOCK_INPUT; + count += x_session_check_input (bufp, &numchars); + UNBLOCK_INPUT; +#endif - if (!tool_bar_p) - if (!dpyinfo->x_focus_frame - || f == dpyinfo->x_focus_frame) - construct_mouse_click (&emacs_event, &event, f); - } - else - { -#ifndef USE_TOOLKIT_SCROLL_BARS - struct scroll_bar *bar - = x_window_to_scroll_bar (event.xbutton.window); +#ifdef USE_GTK + /* For GTK we must use the GTK event loop. But XEvents gets passed + to our filter function above, and then to the big event switch. + We use a bunch of globals to communicate with our filter function, + that is kind of ugly, but it works. */ + current_dpyinfo = dpyinfo; - if (bar) - x_scroll_bar_handle_click (bar, &event, &emacs_event); -#endif /* not USE_TOOLKIT_SCROLL_BARS */ - } + while (gtk_events_pending ()) + { + current_count = count; + current_numcharsp = &numchars; + current_bufp = &bufp; - if (event.type == ButtonPress) - { - dpyinfo->grabbed |= (1 << event.xbutton.button); - last_mouse_frame = f; - /* Ignore any mouse motion that happened - before this event; any subsequent mouse-movement - Emacs events should reflect only motion after - the ButtonPress. */ - if (f != 0) - f->mouse_moved = 0; - - if (!tool_bar_p) - last_tool_bar_item = -1; - } - else - { - dpyinfo->grabbed &= ~(1 << event.xbutton.button); - } + gtk_main_iteration (); - if (numchars >= 1 && emacs_event.kind != no_event) - { - bcopy (&emacs_event, bufp, sizeof (struct input_event)); - bufp++; - count++; - numchars--; - } + count = current_count; + current_bufp = 0; + current_numcharsp = 0; -#ifdef USE_X_TOOLKIT - f = x_menubar_window_to_frame (dpyinfo, event.xbutton.window); - /* For a down-event in the menu bar, - don't pass it to Xt right now. - Instead, save it away - and we will pass it to Xt from kbd_buffer_get_event. - That way, we can run some Lisp code first. */ - if (f && event.type == ButtonPress - /* Verify the event is really within the menu bar - and not just sent to it due to grabbing. */ - && event.xbutton.x >= 0 - && event.xbutton.x < f->output_data.x->pixel_width - && event.xbutton.y >= 0 - && event.xbutton.y < f->output_data.x->menubar_height - && event.xbutton.same_screen) - { - SET_SAVED_BUTTON_EVENT; - XSETFRAME (last_mouse_press_frame, f); - } - else if (event.type == ButtonPress) - { - last_mouse_press_frame = Qnil; - goto OTHER; - } + if (current_finish == X_EVENT_GOTO_OUT) + goto out; + } -#ifdef USE_MOTIF /* This should do not harm for Lucid, - but I am trying to be cautious. */ - else if (event.type == ButtonRelease) - { - if (!NILP (last_mouse_press_frame)) - { - f = XFRAME (last_mouse_press_frame); - if (f->output_data.x) - SET_SAVED_BUTTON_EVENT; - } - else - goto OTHER; - } -#endif /* USE_MOTIF */ - else - goto OTHER; -#endif /* USE_X_TOOLKIT */ - } - break; +#else /* not USE_GTK */ + while (XPending (dpyinfo->display)) + { + int finish; - case CirculateNotify: - goto OTHER; - - case CirculateRequest: - goto OTHER; + XNextEvent (dpyinfo->display, &event); - case VisibilityNotify: - goto OTHER; +#ifdef HAVE_X_I18N + /* Filter events for the current X input method. */ + if (x_filter_event (dpyinfo, &event)) + break; +#endif + event_found = 1; - case MappingNotify: - /* Someone has changed the keyboard mapping - update the - local cache. */ - switch (event.xmapping.request) - { - case MappingModifier: - x_find_modifier_meanings (dpyinfo); - /* This is meant to fall through. */ - case MappingKeyboard: - XRefreshKeyboardMapping (&event.xmapping); - } - goto OTHER; + count += handle_one_xevent (dpyinfo, + &event, + &bufp, + &numchars, + &finish); - default: - OTHER: -#ifdef USE_X_TOOLKIT - BLOCK_INPUT; - XtDispatchEvent (&event); - UNBLOCK_INPUT; -#endif /* USE_X_TOOLKIT */ - break; - } - } + if (finish == X_EVENT_GOTO_OUT) + goto out; + } +#endif /* USE_GTK */ } out:; @@@ -7881,21 -12259,22 +7883,26 @@@ x_new_font (f, fontname if (!fontp) return Qnil; - if (f->output_data.x->font == (XFontStruct *) (fontp->font)) ++ if (FRAME_FONT (f) == (XFontStruct *) (fontp->font)) + /* This font is already set in frame F. There's nothing more to + do. */ + return build_string (fontp->full_name); + - f->output_data.x->font = (XFontStruct *) (fontp->font); - f->output_data.x->baseline_offset = fontp->baseline_offset; - f->output_data.x->fontset = -1; + FRAME_FONT (f) = (XFontStruct *) (fontp->font); + FRAME_BASELINE_OFFSET (f) = fontp->baseline_offset; + FRAME_FONTSET (f) = -1; - x_compute_fringe_widths (f, 1); + FRAME_COLUMN_WIDTH (f) = FONT_WIDTH (FRAME_FONT (f)); + FRAME_LINE_HEIGHT (f) = FONT_HEIGHT (FRAME_FONT (f)); + + compute_fringe_widths (f, 1); /* Compute the scroll bar width in character columns. */ - if (f->scroll_bar_pixel_width > 0) + if (FRAME_CONFIG_SCROLL_BAR_WIDTH (f) > 0) { - int wid = FONT_WIDTH (f->output_data.x->font); - f->scroll_bar_cols = (f->scroll_bar_pixel_width + wid-1) / wid; + int wid = FRAME_COLUMN_WIDTH (f); + FRAME_CONFIG_SCROLL_BAR_COLS (f) + = (FRAME_CONFIG_SCROLL_BAR_WIDTH (f) + wid-1) / wid; } else { @@@ -7923,10 -12308,16 +7930,16 @@@ return build_string (fontp->full_name); } - /* Give frame F the fontset named FONTSETNAME as its default font, and - return the full name of that fontset. FONTSETNAME may be a wildcard - pattern; in that case, we choose some fontset that fits the pattern. - The return value shows which fontset we chose. */ + /* Give frame F the fontset named FONTSETNAME as its default fontset, + and return the full name of that fontset. FONTSETNAME may be a + wildcard pattern; in that case, we choose some fontset that fits + the pattern. FONTSETNAME may be a font name for ASCII characters; + in that case, we create a fontset from that font name. + - The return value shows which fontset we chose. ++ The return value shows which fontset we chose. + If FONTSETNAME specifies the default fontset, return Qt. + If an ASCII font in the specified fontset can't be loaded, return + Qnil. */ Lisp_Object x_new_fontset (f, fontsetname) @@@ -7943,25 -12331,118 +7953,34 @@@ /* This fontset is already set in frame F. There's nothing more to do. */ return fontset_name (fontset); + else if (fontset == 0) + /* The default fontset can't be the default font. */ + return Qt; - result = x_new_font (f, (SDATA (fontset_ascii (fontset)))); - if (fontset >= 0) - result = x_new_font (f, (XSTRING (fontset_ascii (fontset))->data)); ++ if (fontset > 0) ++ result = x_new_font (f, (SDATA (fontset_ascii (fontset)))); + else - result = x_new_font (f, XSTRING (fontsetname)->data); ++ result = x_new_font (f, SDATA (fontsetname)); if (!STRINGP (result)) /* Can't load ASCII font. */ return Qnil; + if (fontset < 0) + fontset = new_fontset_from_font_name (result); + /* Since x_new_font doesn't update any fontset information, do it now. */ - f->output_data.x->fontset = fontset; + FRAME_FONTSET (f) = fontset; #ifdef HAVE_X_I18N if (FRAME_XIC (f) && (FRAME_XIC_STYLE (f) & (XIMPreeditPosition | XIMStatusArea))) - xic_set_xfontset (f, XSTRING (fontset_ascii (fontset))->data); + xic_set_xfontset (f, SDATA (fontset_ascii (fontset))); #endif - + - return build_string (fontsetname); + return fontset_name (fontset); } -/* Compute actual fringe widths */ - -void -x_compute_fringe_widths (f, redraw) - struct frame *f; - int redraw; -{ - int o_left = f->output_data.x->left_fringe_width; - int o_right = f->output_data.x->right_fringe_width; - int o_cols = f->output_data.x->fringe_cols; - - Lisp_Object left_fringe = Fassq (Qleft_fringe, f->param_alist); - Lisp_Object right_fringe = Fassq (Qright_fringe, f->param_alist); - int left_fringe_width, right_fringe_width; - - if (!NILP (left_fringe)) - left_fringe = Fcdr (left_fringe); - if (!NILP (right_fringe)) - right_fringe = Fcdr (right_fringe); - - left_fringe_width = ((NILP (left_fringe) || !INTEGERP (left_fringe)) ? 8 : - XINT (left_fringe)); - right_fringe_width = ((NILP (right_fringe) || !INTEGERP (right_fringe)) ? 8 : - XINT (right_fringe)); - - if (left_fringe_width || right_fringe_width) - { - int left_wid = left_fringe_width >= 0 ? left_fringe_width : -left_fringe_width; - int right_wid = right_fringe_width >= 0 ? right_fringe_width : -right_fringe_width; - int conf_wid = left_wid + right_wid; - int font_wid = FONT_WIDTH (f->output_data.x->font); - int cols = (left_wid + right_wid + font_wid-1) / font_wid; - int real_wid = cols * font_wid; - if (left_wid && right_wid) - { - if (left_fringe_width < 0) - { - /* Left fringe width is fixed, adjust right fringe if necessary */ - f->output_data.x->left_fringe_width = left_wid; - f->output_data.x->right_fringe_width = real_wid - left_wid; - } - else if (right_fringe_width < 0) - { - /* Right fringe width is fixed, adjust left fringe if necessary */ - f->output_data.x->left_fringe_width = real_wid - right_wid; - f->output_data.x->right_fringe_width = right_wid; - } - else - { - /* Adjust both fringes with an equal amount. - Note that we are doing integer arithmetic here, so don't - lose a pixel if the total width is an odd number. */ - int fill = real_wid - conf_wid; - f->output_data.x->left_fringe_width = left_wid + fill/2; - f->output_data.x->right_fringe_width = right_wid + fill - fill/2; - } - } - else if (left_fringe_width) - { - f->output_data.x->left_fringe_width = real_wid; - f->output_data.x->right_fringe_width = 0; - } - else - { - f->output_data.x->left_fringe_width = 0; - f->output_data.x->right_fringe_width = real_wid; - } - f->output_data.x->fringe_cols = cols; - f->output_data.x->fringes_extra = real_wid; - } - else - { - f->output_data.x->left_fringe_width = 0; - f->output_data.x->right_fringe_width = 0; - f->output_data.x->fringe_cols = 0; - f->output_data.x->fringes_extra = 0; - } - - if (redraw && FRAME_VISIBLE_P (f)) - if (o_left != f->output_data.x->left_fringe_width || - o_right != f->output_data.x->right_fringe_width || - o_cols != f->output_data.x->fringe_cols) - redraw_frame (f); -} /*********************************************************************** X Input Methods @@@ -9942,10 -14414,10 +9962,10 @@@ x_load_font (f, fontname, size the font code-points (0:0x20..0x7F, 1:0xA0..0xFF), or (0:0x2020..0x7F7F, 1:0xA0A0..0xFFFF, 3:0x20A0..0x7FFF, 2:0xA020..0xFF7F). For the moment, we don't know which charset -- uses this font. So, we set information in fontp->encoding[1] ++ uses this font. So, we set information in fontp->encoding_type which is never used by any charset. If mapping can't be decided, set FONT_ENCODING_NOT_DECIDED. */ - fontp->encoding[1] + fontp->encoding_type = (font->max_byte1 == 0 /* 1-byte font */ ? (font->min_char_or_byte2 < 0x80 @@@ -10045,6 -14517,100 +10065,98 @@@ x_find_ccl_program (fontp } + /* Return a char-table whose elements are t if the font FONT_INFO + contains a glyph for the corresponding character, and nil if not. + + Fixme: For the moment, this function works only for fonts whose + glyph encoding is the same as Unicode (e.g. ISO10646-1 fonts). */ + + Lisp_Object + x_get_font_repertory (f, font_info) + FRAME_PTR f; + struct font_info *font_info; + { + XFontStruct *font = (XFontStruct *) font_info->font; + Lisp_Object table; + int min_byte1, max_byte1, min_byte2, max_byte2; + + table = Fmake_char_table (Qnil, Qnil); + + min_byte1 = font->min_byte1; + max_byte1 = font->max_byte1; + min_byte2 = font->min_char_or_byte2; + max_byte2 = font->max_char_or_byte2; + if (min_byte1 == 0 && max_byte1 == 0) + { + if (! font->per_char || font->all_chars_exist == True) + char_table_set_range (table, min_byte2, max_byte2, Qt); + else + { + XCharStruct *pcm = font->per_char; + int from = -1; + int i; + + for (i = min_byte2; i <= max_byte2; i++, pcm++) + { + if (pcm->width == 0 && pcm->rbearing == pcm->lbearing) + { + if (from >= 0) + { + char_table_set_range (table, from, i - 1, Qt); + from = -1; + } + } + else if (from < 0) + from = i; + } + if (from >= 0) + char_table_set_range (table, from, i - 1, Qt); + } + } + else + { + if (! font->per_char || font->all_chars_exist == True) + { + int i; + + for (i = min_byte1; i <= max_byte1; i++) + char_table_set_range (table, + (i << 8) | min_byte2, (i << 8) | max_byte2, + Qt); + } + else + { + XCharStruct *pcm = font->per_char; + int i; + + for (i = min_byte1; i <= max_byte1; i++) + { + int from = -1; + int j; + + for (j = min_byte2; j <= max_byte2; j++, pcm++) + { + if (pcm->width == 0 && pcm->rbearing == pcm->lbearing) + { + if (from >= 0) + { + char_table_set_range (table, (i << 8) | from, + (i << 8) | (j - 1), Qt); + from = -1; + } + } + else if (from < 0) + from = j; + } + if (from >= 0) + char_table_set_range (table, (i << 8) | from, + (i << 8) | (j - 1), Qt); + } + } + } + + return table; + } - - /*********************************************************************** Initialization @@@ -10801,11 -15245,6 +10913,9 @@@ syms_of_xterm ( staticpro (&Qvendor_specific_keysyms); Qvendor_specific_keysyms = intern ("vendor-specific-keysyms"); - staticpro (&Qutf_8); - Qutf_8 = intern ("utf-8"); + staticpro (&Qlatin_1); + Qlatin_1 = intern ("latin-1"); + staticpro (&last_mouse_press_frame); last_mouse_press_frame = Qnil;