From 3fd3136672d50515decf72ea797b5abe81ffe70a Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Tue, 3 Sep 2002 04:11:28 +0000 Subject: [PATCH] *** empty log message *** --- lisp/ChangeLog | 5 +++ src/ChangeLog | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ src/charset.h | 3 +- 3 files changed, 115 insertions(+), 1 deletion(-) diff --git a/lisp/ChangeLog b/lisp/ChangeLog index fe5521e8c6c..c5efdf280f6 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,8 @@ +2002-09-03 Kenichi Handa + + * international/mule-conf.el: Don't define the charset iso-8859-1 + here, just setup its properties. + 2002-08-21 Kenichi Handa * international/mule-conf.el (utf-8): Give :mime-charset property. diff --git a/src/ChangeLog b/src/ChangeLog index b2479bb0713..b18851b32f0 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,111 @@ +2002-09-03 Kenichi Handa + + The following changes (and some of 2002-08-20 changes of mine) are + for handling syntax, category, and case conversion for unibyte + characters by converting them to multibyte on the fly. With these + changes, we don't have to setup syntax and case tables for unibyte + characters in each language environment. + + * abbrev.c (Fexpand_abbrev): Convert a unibyte character to + multibyte if necessary. + + * bytecode.c (Fbyte_code): Likewise. + + * character.h (LEADING_CODE_LATIN_1_MIN) + (LEADING_CODE_LATIN_1_MAX): New macros. + (unibyte_to_multibyte_table): Extern it. + (unibyte_char_to_multibyte): New macro. + (MAKE_CHAR_MULTIBYTE): Use unibyte_to_multibyte_table. + (CHAR_LEADING_CODE): New macro. + (FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE): New macro. + + * character.c (unibyte_to_multibyte_table): New variable. + (unibyte_char_to_multibyte): Move to character.h and defined as + macro. + (multibyte_char_to_unibyte): If C is an eight-bit character, + convert it to the corresponding byte value. + + * charset.c (Fset_unibyte_charset): If the dimension of CHARSET is + not 1, singals an error. Update the elements of + unibyte_to_multibyte_table. + (init_charset_once): Initialize unibyte_to_multibyte_table. + (syms_of_charset): Define the charset `iso-8859-1'. + + * casefiddle.c (casify_object): Fix previous change. + + * cmds.c (internal_self_insert): In a multibyte buffer, insert C + as is without converting it to unibyte. In a unibyte buffer, + convert C to multibyte before checking the syntax. + + * lisp.h (unibyte_char_to_multibyte): Extern deleted. + + * minibuf.c (Fminibuffer_complete_word): Use the macro + FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE. + + * regex.h (struct re_pattern_buffer): New member target_multibyte. + + * regex.c (RE_TARGET_MULTIBYTE_P): New macro. + (GET_CHAR_BEFORE_2): Check target_multibyte, not multibyte. If + that is zero, convert an eight-bit char to multibyte. + (MAKE_CHAR_MULTIBYTE, CHAR_LEADING_CODE): New dummy new macros for + non-emacs case. + (PATFETCH): Convert an eight-bit char to multibyte. + (HANDLE_UNIBYTE_RANGE): New macro. + (regex_compile): Setup the compiled pattern for multibyte chars + even if the given regex string is unibyte. Use PATFETCH_RAW + instead of PATFETCH in many places. To handle `charset' + specification of unibyte, call HANDLE_UNIBYTE_RANGE. Use bitmap + only for ASCII chars. + (analyse_first) : Simplified because the compiled pattern + is multibyte. + : Setup fastmap from bitmap only for ASCII chars. + : Use CHAR_LEADING_CODE to get leading codes. + : If multibyte, setup fastmap only for ASCII chars + here. + (re_compile_fastmap) [emacs]: Call analyse_first with the arg + multibyte always 1. + (re_search_2) In emacs, set the locale variable multibyte to 1, + otherwise to 0. New local variable target_multibyte. Check it + to decide the multibyteness of STR1 and STR2. If + target_multibyte is zero, convert unibyte chars to multibyte + before translating and checking fastmap. + (TARGET_CHAR_AND_LENGTH): New macro. + (re_match_2_internal): In emacs, set the locale variable multibyte + to 1, otherwise to 0. New local variable target_multibyte. Check + it to decide the multibyteness of STR1 and STR2. Use + TARGET_CHAR_AND_LENGTH to fetch a character from D. + : If multibyte is nonzero, check fastmap + only for ASCII chars. Call bcmp_translate with + target_multibyte, not with multibyte. + : Declare the local variable C as `unsigned'. + (bcmp_translate): Change the last arg name to target_multibyte. + + * search.c (compile_pattern_1): Don't adjust the multibyteness of + the regexp pattern and the matching target. Set cp->buf.multibyte + to the multibyteness of the regexp pattern. Set + cp->but.target_multibyte to the multibyteness of the matching + target. + (wordify): Use FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE instead of + FETCH_STRING_CHAR_ADVANCE. + (Freplace_match): Convert unibyte chars to multibyte. + + * syntax.c (char_quoted): Use FETCH_CHAR_AS_MULTIBYTE to convert + unibyte chars to multibyte. + (back_comment): Likewise. + (scan_words): Likewise. + (skip_chars): The arg syntaxp is deleted, and the code for + handling syntaxes is moved to skip_syntaxes. Callers changed. + Fix the case that the multibyteness of STRING and the current + buffer doesn't match. + (skip_syntaxes): New function. + (SYNTAX_WITH_MULTIBYTE_CHECK): Check C by ASCII_CHAR_P, not by + SINGLE_BYTE_CHAR_P. + (Fforward_comment): Use FETCH_CHAR_AS_MULTIBYTE to convert unibyte + chars to multibyte. + (scan_lists): Likewise. + (Fbackward_prefix_chars): Likewise. + (scan_sexps_forward): Likewise. + 2002-08-23 Kenichi Handa * xfaces.c (QCfontset): New variable. diff --git a/src/charset.h b/src/charset.h index a8b85b14366..5b16dd12271 100644 --- a/src/charset.h +++ b/src/charset.h @@ -511,11 +511,12 @@ extern Lisp_Object Qcharsetp; extern Lisp_Object Qascii, Qunicode; extern int charset_ascii, charset_eight_bit; extern int charset_iso_8859_1; -extern int charset_unibyte; extern int charset_jisx0201_roman; extern int charset_jisx0208_1978; extern int charset_jisx0208; +extern int charset_unibyte; + extern struct charset *char_charset P_ ((int, Lisp_Object, unsigned *)); extern Lisp_Object charset_attributes P_ ((int)); -- 2.39.5