From: Stefan Monnier Date: Thu, 12 Jun 2008 20:25:47 +0000 (+0000) Subject: * character.h (CHAR_TO_BYTE_SAFE): New macro. X-Git-Tag: emacs-pretest-23.0.90~4848 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=2afc21f5f6d8963301913c5aed515e567b8e3986;p=emacs.git * character.h (CHAR_TO_BYTE_SAFE): New macro. * character.c (Fmultibyte_char_to_unibyte): Obey the docstring. * regex.c (RE_CHAR_TO_UNIBYTE): Use the new macro. (WEAK_ALIAS): Simplify. * syntax.c (skip_chars): Don't mark non-byte chars in the fastmap when searching a unibyte buffer. --- diff --git a/src/ChangeLog b/src/ChangeLog index b5951beb09a..6361f522fe7 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,12 @@ +2008-06-12 Stefan Monnier + + * character.h (CHAR_TO_BYTE_SAFE): New macro. + * character.c (Fmultibyte_char_to_unibyte): Obey the docstring. + * regex.c (RE_CHAR_TO_UNIBYTE): Use the new macro. + (WEAK_ALIAS): Simplify. + * syntax.c (skip_chars): Don't mark non-byte chars in the fastmap + when searching a unibyte buffer. + 2008-06-12 Chong Yidong * xfns.c (Fx_select_font): Rename from x-font-dialog. @@ -8,10 +17,10 @@ 2008-06-11 Jason Rumney - * w32font.c (w32font_encode_char): Detect missing glyphs that are - misreported as space. - (add_font_entity_to_list): Support unicode-bmp and unicode-sip - as aliases for registry iso10646-1. + * w32font.c (w32font_encode_char): Detect missing glyphs that are + misreported as space. + (add_font_entity_to_list): Support unicode-bmp and unicode-sip + as aliases for registry iso10646-1. 2008-06-11 Stefan Monnier diff --git a/src/character.c b/src/character.c index 7f09ecd4857..5e2a3590563 100644 --- a/src/character.c +++ b/src/character.c @@ -359,7 +359,7 @@ If the multibyte character does not represent a byte, return -1. */) return ch; else { - int cu = CHAR_TO_BYTE8 (cm); + int cu = CHAR_TO_BYTE_SAFE (cm); return make_number (cu); } } diff --git a/src/character.h b/src/character.h index ae87b3885d9..cf73083dd04 100644 --- a/src/character.h +++ b/src/character.h @@ -68,6 +68,13 @@ along with GNU Emacs. If not, see . */ ? (c) - 0x3FFF00 \ : multibyte_char_to_unibyte (c, Qnil)) +/* Return the raw 8-bit byte for character C, + or -1 if C doesn't correspond to a byte. */ +#define CHAR_TO_BYTE_SAFE(c) \ + (CHAR_BYTE8_P (c) \ + ? (c) - 0x3FFF00 \ + : multibyte_char_to_unibyte_safe (c, Qnil)) + /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character that corresponds to a raw 8-bit byte. */ #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1) diff --git a/src/regex.c b/src/regex.c index 8ffd2be6ded..ff43fc85008 100644 --- a/src/regex.c +++ b/src/regex.c @@ -153,10 +153,7 @@ # define RE_CHAR_TO_MULTIBYTE(c) unibyte_to_multibyte_table[(c)] -# define RE_CHAR_TO_UNIBYTE(c) \ - (ASCII_CHAR_P (c) ? (c) \ - : CHAR_BYTE8_P (c) ? CHAR_TO_BYTE8 (c) \ - : multibyte_char_to_unibyte_safe (c)) +# define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c) /* Set C a (possibly converted to multibyte) character before P. P points into a string which is the virtual concatenation of STR1 @@ -5574,10 +5571,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) if (multibyte) { pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen); - if (CHAR_BYTE8_P (pat_ch)) - pat_ch = CHAR_TO_BYTE8 (pat_ch); - else - pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch); + pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch); } else { diff --git a/src/syntax.c b/src/syntax.c index 8c5ebb96f28..6dc63c25537 100644 --- a/src/syntax.c +++ b/src/syntax.c @@ -1711,7 +1711,11 @@ skip_chars (forwardp, string, lim, handle_iso_classes) int c2 = char_ranges[i + 1]; for (; c1 <= c2; c1++) - fastmap[CHAR_TO_BYTE8 (c1)] = 1; + { + int b = CHAR_TO_BYTE_SAFE (c1); + if (b >= 0) + fastmap[b] = 1; + } } } }