From: Kenichi Handa Date: Wed, 14 Jan 2009 12:19:44 +0000 (+0000) Subject: (TWO_MORE_BYTES): New macro. X-Git-Tag: emacs-pretest-23.0.90~466 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=f56a4450912fa06401b13e6631313fe17bed006f;p=emacs.git (TWO_MORE_BYTES): New macro. (detect_coding_utf_16): Use TWO_MORE_BYTES instead of ONE_MORE_BYTE. --- diff --git a/src/ChangeLog b/src/ChangeLog index 5d048f7413b..4f11a1269cc 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +2009-01-14 Kenichi Handa + + * coding.c (TWO_MORE_BYTES): New macro. + (detect_coding_utf_16): Use TWO_MORE_BYTES instead of + ONE_MORE_BYTE. + 2009-01-13 Chong Yidong * font.c (font_clear_prop): If clearing the family, clear the font @@ -90,7 +96,7 @@ 2009-01-07 Kenichi Handa * fileio.c (Finsert_file_contents): In the case of replace, - remeber the coding system used for decoding in + remember the coding system used for decoding in coding_system (Bug#1039). * coding.c (decode_coding_utf_8): Check byte_after_cr before diff --git a/src/coding.c b/src/coding.c index 01878a37b5c..9a94bc6fb2a 100644 --- a/src/coding.c +++ b/src/coding.c @@ -743,6 +743,47 @@ static struct coding_system coding_categories[coding_category_max]; consumed_chars++; \ } while (0) +/* Safely get two bytes from the source text pointed by SRC which ends + at SRC_END, and set C1 and C2 to those bytes. If there are not + enough bytes in the source for C1, it jumps to `no_more_source'. + If there are not enough bytes in the source for C2, set C2 to -1. + If multibytep is nonzero and a multibyte character is found at SRC, + set C1 and/or C2 to the negative value of the character code. The + caller should declare and set these variables appropriately in + advance: + src, src_end, multibytep + It is intended that this macro is used in detect_coding_utf_16. */ + +#define TWO_MORE_BYTES(c1, c2) \ + do { \ + if (src == src_end) \ + goto no_more_source; \ + c1 = *src++; \ + if (multibytep && (c1 & 0x80)) \ + { \ + if ((c1 & 0xFE) == 0xC0) \ + c1 = ((c1 & 1) << 6) | *src++; \ + else \ + { \ + c1 = c2 = -1; \ + break; \ + } \ + } \ + if (src == src_end) \ + c2 = -1; \ + else \ + { \ + c2 = *src++; \ + if (multibytep && (c2 & 0x80)) \ + { \ + if ((c2 & 0xFE) == 0xC0) \ + c2 = ((c2 & 1) << 6) | *src++; \ + else \ + c2 = -1; \ + } \ + } \ + } while (0) + #define ONE_MORE_BYTE_NO_CHECK(c) \ do { \ @@ -1575,8 +1616,7 @@ detect_coding_utf_16 (coding, detect_info) return 0; } - ONE_MORE_BYTE (c1); - ONE_MORE_BYTE (c2); + TWO_MORE_BYTES (c1, c2); if ((c1 == 0xFF) && (c2 == 0xFE)) { detect_info->found |= (CATEGORY_MASK_UTF_16_LE @@ -1593,6 +1633,11 @@ detect_coding_utf_16 (coding, detect_info) | CATEGORY_MASK_UTF_16_BE_NOSIG | CATEGORY_MASK_UTF_16_LE_NOSIG); } + else if (c1 < 0 || c2 < 0) + { + detect_info->rejected |= CATEGORY_MASK_UTF_16; + return 0; + } else { /* We check the dispersion of Eth and Oth bytes where E is even and @@ -1610,8 +1655,9 @@ detect_coding_utf_16 (coding, detect_info) while (1) { - ONE_MORE_BYTE (c1); - ONE_MORE_BYTE (c2); + TWO_MORE_BYTES (c1, c2); + if (c1 < 0 || c2 < 0) + break; if (! e[c1]) { e[c1] = 1;