From b0edb2c59cccb8fa81b35ce03a39367f6ac9475f Mon Sep 17 00:00:00 2001 From: Dave Love Date: Mon, 27 May 2002 22:19:17 +0000 Subject: [PATCH] (decode_coding_utf_8): Reject overlong sequences. --- src/ChangeLog | 6 +++++- src/coding.c | 24 ++++++++++++++++++++---- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 5eac57c3543..9d7f54bddee 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,4 +1,8 @@ -2002-05-26 Dave Love +2002-05-27 Dave Love + + * coding.c (decode_coding_utf_8): Reject overlong sequences. + +2002-05-26 Dave Love * coding.c: Doc fixes. (Fcoding_system_aliases): Fix return value. diff --git a/src/coding.c b/src/coding.c index a4ad9c6a542..1fc59a02bb1 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1078,6 +1078,7 @@ detect_coding_utf_8 (coding, mask) } +/* Fixme: deal with surrogates? */ static void decode_coding_utf_8 (coding) struct coding_system *coding; @@ -1126,23 +1127,38 @@ decode_coding_utf_8 (coding) if (! UTF_8_EXTRA_OCTET_P (c2)) goto invalid_code; if (UTF_8_2_OCTET_LEADING_P (c1)) - c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); + { + c = ((c1 & 0x1F) << 6) | (c2 & 0x3F); + /* Reject overlong sequences here and below. Encoders + producing them are incorrect, they can be misleading, + and they mess up read/write invariance. */ + if (c < 128) + goto invalid_code; + } else { ONE_MORE_BYTE (c3); if (! UTF_8_EXTRA_OCTET_P (c3)) goto invalid_code; if (UTF_8_3_OCTET_LEADING_P (c1)) - c = (((c1 & 0xF) << 12) - | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); + { + c = (((c1 & 0xF) << 12) + | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); + if (c < 0x800) + goto invalid_code; + } else { ONE_MORE_BYTE (c4); if (! UTF_8_EXTRA_OCTET_P (c4)) goto invalid_code; if (UTF_8_4_OCTET_LEADING_P (c1)) + { c = (((c1 & 0x7) << 18) | ((c2 & 0x3F) << 12) | ((c3 & 0x3F) << 6) | (c4 & 0x3F)); + if (c < 0x10000) + goto invalid_code; + } else { ONE_MORE_BYTE (c5); @@ -1153,7 +1169,7 @@ decode_coding_utf_8 (coding) c = (((c1 & 0x3) << 24) | ((c2 & 0x3F) << 18) | ((c3 & 0x3F) << 12) | ((c4 & 0x3F) << 6) | (c5 & 0x3F)); - if (c > MAX_CHAR) + if ((c > MAX_CHAR) || (c < 0x200000)) goto invalid_code; } else -- 2.39.5