From 22aa665c9b536775a28ff2e4907afc31b69ccb21 Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Fri, 15 Jun 2018 17:39:34 +0300 Subject: [PATCH] Reject invalid 5-byte sequences when detecting UTF-8 encoding * src/coding.c (detect_coding_utf_8): Reject multibyte sequences whose leading byte is greater than MAX_MULTIBYTE_LEADING_CODE. (Bug#31829) * src/character.h (MAX_MULTIBYTE_LEADING_CODE): Add commentary about the connection between the value of this macro and MAX_CHAR. --- src/character.h | 3 ++- src/coding.c | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/character.h b/src/character.h index 1f21b2ad330..bc65759aa2a 100644 --- a/src/character.h +++ b/src/character.h @@ -57,7 +57,8 @@ INLINE_HEADER_BEGIN /* Minimum leading code of multibyte characters. */ #define MIN_MULTIBYTE_LEADING_CODE 0xC0 -/* Maximum leading code of multibyte characters. */ +/* Maximum leading code of multibyte characters. Note: this must be + updated if we ever increase MAX_CHAR above. */ #define MAX_MULTIBYTE_LEADING_CODE 0xF8 /* Unicode character values. */ diff --git a/src/coding.c b/src/coding.c index e756ba169dd..b1eb2edb497 100644 --- a/src/coding.c +++ b/src/coding.c @@ -1225,7 +1225,10 @@ detect_coding_utf_8 (struct coding_system *coding, ONE_MORE_BYTE (c4); if (c4 < 0 || ! UTF_8_EXTRA_OCTET_P (c4)) break; - if (UTF_8_5_OCTET_LEADING_P (c)) + if (UTF_8_5_OCTET_LEADING_P (c) + /* If we ever need to increase MAX_CHAR, the below may need + to be reviewed. */ + && c < MAX_MULTIBYTE_LEADING_CODE) { nchars++; continue; -- 2.39.5