From: Paul Eggert Date: Tue, 31 May 2022 08:19:32 +0000 (-0700) Subject: Avoid undefined behavior in detect_coding routines X-Git-Tag: emacs-29.0.90~1910^2~319^2~8 X-Git-Url: http://git.eshelyaron.com/gitweb/?a=commitdiff_plain;h=82c05c034e1ecec49e4e8916b2cb6163d7a5bb74;p=emacs.git Avoid undefined behavior in detect_coding routines * src/coding.c (detect_coding): Always initialize all components of detect_info, so that detect_coding_utf_8 etc. do not have undefined behavior when they read detect_info.checked. This bug is not likely to cause problems on real systems. Problem found by GCC 12 -fanalyzer. (detect_coding_system): Use consistent style with detect_coding initialization. --- diff --git a/src/coding.c b/src/coding.c index 2bed293d571..aa32efc3f61 100644 --- a/src/coding.c +++ b/src/coding.c @@ -6528,7 +6528,7 @@ detect_coding (struct coding_system *coding) if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided)) { int c, i; - struct coding_detection_info detect_info; + struct coding_detection_info detect_info = {0}; bool null_byte_found = 0, eight_bit_found = 0; bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd, inhibit_null_byte_detection); @@ -6537,7 +6537,6 @@ detect_coding (struct coding_system *coding) bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8; coding->head_ascii = 0; - detect_info.checked = detect_info.found = detect_info.rejected = 0; for (src = coding->source; src < src_end; src++) { c = *src; @@ -6712,12 +6711,8 @@ detect_coding (struct coding_system *coding) else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) == coding_category_utf_8_auto) { - Lisp_Object coding_systems; - struct coding_detection_info detect_info; - - coding_systems + Lisp_Object coding_systems = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); - detect_info.found = detect_info.rejected = 0; if (check_ascii (coding) == coding->src_bytes) { if (CONSP (coding_systems)) @@ -6725,6 +6720,7 @@ detect_coding (struct coding_system *coding) } else { + struct coding_detection_info detect_info = {0}; if (CONSP (coding_systems) && detect_coding_utf_8 (coding, &detect_info)) { @@ -6738,20 +6734,19 @@ detect_coding (struct coding_system *coding) else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id))) == coding_category_utf_16_auto) { - Lisp_Object coding_systems; - struct coding_detection_info detect_info; - - coding_systems + Lisp_Object coding_systems = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom); - detect_info.found = detect_info.rejected = 0; coding->head_ascii = 0; - if (CONSP (coding_systems) - && detect_coding_utf_16 (coding, &detect_info)) + if (CONSP (coding_systems)) { - if (detect_info.found & CATEGORY_MASK_UTF_16_LE) - found = XCAR (coding_systems); - else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) - found = XCDR (coding_systems); + struct coding_detection_info detect_info = {0}; + if (detect_coding_utf_16 (coding, &detect_info)) + { + if (detect_info.found & CATEGORY_MASK_UTF_16_LE) + found = XCAR (coding_systems); + else if (detect_info.found & CATEGORY_MASK_UTF_16_BE) + found = XCDR (coding_systems); + } } } @@ -8639,7 +8634,7 @@ detect_coding_system (const unsigned char *src, Lisp_Object val = Qnil; struct coding_system coding; ptrdiff_t id; - struct coding_detection_info detect_info; + struct coding_detection_info detect_info = {0}; enum coding_category base_category; bool null_byte_found = 0, eight_bit_found = 0; @@ -8658,8 +8653,6 @@ detect_coding_system (const unsigned char *src, coding.mode |= CODING_MODE_LAST_BLOCK; coding.head_ascii = 0; - detect_info.checked = detect_info.found = detect_info.rejected = 0; - /* At first, detect text-format if necessary. */ base_category = XFIXNUM (CODING_ATTR_CATEGORY (attrs)); if (base_category == coding_category_undecided)