]> git.eshelyaron.com Git - emacs.git/commitdiff
Avoid undefined behavior in detect_coding routines
authorPaul Eggert <eggert@cs.ucla.edu>
Tue, 31 May 2022 08:19:32 +0000 (01:19 -0700)
committerPaul Eggert <eggert@cs.ucla.edu>
Tue, 31 May 2022 08:26:47 +0000 (01:26 -0700)
* src/coding.c (detect_coding): Always initialize all
components of detect_info, so that detect_coding_utf_8 etc.
do not have undefined behavior when they read detect_info.checked.
This bug is not likely to cause problems on real systems.
Problem found by GCC 12 -fanalyzer.
(detect_coding_system): Use consistent style with detect_coding
initialization.

src/coding.c

index 2bed293d571b3974a144cc2ac8b3daf00bad1807..aa32efc3f615f35f0e8ba06a647f104b61becbbf 100644 (file)
@@ -6528,7 +6528,7 @@ detect_coding (struct coding_system *coding)
   if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
     {
       int c, i;
-      struct coding_detection_info detect_info;
+      struct coding_detection_info detect_info = {0};
       bool null_byte_found = 0, eight_bit_found = 0;
       bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
                                       inhibit_null_byte_detection);
@@ -6537,7 +6537,6 @@ detect_coding (struct coding_system *coding)
       bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
 
       coding->head_ascii = 0;
-      detect_info.checked = detect_info.found = detect_info.rejected = 0;
       for (src = coding->source; src < src_end; src++)
        {
          c = *src;
@@ -6712,12 +6711,8 @@ detect_coding (struct coding_system *coding)
   else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
           == coding_category_utf_8_auto)
     {
-      Lisp_Object coding_systems;
-      struct coding_detection_info detect_info;
-
-      coding_systems
+      Lisp_Object coding_systems
        = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
-      detect_info.found = detect_info.rejected = 0;
       if (check_ascii (coding) == coding->src_bytes)
        {
          if (CONSP (coding_systems))
@@ -6725,6 +6720,7 @@ detect_coding (struct coding_system *coding)
        }
       else
        {
+         struct coding_detection_info detect_info = {0};
          if (CONSP (coding_systems)
              && detect_coding_utf_8 (coding, &detect_info))
            {
@@ -6738,20 +6734,19 @@ detect_coding (struct coding_system *coding)
   else if (XFIXNUM (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
           == coding_category_utf_16_auto)
     {
-      Lisp_Object coding_systems;
-      struct coding_detection_info detect_info;
-
-      coding_systems
+      Lisp_Object coding_systems
        = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
-      detect_info.found = detect_info.rejected = 0;
       coding->head_ascii = 0;
-      if (CONSP (coding_systems)
-         && detect_coding_utf_16 (coding, &detect_info))
+      if (CONSP (coding_systems))
        {
-         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-           found = XCAR (coding_systems);
-         else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
-           found = XCDR (coding_systems);
+         struct coding_detection_info detect_info = {0};
+         if (detect_coding_utf_16 (coding, &detect_info))
+           {
+             if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+               found = XCAR (coding_systems);
+             else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
+               found = XCDR (coding_systems);
+           }
        }
     }
 
@@ -8639,7 +8634,7 @@ detect_coding_system (const unsigned char *src,
   Lisp_Object val = Qnil;
   struct coding_system coding;
   ptrdiff_t id;
-  struct coding_detection_info detect_info;
+  struct coding_detection_info detect_info = {0};
   enum coding_category base_category;
   bool null_byte_found = 0, eight_bit_found = 0;
 
@@ -8658,8 +8653,6 @@ detect_coding_system (const unsigned char *src,
   coding.mode |= CODING_MODE_LAST_BLOCK;
   coding.head_ascii = 0;
 
-  detect_info.checked = detect_info.found = detect_info.rejected = 0;
-
   /* At first, detect text-format if necessary.  */
   base_category = XFIXNUM (CODING_ATTR_CATEGORY (attrs));
   if (base_category == coding_category_undecided)