diffblue · tautschnig · Jan 2, 2021 · tautschnig · Jan 12, 2021
@@ -40,7 +40,6 @@ Author: Peter Schrammel
 #include <goto-programs/remove_unused_functions.h>
 #include <goto-programs/remove_vector.h>
 #include <goto-programs/remove_virtual_functions.h>
-#include <goto-programs/rewrite_union.h>
 #include <goto-programs/set_properties.h>
 #include <goto-programs/show_properties.h>
 #include <goto-programs/string_abstraction.h>
@@ -296,7 +295,6 @@ bool jdiff_parse_optionst::process_goto_program(
     remove_returns(goto_model);
     remove_vector(goto_model);
     remove_complex(goto_model);
-    rewrite_union(goto_model);
 
     // add generic checks
     log.status() << "Generic Property Instrumentation" << messaget::eom;

diff --git a/regression/ansi-c/arch_flags_mcpu_bad/object.intel b/regression/ansi-c/arch_flags_mcpu_bad/object.intel
diff --git a/regression/ansi-c/arch_flags_mcpu_bad/test.desc b/regression/ansi-c/arch_flags_mcpu_bad/test.desc
@@ -11,7 +11,7 @@ This tests the -mcpu=cortex=a15 flag that should activate ARM-32 mode.
 The object file 'object.intel' was compiled from 'source.c' with goto-cc
 on a 64-bit platform:
 
-  goto-cc -c source.c
+  goto-cc -c source.c -o object.intel
 
 preproc.i is already pre-processed so that it can be linked in without
 needing to invoke a pre-processor from a cross-compile toolchain on your

diff --git a/regression/ansi-c/arch_flags_mcpu_good/object.arm b/regression/ansi-c/arch_flags_mcpu_good/object.arm
diff --git a/regression/ansi-c/arch_flags_mthumb_bad/object.intel b/regression/ansi-c/arch_flags_mthumb_bad/object.intel
diff --git a/regression/ansi-c/arch_flags_mthumb_bad/test.desc b/regression/ansi-c/arch_flags_mthumb_bad/test.desc
@@ -11,7 +11,7 @@ This tests the -mthumb flag that should activate ARM-32 mode. The object
 file 'object.intel' was compiled from 'source.c' with goto-cc on a
 64-bit platform:
 
-  goto-cc -c source.c
+  goto-cc -c source.c -o object.intel
 
 preproc.i is already pre-processed so that it can be linked in without
 needing to invoke a pre-processor from a cross-compile toolchain on your

diff --git a/regression/ansi-c/arch_flags_mthumb_good/object.arm b/regression/ansi-c/arch_flags_mthumb_good/object.arm
diff --git a/regression/cbmc-library/float-nan-check/test.desc b/regression/cbmc-library/float-nan-check/test.desc
@@ -1,16 +1,16 @@
 CORE
 main.c
 --nan-check
-\[main.NaN.1\] line \d+ NaN on \+ in byte_extract_little_endian\(c, (0|0l|0ll), float\) \+ myzero: SUCCESS
+\[main.NaN.1\] line \d+ NaN on \+ in c\.f \+ myzero: SUCCESS
 \[main.NaN.2\] line \d+ NaN on / in myzero / myzero: FAILURE
 \[main.NaN.3\] line \d+ NaN on / in \(float\)n / myinf: FAILURE
 \[main.NaN.4\] line \d+ NaN on \* in myinf \* myzero: FAILURE
 \[main.NaN.5\] line \d+ NaN on \+ in -myinf \+ myinf: FAILURE
 \[main.NaN.6\] line \d+ NaN on \+ in myinf \+ -myinf: FAILURE
 \[main.NaN.7\] line \d+ NaN on - in myinf - myinf: FAILURE
 \[main.NaN.8\] line \d+ NaN on - in -myinf - -myinf: FAILURE
-\[main.NaN.9\] line \d+ NaN on \+ in byte_extract_little_endian\(c, (0|0l|0ll), float\) \+ byte_extract_little_endian\(c, (0|0l|0ll), float\): SUCCESS
-\[main.NaN.10\] line \d+ NaN on / in byte_extract_little_endian\(c, (0|0l|0ll), float\) / myzero: SUCCESS
+\[main.NaN.9\] line \d+ NaN on \+ in c\.f \+ c\.f: SUCCESS
+\[main.NaN.10\] line \d+ NaN on / in c\.f / myzero: SUCCESS
 \[main.NaN.11\] line \d+ NaN on \* in mynan \* mynan: SUCCESS
 \[main.NaN.12\] line \d+ NaN on \+ in mynan \+ mynan: SUCCESS
 \[main.NaN.13\] line \d+ NaN on - in mynan - mynan: SUCCESS

diff --git a/regression/cbmc/union7/no_simplify.desc b/regression/cbmc/union7/no_simplify.desc
@@ -0,0 +1,11 @@
+CORE broken-smt-backend
+main.c
+--big-endian --no-simplify
+^EXIT=0$
+^SIGNAL=0$
+^VERIFICATION SUCCESSFUL$
+--
+^warning: ignoring
+--
+Although this test can now be fully solved via constant propagation and
+simplification, it should also succeed without simplification.
diff --git a/regression/cbmc/union7/test.desc b/regression/cbmc/union7/test.desc
@@ -1,11 +1,11 @@
-CORE broken-smt-backend
+CORE
 main.c
 --big-endian
+Generated 3 VCC\(s\), 0 remaining after simplification
 ^EXIT=0$
 ^SIGNAL=0$
 ^VERIFICATION SUCCESSFUL$
 --
 ^warning: ignoring
 --
-This test reports a VERIFICATION ERROR when running with the SMT2 solver on
-Windows only.
+This test can now be fully solved via constant propagation and simplification.
diff --git a/regression/cbmc/xml-trace/test.desc b/regression/cbmc/xml-trace/test.desc
@@ -7,7 +7,7 @@ VERIFICATION FAILED
 <assignment assignment_type="state" base_name="u" display_name="test::u" hidden="false" identifier="test::u" mode="C" step_nr="\d+" thread="0">
 <location file=".*" function="test" line="\d+" working-directory=".*"/>
 <type>union myunion</type>
-<full_lhs>byte_extract_little_endian\(u, 0ll?, .*int.*\)</full_lhs>
+<full_lhs>u\.i</full_lhs>
 <full_lhs_value binary="[01]+">\d+ll?</full_lhs_value>
 <value>\{ \.i=\d+ll? \}</value>
 <value_expression>

@@ -79,13 +79,13 @@
 /home/runner/work/cbmc/cbmc/src/analyses/variable-sensitivity/variable_sensitivity_object_factory.cpp:188: warning: The following parameter of initialize_abstract_object(const typet type, bool top, bool bottom, const exprt &e, const abstract_environmentt &environment, const namespacet &ns, const vsd_configt &configuration) is not documented:
   parameter 'configuration'
 
-warning: Include graph for 'cbmc_parse_options.cpp' not generated, too many nodes (61), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
+warning: Include graph for 'cbmc_parse_options.cpp' not generated, too many nodes (60), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Include graph for 'goto_instrument_parse_options.cpp' not generated, too many nodes (97), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'goto_functions.h' not generated, too many nodes (66), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'goto_model.h' not generated, too many nodes (109), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'arith_tools.h' not generated, too many nodes (183), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
-warning: Included by graph for 'c_types.h' not generated, too many nodes (111), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
-warning: Included by graph for 'config.h' not generated, too many nodes (85), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
+warning: Included by graph for 'c_types.h' not generated, too many nodes (112), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
+warning: Included by graph for 'config.h' not generated, too many nodes (86), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'exception_utils.h' not generated, too many nodes (61), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'expr.h' not generated, too many nodes (87), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'expr_util.h' not generated, too many nodes (61), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
@@ -94,7 +94,7 @@ warning: Included by graph for 'irep.h' not generated, too many nodes (62), thre
 warning: Included by graph for 'message.h' not generated, too many nodes (116), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'namespace.h' not generated, too many nodes (110), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'pointer_expr.h' not generated, too many nodes (101), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
-warning: Included by graph for 'prefix.h' not generated, too many nodes (86), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
+warning: Included by graph for 'prefix.h' not generated, too many nodes (87), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'simplify_expr.h' not generated, too many nodes (77), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'std_code.h' not generated, too many nodes (78), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
 warning: Included by graph for 'std_expr.h' not generated, too many nodes (245), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.

@@ -17,6 +17,7 @@ Author: Daniel Kroening, [email protected]
 #include <util/c_types.h>
 #include <util/config.h>
 #include <util/cprover_prefix.h>
+#include <util/expr_initializer.h>
 #include <util/expr_util.h>
 #include <util/floatbv_expr.h>
 #include <util/ieee_float.h>
@@ -1105,6 +1106,36 @@ void c_typecheck_baset::typecheck_expr_typecast(exprt &expr)
         expr.set(ID_C_lvalue, true);
         return;
       }
+      else if(c.get_anonymous() && c.type().id() == ID_struct_tag)
+      {
+        // we also look into anonymous members as we might have wrapped a
+        // non-maximal member in such a struct
+        const struct_typet &struct_type =
+          follow_tag(to_struct_tag_type(c.type()));
+        if(
+          !struct_type.components().empty() &&
+          struct_type.components().front().type() == op.type())
+        {
+          auto nondet =
+            nondet_initializer(c.type(), expr.source_location(), *this);
+          if(!nondet.has_value())
+          {
+            error().source_location = expr.source_location();
+            error() << "cannot nondet-initialize union component of type '"
+                    << to_string(c.type()) << "'" << eom;
+            throw 0;
+          }
+          CHECK_RETURN(nondet->id() == ID_struct);
+          CHECK_RETURN(!nondet->operands().empty());
+
+          nondet->operands().front() = op;
+          union_exprt union_expr(c.get_name(), *nondet, expr.type());
+          union_expr.add_source_location() = expr.source_location();
+          expr = union_expr;
+          expr.set(ID_C_lvalue, true);
+          return;
+        }
+      }
     }
 
     // not found, complain

@@ -530,27 +530,36 @@ exprt::operandst::const_iterator c_typecheck_baset::do_designated_initializer(
         // initializer; other bytes have an unspecified value (C Standard
         // 6.2.6.1(7)). In practice, objects of static lifetime are fully zero
         // initialized.
-        const auto zero =
-          zero_initializer(component.type(), value.source_location(), *this);
-        if(!zero.has_value())
-        {
-          error().source_location = value.source_location();
-          error() << "cannot zero-initialize union component of type '"
-                  << to_string(component.type()) << "'" << eom;
-          throw 0;
-        }
-
         if(current_symbol.is_static_lifetime)
         {
-          byte_update_exprt byte_update{
-            byte_update_id(), *dest, from_integer(0, index_type()), *zero};
-          byte_update.add_source_location() = value.source_location();
-          *dest = std::move(byte_update);
-          dest = &(to_byte_update_expr(*dest).op2());
+          const auto zero =
+            zero_initializer(component.type(), value.source_location(), *this);
+          if(!zero.has_value())
+          {
+            error().source_location = value.source_location();
+            error() << "cannot zero-initialize union component of type '"
+                    << to_string(component.type()) << "'" << eom;
+            throw 0;
+          }
+
+          union_exprt union_expr(component.get_name(), *zero, type);
+          union_expr.add_source_location() = value.source_location();
+          *dest = std::move(union_expr);
+          dest = &(to_union_expr(*dest).op());
         }
         else
         {
-          union_exprt union_expr(component.get_name(), *zero, type);
+          const auto nondet = nondet_initializer(
+            component.type(), value.source_location(), *this);
+          if(!nondet.has_value())
+          {
+            error().source_location = value.source_location();
+            error() << "cannot nondet-initialize union component of type '"
+                    << to_string(component.type()) << "'" << eom;
+            throw 0;
+          }
+
+          union_exprt union_expr(component.get_name(), *nondet, type);
           union_expr.add_source_location() = value.source_location();
           *dest = std::move(union_expr);
           dest = &(to_union_expr(*dest).op());

@@ -20,7 +20,9 @@ Author: Daniel Kroening, [email protected]
 #include <util/mathematical_types.h>
 #include <util/pointer_expr.h>
 #include <util/pointer_offset_size.h>
+#include <util/prefix.h>
 #include <util/simplify_expr.h>
+#include <util/string2int.h>
 
 #include "ansi_c_convert_type.h"
 #include "c_qualifiers.h"
@@ -1073,6 +1075,148 @@ void c_typecheck_baset::typecheck_compound_body(
     else
       it++;
   }
+
+  if(type.id() == ID_union && !components.empty())
+  {
+    const auto widest_member =
+      to_union_type(type).find_widest_union_component(*this);
+    optionalt<exprt> size_expr_opt;
+
+    if(!widest_member.has_value())
+    {
+      // GCC extension: arrays of non-constant size
+      size_expr_opt = size_of_expr(type, *this);
+
+      if(!size_expr_opt.has_value())
+      {
+        error().source_location = type.source_location();
+        error() << "cannot determine size of union" << eom;
+        throw 0;
+      }
+    }
+
+    // ensure non-conflicting member names
+    for(auto &component : components)
+    {
+      if(
+        id2string(component.get_name()).size() > 5 &&
+        has_prefix(id2string(component.get_name()), "$anon"))
+      {
+        const std::string suffix{id2string(component.get_name()), 5};
+        auto int_suffix = string2optional_unsigned(suffix);
+        if(int_suffix.has_value())
+          anon_member_counter = std::max(anon_member_counter, *int_suffix + 1);
+      }
+    }
+
+    for(auto &component : components)
+    {
+      std::vector<typet> padding_types;
+
+      const auto &component_bits = pointer_offset_bits(component.type(), *this);
+      if(
+        component_bits.has_value() &&
+        (!widest_member.has_value() ||
+         *component_bits < widest_member->second) &&
+        *component_bits % 8 != 0)
+      {
+        std::size_t bit_field_padding_width =
+          8 - numeric_cast_v<std::size_t>(*component_bits % 8);
+        padding_types.push_back(c_bit_field_typet{
+          unsignedbv_typet{bit_field_padding_width}, bit_field_padding_width});
+      }
+
+      if(size_expr_opt.has_value())
+      {
+        const auto component_size_expr_opt =
+          size_of_expr(component.type(), *this);
+        CHECK_RETURN(component_size_expr_opt.has_value());
+        // size_of_expr will round up to full bytes, and also works for bit
+        // fields (which C sizeof does not support). Thus, we don't need to
+        // special-case bit_field_padding_opt being set.
+        if_exprt padding_bytes{
+          binary_relation_exprt{
+            *component_size_expr_opt, ID_lt, *size_expr_opt},
+          minus_exprt{*size_expr_opt, *component_size_expr_opt},
+          from_integer(0, size_expr_opt->type())};
+        simplify(padding_bytes, *this);
+
+        if(!padding_bytes.is_zero())
+        {
+          make_index_type(padding_bytes);
+
+          padding_types.push_back(
+            array_typet{unsigned_char_type(), padding_bytes});
+        }
+      }
+      else
+      {
+        CHECK_RETURN(component_bits.has_value());
+        // Truncating division ensures that we don't need to special-case
+        // bit_field_padding_opt being set.
+        std::size_t padding_bytes =
+          numeric_cast_v<std::size_t>(widest_member->second - *component_bits) /
+          8;
+
+        if(padding_bytes != 0)
+        {
+          padding_types.push_back(array_typet{
+            unsigned_char_type(), from_integer(padding_bytes, index_type())});
+        }
+      }
+
+      if(padding_types.empty())
+        continue;
+
+      struct_typet::componentst component_with_padding_type;
+      component_with_padding_type.push_back(component);
+      for(const auto &t : padding_types)
+      {
+        // TODO: perhaps use the same naming as padding.cpp uses
+        component_with_padding_type.push_back(struct_union_typet::componentt{
+          "$anon" + std::to_string(anon_member_counter++), t});
+        // TODO: not sure whether this should be anonymous
+        component_with_padding_type.back().set_anonymous(true);
+        component_with_padding_type.back().set_is_padding(true);
+        component_with_padding_type.back().add_source_location() =
+          component.source_location();
+      }
+
+      struct_union_typet::componentt component_with_padding{
+        "$anon" + std::to_string(anon_member_counter++),
+        struct_typet{component_with_padding_type}};
+      component_with_padding.set_anonymous(true);
+      component_with_padding.add_source_location() =
+        component.source_location();
+
+      // based on typecheck_compound_type, branch
+      // "if(type.find(ID_tag).is_nil())"
+      // produce type symbol
+      type_symbolt compound_symbol{component_with_padding.type()};
+      compound_symbol.location = component_with_padding.source_location();
+
+      std::string typestr = type2name(compound_symbol.type, *this);
+      compound_symbol.base_name = "#anon#" + typestr;
+      compound_symbol.name = "tag-#anon#" + typestr;
+
+      // We might already have the same anonymous struct, and this is simply
+      // ok as those are exactly the same types, just introduced at a
+      // different source location.
+      symbolt *new_symbol = &compound_symbol;
+      if(
+        symbol_table.symbols.find(compound_symbol.name) ==
+        symbol_table.symbols.end())
+      {
+        move_symbol(compound_symbol, new_symbol);
+      }
+
+      struct_tag_typet tag_type{new_symbol->name};
+      tag_type.add_source_location() = component_with_padding.source_location();
+      component_with_padding.type().swap(tag_type);
+
+      component.swap(component_with_padding);
+    }
+  }
 }
 
 typet c_typecheck_baset::enum_constant_type(

@@ -62,7 +62,6 @@ Author: Daniel Kroening, [email protected]
 #include <goto-programs/remove_skip.h>
 #include <goto-programs/remove_unused_functions.h>
 #include <goto-programs/remove_vector.h>
-#include <goto-programs/rewrite_union.h>
 #include <goto-programs/set_properties.h>
 #include <goto-programs/show_goto_functions.h>
 #include <goto-programs/show_properties.h>
@@ -946,7 +945,6 @@ bool cbmc_parse_optionst::process_goto_program(
   remove_returns(goto_model);
   remove_vector(goto_model);
   remove_complex(goto_model);
-  rewrite_union(goto_model);
 
   // add generic checks
   log.status() << "Generic Property Instrumentation" << messaget::eom;