Byte-extract lowering: do not blindly use offsets

tautschnig · tautschnig · commit d4e61f87dddf · 2023-04-18T12:38:41.000Z
When unpacking an array we must not create 0 bytes up until an offset when the source array can never be as large. Doing so when within a struct would spuriously produce a larger member. The test case highlighted a problem in the SMT2 back-end: the first operand of a struct was not correctly flattened and instead used default expression conversion (which is only ok when the array theory is never being used). To avoid this problem creeping in again, factor out the operand conversion into a lambda and invoke that lambda each time. Fixes: #7654
diff --git a/regression/cbmc/byte_extract2/main.c b/regression/cbmc/byte_extract2/main.c
@@ -0,0 +1,22 @@
+#include <assert.h>
+
+#define SIZE 1
+
+struct s
+{
+  char a1[1];
+  unsigned int a2_len;
+  char a2[SIZE];
+};
+
+void main()
+{
+  struct s s1;
+  char buf[SIZE];
+
+  __CPROVER_assume(s1.a2_len <= SIZE);
+  __CPROVER_assume(s1.a2_len >= SIZE);
+  char src_n[s1.a2_len];
+  __CPROVER_array_copy(src_n, s1.a2);
+  assert(src_n[0] == s1.a2[0]);
+}
diff --git a/regression/cbmc/byte_extract2/test.desc b/regression/cbmc/byte_extract2/test.desc
@@ -0,0 +1,11 @@
+CORE broken-cprover-smt-backend
+main.c
+
+^VERIFICATION SUCCESSFUL$
+^EXIT=0$
+^SIGNAL=0$
+--
+^warning: ignoring
+--
+The CPROVER SMT2 back-end does not support quantifiers, which are brought into
+this example via an array comprehension.
diff --git a/src/solvers/smt2/smt2_conv.cpp b/src/solvers/smt2/smt2_conv.cpp
@@ -3280,49 +3280,37 @@ void smt2_convt::convert_struct(const struct_exprt &expr)
   }
   else
   {
-    if(components.size()==1)
-    {
-      const exprt &op = expr.op0();
+    auto convert_operand = [this](const exprt &op) {
       // may need to flatten array-theory arrays in there
       if(op.type().id() == ID_array && use_array_theory(op))
         flatten_array(op);
       else if(op.type().id() == ID_bool)
         flatten2bv(op);
       else
         convert_expr(op);
-    }
-    else
+    };
+
+    // SMT-LIB 2 concat is binary only
+    std::size_t n_concat = 0;
+    for(std::size_t i = components.size(); i > 1; i--)
     {
-      // SMT-LIB 2 concat is binary only
-      std::size_t n_concat = 0;
-      for(std::size_t i=components.size(); i>1; i--)
+      if(is_zero_width(components[i - 1].type(), ns))
+        continue;
+      else if(i > 2 || !is_zero_width(components[0].type(), ns))
       {
-        if(is_zero_width(components[i - 1].type(), ns))
-          continue;
-        else if(i > 2 || !is_zero_width(components[0].type(), ns))
-        {
-          ++n_concat;
-          out << "(concat ";
-        }
-
-        exprt op=expr.operands()[i-1];
-
-        // may need to flatten array-theory arrays in there
-        if(op.type().id() == ID_array && use_array_theory(op))
-          flatten_array(op);
-        else if(op.type().id() == ID_bool)
-          flatten2bv(op);
-        else
-          convert_expr(op);
-
-        out << " ";
+        ++n_concat;
+        out << "(concat ";
       }
 
-      if(!is_zero_width(components[0].type(), ns))
-        convert_expr(expr.op0());
+      convert_operand(expr.operands()[i - 1]);
 
-      out << std::string(n_concat, ')');
+      out << " ";
     }
+
+    if(!is_zero_width(components[0].type(), ns))
+      convert_operand(expr.op0());
+
+    out << std::string(n_concat, ')');
   }
 }
 
diff --git a/src/solvers/smt2/smt2_parser.cpp b/src/solvers/smt2/smt2_parser.cpp
@@ -1207,7 +1207,7 @@ void smt2_parsert::setup_expressions()
 
     // add the widths
     auto op_width = make_range(op).map(
-      [](const exprt &o) { return to_unsignedbv_type(o.type()).get_width(); });
+      [](const exprt &o) { return to_bitvector_type(o.type()).get_width(); });
 
     const std::size_t total_width =
       std::accumulate(op_width.begin(), op_width.end(), 0);
diff --git a/src/util/lower_byte_operators.cpp b/src/util/lower_byte_operators.cpp
@@ -522,9 +522,10 @@ static exprt unpack_array_vector_no_known_bounds(
 
 /// Rewrite an array or vector into its individual bytes.
 /// \param src: array/vector to unpack
-/// \param src_size: array/vector size; if not a constant and thus not set,
-///   \p max_bytes must be a known constant value to build an array expression,
-///   otherwise we fall back to building and array comprehension
+/// \param src_size: number of elements in \p src, if the array/vector size is
+///   constant; if the array/vector size is not constant (and this argument thus
+///   not set), \p max_bytes needs to be a known constant value to build an
+///   array expression, else we fall back to building an array comprehension
 /// \param element_bits: bit width of array/vector elements
 /// \param little_endian: true, iff assumed endianness is little-endian
 /// \param offset_bytes: if set, bytes prior to this offset will be filled
@@ -576,7 +577,9 @@ static exprt unpack_array_vector(
       first_element = *offset_bytes / el_bytes;
       // insert offset_bytes-many nil bytes into the output array
       byte_operands.resize(
-        numeric_cast_v<std::size_t>(*offset_bytes - (*offset_bytes % el_bytes)),
+        numeric_cast_v<std::size_t>(std::min(
+          *offset_bytes - (*offset_bytes % el_bytes),
+          *num_elements * el_bytes)),
         from_integer(0, bv_typet{bits_per_byte}));
     }
   }
@@ -777,24 +780,39 @@ static array_exprt unpack_struct(
       !bit_fields.has_value(),
       "all preceding members should have been processed");
 
-    exprt sub = unpack_rec(
-      member,
-      little_endian,
-      offset_in_member,
-      max_bytes_left,
-      bits_per_byte,
-      ns,
-      true);
+    if(
+      component_bits.has_value() && offset_in_member.has_value() &&
+      *offset_in_member * bits_per_byte >= *component_bits)
+    {
+      // we won't actually need this component, fill in zeros instead of
+      // computing an unpacking
+      byte_operands.resize(
+        byte_operands.size() +
+          numeric_cast_v<std::size_t>(*component_bits / bits_per_byte),
+        from_integer(0, bv_typet{bits_per_byte}));
+    }
+    else
+    {
+      exprt sub = unpack_rec(
+        member,
+        little_endian,
+        offset_in_member,
+        max_bytes_left,
+        bits_per_byte,
+        ns,
+        true);
 
-    byte_operands.insert(
-      byte_operands.end(),
-      std::make_move_iterator(sub.operands().begin()),
-      std::make_move_iterator(sub.operands().end()));
+      byte_operands.insert(
+        byte_operands.end(),
+        std::make_move_iterator(sub.operands().begin()),
+        std::make_move_iterator(sub.operands().end()));
+    }
 
     if(component_bits.has_value())
       member_offset_bits += *component_bits;
   }
 
+  // any remaining bit fields?
   if(bit_fields.has_value())
   {
     process_bit_fields(