byte_extract lowering of vectors and array cleanup

tautschnig · tautschnig · commit 54cce5aa1758 · 2019-02-14T09:33:32.000Z
Refactor the code used for arrays to make it re-usable for vectors and arrays.
diff --git a/src/solvers/lowering/byte_operators.cpp b/src/solvers/lowering/byte_operators.cpp
@@ -18,13 +18,95 @@ Author: Daniel Kroening, kroening@kroening.com
 
 #include "flatten_byte_extract_exceptions.h"
 
-// clang-format off
+static exprt unpack_rec(
+  const exprt &src,
+  bool little_endian,
+  const exprt &max_bytes,
+  const namespacet &ns,
+  bool unpack_byte_array = false);
+
+/// rewrite an array or vector into its individual bytes
+/// \param src: array/vector to unpack
+/// \param src_size: array/vector size
+/// \param element_width: bit width of array/vector elements
+/// \param little_endian: true, iff assumed endianness is little-endian
+/// \param max_bytes: if not nil, use as upper bound of the number of bytes to
+///                   unpack
+/// \param ns: namespace for type lookups
+/// \param [out] dest_array: target array_exprt to hold unpacked elements
+static void unpack_array_vector(
+  const exprt &src,
+  const exprt &src_size,
+  const mp_integer &element_width,
+  bool little_endian,
+  const exprt &max_bytes,
+  const namespacet &ns,
+  array_exprt &dest_array)
+{
+  auto max_bytes_int = numeric_cast<mp_integer>(max_bytes);
+  auto num_elements = numeric_cast<mp_integer>(src_size);
+
+  if(!max_bytes_int && !num_elements)
+  {
+    throw non_const_array_sizet(src.type(), max_bytes);
+  }
+
+  mp_integer first_element = 0;
+
+  // refine the number of elements to extract in case the element width is known
+  // and a multiple of bytes; otherwise we will expand the entire array/vector
+  if(element_width > 0 && element_width % 8 == 0)
+  {
+    mp_integer el_bytes = element_width / 8;
+
+    // turn bytes into elements
+    if(!num_elements)
+    {
+      if(*max_bytes_int % el_bytes != 0)
+        *max_bytes_int += el_bytes;
+
+      *max_bytes_int /= el_bytes;
+    }
+  }
+
+  // the maximum number of bytes is an upper bound in case the size of the
+  // array/vector is unknown; if the element_width was usable above this will
+  // have been turned into a number of elements already
+  if(!num_elements)
+    num_elements = max_bytes_int;
+
+  exprt src_simp = simplify_expr(src, ns);
+
+  for(mp_integer i = first_element; i < *num_elements; ++i)
+  {
+    exprt element;
+
+    if(src_simp.id() != ID_array || i >= src_simp.operands().size())
+    {
+      element = index_exprt(src_simp, from_integer(i, index_type()));
+    }
+    else
+    {
+      const std::size_t index_int = numeric_cast_v<std::size_t>(i);
+      element = src_simp.operands()[index_int];
+    }
+
+    exprt sub = unpack_rec(element, little_endian, max_bytes, ns, true);
+    dest_array.operands().insert(
+      dest_array.operands().end(),
+      sub.operands().begin(),
+      sub.operands().end());
+  }
+}
 
 /// rewrite an object into its individual bytes
-/// \par parameters: src  object to unpack
-/// little_endian  true, iff assumed endianness is little-endian
-/// max_bytes  if not nil, use as upper bound of the number of bytes to unpack
-/// ns  namespace for type lookups
+/// \param src: object to unpack
+/// \param little_endian: true, iff assumed endianness is little-endian
+/// \param max_bytes: if not nil, use as upper bound of the number of bytes to
+///                   unpack
+/// \param ns: namespace for type lookups
+/// \param unpack_byte_array: if true, return unmodified src iff it is a an
+//                            array of bytes
 /// \return array of bytes in the sequence found in memory
 /// \throws flatten_byte_extract_exceptiont Raised is unable to unpack the
 /// object because of either non constant size, byte misalignment or
@@ -34,7 +116,7 @@ static exprt unpack_rec(
   bool little_endian,
   const exprt &max_bytes,
   const namespacet &ns,
-  bool unpack_byte_array=false)
+  bool unpack_byte_array)
 {
   array_exprt array({},
     array_typet(unsignedbv_typet(8), from_integer(0, size_type())));
@@ -51,48 +133,37 @@ static exprt unpack_rec(
     auto element_width = pointer_offset_bits(subtype, ns);
     CHECK_RETURN(element_width.has_value());
 
-    // this probably doesn't really matter
-    #if 0
-    INVARIANT(
-      element_width > 0,
-      "element width of array should be constant",
-      irep_pretty_diagnosticst(src.type()));
-
-    INVARIANT(
-      element_width % 8 == 0,
-      "elements in array should be byte-aligned",
-      irep_pretty_diagnosticst(src.type()));
-    #endif
-
     if(!unpack_byte_array && *element_width == 8)
       return src;
 
-    auto num_elements = numeric_cast<mp_integer>(max_bytes);
-    if(!num_elements.has_value())
-      num_elements = numeric_cast<mp_integer>(array_type.size());
-    if(!num_elements.has_value())
-      throw non_const_array_sizet(array_type, max_bytes);
+    unpack_array_vector(
+      src,
+      array_type.size(),
+      *element_width,
+      little_endian,
+      max_bytes,
+      ns,
+      array);
+  }
+  else if(src.type().id() == ID_vector)
+  {
+    const vector_typet &vector_type = to_vector_type(src.type());
+    const typet &subtype = vector_type.subtype();
 
-    // all array members will have the same structure; do this just
-    // once and then replace the dummy symbol by a suitable index
-    // expression in the loop below
-    symbol_exprt dummy(ID_C_incomplete, subtype);
-    exprt sub=unpack_rec(dummy, little_endian, max_bytes, ns, true);
+    auto element_width = pointer_offset_bits(subtype, ns);
+    CHECK_RETURN(element_width.has_value());
 
-    for(mp_integer i=0; i<*num_elements; ++i)
-    {
-      index_exprt index(src, from_integer(i, index_type()));
-      replace_symbolt replace;
-      replace.insert(dummy, index);
+    if(!unpack_byte_array && *element_width == 8)
+      return src;
 
-      for(const auto &op : sub.operands())
-      {
-        exprt new_op=op;
-        replace(new_op);
-        simplify(new_op, ns);
-        array.copy_to_operands(new_op);
-      }
-    }
+    unpack_array_vector(
+      src,
+      vector_type.size(),
+      *element_width,
+      little_endian,
+      max_bytes,
+      ns,
+      array);
   }
   else if(ns.follow(src.type()).id()==ID_struct)
   {
@@ -233,12 +304,16 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
     const typet &subtype=array_type.subtype();
 
     auto element_width = pointer_offset_bits(subtype, ns);
-    const auto num_elements = numeric_cast<mp_integer>(array_type.size());
-    // TODO: consider ways of dealing with arrays of unknown subtype
-    // size or with a subtype size that does not fit byte boundaries
+    auto num_elements = numeric_cast<mp_integer>(array_type.size());
+    if(!num_elements.has_value())
+      num_elements = mp_integer(unpacked.op().operands().size());
+
+    // consider ways of dealing with arrays of unknown subtype size or with a
+    // subtype size that does not fit byte boundaries; currently we fall back to
+    // stitching together consecutive elements down below
     if(
       element_width.has_value() && *element_width >= 1 &&
-      *element_width % 8 == 0 && num_elements.has_value())
+      *element_width % 8 == 0)
     {
       array_exprt array({}, array_type);
 
@@ -258,6 +333,41 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
       return simplify_expr(array, ns);
     }
   }
+  else if(src.type().id() == ID_vector)
+  {
+    const vector_typet &vector_type = to_vector_type(src.type());
+    const typet &subtype = vector_type.subtype();
+
+    mp_integer num_elements = numeric_cast_v<mp_integer>(vector_type.size());
+
+    auto element_width = pointer_offset_bits(subtype, ns);
+    CHECK_RETURN(element_width.has_value());
+
+    // consider ways of dealing with vectors of unknown subtype size or with a
+    // subtype size that does not fit byte boundaries; currently we fall back to
+    // stitching together consecutive elements down below
+    if(
+      element_width.has_value() && *element_width >= 1 &&
+      *element_width % 8 == 0)
+    {
+      vector_exprt vector(vector_type);
+
+      for(mp_integer i = 0; i < num_elements; ++i)
+      {
+        plus_exprt new_offset(
+          unpacked.offset(),
+          from_integer(i * (*element_width) / 8, unpacked.offset().type()));
+
+        byte_extract_exprt tmp(unpacked);
+        tmp.type() = subtype;
+        tmp.offset() = simplify_expr(new_offset, ns);
+
+        vector.copy_to_operands(lower_byte_extract(tmp, ns));
+      }
+
+      return simplify_expr(vector, ns);
+    }
+  }
   else if(ns.follow(src.type()).id()==ID_struct)
   {
     const struct_typet &struct_type=to_struct_type(ns.follow(src.type()));
@@ -299,10 +409,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
   const exprt &root=unpacked.op();
   const exprt &offset=unpacked.offset();
 
-  const array_typet &array_type=to_array_type(root.type());
-  const typet &subtype=array_type.subtype();
+  optionalt<typet> subtype;
+  if(root.type().id() == ID_vector)
+    subtype = to_vector_type(root.type()).subtype();
+  else
+    subtype = to_array_type(root.type()).subtype();
 
-  auto subtype_bits = pointer_offset_bits(subtype, ns);
+  auto subtype_bits = pointer_offset_bits(*subtype, ns);
 
   DATA_INVARIANT(
     subtype_bits.has_value() && *subtype_bits == 8,
@@ -344,7 +457,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
   else // width_bytes>=2
   {
     concatenation_exprt concatenation(
-      std::move(op), bitvector_typet(subtype.id(), width_bytes * 8));
+      std::move(op), bitvector_typet(subtype->id(), width_bytes * 8));
     return simplify_expr(
       typecast_exprt(std::move(concatenation), src.type()), ns);
   }
@@ -649,4 +762,3 @@ exprt lower_byte_operators(const exprt &src, const namespacet &ns)
   else
     return tmp;
 }
-// clang-format on
diff --git a/src/solvers/lowering/flatten_byte_extract_exceptions.h b/src/solvers/lowering/flatten_byte_extract_exceptions.h
@@ -28,7 +28,7 @@ class flatten_byte_extract_exceptiont : public std::runtime_error
 class non_const_array_sizet : public flatten_byte_extract_exceptiont
 {
 public:
-  non_const_array_sizet(const array_typet &array_type, const exprt &max_bytes)
+  non_const_array_sizet(const typet &array_type, const exprt &max_bytes)
     : flatten_byte_extract_exceptiont("cannot unpack array of non-const size"),
       max_bytes(max_bytes),
       array_type(array_type)
@@ -47,7 +47,7 @@ class non_const_array_sizet : public flatten_byte_extract_exceptiont
 
 private:
   exprt max_bytes;
-  array_typet array_type;
+  typet array_type;
 
   std::string computed_error_message;
 };
diff --git a/unit/solvers/lowering/byte_operators.cpp b/unit/solvers/lowering/byte_operators.cpp
@@ -125,8 +125,8 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]")
       signedbv_typet(128),
       // ieee_float_spect::single_precision().to_type(),
       // pointer_typet(u64, 64),
-      // vector_typet(u8, size),
-      // vector_typet(u64, size),
+      vector_typet(u8, size),
+      vector_typet(u64, size),
       // complex_typet(s16),
       // complex_typet(u64)
     };