From 5c3860636ea2168a3204ad704aae08c18d8fe82c Mon Sep 17 00:00:00 2001
From: Michael Tautschnig <tautschn@amazon.com>
Date: Sun, 3 Feb 2019 19:28:55 +0000
Subject: [PATCH 1/5] byte_extract lowering of vectors and array cleanup

Refactor the code used for arrays to make it re-usable for vectors and arrays.
---
 src/solvers/lowering/byte_operators.cpp       | 246 +++++++++++++-----
 .../flatten_byte_extract_exceptions.h         |   4 +-
 unit/solvers/lowering/byte_operators.cpp      |   4 +-
 3 files changed, 183 insertions(+), 71 deletions(-)

diff --git a/src/solvers/lowering/byte_operators.cpp b/src/solvers/lowering/byte_operators.cpp
index 33f680a4dfb..c2c5f976d0f 100644
--- a/src/solvers/lowering/byte_operators.cpp
+++ b/src/solvers/lowering/byte_operators.cpp
@@ -18,13 +18,101 @@ Author: Daniel Kroening, kroening@kroening.com
 
 #include "flatten_byte_extract_exceptions.h"
 
-// clang-format off
+static exprt unpack_rec(
+  const exprt &src,
+  bool little_endian,
+  const exprt &max_bytes,
+  const namespacet &ns,
+  bool unpack_byte_array = false);
+
+/// Rewrite an array or vector into its individual bytes.
+/// \param src: array/vector to unpack
+/// \param src_size: array/vector size; if not a constant, \p max_bytes must be
+///   a constant value, otherwise we fail with an exception
+/// \param element_width: bit width of array/vector elements
+/// \param little_endian: true, iff assumed endianness is little-endian
+/// \param max_bytes: if not nil, use as upper bound of the number of bytes to
+///   unpack
+/// \param ns: namespace for type lookups
+/// \return array_exprt holding unpacked elements
+static array_exprt unpack_array_vector(
+  const exprt &src,
+  const exprt &src_size,
+  const mp_integer &element_width,
+  bool little_endian,
+  const exprt &max_bytes,
+  const namespacet &ns)
+{
+  auto max_bytes_int = numeric_cast<mp_integer>(max_bytes);
+  auto num_elements = numeric_cast<mp_integer>(src_size);
+
+  if(!max_bytes_int && !num_elements)
+  {
+    throw non_const_array_sizet(src.type(), max_bytes);
+  }
+
+  mp_integer first_element = 0;
+
+  // refine the number of elements to extract in case the element width is known
+  // and a multiple of bytes; otherwise we will expand the entire array/vector
+  optionalt<mp_integer> max_elements;
+  if(element_width > 0 && element_width % 8 == 0)
+  {
+    mp_integer el_bytes = element_width / 8;
+
+    if(!num_elements)
+    {
+      // turn bytes into elements, rounding up
+      max_elements = (*max_bytes_int + el_bytes - 1) / el_bytes;
+    }
+  }
 
-/// rewrite an object into its individual bytes
-/// \par parameters: src  object to unpack
-/// little_endian  true, iff assumed endianness is little-endian
-/// max_bytes  if not nil, use as upper bound of the number of bytes to unpack
-/// ns  namespace for type lookups
+  // the maximum number of bytes is an upper bound in case the size of the
+  // array/vector is unknown; if the element_width was usable above this will
+  // have been turned into a number of elements already
+  if(!num_elements)
+    num_elements = *max_elements;
+
+  const exprt src_simp = simplify_expr(src, ns);
+
+  exprt::operandst byte_operands;
+  for(mp_integer i = first_element; i < *num_elements; ++i)
+  {
+    exprt element;
+
+    if(
+      (src_simp.id() == ID_array || src_simp.id() == ID_vector) &&
+      i < src_simp.operands().size())
+    {
+      const std::size_t index_int = numeric_cast_v<std::size_t>(i);
+      element = src_simp.operands()[index_int];
+    }
+    else
+    {
+      element = index_exprt(src_simp, from_integer(i, index_type()));
+    }
+
+    // recursively unpack each element until so that we eventually just have an
+    // array of bytes left
+    exprt sub = unpack_rec(element, little_endian, max_bytes, ns, true);
+    byte_operands.insert(
+      byte_operands.end(), sub.operands().begin(), sub.operands().end());
+  }
+
+  const std::size_t size = byte_operands.size();
+  return array_exprt(
+    std::move(byte_operands),
+    array_typet(unsignedbv_typet(8), from_integer(size, size_type())));
+}
+
+/// Rewrite an object into its individual bytes.
+/// \param src: object to unpack
+/// \param little_endian: true, iff assumed endianness is little-endian
+/// \param max_bytes: if not nil, use as upper bound of the number of bytes to
+///   unpack
+/// \param ns: namespace for type lookups
+/// \param unpack_byte_array: if true, return unmodified \p src iff it is an
+//    array of bytes
 /// \return array of bytes in the sequence found in memory
 /// \throws flatten_byte_extract_exceptiont Raised is unable to unpack the
 /// object because of either non constant size, byte misalignment or
@@ -34,15 +122,8 @@ static exprt unpack_rec(
   bool little_endian,
   const exprt &max_bytes,
   const namespacet &ns,
-  bool unpack_byte_array=false)
+  bool unpack_byte_array)
 {
-  array_exprt array({},
-    array_typet(unsignedbv_typet(8), from_integer(0, size_type())));
-
-  // endianness_mapt should be the point of reference for mapping out
-  // endianness, but we need to work on elements here instead of
-  // individual bits
-
   if(src.type().id()==ID_array)
   {
     const array_typet &array_type=to_array_type(src.type());
@@ -51,54 +132,32 @@ static exprt unpack_rec(
     auto element_width = pointer_offset_bits(subtype, ns);
     CHECK_RETURN(element_width.has_value());
 
-    // this probably doesn't really matter
-    #if 0
-    INVARIANT(
-      element_width > 0,
-      "element width of array should be constant",
-      irep_pretty_diagnosticst(src.type()));
-
-    INVARIANT(
-      element_width % 8 == 0,
-      "elements in array should be byte-aligned",
-      irep_pretty_diagnosticst(src.type()));
-    #endif
-
     if(!unpack_byte_array && *element_width == 8)
       return src;
 
-    auto num_elements = numeric_cast<mp_integer>(max_bytes);
-    if(!num_elements.has_value())
-      num_elements = numeric_cast<mp_integer>(array_type.size());
-    if(!num_elements.has_value())
-      throw non_const_array_sizet(array_type, max_bytes);
+    return unpack_array_vector(
+      src, array_type.size(), *element_width, little_endian, max_bytes, ns);
+  }
+  else if(src.type().id() == ID_vector)
+  {
+    const vector_typet &vector_type = to_vector_type(src.type());
+    const typet &subtype = vector_type.subtype();
 
-    // all array members will have the same structure; do this just
-    // once and then replace the dummy symbol by a suitable index
-    // expression in the loop below
-    symbol_exprt dummy(ID_C_incomplete, subtype);
-    exprt sub=unpack_rec(dummy, little_endian, max_bytes, ns, true);
+    auto element_width = pointer_offset_bits(subtype, ns);
+    CHECK_RETURN(element_width.has_value());
 
-    for(mp_integer i=0; i<*num_elements; ++i)
-    {
-      index_exprt index(src, from_integer(i, index_type()));
-      replace_symbolt replace;
-      replace.insert(dummy, index);
+    if(!unpack_byte_array && *element_width == 8)
+      return src;
 
-      for(const auto &op : sub.operands())
-      {
-        exprt new_op=op;
-        replace(new_op);
-        simplify(new_op, ns);
-        array.copy_to_operands(new_op);
-      }
-    }
+    return unpack_array_vector(
+      src, vector_type.size(), *element_width, little_endian, max_bytes, ns);
   }
   else if(ns.follow(src.type()).id()==ID_struct)
   {
     const struct_typet &struct_type=to_struct_type(ns.follow(src.type()));
     const struct_typet::componentst &components=struct_type.components();
 
+    exprt::operandst byte_operands;
     for(const auto &comp : components)
     {
       auto element_width = pointer_offset_bits(comp.type(), ns);
@@ -114,9 +173,14 @@ static exprt unpack_rec(
       member_exprt member(src, comp.get_name(), comp.type());
       exprt sub=unpack_rec(member, little_endian, max_bytes, ns, true);
 
-      for(const auto& op : sub.operands())
-        array.copy_to_operands(op);
+      byte_operands.insert(
+        byte_operands.end(), sub.operands().begin(), sub.operands().end());
     }
+
+    const std::size_t size = byte_operands.size();
+    return array_exprt(
+      std::move(byte_operands),
+      array_typet(unsignedbv_typet(8), from_integer(size, size_type())));
   }
   else if(src.type().id()!=ID_empty)
   {
@@ -138,6 +202,7 @@ static exprt unpack_rec(
         bits = *bits_opt * 8;
     }
 
+    exprt::operandst byte_operands;
     for(mp_integer i=0; i<bits; i+=8)
     {
       extractbits_exprt extractbits(
@@ -146,17 +211,23 @@ static exprt unpack_rec(
         from_integer(i, index_type()),
         unsignedbv_typet(8));
 
+      // endianness_mapt should be the point of reference for mapping out
+      // endianness, but we need to work on elements here instead of
+      // individual bits
       if(little_endian)
-        array.copy_to_operands(extractbits);
+        byte_operands.push_back(extractbits);
       else
-        array.operands().insert(array.operands().begin(), extractbits);
+        byte_operands.insert(byte_operands.begin(), extractbits);
     }
-  }
 
-  to_array_type(array.type()).size()=
-    from_integer(array.operands().size(), size_type());
+    const std::size_t size = byte_operands.size();
+    return array_exprt(
+      std::move(byte_operands),
+      array_typet(unsignedbv_typet(8), from_integer(size, size_type())));
+  }
 
-  return std::move(array);
+  return array_exprt(
+    {}, array_typet(unsignedbv_typet(8), from_integer(0, size_type())));
 }
 
 /// rewrite byte extraction from an array to byte extraction from a
@@ -233,12 +304,16 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
     const typet &subtype=array_type.subtype();
 
     auto element_width = pointer_offset_bits(subtype, ns);
-    const auto num_elements = numeric_cast<mp_integer>(array_type.size());
-    // TODO: consider ways of dealing with arrays of unknown subtype
-    // size or with a subtype size that does not fit byte boundaries
+    auto num_elements = numeric_cast<mp_integer>(array_type.size());
+    if(!num_elements.has_value())
+      num_elements = mp_integer(unpacked.op().operands().size());
+
+    // consider ways of dealing with arrays of unknown subtype size or with a
+    // subtype size that does not fit byte boundaries; currently we fall back to
+    // stitching together consecutive elements down below
     if(
       element_width.has_value() && *element_width >= 1 &&
-      *element_width % 8 == 0 && num_elements.has_value())
+      *element_width % 8 == 0)
     {
       array_exprt array({}, array_type);
 
@@ -258,6 +333,41 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
       return simplify_expr(array, ns);
     }
   }
+  else if(src.type().id() == ID_vector)
+  {
+    const vector_typet &vector_type = to_vector_type(src.type());
+    const typet &subtype = vector_type.subtype();
+
+    mp_integer num_elements = numeric_cast_v<mp_integer>(vector_type.size());
+
+    auto element_width = pointer_offset_bits(subtype, ns);
+    CHECK_RETURN(element_width.has_value());
+
+    // consider ways of dealing with vectors of unknown subtype size or with a
+    // subtype size that does not fit byte boundaries; currently we fall back to
+    // stitching together consecutive elements down below
+    if(
+      element_width.has_value() && *element_width >= 1 &&
+      *element_width % 8 == 0)
+    {
+      vector_exprt vector(vector_type);
+
+      for(mp_integer i = 0; i < num_elements; ++i)
+      {
+        plus_exprt new_offset(
+          unpacked.offset(),
+          from_integer(i * (*element_width) / 8, unpacked.offset().type()));
+
+        byte_extract_exprt tmp(unpacked);
+        tmp.type() = subtype;
+        tmp.offset() = simplify_expr(new_offset, ns);
+
+        vector.copy_to_operands(lower_byte_extract(tmp, ns));
+      }
+
+      return simplify_expr(vector, ns);
+    }
+  }
   else if(ns.follow(src.type()).id()==ID_struct)
   {
     const struct_typet &struct_type=to_struct_type(ns.follow(src.type()));
@@ -299,10 +409,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
   const exprt &root=unpacked.op();
   const exprt &offset=unpacked.offset();
 
-  const array_typet &array_type=to_array_type(root.type());
-  const typet &subtype=array_type.subtype();
+  optionalt<typet> subtype;
+  if(root.type().id() == ID_vector)
+    subtype = to_vector_type(root.type()).subtype();
+  else
+    subtype = to_array_type(root.type()).subtype();
 
-  auto subtype_bits = pointer_offset_bits(subtype, ns);
+  auto subtype_bits = pointer_offset_bits(*subtype, ns);
 
   DATA_INVARIANT(
     subtype_bits.has_value() && *subtype_bits == 8,
@@ -344,7 +457,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
   else // width_bytes>=2
   {
     concatenation_exprt concatenation(
-      std::move(op), bitvector_typet(subtype.id(), width_bytes * 8));
+      std::move(op), bitvector_typet(subtype->id(), width_bytes * 8));
     return simplify_expr(
       typecast_exprt(std::move(concatenation), src.type()), ns);
   }
@@ -649,4 +762,3 @@ exprt lower_byte_operators(const exprt &src, const namespacet &ns)
   else
     return tmp;
 }
-// clang-format on
diff --git a/src/solvers/lowering/flatten_byte_extract_exceptions.h b/src/solvers/lowering/flatten_byte_extract_exceptions.h
index a5ae22791b3..b1964518c36 100644
--- a/src/solvers/lowering/flatten_byte_extract_exceptions.h
+++ b/src/solvers/lowering/flatten_byte_extract_exceptions.h
@@ -28,7 +28,7 @@ class flatten_byte_extract_exceptiont : public std::runtime_error
 class non_const_array_sizet : public flatten_byte_extract_exceptiont
 {
 public:
-  non_const_array_sizet(const array_typet &array_type, const exprt &max_bytes)
+  non_const_array_sizet(const typet &array_type, const exprt &max_bytes)
     : flatten_byte_extract_exceptiont("cannot unpack array of non-const size"),
       max_bytes(max_bytes),
       array_type(array_type)
@@ -47,7 +47,7 @@ class non_const_array_sizet : public flatten_byte_extract_exceptiont
 
 private:
   exprt max_bytes;
-  array_typet array_type;
+  typet array_type;
 
   std::string computed_error_message;
 };
diff --git a/unit/solvers/lowering/byte_operators.cpp b/unit/solvers/lowering/byte_operators.cpp
index da1a770dea0..775fd275966 100644
--- a/unit/solvers/lowering/byte_operators.cpp
+++ b/unit/solvers/lowering/byte_operators.cpp
@@ -125,8 +125,8 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]")
       signedbv_typet(128),
       // ieee_float_spect::single_precision().to_type(),
       // pointer_typet(u64, 64),
-      // vector_typet(u8, size),
-      // vector_typet(u64, size),
+      vector_typet(u8, size),
+      vector_typet(u64, size),
       // complex_typet(s16),
       // complex_typet(u64)
     };

From e2265db60c5dd08ab0c9e7b882679c91a534543a Mon Sep 17 00:00:00 2001
From: Michael Tautschnig <tautschn@amazon.com>
Date: Thu, 14 Feb 2019 11:58:59 +0000
Subject: [PATCH 2/5] byte_extract lowering: also use a lower bound

We already limited extraction at an uppper bound, but when we know the offset we
can also start as late as possible. For structs, for example, we now do not
expand components that will be irrelevant to the final result.
---
 src/solvers/lowering/byte_operators.cpp | 82 ++++++++++++++++++++++---
 1 file changed, 75 insertions(+), 7 deletions(-)

diff --git a/src/solvers/lowering/byte_operators.cpp b/src/solvers/lowering/byte_operators.cpp
index c2c5f976d0f..c525b8b2a5d 100644
--- a/src/solvers/lowering/byte_operators.cpp
+++ b/src/solvers/lowering/byte_operators.cpp
@@ -21,6 +21,7 @@ Author: Daniel Kroening, kroening@kroening.com
 static exprt unpack_rec(
   const exprt &src,
   bool little_endian,
+  const exprt &offset_bytes,
   const exprt &max_bytes,
   const namespacet &ns,
   bool unpack_byte_array = false);
@@ -31,6 +32,8 @@ static exprt unpack_rec(
 ///   a constant value, otherwise we fail with an exception
 /// \param element_width: bit width of array/vector elements
 /// \param little_endian: true, iff assumed endianness is little-endian
+/// \param offset_bytes: if not nil, bytes prior to this offset will be filled
+///   with nil values
 /// \param max_bytes: if not nil, use as upper bound of the number of bytes to
 ///   unpack
 /// \param ns: namespace for type lookups
@@ -40,6 +43,7 @@ static array_exprt unpack_array_vector(
   const exprt &src_size,
   const mp_integer &element_width,
   bool little_endian,
+  const exprt &offset_bytes,
   const exprt &max_bytes,
   const namespacet &ns)
 {
@@ -51,6 +55,7 @@ static array_exprt unpack_array_vector(
     throw non_const_array_sizet(src.type(), max_bytes);
   }
 
+  exprt::operandst byte_operands;
   mp_integer first_element = 0;
 
   // refine the number of elements to extract in case the element width is known
@@ -65,6 +70,17 @@ static array_exprt unpack_array_vector(
       // turn bytes into elements, rounding up
       max_elements = (*max_bytes_int + el_bytes - 1) / el_bytes;
     }
+
+    if(auto offset_bytes_int = numeric_cast<mp_integer>(offset_bytes))
+    {
+      // compute offset as number of elements
+      first_element = *offset_bytes_int / el_bytes;
+      // insert offset_bytes-many nil bytes into the output array
+      *offset_bytes_int -= *offset_bytes_int % el_bytes;
+      byte_operands.resize(
+        numeric_cast_v<std::size_t>(*offset_bytes_int),
+        from_integer(0, unsignedbv_typet(8)));
+    }
   }
 
   // the maximum number of bytes is an upper bound in case the size of the
@@ -75,7 +91,6 @@ static array_exprt unpack_array_vector(
 
   const exprt src_simp = simplify_expr(src, ns);
 
-  exprt::operandst byte_operands;
   for(mp_integer i = first_element; i < *num_elements; ++i)
   {
     exprt element;
@@ -94,7 +109,8 @@ static array_exprt unpack_array_vector(
 
     // recursively unpack each element until so that we eventually just have an
     // array of bytes left
-    exprt sub = unpack_rec(element, little_endian, max_bytes, ns, true);
+    exprt sub =
+      unpack_rec(element, little_endian, nil_exprt(), max_bytes, ns, true);
     byte_operands.insert(
       byte_operands.end(), sub.operands().begin(), sub.operands().end());
   }
@@ -108,6 +124,8 @@ static array_exprt unpack_array_vector(
 /// Rewrite an object into its individual bytes.
 /// \param src: object to unpack
 /// \param little_endian: true, iff assumed endianness is little-endian
+/// \param offset_bytes: if not nil, bytes prior to this offset will be filled
+///   with nil values
 /// \param max_bytes: if not nil, use as upper bound of the number of bytes to
 ///   unpack
 /// \param ns: namespace for type lookups
@@ -120,6 +138,7 @@ static array_exprt unpack_array_vector(
 static exprt unpack_rec(
   const exprt &src,
   bool little_endian,
+  const exprt &offset_bytes,
   const exprt &max_bytes,
   const namespacet &ns,
   bool unpack_byte_array)
@@ -136,7 +155,13 @@ static exprt unpack_rec(
       return src;
 
     return unpack_array_vector(
-      src, array_type.size(), *element_width, little_endian, max_bytes, ns);
+      src,
+      array_type.size(),
+      *element_width,
+      little_endian,
+      offset_bytes,
+      max_bytes,
+      ns);
   }
   else if(src.type().id() == ID_vector)
   {
@@ -150,13 +175,21 @@ static exprt unpack_rec(
       return src;
 
     return unpack_array_vector(
-      src, vector_type.size(), *element_width, little_endian, max_bytes, ns);
+      src,
+      vector_type.size(),
+      *element_width,
+      little_endian,
+      offset_bytes,
+      max_bytes,
+      ns);
   }
   else if(ns.follow(src.type()).id()==ID_struct)
   {
     const struct_typet &struct_type=to_struct_type(ns.follow(src.type()));
     const struct_typet::componentst &components=struct_type.components();
 
+    mp_integer member_offset_bits = 0;
+
     exprt::operandst byte_operands;
     for(const auto &comp : components)
     {
@@ -170,11 +203,43 @@ static exprt unpack_rec(
         throw non_byte_alignedt(struct_type, comp, *element_width);
       }
 
+      exprt offset_in_member = nil_exprt();
+      auto offset_in_member_int = numeric_cast<mp_integer>(offset_bytes);
+      exprt max_bytes_left = nil_exprt();
+      auto max_bytes_left_int = numeric_cast<mp_integer>(max_bytes);
+
+      if(offset_in_member_int.has_value())
+      {
+        *offset_in_member_int -= member_offset_bits / 8;
+        // if the offset is negative, offset_in_member remains nil, which has
+        // the same effect as setting it to zero
+        if(*offset_in_member_int >= 0)
+        {
+          offset_in_member =
+            from_integer(*offset_in_member_int, offset_bytes.type());
+        }
+      }
+
+      if(max_bytes_left_int.has_value())
+      {
+        *max_bytes_left_int -= member_offset_bits / 8;
+        if(*max_bytes_left_int >= 0)
+          max_bytes_left = from_integer(*max_bytes_left_int, max_bytes.type());
+        else
+          break;
+      }
+
       member_exprt member(src, comp.get_name(), comp.type());
-      exprt sub=unpack_rec(member, little_endian, max_bytes, ns, true);
+      if(src.id() == ID_struct)
+        simplify(member, ns);
+
+      exprt sub = unpack_rec(
+        member, little_endian, offset_in_member, max_bytes_left, ns, true);
 
       byte_operands.insert(
         byte_operands.end(), sub.operands().begin(), sub.operands().end());
+
+      member_offset_bits += *element_width;
     }
 
     const std::size_t size = byte_operands.size();
@@ -294,9 +359,12 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
           typecast_exprt(src.offset(), upper_bound.type())),
         ns);
 
+  exprt lb = src.offset();
+  if(!lb.is_constant())
+    lb.make_nil();
+
   byte_extract_exprt unpacked(src);
-  unpacked.op()=
-    unpack_rec(src.op(), little_endian, upper_bound, ns);
+  unpacked.op() = unpack_rec(src.op(), little_endian, lb, upper_bound, ns);
 
   if(src.type().id()==ID_array)
   {

From 2bfaafc6e9191c9c71fc4124e5c227d8d738500c Mon Sep 17 00:00:00 2001
From: Michael Tautschnig <tautschn@amazon.com>
Date: Sun, 3 Feb 2019 15:45:42 +0000
Subject: [PATCH 3/5] byte_extract lowering of arrays: optimise for constant
 index

No need to construct index_exprt when we can just pick the operand out of a
vector.
---
 src/solvers/lowering/byte_operators.cpp | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/solvers/lowering/byte_operators.cpp b/src/solvers/lowering/byte_operators.cpp
index c525b8b2a5d..4b5f1ddff68 100644
--- a/src/solvers/lowering/byte_operators.cpp
+++ b/src/solvers/lowering/byte_operators.cpp
@@ -516,8 +516,22 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
       little_endian?(width_bytes-i-1):i;
 
     plus_exprt offset_i(from_integer(offset_int, offset.type()), offset);
-    index_exprt index_expr(root, offset_i);
-    op.push_back(index_expr);
+    simplify(offset_i, ns);
+
+    mp_integer index = 0;
+    if(
+      offset_i.is_constant() &&
+      (root.id() == ID_array || root.id() == ID_vector) &&
+      !to_integer(to_constant_expr(offset_i), index) &&
+      index < root.operands().size() && index >= 0)
+    {
+      // offset is constant and in range
+      op.push_back(root.operands()[numeric_cast_v<std::size_t>(index)]);
+    }
+    else
+    {
+      op.push_back(index_exprt(root, offset_i));
+    }
   }
 
   if(width_bytes==1)

From 445f38fc01d94e49081fc0018c7d4c9f56129d7a Mon Sep 17 00:00:00 2001
From: Michael Tautschnig <tautschn@amazon.com>
Date: Thu, 14 Feb 2019 12:26:13 +0000
Subject: [PATCH 4/5] byte-operator lowering: rename element_width to
 {element,component}_bits

This makes the unit of measurement explicit and thus should help avoid
bits/bytes confusion.
---
 src/solvers/lowering/byte_operators.cpp | 60 ++++++++++++-------------
 1 file changed, 28 insertions(+), 32 deletions(-)

diff --git a/src/solvers/lowering/byte_operators.cpp b/src/solvers/lowering/byte_operators.cpp
index 4b5f1ddff68..dc92b49f4ef 100644
--- a/src/solvers/lowering/byte_operators.cpp
+++ b/src/solvers/lowering/byte_operators.cpp
@@ -30,7 +30,7 @@ static exprt unpack_rec(
 /// \param src: array/vector to unpack
 /// \param src_size: array/vector size; if not a constant, \p max_bytes must be
 ///   a constant value, otherwise we fail with an exception
-/// \param element_width: bit width of array/vector elements
+/// \param element_bits: bit width of array/vector elements
 /// \param little_endian: true, iff assumed endianness is little-endian
 /// \param offset_bytes: if not nil, bytes prior to this offset will be filled
 ///   with nil values
@@ -41,7 +41,7 @@ static exprt unpack_rec(
 static array_exprt unpack_array_vector(
   const exprt &src,
   const exprt &src_size,
-  const mp_integer &element_width,
+  const mp_integer &element_bits,
   bool little_endian,
   const exprt &offset_bytes,
   const exprt &max_bytes,
@@ -61,9 +61,9 @@ static array_exprt unpack_array_vector(
   // refine the number of elements to extract in case the element width is known
   // and a multiple of bytes; otherwise we will expand the entire array/vector
   optionalt<mp_integer> max_elements;
-  if(element_width > 0 && element_width % 8 == 0)
+  if(element_bits > 0 && element_bits % 8 == 0)
   {
-    mp_integer el_bytes = element_width / 8;
+    mp_integer el_bytes = element_bits / 8;
 
     if(!num_elements)
     {
@@ -84,7 +84,7 @@ static array_exprt unpack_array_vector(
   }
 
   // the maximum number of bytes is an upper bound in case the size of the
-  // array/vector is unknown; if the element_width was usable above this will
+  // array/vector is unknown; if element_bits was usable above this will
   // have been turned into a number of elements already
   if(!num_elements)
     num_elements = *max_elements;
@@ -148,16 +148,16 @@ static exprt unpack_rec(
     const array_typet &array_type=to_array_type(src.type());
     const typet &subtype=array_type.subtype();
 
-    auto element_width = pointer_offset_bits(subtype, ns);
-    CHECK_RETURN(element_width.has_value());
+    auto element_bits = pointer_offset_bits(subtype, ns);
+    CHECK_RETURN(element_bits.has_value());
 
-    if(!unpack_byte_array && *element_width == 8)
+    if(!unpack_byte_array && *element_bits == 8)
       return src;
 
     return unpack_array_vector(
       src,
       array_type.size(),
-      *element_width,
+      *element_bits,
       little_endian,
       offset_bytes,
       max_bytes,
@@ -168,16 +168,16 @@ static exprt unpack_rec(
     const vector_typet &vector_type = to_vector_type(src.type());
     const typet &subtype = vector_type.subtype();
 
-    auto element_width = pointer_offset_bits(subtype, ns);
-    CHECK_RETURN(element_width.has_value());
+    auto element_bits = pointer_offset_bits(subtype, ns);
+    CHECK_RETURN(element_bits.has_value());
 
-    if(!unpack_byte_array && *element_width == 8)
+    if(!unpack_byte_array && *element_bits == 8)
       return src;
 
     return unpack_array_vector(
       src,
       vector_type.size(),
-      *element_width,
+      *element_bits,
       little_endian,
       offset_bytes,
       max_bytes,
@@ -193,14 +193,14 @@ static exprt unpack_rec(
     exprt::operandst byte_operands;
     for(const auto &comp : components)
     {
-      auto element_width = pointer_offset_bits(comp.type(), ns);
+      auto component_bits = pointer_offset_bits(comp.type(), ns);
 
       // the next member would be misaligned, abort
       if(
-        !element_width.has_value() || *element_width == 0 ||
-        *element_width % 8 != 0)
+        !component_bits.has_value() || *component_bits == 0 ||
+        *component_bits % 8 != 0)
       {
-        throw non_byte_alignedt(struct_type, comp, *element_width);
+        throw non_byte_alignedt(struct_type, comp, *component_bits);
       }
 
       exprt offset_in_member = nil_exprt();
@@ -239,7 +239,7 @@ static exprt unpack_rec(
       byte_operands.insert(
         byte_operands.end(), sub.operands().begin(), sub.operands().end());
 
-      member_offset_bits += *element_width;
+      member_offset_bits += *component_bits;
     }
 
     const std::size_t size = byte_operands.size();
@@ -371,7 +371,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
     const array_typet &array_type=to_array_type(src.type());
     const typet &subtype=array_type.subtype();
 
-    auto element_width = pointer_offset_bits(subtype, ns);
+    auto element_bits = pointer_offset_bits(subtype, ns);
     auto num_elements = numeric_cast<mp_integer>(array_type.size());
     if(!num_elements.has_value())
       num_elements = mp_integer(unpacked.op().operands().size());
@@ -379,9 +379,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
     // consider ways of dealing with arrays of unknown subtype size or with a
     // subtype size that does not fit byte boundaries; currently we fall back to
     // stitching together consecutive elements down below
-    if(
-      element_width.has_value() && *element_width >= 1 &&
-      *element_width % 8 == 0)
+    if(element_bits.has_value() && *element_bits >= 1 && *element_bits % 8 == 0)
     {
       array_exprt array({}, array_type);
 
@@ -389,7 +387,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
       {
         plus_exprt new_offset(
           unpacked.offset(),
-          from_integer(i * (*element_width) / 8, unpacked.offset().type()));
+          from_integer(i * (*element_bits) / 8, unpacked.offset().type()));
 
         byte_extract_exprt tmp(unpacked);
         tmp.type()=subtype;
@@ -408,15 +406,13 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
 
     mp_integer num_elements = numeric_cast_v<mp_integer>(vector_type.size());
 
-    auto element_width = pointer_offset_bits(subtype, ns);
-    CHECK_RETURN(element_width.has_value());
+    auto element_bits = pointer_offset_bits(subtype, ns);
+    CHECK_RETURN(element_bits.has_value());
 
     // consider ways of dealing with vectors of unknown subtype size or with a
     // subtype size that does not fit byte boundaries; currently we fall back to
     // stitching together consecutive elements down below
-    if(
-      element_width.has_value() && *element_width >= 1 &&
-      *element_width % 8 == 0)
+    if(element_bits.has_value() && *element_bits >= 1 && *element_bits % 8 == 0)
     {
       vector_exprt vector(vector_type);
 
@@ -424,7 +420,7 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
       {
         plus_exprt new_offset(
           unpacked.offset(),
-          from_integer(i * (*element_width) / 8, unpacked.offset().type()));
+          from_integer(i * (*element_bits) / 8, unpacked.offset().type()));
 
         byte_extract_exprt tmp(unpacked);
         tmp.type() = subtype;
@@ -446,12 +442,12 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
 
     for(const auto &comp : components)
     {
-      auto element_width = pointer_offset_bits(comp.type(), ns);
+      auto component_bits = pointer_offset_bits(comp.type(), ns);
 
       // the next member would be misaligned, abort
       if(
-        !element_width.has_value() || *element_width == 0 ||
-        *element_width % 8 != 0)
+        !component_bits.has_value() || *component_bits == 0 ||
+        *component_bits % 8 != 0)
       {
         failed=true;
         break;

From bd307906f68c1da07ec138999e871b187092c7ec Mon Sep 17 00:00:00 2001
From: Michael Tautschnig <tautschn@amazon.com>
Date: Thu, 14 Feb 2019 13:04:31 +0000
Subject: [PATCH 5/5] byte_extract lowering: Use optionalt<mp_integer> instead
 of nil_exprt

This makes the interface much more explicit about what is or isn't supported at
moment.
---
 src/solvers/lowering/byte_operators.cpp | 126 +++++++++++-------------
 1 file changed, 56 insertions(+), 70 deletions(-)

diff --git a/src/solvers/lowering/byte_operators.cpp b/src/solvers/lowering/byte_operators.cpp
index dc92b49f4ef..03b7cd44d5a 100644
--- a/src/solvers/lowering/byte_operators.cpp
+++ b/src/solvers/lowering/byte_operators.cpp
@@ -21,64 +21,59 @@ Author: Daniel Kroening, kroening@kroening.com
 static exprt unpack_rec(
   const exprt &src,
   bool little_endian,
-  const exprt &offset_bytes,
-  const exprt &max_bytes,
+  const optionalt<mp_integer> &offset_bytes,
+  const optionalt<mp_integer> &max_bytes,
   const namespacet &ns,
   bool unpack_byte_array = false);
 
 /// Rewrite an array or vector into its individual bytes.
 /// \param src: array/vector to unpack
-/// \param src_size: array/vector size; if not a constant, \p max_bytes must be
-///   a constant value, otherwise we fail with an exception
+/// \param src_size: array/vector size; if not a constant and thus not set,
+///   \p max_bytes must be a known constant value, otherwise we fail with an
+///   exception
 /// \param element_bits: bit width of array/vector elements
 /// \param little_endian: true, iff assumed endianness is little-endian
-/// \param offset_bytes: if not nil, bytes prior to this offset will be filled
+/// \param offset_bytes: if set, bytes prior to this offset will be filled
 ///   with nil values
-/// \param max_bytes: if not nil, use as upper bound of the number of bytes to
+/// \param max_bytes: if set, use as upper bound of the number of bytes to
 ///   unpack
 /// \param ns: namespace for type lookups
 /// \return array_exprt holding unpacked elements
 static array_exprt unpack_array_vector(
   const exprt &src,
-  const exprt &src_size,
+  const optionalt<mp_integer> &src_size,
   const mp_integer &element_bits,
   bool little_endian,
-  const exprt &offset_bytes,
-  const exprt &max_bytes,
+  const optionalt<mp_integer> &offset_bytes,
+  const optionalt<mp_integer> &max_bytes,
   const namespacet &ns)
 {
-  auto max_bytes_int = numeric_cast<mp_integer>(max_bytes);
-  auto num_elements = numeric_cast<mp_integer>(src_size);
-
-  if(!max_bytes_int && !num_elements)
-  {
-    throw non_const_array_sizet(src.type(), max_bytes);
-  }
+  if(!src_size.has_value() && !max_bytes.has_value())
+    throw non_const_array_sizet(src.type(), nil_exprt());
 
   exprt::operandst byte_operands;
   mp_integer first_element = 0;
 
   // refine the number of elements to extract in case the element width is known
   // and a multiple of bytes; otherwise we will expand the entire array/vector
-  optionalt<mp_integer> max_elements;
+  optionalt<mp_integer> num_elements = src_size;
   if(element_bits > 0 && element_bits % 8 == 0)
   {
     mp_integer el_bytes = element_bits / 8;
 
-    if(!num_elements)
+    if(!num_elements.has_value())
     {
       // turn bytes into elements, rounding up
-      max_elements = (*max_bytes_int + el_bytes - 1) / el_bytes;
+      num_elements = (*max_bytes + el_bytes - 1) / el_bytes;
     }
 
-    if(auto offset_bytes_int = numeric_cast<mp_integer>(offset_bytes))
+    if(offset_bytes.has_value())
     {
       // compute offset as number of elements
-      first_element = *offset_bytes_int / el_bytes;
+      first_element = *offset_bytes / el_bytes;
       // insert offset_bytes-many nil bytes into the output array
-      *offset_bytes_int -= *offset_bytes_int % el_bytes;
       byte_operands.resize(
-        numeric_cast_v<std::size_t>(*offset_bytes_int),
+        numeric_cast_v<std::size_t>(*offset_bytes - (*offset_bytes % el_bytes)),
         from_integer(0, unsignedbv_typet(8)));
     }
   }
@@ -87,7 +82,7 @@ static array_exprt unpack_array_vector(
   // array/vector is unknown; if element_bits was usable above this will
   // have been turned into a number of elements already
   if(!num_elements)
-    num_elements = *max_elements;
+    num_elements = *max_bytes;
 
   const exprt src_simp = simplify_expr(src, ns);
 
@@ -109,8 +104,7 @@ static array_exprt unpack_array_vector(
 
     // recursively unpack each element until so that we eventually just have an
     // array of bytes left
-    exprt sub =
-      unpack_rec(element, little_endian, nil_exprt(), max_bytes, ns, true);
+    exprt sub = unpack_rec(element, little_endian, {}, max_bytes, ns, true);
     byte_operands.insert(
       byte_operands.end(), sub.operands().begin(), sub.operands().end());
   }
@@ -124,22 +118,22 @@ static array_exprt unpack_array_vector(
 /// Rewrite an object into its individual bytes.
 /// \param src: object to unpack
 /// \param little_endian: true, iff assumed endianness is little-endian
-/// \param offset_bytes: if not nil, bytes prior to this offset will be filled
-///   with nil values
-/// \param max_bytes: if not nil, use as upper bound of the number of bytes to
+/// \param offset_bytes: if set, bytes prior to this offset will be filled with
+///   nil values
+/// \param max_bytes: if set, use as upper bound of the number of bytes to
 ///   unpack
 /// \param ns: namespace for type lookups
 /// \param unpack_byte_array: if true, return unmodified \p src iff it is an
 //    array of bytes
 /// \return array of bytes in the sequence found in memory
-/// \throws flatten_byte_extract_exceptiont Raised is unable to unpack the
+/// \throws flatten_byte_extract_exceptiont Raised if unable to unpack the
 /// object because of either non constant size, byte misalignment or
 /// non-constant component width.
 static exprt unpack_rec(
   const exprt &src,
   bool little_endian,
-  const exprt &offset_bytes,
-  const exprt &max_bytes,
+  const optionalt<mp_integer> &offset_bytes,
+  const optionalt<mp_integer> &max_bytes,
   const namespacet &ns,
   bool unpack_byte_array)
 {
@@ -154,9 +148,11 @@ static exprt unpack_rec(
     if(!unpack_byte_array && *element_bits == 8)
       return src;
 
+    const auto constant_size_or_nullopt =
+      numeric_cast<mp_integer>(array_type.size());
     return unpack_array_vector(
       src,
-      array_type.size(),
+      constant_size_or_nullopt,
       *element_bits,
       little_endian,
       offset_bytes,
@@ -176,7 +172,7 @@ static exprt unpack_rec(
 
     return unpack_array_vector(
       src,
-      vector_type.size(),
+      numeric_cast_v<mp_integer>(vector_type.size()),
       *element_bits,
       little_endian,
       offset_bytes,
@@ -203,29 +199,22 @@ static exprt unpack_rec(
         throw non_byte_alignedt(struct_type, comp, *component_bits);
       }
 
-      exprt offset_in_member = nil_exprt();
-      auto offset_in_member_int = numeric_cast<mp_integer>(offset_bytes);
-      exprt max_bytes_left = nil_exprt();
-      auto max_bytes_left_int = numeric_cast<mp_integer>(max_bytes);
+      optionalt<mp_integer> offset_in_member;
+      optionalt<mp_integer> max_bytes_left;
 
-      if(offset_in_member_int.has_value())
+      if(offset_bytes.has_value())
       {
-        *offset_in_member_int -= member_offset_bits / 8;
-        // if the offset is negative, offset_in_member remains nil, which has
+        offset_in_member = *offset_bytes - member_offset_bits / 8;
+        // if the offset is negative, offset_in_member remains unset, which has
         // the same effect as setting it to zero
-        if(*offset_in_member_int >= 0)
-        {
-          offset_in_member =
-            from_integer(*offset_in_member_int, offset_bytes.type());
-        }
+        if(*offset_in_member < 0)
+          offset_in_member.reset();
       }
 
-      if(max_bytes_left_int.has_value())
+      if(max_bytes.has_value())
       {
-        *max_bytes_left_int -= member_offset_bits / 8;
-        if(*max_bytes_left_int >= 0)
-          max_bytes_left = from_integer(*max_bytes_left_int, max_bytes.type());
-        else
+        max_bytes_left = *max_bytes - member_offset_bits / 8;
+        if(*max_bytes_left < 0)
           break;
       }
 
@@ -256,16 +245,10 @@ static exprt unpack_rec(
 
     if(bits_opt.has_value())
       bits = *bits_opt;
+    else if(max_bytes.has_value())
+      bits = *max_bytes * 8;
     else
-    {
-      bits_opt = numeric_cast<mp_integer>(max_bytes);
-      if(!bits_opt.has_value())
-      {
-        throw non_constant_widtht(src, max_bytes);
-      }
-      else
-        bits = *bits_opt * 8;
-    }
+      throw non_constant_widtht(src, nil_exprt());
 
     exprt::operandst byte_operands;
     for(mp_integer i=0; i<bits; i+=8)
@@ -352,19 +335,22 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
   // determine an upper bound of the number of bytes we might need
   exprt upper_bound=size_of_expr(src.type(), ns);
   if(upper_bound.is_not_nil())
-    upper_bound=
-      simplify_expr(
-        plus_exprt(
-          upper_bound,
-          typecast_exprt(src.offset(), upper_bound.type())),
-        ns);
+    upper_bound = simplify_expr(
+      plus_exprt(
+        upper_bound,
+        typecast_exprt::conditional_cast(src.offset(), upper_bound.type())),
+      ns);
 
-  exprt lb = src.offset();
-  if(!lb.is_constant())
-    lb.make_nil();
+  const auto lower_bound_or_nullopt = numeric_cast<mp_integer>(src.offset());
+  const auto upper_bound_or_nullopt = numeric_cast<mp_integer>(upper_bound);
 
   byte_extract_exprt unpacked(src);
-  unpacked.op() = unpack_rec(src.op(), little_endian, lb, upper_bound, ns);
+  unpacked.op() = unpack_rec(
+    src.op(),
+    little_endian,
+    lower_bound_or_nullopt,
+    upper_bound_or_nullopt,
+    ns);
 
   if(src.type().id()==ID_array)
   {