diff --git a/jbmc/regression/jbmc/nondet_propagation1/andNot_Bug.class b/jbmc/regression/jbmc/nondet_propagation1/andNot_Bug.class new file mode 100644 index 00000000000..1b77b951288 Binary files /dev/null and b/jbmc/regression/jbmc/nondet_propagation1/andNot_Bug.class differ diff --git a/jbmc/regression/jbmc/nondet_propagation1/andNot_Bug.java b/jbmc/regression/jbmc/nondet_propagation1/andNot_Bug.java new file mode 100644 index 00000000000..356f615643b --- /dev/null +++ b/jbmc/regression/jbmc/nondet_propagation1/andNot_Bug.java @@ -0,0 +1,13 @@ +public class andNot_Bug { + public static void main(int capacity) { + if(capacity < 1) + return; + boolean b1 = true; + boolean b2[] = new boolean[capacity]; + b2[0] = false; + + for (int i = 0; i < b2.length; i++) { + b1 = b1 && !b2[i]; + } + } +} diff --git a/jbmc/regression/jbmc/nondet_propagation1/test.desc b/jbmc/regression/jbmc/nondet_propagation1/test.desc new file mode 100644 index 00000000000..4f1d31e9028 --- /dev/null +++ b/jbmc/regression/jbmc/nondet_propagation1/test.desc @@ -0,0 +1,7 @@ +CORE +andNot_Bug.class +--depth 1600 --java-unwind-enum-static --java-max-vla-length 48 --unwind 4 --max-nondet-string-length 100 --function andNot_Bug.main +^EXIT=0$ +^SIGNAL=0$ +-- +-- diff --git a/regression/cbmc-library/memcpy-03/main.c b/regression/cbmc-library/memcpy-03/main.c new file mode 100644 index 00000000000..9f5d9f6b85d --- /dev/null +++ b/regression/cbmc-library/memcpy-03/main.c @@ -0,0 +1,16 @@ +#include +#include + +int main() +{ + char foo[1]; + char result = 0; + int size; + __CPROVER_assume(size == 1); + + memset(&result, 42, size); + __CPROVER_assert(result == 42, "should succeed"); + result = 42; + memcpy(&result, foo, size); + __CPROVER_assert(result == 42, "should fail"); +} diff --git a/regression/cbmc-library/memcpy-03/test.desc b/regression/cbmc-library/memcpy-03/test.desc new file mode 100644 index 00000000000..7dbda2b83e4 --- /dev/null +++ b/regression/cbmc-library/memcpy-03/test.desc @@ -0,0 +1,10 @@ +CORE +main.c + +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +^\[main.assertion.1\] line 12 should succeed: SUCCESS$ +^\[main.assertion.2\] line 15 should fail: FAILURE$ +-- +^warning: ignoring diff --git a/regression/cbmc/Endianness4/test.desc b/regression/cbmc/Endianness4/test.desc index 39d9265e8a7..9efefbc7362 100644 --- a/regression/cbmc/Endianness4/test.desc +++ b/regression/cbmc/Endianness4/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c ^EXIT=0$ diff --git a/regression/cbmc/Endianness6/test.desc b/regression/cbmc/Endianness6/test.desc index 1b8f7c29219..81ceb4c6dc0 100644 --- a/regression/cbmc/Endianness6/test.desc +++ b/regression/cbmc/Endianness6/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --big-endian ^EXIT=0$ diff --git a/regression/cbmc/Pointer_byte_extract5/no-simplify.desc b/regression/cbmc/Pointer_byte_extract5/no-simplify.desc index 59ce8fd1c87..36c5519b60c 100644 --- a/regression/cbmc/Pointer_byte_extract5/no-simplify.desc +++ b/regression/cbmc/Pointer_byte_extract5/no-simplify.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.i --bounds-check --32 --no-simplify ^EXIT=10$ diff --git a/regression/cbmc/Pointer_byte_extract5/test.desc b/regression/cbmc/Pointer_byte_extract5/test.desc index 3d50935641c..48d0d5d9c53 100644 --- a/regression/cbmc/Pointer_byte_extract5/test.desc +++ b/regression/cbmc/Pointer_byte_extract5/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.i --bounds-check --32 ^EXIT=10$ diff --git a/regression/cbmc/Promotion3/test.desc b/regression/cbmc/Promotion3/test.desc index 9efefbc7362..39d9265e8a7 100644 --- a/regression/cbmc/Promotion3/test.desc +++ b/regression/cbmc/Promotion3/test.desc @@ -1,4 +1,4 @@ -CORE +CORE broken-smt-backend main.c ^EXIT=0$ diff --git a/regression/cbmc/array_constraints1/test.desc b/regression/cbmc/array_constraints1/test.desc index 064eaa5b589..8040e5651bf 100644 --- a/regression/cbmc/array_constraints1/test.desc +++ b/regression/cbmc/array_constraints1/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --unwind 2 --pointer-check ^EXIT=10$ diff --git a/regression/cbmc/byte_update2/test.desc b/regression/cbmc/byte_update2/test.desc index 9d95b7efe83..9845e70d84b 100644 --- a/regression/cbmc/byte_update2/test.desc +++ b/regression/cbmc/byte_update2/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --little-endian ^EXIT=0$ diff --git a/regression/cbmc/byte_update4/test.desc b/regression/cbmc/byte_update4/test.desc index 9d95b7efe83..9845e70d84b 100644 --- a/regression/cbmc/byte_update4/test.desc +++ b/regression/cbmc/byte_update4/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --little-endian ^EXIT=0$ diff --git a/regression/cbmc/byte_update6/test.desc b/regression/cbmc/byte_update6/test.desc index 9d95b7efe83..9845e70d84b 100644 --- a/regression/cbmc/byte_update6/test.desc +++ b/regression/cbmc/byte_update6/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --little-endian ^EXIT=0$ diff --git a/regression/cbmc/byte_update7/test.desc b/regression/cbmc/byte_update7/test.desc index 9d95b7efe83..9845e70d84b 100644 --- a/regression/cbmc/byte_update7/test.desc +++ b/regression/cbmc/byte_update7/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --little-endian ^EXIT=0$ diff --git a/regression/cbmc/byte_update8/test.desc b/regression/cbmc/byte_update8/test.desc index 9d95b7efe83..9845e70d84b 100644 --- a/regression/cbmc/byte_update8/test.desc +++ b/regression/cbmc/byte_update8/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --little-endian ^EXIT=0$ diff --git a/regression/cbmc/byte_update9/test.desc b/regression/cbmc/byte_update9/test.desc index 1b8f7c29219..81ceb4c6dc0 100644 --- a/regression/cbmc/byte_update9/test.desc +++ b/regression/cbmc/byte_update9/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --big-endian ^EXIT=0$ diff --git a/regression/cbmc/gcc_c99-bool-1/test.desc b/regression/cbmc/gcc_c99-bool-1/test.desc index d82c02459fc..82001904141 100644 --- a/regression/cbmc/gcc_c99-bool-1/test.desc +++ b/regression/cbmc/gcc_c99-bool-1/test.desc @@ -1,4 +1,4 @@ -CORE +CORE broken-smt-backend c99-bool-1.c ^EXIT=0$ diff --git a/regression/cbmc/memory_allocation1/test.desc b/regression/cbmc/memory_allocation1/test.desc index 12560af6f29..77c4e0af53a 100644 --- a/regression/cbmc/memory_allocation1/test.desc +++ b/regression/cbmc/memory_allocation1/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --pointer-check ^EXIT=10$ diff --git a/regression/cbmc/scanf1/test.desc b/regression/cbmc/scanf1/test.desc index 3dba1ea8007..346519020ae 100644 --- a/regression/cbmc/scanf1/test.desc +++ b/regression/cbmc/scanf1/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.i --little-endian ^EXIT=10$ diff --git a/regression/cbmc/struct6/test.desc b/regression/cbmc/struct6/test.desc index 148fe24db7c..da239c1965b 100644 --- a/regression/cbmc/struct6/test.desc +++ b/regression/cbmc/struct6/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --bounds-check --pointer-check ^EXIT=0$ diff --git a/regression/cbmc/union12/test.desc b/regression/cbmc/union12/test.desc index d568171b1a6..82b3a34754a 100644 --- a/regression/cbmc/union12/test.desc +++ b/regression/cbmc/union12/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c --pointer-check ^EXIT=10$ diff --git a/regression/cbmc/union6/test.desc b/regression/cbmc/union6/test.desc index 9845e70d84b..7c7e749235e 100644 --- a/regression/cbmc/union6/test.desc +++ b/regression/cbmc/union6/test.desc @@ -1,4 +1,4 @@ -CORE +CORE broken-smt-backend main.c --little-endian ^EXIT=0$ @@ -6,3 +6,6 @@ main.c ^VERIFICATION SUCCESSFUL$ -- ^warning: ignoring +-- +This test reports a VERIFICATION ERROR when running with the SMT2 solver on +Windows only. diff --git a/regression/cbmc/union7/test.desc b/regression/cbmc/union7/test.desc index 1b8f7c29219..58448e0655b 100644 --- a/regression/cbmc/union7/test.desc +++ b/regression/cbmc/union7/test.desc @@ -6,3 +6,6 @@ main.c ^VERIFICATION SUCCESSFUL$ -- ^warning: ignoring +-- +This test reports a VERIFICATION ERROR when running with the SMT2 solver on +Windows only. diff --git a/regression/cbmc/union9/test.desc b/regression/cbmc/union9/test.desc index 39d9265e8a7..9efefbc7362 100644 --- a/regression/cbmc/union9/test.desc +++ b/regression/cbmc/union9/test.desc @@ -1,4 +1,4 @@ -CORE broken-smt-backend +CORE main.c ^EXIT=0$ diff --git a/src/solvers/flattening/boolbv_byte_extract.cpp b/src/solvers/flattening/boolbv_byte_extract.cpp index 05a49f4ba62..f62dc552642 100644 --- a/src/solvers/flattening/boolbv_byte_extract.cpp +++ b/src/solvers/flattening/boolbv_byte_extract.cpp @@ -12,6 +12,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include @@ -40,7 +41,8 @@ bvt map_bv(const bv_endianness_mapt &map, const bvt &src) bvt boolbvt::convert_byte_extract(const byte_extract_exprt &expr) { - // if we extract from an unbounded array, call the flattening code + // array logic does not handle byte operators, thus lower when operating on + // unbounded arrays if(is_unbounded_array(expr.op().type())) { try diff --git a/src/solvers/flattening/boolbv_byte_update.cpp b/src/solvers/flattening/boolbv_byte_update.cpp index ec73f5c34b0..843e5223d54 100644 --- a/src/solvers/flattening/boolbv_byte_update.cpp +++ b/src/solvers/flattening/boolbv_byte_update.cpp @@ -8,16 +8,37 @@ Author: Daniel Kroening, kroening@kroening.com #include "boolbv.h" -#include - #include #include +#include #include +#include + +#include +#include +#include "bv_conversion_exceptions.h" #include "bv_endianness_map.h" bvt boolbvt::convert_byte_update(const byte_update_exprt &expr) { + // if we update (from) an unbounded array, lower the expression as the array + // logic does not handle byte operators + if( + is_unbounded_array(expr.op().type()) || + is_unbounded_array(expr.value().type())) + { + try + { + return convert_bv(lower_byte_update(expr, ns)); + } + catch(const flatten_byte_extract_exceptiont &byte_extract_flatten_exception) + { + util_throw_with_nested( + bitvector_conversion_exceptiont("Can't convert byte_update", expr)); + } + } + const exprt &op = expr.op(); const exprt &offset_expr=expr.offset(); const exprt &value=expr.value(); diff --git a/src/solvers/flattening/boolbv_index.cpp b/src/solvers/flattening/boolbv_index.cpp index 89916842e50..0118e680cd5 100644 --- a/src/solvers/flattening/boolbv_index.cpp +++ b/src/solvers/flattening/boolbv_index.cpp @@ -16,6 +16,8 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include + bvt boolbvt::convert_index(const index_exprt &expr) { const exprt &array=expr.array(); @@ -47,7 +49,10 @@ bvt boolbvt::convert_index(const index_exprt &expr) for(std::size_t i=0; i -#include -#include -#include bvt boolbvt::convert_member(const member_exprt &expr) { @@ -20,59 +17,40 @@ bvt boolbvt::convert_member(const member_exprt &expr) const bvt &struct_bv=convert_bv(struct_op); - if(struct_op_type.id()==ID_union) - { - return convert_bv( - byte_extract_exprt(byte_extract_id(), - struct_op, - from_integer(0, index_type()), - expr.type())); - } - else - { - INVARIANT( - struct_op_type.id() == ID_struct, - "member_exprt should denote access to a union or struct"); + const irep_idt &component_name = expr.get_component_name(); + const struct_union_typet::componentst &components = + to_struct_union_type(struct_op_type).components(); - const irep_idt &component_name=expr.get_component_name(); - const struct_typet::componentst &components= - to_struct_type(struct_op_type).components(); + std::size_t offset = 0; - std::size_t offset=0; + for(const auto &c : components) + { + const typet &subtype = c.type(); + const std::size_t sub_width = boolbv_width(subtype); - for(const auto &c : components) + if(c.get_name() == component_name) { - const typet &subtype = c.type(); - std::size_t sub_width=boolbv_width(subtype); - - if(c.get_name() == component_name) - { - DATA_INVARIANT_WITH_DIAGNOSTICS( - base_type_eq(subtype, expr.type(), ns), - "struct component type shall match the member expression type", - subtype.pretty(), - expr.type().pretty()); - - bvt bv; - bv.resize(sub_width); - INVARIANT( - offset + sub_width <= struct_bv.size(), - "bitvector part corresponding to struct element shall be contained " - "within the full struct bitvector"); - - for(std::size_t i=0; i + #include #include #include +#include +#include #include #include #include @@ -22,13 +26,47 @@ Author: Daniel Kroening, kroening@kroening.com static exprt bv_to_expr( const exprt &bitvector_expr, const typet &target_type, + const endianness_mapt &endianness_map, const namespacet &ns); +struct boundst +{ + std::size_t lb; + std::size_t ub; +}; + +/// Map bit boundaries according to endianness. +static boundst map_bounds( + const endianness_mapt &endianness_map, + std::size_t lower_bound, + std::size_t upper_bound) +{ + boundst result; + result.lb = lower_bound; + result.ub = upper_bound; + + if(result.ub < endianness_map.number_of_bits()) + { + result.lb = endianness_map.map_bit(result.lb); + result.ub = endianness_map.map_bit(result.ub); + + // big-endian bounds need swapping + if(result.ub < result.lb) + { + result.lb = endianness_map.number_of_bits() - result.lb - 1; + result.ub = endianness_map.number_of_bits() - result.ub - 1; + } + } + + return result; +} + /// Convert a bitvector-typed expression \p bitvector_expr to a struct-typed /// expression. See \ref bv_to_expr for an overview. static struct_exprt bv_to_struct_expr( const exprt &bitvector_expr, const struct_typet &struct_type, + const endianness_mapt &endianness_map, const namespacet &ns) { const struct_typet::componentst &components = struct_type.components(); @@ -52,13 +90,15 @@ static struct_exprt bv_to_struct_expr( continue; } + const auto bounds = map_bounds( + endianness_map, + member_offset_bits, + member_offset_bits + component_bits - 1); bitvector_typet type{bitvector_expr.type().id(), component_bits}; operands.push_back(bv_to_expr( - extractbits_exprt{bitvector_expr, - member_offset_bits + component_bits - 1, - member_offset_bits, - std::move(type)}, + extractbits_exprt{bitvector_expr, bounds.ub, bounds.lb, std::move(type)}, comp.type(), + endianness_map, ns)); if(component_bits_opt.has_value()) @@ -73,6 +113,7 @@ static struct_exprt bv_to_struct_expr( static array_exprt bv_to_array_expr( const exprt &bitvector_expr, const array_typet &array_type, + const endianness_mapt &endianness_map, const namespacet &ns) { auto num_elements = numeric_cast(array_type.size()); @@ -100,18 +141,20 @@ static array_exprt bv_to_array_expr( { const std::size_t subtype_bits_int = numeric_cast_v(*subtype_bits); + const auto bounds = map_bounds( + endianness_map, i * subtype_bits_int, ((i + 1) * subtype_bits_int) - 1); bitvector_typet type{bitvector_expr.type().id(), subtype_bits_int}; operands.push_back(bv_to_expr( - extractbits_exprt{bitvector_expr, - ((i + 1) * subtype_bits_int) - 1, - i * subtype_bits_int, - std::move(type)}, + extractbits_exprt{ + bitvector_expr, bounds.ub, bounds.lb, std::move(type)}, array_type.subtype(), + endianness_map, ns)); } else { - operands.push_back(bv_to_expr(bitvector_expr, array_type.subtype(), ns)); + operands.push_back( + bv_to_expr(bitvector_expr, array_type.subtype(), endianness_map, ns)); } } @@ -123,6 +166,7 @@ static array_exprt bv_to_array_expr( static vector_exprt bv_to_vector_expr( const exprt &bitvector_expr, const vector_typet &vector_type, + const endianness_mapt &endianness_map, const namespacet &ns) { const std::size_t num_elements = @@ -142,18 +186,20 @@ static vector_exprt bv_to_vector_expr( { const std::size_t subtype_bits_int = numeric_cast_v(*subtype_bits); + const auto bounds = map_bounds( + endianness_map, i * subtype_bits_int, ((i + 1) * subtype_bits_int) - 1); bitvector_typet type{bitvector_expr.type().id(), subtype_bits_int}; operands.push_back(bv_to_expr( - extractbits_exprt{bitvector_expr, - ((i + 1) * subtype_bits_int) - 1, - i * subtype_bits_int, - std::move(type)}, + extractbits_exprt{ + bitvector_expr, bounds.ub, bounds.lb, std::move(type)}, vector_type.subtype(), + endianness_map, ns)); } else { - operands.push_back(bv_to_expr(bitvector_expr, vector_type.subtype(), ns)); + operands.push_back( + bv_to_expr(bitvector_expr, vector_type.subtype(), endianness_map, ns)); } } @@ -165,6 +211,7 @@ static vector_exprt bv_to_vector_expr( static complex_exprt bv_to_complex_expr( const exprt &bitvector_expr, const complex_typet &complex_type, + const endianness_mapt &endianness_map, const namespacet &ns) { const std::size_t total_bits = @@ -181,17 +228,22 @@ static complex_exprt bv_to_complex_expr( else subtype_bits = total_bits / 2; + const auto bounds_real = map_bounds(endianness_map, 0, subtype_bits - 1); + const auto bounds_imag = + map_bounds(endianness_map, subtype_bits, subtype_bits * 2 - 1); + const bitvector_typet type{bitvector_expr.type().id(), subtype_bits}; return complex_exprt{ bv_to_expr( - extractbits_exprt{bitvector_expr, subtype_bits - 1, 0, type}, + extractbits_exprt{bitvector_expr, bounds_real.ub, bounds_real.lb, type}, complex_type.subtype(), + endianness_map, ns), bv_to_expr( - extractbits_exprt{ - bitvector_expr, subtype_bits * 2 - 1, subtype_bits, type}, + extractbits_exprt{bitvector_expr, bounds_imag.ub, bounds_imag.lb, type}, complex_type.subtype(), + endianness_map, ns), complex_type}; } @@ -206,11 +258,13 @@ static complex_exprt bv_to_complex_expr( /// \param bitvector_expr: Bitvector-typed expression to extract from. /// \param target_type: Type of the expression to build. /// \param ns: Namespace to resolve tag types. +/// \param endianness_map: Endianness map. /// \return Expression of type \p target_type constructed from sequences of bits /// from \p bitvector_expr. static exprt bv_to_expr( const exprt &bitvector_expr, const typet &target_type, + const endianness_mapt &endianness_map, const namespacet &ns) { PRECONDITION(can_cast_type(bitvector_expr.type())); @@ -226,26 +280,33 @@ static exprt bv_to_expr( if(target_type.id() == ID_struct) { - return bv_to_struct_expr(bitvector_expr, to_struct_type(target_type), ns); + return bv_to_struct_expr( + bitvector_expr, to_struct_type(target_type), endianness_map, ns); } else if(target_type.id() == ID_struct_tag) { struct_exprt result = bv_to_struct_expr( - bitvector_expr, ns.follow_tag(to_struct_tag_type(target_type)), ns); + bitvector_expr, + ns.follow_tag(to_struct_tag_type(target_type)), + endianness_map, + ns); result.type() = target_type; return std::move(result); } else if(target_type.id() == ID_array) { - return bv_to_array_expr(bitvector_expr, to_array_type(target_type), ns); + return bv_to_array_expr( + bitvector_expr, to_array_type(target_type), endianness_map, ns); } else if(target_type.id() == ID_vector) { - return bv_to_vector_expr(bitvector_expr, to_vector_type(target_type), ns); + return bv_to_vector_expr( + bitvector_expr, to_vector_type(target_type), endianness_map, ns); } else if(target_type.id() == ID_complex) { - return bv_to_complex_expr(bitvector_expr, to_complex_type(target_type), ns); + return bv_to_complex_expr( + bitvector_expr, to_complex_type(target_type), endianness_map, ns); } else { @@ -351,6 +412,164 @@ static array_exprt unpack_array_vector( array_typet(unsignedbv_typet(8), from_integer(size, size_type()))); } +/// Extract bytes from a sequence of bitvector-typed elements. +/// \param bit_fields: operands to concatenate +/// \param total_bits: total bit width of operands +/// \param [out] dest: target to append unpacked bytes to +/// \param little_endian: true, iff assumed endianness is little-endian +/// \param offset_bytes: if set, bytes prior to this offset will be filled +/// with nil values +/// \param max_bytes: if set, use as upper bound of the number of bytes to +/// unpack +/// \param ns: namespace for type lookups +static void process_bit_fields( + exprt::operandst &&bit_fields, + std::size_t total_bits, + exprt::operandst &dest, + bool little_endian, + const optionalt &offset_bytes, + const optionalt &max_bytes, + const namespacet &ns) +{ + const concatenation_exprt concatenation{std::move(bit_fields), + unsignedbv_typet{total_bits}}; + + exprt sub = + unpack_rec(concatenation, little_endian, offset_bytes, max_bytes, ns, true); + + dest.insert( + dest.end(), + std::make_move_iterator(sub.operands().begin()), + std::make_move_iterator(sub.operands().end())); +} + +/// Rewrite a struct-typed expression into its individual bytes. +/// \param src: struct-typed expression to unpack +/// \param little_endian: true, iff assumed endianness is little-endian +/// \param offset_bytes: if set, bytes prior to this offset will be filled +/// with nil values +/// \param max_bytes: if set, use as upper bound of the number of bytes to +/// unpack +/// \param ns: namespace for type lookups +/// \return array_exprt holding unpacked elements +static array_exprt unpack_struct( + const exprt &src, + bool little_endian, + const optionalt &offset_bytes, + const optionalt &max_bytes, + const namespacet &ns) +{ + const struct_typet &struct_type = to_struct_type(ns.follow(src.type())); + const struct_typet::componentst &components = struct_type.components(); + + optionalt offset_in_member; + optionalt max_bytes_left; + optionalt> bit_fields; + + mp_integer member_offset_bits = 0; + exprt::operandst byte_operands; + for(auto it = components.begin(); it != components.end(); ++it) + { + const auto &comp = *it; + auto component_bits = pointer_offset_bits(comp.type(), ns); + + // We can only handle a member of unknown width when it is the last member + // and is byte-aligned. Members of unknown width in the middle would leave + // us with unknown alignment of subsequent members, and queuing them up as + // bit fields is not possible either as the total width of the concatenation + // could not be determined. + if( + !component_bits.has_value() && + (std::next(it) != components.end() || bit_fields.has_value())) + throw non_constant_widtht(src, nil_exprt()); + + member_exprt member(src, comp.get_name(), comp.type()); + if(src.id() == ID_struct) + simplify(member, ns); + + // Is it a byte-aligned member? + if(member_offset_bits % 8 == 0) + { + if(bit_fields.has_value()) + { + process_bit_fields( + std::move(bit_fields->first), + bit_fields->second, + byte_operands, + little_endian, + offset_in_member, + max_bytes_left, + ns); + bit_fields.reset(); + } + + if(offset_bytes.has_value()) + { + offset_in_member = *offset_bytes - member_offset_bits / 8; + // if the offset is negative, offset_in_member remains unset, which has + // the same effect as setting it to zero + if(*offset_in_member < 0) + offset_in_member.reset(); + } + + if(max_bytes.has_value()) + { + max_bytes_left = *max_bytes - member_offset_bits / 8; + if(*max_bytes_left < 0) + break; + } + } + + if( + member_offset_bits % 8 != 0 || + (component_bits.has_value() && *component_bits % 8 != 0)) + { + if(!bit_fields.has_value()) + bit_fields = std::make_pair(exprt::operandst{}, std::size_t{0}); + + const std::size_t bits_int = numeric_cast_v(*component_bits); + bit_fields->first.insert( + little_endian ? bit_fields->first.begin() : bit_fields->first.end(), + typecast_exprt::conditional_cast(member, unsignedbv_typet{bits_int})); + bit_fields->second += bits_int; + + member_offset_bits += *component_bits; + + continue; + } + + INVARIANT( + !bit_fields.has_value(), + "all preceding members should have been processed"); + + exprt sub = unpack_rec( + member, little_endian, offset_in_member, max_bytes_left, ns, true); + + byte_operands.insert( + byte_operands.end(), + std::make_move_iterator(sub.operands().begin()), + std::make_move_iterator(sub.operands().end())); + + if(component_bits.has_value()) + member_offset_bits += *component_bits; + } + + if(bit_fields.has_value()) + process_bit_fields( + std::move(bit_fields->first), + bit_fields->second, + byte_operands, + little_endian, + offset_in_member, + max_bytes_left, + ns); + + const std::size_t size = byte_operands.size(); + return array_exprt{ + std::move(byte_operands), + array_typet{unsignedbv_typet{8}, from_integer(size, size_type())}}; +} + /// Rewrite a complex_exprt into its individual bytes. /// \param src: complex-typed expression to unpack /// \param little_endian: true, iff assumed endianness is little-endian @@ -405,8 +624,7 @@ unpack_complex(const exprt &src, bool little_endian, const namespacet &ns) // array of bytes /// \return array of bytes in the sequence found in memory /// \throws flatten_byte_extract_exceptiont Raised if unable to unpack the -/// object because of either non constant size, byte misalignment or -/// non-constant component width. +/// object because of either non-constant size or non-constant component width. static exprt unpack_rec( const exprt &src, bool little_endian, @@ -463,60 +681,7 @@ static exprt unpack_rec( } else if(src.type().id() == ID_struct || src.type().id() == ID_struct_tag) { - const struct_typet &struct_type=to_struct_type(ns.follow(src.type())); - const struct_typet::componentst &components=struct_type.components(); - - mp_integer member_offset_bits = 0; - - exprt::operandst byte_operands; - for(const auto &comp : components) - { - auto component_bits = pointer_offset_bits(comp.type(), ns); - - // the next member would be misaligned, abort - if( - !component_bits.has_value() || *component_bits == 0 || - *component_bits % 8 != 0) - { - throw non_byte_alignedt(struct_type, comp, *component_bits); - } - - optionalt offset_in_member; - optionalt max_bytes_left; - - if(offset_bytes.has_value()) - { - offset_in_member = *offset_bytes - member_offset_bits / 8; - // if the offset is negative, offset_in_member remains unset, which has - // the same effect as setting it to zero - if(*offset_in_member < 0) - offset_in_member.reset(); - } - - if(max_bytes.has_value()) - { - max_bytes_left = *max_bytes - member_offset_bits / 8; - if(*max_bytes_left < 0) - break; - } - - member_exprt member(src, comp.get_name(), comp.type()); - if(src.id() == ID_struct) - simplify(member, ns); - - exprt sub = unpack_rec( - member, little_endian, offset_in_member, max_bytes_left, ns, true); - - byte_operands.insert( - byte_operands.end(), sub.operands().begin(), sub.operands().end()); - - member_offset_bits += *component_bits; - } - - const std::size_t size = byte_operands.size(); - return array_exprt( - std::move(byte_operands), - array_typet(unsignedbv_typet(8), from_integer(size, size_type()))); + return unpack_struct(src, little_endian, offset_bytes, max_bytes, ns); } else if(src.type().id() == ID_union || src.type().id() == ID_union_tag) { @@ -713,11 +878,15 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns) // determine an upper bound of the number of bytes we might need exprt upper_bound=size_of_expr(src.type(), ns); if(upper_bound.is_not_nil()) + { upper_bound = simplify_expr( plus_exprt( upper_bound, typecast_exprt::conditional_cast(src.offset(), upper_bound.type())), ns); + } + else if(src.type().id() == ID_empty) + upper_bound = from_integer(0, size_type()); const auto lower_bound_or_nullopt = numeric_cast(src.offset()); const auto upper_bound_or_nullopt = numeric_cast(upper_bound); @@ -941,273 +1110,735 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns) concatenation_exprt concatenation( std::move(op), bitvector_typet(subtype->id(), width_bytes * 8)); - return bv_to_expr(concatenation, src.type(), ns); + endianness_mapt map(src.type(), little_endian, ns); + return bv_to_expr(concatenation, src.type(), map, ns); } } static exprt lower_byte_update( const byte_update_exprt &src, - const namespacet &ns, - bool negative_offset) + const array_exprt &value_as_byte_array, + const optionalt &non_const_update_bound, + const namespacet &ns); + +/// Apply a byte update \p src to an array/vector of bytes using the byte +/// array \p value_as_byte_array as update value. +/// \param src: Original byte-update expression +/// \param subtype: Array/vector element type +/// \param value_as_byte_array: Update value as an array of bytes +/// \param non_const_update_bound: If set, (symbolically) constrain updates such +/// as to at most update \p non_const_update_bound elements +/// \param ns: Namespace +/// \return Array/vector expression reflecting all updates. +static exprt lower_byte_update_byte_array_vector( + const byte_update_exprt &src, + const typet &subtype, + const array_exprt &value_as_byte_array, + const optionalt &non_const_update_bound, + const namespacet &ns) +{ + // apply 'array-update-with' num_elements times + exprt result = src.op(); + + for(std::size_t i = 0; i < value_as_byte_array.operands().size(); ++i) + { + const exprt &element = value_as_byte_array.operands()[i]; + + const exprt where = simplify_expr( + plus_exprt{src.offset(), from_integer(i, src.offset().type())}, ns); + + // skip elements that wouldn't change (skip over typecasts as we might have + // some signed/unsigned char conversions) + const exprt &e = skip_typecast(element); + if(e.id() == ID_index) + { + const index_exprt &index_expr = to_index_expr(e); + if(index_expr.array() == src.op() && index_expr.index() == where) + continue; + } + + exprt update_value; + if(non_const_update_bound.has_value()) + { + update_value = typecast_exprt::conditional_cast( + if_exprt{binary_predicate_exprt{ + from_integer(i, non_const_update_bound->type()), + ID_lt, + *non_const_update_bound}, + element, + index_exprt{src.op(), where}}, + subtype); + } + else + update_value = typecast_exprt::conditional_cast(element, subtype); + + if(result.id() != ID_with) + result = with_exprt{result, where, update_value}; + else + result.add_to_operands(where, update_value); + } + + return simplify_expr(result, ns); +} + +/// Apply a byte update \p src to an array/vector typed operand, using the byte +/// array \p value_as_byte_array as update value, when either the size of each +/// element or the overall array/vector size is not constant. +/// \param src: Original byte-update expression +/// \param subtype: Array/vector element type +/// \param value_as_byte_array: Update value as an array of bytes +/// \param non_const_update_bound: If set, (symbolically) constrain updates such +/// as to at most update \p non_const_update_bound elements +/// \param ns: Namespace +/// \return Array/vector expression reflecting all updates. +static exprt lower_byte_update_array_vector_non_const( + const byte_update_exprt &src, + const typet &subtype, + const array_exprt &value_as_byte_array, + const optionalt &non_const_update_bound, + const namespacet &ns) { - const auto element_size_opt = pointer_offset_size(src.value().type(), ns); + const irep_idt extract_opcode = src.id() == ID_byte_update_little_endian + ? ID_byte_extract_little_endian + : ID_byte_extract_big_endian; + + // do all arithmetic below using index/offset types - the array theory + // back-end is really picky about indices having the same type + const exprt subtype_size = simplify_expr( + typecast_exprt::conditional_cast( + size_of_expr(subtype, ns), src.offset().type()), + ns); - INVARIANT_WITH_DIAGNOSTICS( - element_size_opt.has_value(), - "size of type in bytes must be known", - irep_pretty_diagnosticst(src)); + // compute the index of the first element of the array/vector that may be + // updated + exprt first_index = div_exprt{src.offset(), subtype_size}; + simplify(first_index, ns); - const mp_integer &element_size = *element_size_opt; + // compute the offset within that first element + const exprt update_offset = mod_exprt{src.offset(), subtype_size}; - if(src.op().type().id()==ID_array) + // compute the number of bytes (from the update value) that are going to be + // consumed for updating the first element + exprt initial_bytes = minus_exprt{subtype_size, update_offset}; + exprt update_bound; + if(non_const_update_bound.has_value()) { - const array_typet &array_type=to_array_type(src.op().type()); - const typet &subtype=array_type.subtype(); + update_bound = typecast_exprt::conditional_cast( + *non_const_update_bound, subtype_size.type()); + } + else + { + update_bound = + from_integer(value_as_byte_array.operands().size(), initial_bytes.type()); + } + initial_bytes = + if_exprt{binary_predicate_exprt{initial_bytes, ID_lt, update_bound}, + initial_bytes, + update_bound}; + simplify(initial_bytes, ns); + + // encode the first update: update the original element at first_index with + // initial_bytes-many bytes extracted from value_as_byte_array + with_exprt result{ + src.op(), + first_index, + lower_byte_operators( + byte_update_exprt{ + src.id(), + index_exprt{src.op(), first_index}, + update_offset, + byte_extract_exprt{extract_opcode, + value_as_byte_array, + from_integer(0, src.offset().type()), + array_typet{unsignedbv_typet{8}, initial_bytes}}}, + ns)}; + + // We will update one array/vector element at a time - compute the number of + // update values that will be consumed in each step. If we cannot determine a + // constant value at this time we assume it's at least one byte. The + // byte_extract_exprt within the loop uses the symbolic value (subtype_size), + // we just need to make sure we make progress for the loop to terminate. + std::size_t step_size = 1; + if(subtype_size.is_constant()) + step_size = numeric_cast_v(to_constant_expr(subtype_size)); + // Given the first update already encoded, keep track of the offset into the + // update value. Again, the expressions within the loop use the symbolic + // value, this is just an optimization in case we can determine a constant + // offset. + std::size_t offset = 0; + if(initial_bytes.is_constant()) + offset = numeric_cast_v(to_constant_expr(initial_bytes)); + + std::size_t i = 1; + for(; offset + step_size <= value_as_byte_array.operands().size(); + offset += step_size, ++i) + { + exprt where = simplify_expr( + plus_exprt{first_index, from_integer(i, first_index.type())}, ns); + + exprt offset_expr = simplify_expr( + plus_exprt{ + initial_bytes, + mult_exprt{subtype_size, from_integer(i - 1, subtype_size.type())}}, + ns); + + exprt element = lower_byte_operators( + byte_update_exprt{ + src.id(), + index_exprt{src.op(), where}, + from_integer(0, src.offset().type()), + byte_extract_exprt{extract_opcode, + value_as_byte_array, + std::move(offset_expr), + array_typet{unsignedbv_typet{8}, subtype_size}}}, + ns); + + result.add_to_operands(std::move(where), std::move(element)); + } + + // do the tail + if(offset < value_as_byte_array.operands().size()) + { + const std::size_t tail_size = + value_as_byte_array.operands().size() - offset; + + exprt where = simplify_expr( + plus_exprt{first_index, from_integer(i, first_index.type())}, ns); + + exprt element = lower_byte_operators( + byte_update_exprt{ + src.id(), + index_exprt{src.op(), where}, + from_integer(0, src.offset().type()), + byte_extract_exprt{extract_opcode, + value_as_byte_array, + from_integer(offset, src.offset().type()), + array_typet{unsignedbv_typet{8}, + from_integer(tail_size, size_type())}}}, + ns); + + result.add_to_operands(std::move(where), std::move(element)); + } + + return simplify_expr(result, ns); +} + +/// Apply a byte update \p src to an array/vector typed operand using the byte +/// array \p value_as_byte_array as update value. +/// \param src: Original byte-update expression +/// \param subtype: Array/vector element type +/// \param value_as_byte_array: Update value as an array of bytes +/// \param non_const_update_bound: If set, (symbolically) constrain updates such +/// as to at most update \p non_const_update_bound elements +/// \param ns: Namespace +/// \return Array/vector expression reflecting all updates. +static exprt lower_byte_update_array_vector( + const byte_update_exprt &src, + const typet &subtype, + const array_exprt &value_as_byte_array, + const optionalt &non_const_update_bound, + const namespacet &ns) +{ + const bool is_array = src.type().id() == ID_array; + exprt size; + if(is_array) + size = to_array_type(src.type()).size(); + else + size = to_vector_type(src.type()).size(); + + auto subtype_bits = pointer_offset_bits(subtype, ns); + + // fall back to bytewise updates in all non-constant or dubious cases + if( + !size.is_constant() || !src.offset().is_constant() || + !subtype_bits.has_value() || *subtype_bits == 0 || *subtype_bits % 8 != 0 || + non_const_update_bound.has_value()) + { + return lower_byte_update_array_vector_non_const( + src, subtype, value_as_byte_array, non_const_update_bound, ns); + } + + std::size_t num_elements = + numeric_cast_v(to_constant_expr(size)); + mp_integer offset_bytes = + numeric_cast_v(to_constant_expr(src.offset())); + + exprt::operandst elements; + elements.reserve(num_elements); - // array of bitvectors? - if(subtype.id()==ID_unsignedbv || - subtype.id()==ID_signedbv || - subtype.id()==ID_floatbv || - subtype.id()==ID_c_bool || - subtype.id()==ID_pointer) + std::size_t i = 0; + // copy the prefix not affected by the update + for(; i < num_elements && (i + 1) * *subtype_bits <= offset_bytes * 8; ++i) + elements.push_back(index_exprt{src.op(), from_integer(i, index_type())}); + + // the modified elements + for(; i < num_elements && + i * *subtype_bits < + (offset_bytes + value_as_byte_array.operands().size()) * 8; + ++i) + { + mp_integer update_offset = offset_bytes - i * (*subtype_bits / 8); + mp_integer update_elements = *subtype_bits / 8; + exprt::operandst::const_iterator first = + value_as_byte_array.operands().begin(); + exprt::operandst::const_iterator end = value_as_byte_array.operands().end(); + if(update_offset < 0) + { + INVARIANT( + value_as_byte_array.operands().size() > -update_offset, + "indices past the update should be handled above"); + first += numeric_cast_v(-update_offset); + } + else { - auto sub_size_opt = pointer_offset_size(subtype, ns); + update_elements -= update_offset; + INVARIANT( + update_elements > 0, + "indices before the update should be handled above"); + } + + if(std::distance(first, end) > update_elements) + end = first + numeric_cast_v(update_elements); + exprt::operandst update_values{first, end}; + const std::size_t update_size = update_values.size(); + + const byte_update_exprt bu{ + src.id(), + index_exprt{src.op(), from_integer(i, index_type())}, + from_integer(update_offset < 0 ? 0 : update_offset, src.offset().type()), + array_exprt{std::move(update_values), + array_typet{unsignedbv_typet{8}, + from_integer(update_size, size_type())}}}; + elements.push_back(lower_byte_operators(bu, ns)); + } + + // copy the tail not affected by the update + for(; i < num_elements; ++i) + elements.push_back(index_exprt{src.op(), from_integer(i, index_type())}); + if(is_array) + return simplify_expr( + array_exprt{std::move(elements), to_array_type(src.type())}, ns); + else + return simplify_expr( + vector_exprt{std::move(elements), to_vector_type(src.type())}, ns); +} + +/// Apply a byte update \p src to a struct typed operand using the byte array +/// \p value_as_byte_array as update value. +/// \param src: Original byte-update expression +/// \param struct_type: Type of the operand +/// \param value_as_byte_array: Update value as an array of bytes +/// \param non_const_update_bound: If set, (symbolically) constrain updates such +/// as to at most update \p non_const_update_bound elements +/// \param ns: Namespace +/// \return Struct expression reflecting all updates. +static exprt lower_byte_update_struct( + const byte_update_exprt &src, + const struct_typet &struct_type, + const array_exprt &value_as_byte_array, + const optionalt &non_const_update_bound, + const namespacet &ns) +{ + exprt::operandst elements; + elements.reserve(struct_type.components().size()); + + mp_integer member_offset_bits = 0; + for(const auto &comp : struct_type.components()) + { + auto element_width = pointer_offset_bits(comp.type(), ns); + + exprt member = member_exprt{src.op(), comp.get_name(), comp.type()}; + + // compute the update offset relative to this struct member - will be + // negative if we are already in the middle of the update or beyond it + exprt offset = simplify_expr( + minus_exprt{src.offset(), + from_integer(member_offset_bits / 8, src.offset().type())}, + ns); + auto offset_bytes = numeric_cast(offset); + // we don't need to update anything when + // 1) the update offset is greater than the end of this member (thus the + // relative offset is greater than this element) or + // 2) the update offset plus the size of the update is less than the member + // offset + if( + offset_bytes.has_value() && + (*offset_bytes * 8 >= *element_width || + (*offset_bytes < 0 && + -*offset_bytes >= value_as_byte_array.operands().size()))) + { + elements.push_back(std::move(member)); + member_offset_bits += *element_width; + continue; + } + else if(!offset_bytes.has_value()) + { + const irep_idt extract_opcode = src.id() == ID_byte_update_little_endian + ? ID_byte_extract_little_endian + : ID_byte_extract_big_endian; + + const byte_extract_exprt byte_extract_expr{ + extract_opcode, + src.op(), + from_integer(0, src.offset().type()), + array_typet{unsignedbv_typet{8}, size_of_expr(src.type(), ns)}}; + + byte_update_exprt bu = src; + bu.op() = lower_byte_extract(byte_extract_expr, ns); + + return lower_byte_extract( + byte_extract_exprt{extract_opcode, + lower_byte_update_byte_array_vector( + bu, + unsignedbv_typet{8}, + value_as_byte_array, + non_const_update_bound, + ns), + from_integer(0, src.offset().type()), + src.type()}, + ns); + } + + mp_integer update_elements = (*element_width + 7) / 8; + exprt::operandst::const_iterator first = + value_as_byte_array.operands().begin(); + exprt::operandst::const_iterator end = value_as_byte_array.operands().end(); + if(*offset_bytes < 0) + { + offset = from_integer(0, src.offset().type()); + INVARIANT( + value_as_byte_array.operands().size() > -*offset_bytes, + "members past the update should be handled above"); + first += numeric_cast_v(-*offset_bytes); + } + else + { + update_elements -= *offset_bytes; INVARIANT( - sub_size_opt.has_value(), - "bit width (rounded to full bytes) of subtype must be known"); + update_elements > 0, + "members before the update should be handled above"); + } - const mp_integer &sub_size = *sub_size_opt; + if(std::distance(first, end) > update_elements) + end = first + numeric_cast_v(update_elements); + exprt::operandst update_values(first, end); + const std::size_t update_size = update_values.size(); - // byte array? - if(sub_size==1) - { - // apply 'array-update-with' element_size times - exprt result = src.op(); - - for(mp_integer i=0; i(member_offset_bits % 8); + const std::size_t element_bits_int = + numeric_cast_v(*element_width); + + const bool little_endian = src.id() == ID_byte_update_little_endian; + if(shift != 0) + { + member = + concatenation_exprt{typecast_exprt::conditional_cast( + member, unsignedbv_typet{element_bits_int}), + from_integer(0, unsignedbv_typet{shift}), + unsignedbv_typet{shift + element_bits_int}}; + if(!little_endian) + member.op0().swap(member.op1()); } + + byte_update_exprt bu{ + src.id(), + std::move(member), + offset, + array_exprt{std::move(update_values), + array_typet{unsignedbv_typet{8}, + from_integer(update_size, size_type())}}}; + + if(shift == 0) + elements.push_back(lower_byte_operators(bu, ns)); else { - PRECONDITION_WITH_DIAGNOSTICS( - false, - "flatten_byte_update can only do arrays of scalars right now", - subtype.id_string()); + elements.push_back(typecast_exprt::conditional_cast( + extractbits_exprt{lower_byte_operators(bu, ns), + element_bits_int - 1 + (little_endian ? shift : 0), + (little_endian ? shift : 0), + unsignedbv_typet{element_bits_int}}, + comp.type())); } + + member_offset_bits += *element_width; } - else if(src.op().type().id()==ID_signedbv || - src.op().type().id()==ID_unsignedbv || - src.op().type().id()==ID_floatbv || - src.op().type().id()==ID_c_bool || - src.op().type().id()==ID_pointer) + + return simplify_expr(struct_exprt{std::move(elements), struct_type}, ns); +} + +/// Apply a byte update \p src to a union typed operand using the byte array +/// \p value_as_byte_array as update value. +/// \param src: Original byte-update expression +/// \param union_type: Type of the operand +/// \param value_as_byte_array: Update value as an array of bytes +/// \param non_const_update_bound: If set, (symbolically) constrain updates such +/// as to at most update \p non_const_update_bound elements +/// \param ns: Namespace +/// \return Union expression reflecting all updates. +static exprt lower_byte_update_union( + const byte_update_exprt &src, + const union_typet &union_type, + const array_exprt &value_as_byte_array, + const optionalt &non_const_update_bound, + const namespacet &ns) +{ + const union_typet::componentst &components = union_type.components(); + + mp_integer max_width = 0; + typet max_comp_type; + irep_idt max_comp_name; + + for(const auto &comp : components) { - // do a shift, mask and OR - const auto type_width = pointer_offset_bits(src.op().type(), ns); + auto element_width = pointer_offset_bits(comp.type(), ns); + + if(!element_width.has_value() || *element_width <= max_width) + continue; + + max_width = *element_width; + max_comp_type = comp.type(); + max_comp_name = comp.get_name(); + } + + PRECONDITION_WITH_DIAGNOSTICS( + max_width > 0, + "lower_byte_update of union of unknown size is not supported"); + + byte_update_exprt bu = src; + bu.op() = member_exprt{src.op(), max_comp_name, max_comp_type}; + bu.type() = max_comp_type; + + return union_exprt{ + max_comp_name, + lower_byte_update(bu, value_as_byte_array, non_const_update_bound, ns), + src.type()}; +} + +/// Apply a byte update \p src using the byte array \p value_as_byte_array as +/// update value. +/// \param src: Original byte-update expression +/// \param value_as_byte_array: Update value as an array of bytes +/// \param non_const_update_bound: If set, (symbolically) constrain updates such +/// as to at most update \p non_const_update_bound elements +/// \param ns: Namespace +/// \return Expression reflecting all updates. +static exprt lower_byte_update( + const byte_update_exprt &src, + const array_exprt &value_as_byte_array, + const optionalt &non_const_update_bound, + const namespacet &ns) +{ + if(src.type().id() == ID_array || src.type().id() == ID_vector) + { + optionalt subtype; + if(src.type().id() == ID_vector) + subtype = to_vector_type(src.type()).subtype(); + else + subtype = to_array_type(src.type()).subtype(); + + auto element_width = pointer_offset_bits(*subtype, ns); + CHECK_RETURN(element_width.has_value()); + CHECK_RETURN(*element_width > 0); + CHECK_RETURN(*element_width % 8 == 0); + + if(*element_width == 8) + return lower_byte_update_byte_array_vector( + src, *subtype, value_as_byte_array, non_const_update_bound, ns); + else + return lower_byte_update_array_vector( + src, *subtype, value_as_byte_array, non_const_update_bound, ns); + } + else if(src.type().id() == ID_struct || src.type().id() == ID_struct_tag) + { + exprt result = lower_byte_update_struct( + src, + to_struct_type(ns.follow(src.type())), + value_as_byte_array, + non_const_update_bound, + ns); + result.type() = src.type(); + return result; + } + else if(src.type().id() == ID_union || src.type().id() == ID_union_tag) + { + exprt result = lower_byte_update_union( + src, + to_union_type(ns.follow(src.type())), + value_as_byte_array, + non_const_update_bound, + ns); + result.type() = src.type(); + return result; + } + else if( + can_cast_type(src.type()) || + src.type().id() == ID_c_enum || src.type().id() == ID_c_enum_tag) + { + // mask out the bits to be updated, shift the value according to the + // offset and bit-or + const auto type_width = pointer_offset_bits(src.type(), ns); CHECK_RETURN(type_width.has_value() && *type_width > 0); - const std::size_t width = numeric_cast_v(*type_width); + const std::size_t type_bits = numeric_cast_v(*type_width); - INVARIANT( - element_size * 8 <= width, - "element bit width must not be larger than width indicated by type"); + const std::size_t update_size = value_as_byte_array.operands().size(); + const std::size_t width = std::max(type_bits, update_size * 8); + + const bool is_little_endian = src.id() == ID_byte_update_little_endian; // build mask - exprt mask= - from_integer(power(2, element_size*8)-1, unsignedbv_typet(width)); - - // zero-extend the value, but only if needed - exprt value_extended; - - if(width > element_size * 8) - value_extended = concatenation_exprt( - from_integer( - 0, - unsignedbv_typet( - width - numeric_cast_v(element_size) * 8)), - src.value(), - src.op().type()); + exprt mask; + if(is_little_endian) + mask = + from_integer(power(2, update_size * 8) - 1, unsignedbv_typet{width}); else - value_extended = src.value(); + mask = from_integer( + power(2, width) - power(2, width - update_size * 8), + unsignedbv_typet{width}); const typet &offset_type = src.offset().type(); - mult_exprt offset_times_eight(src.offset(), from_integer(8, offset_type)); - - binary_predicate_exprt offset_ge_zero( - offset_times_eight, - ID_ge, - from_integer(0, offset_type)); - - // Shift the mask and value. - // Note either shift might discard some of the new value's bits. - exprt mask_shifted; - exprt value_shifted; - if(negative_offset) + mult_exprt offset_times_eight{src.offset(), from_integer(8, offset_type)}; + + const binary_predicate_exprt offset_ge_zero{ + offset_times_eight, ID_ge, from_integer(0, offset_type)}; + + if_exprt mask_shifted{offset_ge_zero, + shl_exprt{mask, offset_times_eight}, + lshr_exprt{mask, offset_times_eight}}; + if(!is_little_endian) + mask_shifted.true_case().swap(mask_shifted.false_case()); + + // original_bits &= ~mask + exprt val_before = + typecast_exprt::conditional_cast(src.op(), unsignedbv_typet{type_bits}); + if(width > type_bits) { - // In this case we shift right, to mask out higher addresses - // rather than left to mask out lower ones as usual. - mask_shifted=lshr_exprt(mask, offset_times_eight); - value_shifted=lshr_exprt(value_extended, offset_times_eight); + val_before = concatenation_exprt{ + from_integer(0, unsignedbv_typet{width - type_bits}), + val_before, + unsignedbv_typet{width}}; + + if(!is_little_endian) + val_before.op0().swap(val_before.op1()); } - else + bitand_exprt bitand_expr{val_before, bitnot_exprt{mask_shifted}}; + + // concatenate and zero-extend the value + concatenation_exprt value{exprt::operandst{value_as_byte_array.operands()}, + unsignedbv_typet{update_size * 8}}; + if(is_little_endian) + std::reverse(value.operands().begin(), value.operands().end()); + + exprt zero_extended; + if(width > update_size * 8) { - mask_shifted=shl_exprt(mask, offset_times_eight); - value_shifted=shl_exprt(value_extended, offset_times_eight); + zero_extended = concatenation_exprt{ + from_integer(0, unsignedbv_typet{width - update_size * 8}), + value, + unsignedbv_typet{width}}; + + if(!is_little_endian) + zero_extended.op0().swap(zero_extended.op1()); } + else + zero_extended = value; - // original_bits &= ~mask - bitand_exprt bitand_expr(src.op(), bitnot_exprt(mask_shifted)); + // shift the value + if_exprt value_shifted{offset_ge_zero, + shl_exprt{zero_extended, offset_times_eight}, + lshr_exprt{zero_extended, offset_times_eight}}; + if(!is_little_endian) + value_shifted.true_case().swap(value_shifted.false_case()); // original_bits |= newvalue - bitor_exprt bitor_expr(bitand_expr, value_shifted); + bitor_exprt bitor_expr{bitand_expr, value_shifted}; - return simplify_expr(bitor_expr, ns); + if(!is_little_endian && width > type_bits) + { + return simplify_expr( + typecast_exprt::conditional_cast( + extractbits_exprt{bitor_expr, + width - 1, + width - type_bits, + unsignedbv_typet{type_bits}}, + src.type()), + ns); + } + + return simplify_expr( + typecast_exprt::conditional_cast(bitor_expr, src.type()), ns); } else { PRECONDITION_WITH_DIAGNOSTICS( - false, - "flatten_byte_update can only do arrays and scalars right now", - src.op().type().id_string()); + false, "lower_byte_update does not yet support ", src.type().id_string()); } } -static exprt lower_byte_update( - const byte_update_exprt &src, - const namespacet &ns) +exprt lower_byte_update(const byte_update_exprt &src, const namespacet &ns) { - return lower_byte_update(src, ns, false); + DATA_INVARIANT( + src.id() == ID_byte_update_little_endian || + src.id() == ID_byte_update_big_endian, + "byte update expression should either be little or big endian"); + + // An update of a void-typed object or update by a void-typed value is the + // source operand (this is questionable, but may arise when dereferencing + // invalid pointers). + if(src.type().id() == ID_empty || src.value().type().id() == ID_empty) + return src.op(); + + // byte_update lowering proceeds as follows: + // 1) Determine the size of the update, with the size of the object to be + // updated as an upper bound. We fail if neither can be determined. + // 2) Turn the update value into a byte array of the size determined above. + // 3) If the offset is not constant, turn the object into a byte array, and + // use a "with" expression to encode the update; else update the values in + // place. + // 4) Construct a new object. + std::size_t max_update_bytes = 0; + optionalt non_const_update_bound; + exprt update_size_expr = size_of_expr(src.value().type(), ns); + CHECK_RETURN(update_size_expr.is_not_nil()); + + simplify(update_size_expr, ns); + if(update_size_expr.is_constant()) + max_update_bytes = + numeric_cast_v(to_constant_expr(update_size_expr)); + else + { + exprt object_size_expr = size_of_expr(src.type(), ns); + CHECK_RETURN(object_size_expr.is_not_nil()); + simplify(object_size_expr, ns); + if(!object_size_expr.is_constant()) + { + throw non_constant_widtht(src, update_size_expr); + } + + max_update_bytes = + numeric_cast_v(to_constant_expr(object_size_expr)); + non_const_update_bound = std::move(update_size_expr); + } + + const irep_idt extract_opcode = src.id() == ID_byte_update_little_endian + ? ID_byte_extract_little_endian + : ID_byte_extract_big_endian; + + const byte_extract_exprt byte_extract_expr{ + extract_opcode, + src.value(), + from_integer(0, src.offset().type()), + array_typet{unsignedbv_typet{8}, + from_integer(max_update_bytes, size_type())}}; + + const array_exprt value_as_byte_array = + to_array_expr(lower_byte_extract(byte_extract_expr, ns)); + + CHECK_RETURN(value_as_byte_array.operands().size() == max_update_bytes); + + return lower_byte_update( + src, value_as_byte_array, non_const_update_bound, ns); } bool has_byte_operator(const exprt &src) diff --git a/src/solvers/lowering/expr_lowering.h b/src/solvers/lowering/expr_lowering.h index 2f033a45d13..d48049b87b0 100644 --- a/src/solvers/lowering/expr_lowering.h +++ b/src/solvers/lowering/expr_lowering.h @@ -12,11 +12,30 @@ Author: Michael Tautschnig #include class byte_extract_exprt; +class byte_update_exprt; class namespacet; class popcount_exprt; +/// Rewrite a byte extract expression to more fundamental operations. +/// \param src: Byte extract expression +/// \param ns: Namespace +/// \return Semantically equivalent expression that does not contain any \ref +/// byte_extract_exprt or \ref byte_update_exprt. exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns); +/// Rewrite a byte update expression to more fundamental operations. +/// \param src: Byte update expression +/// \param ns: Namespace +/// \return Semantically equivalent expression that does not contain any \ref +/// byte_extract_exprt or \ref byte_update_exprt. +exprt lower_byte_update(const byte_update_exprt &src, const namespacet &ns); + +/// Rewrite an expression possibly containing byte-extract or -update +/// expressions to more fundamental operations. +/// \param src: Input expression +/// \param ns: Namespace +/// \return Semantically equivalent expression that does not contain any \ref +/// byte_extract_exprt or \ref byte_update_exprt. exprt lower_byte_operators(const exprt &src, const namespacet &ns); bool has_byte_operator(const exprt &src); diff --git a/src/solvers/smt2/smt2_conv.cpp b/src/solvers/smt2/smt2_conv.cpp index 9063595d499..5aeb5d43c4e 100644 --- a/src/solvers/smt2/smt2_conv.cpp +++ b/src/solvers/smt2/smt2_conv.cpp @@ -622,102 +622,11 @@ void smt2_convt::convert_byte_extract(const byte_extract_exprt &expr) void smt2_convt::convert_byte_update(const byte_update_exprt &expr) { - #if 0 - // The situation: expr.op0 needs to be split in 3 parts - // |<--- L --->|<--- M --->|<--- R --->| - // where M is the expr.op1'th byte - // We need to output L expr.op2 R - - mp_integer i = numeric_cast_v(expr.op()); - - std::size_t total_width=boolbv_width(expr.op().type()); - CHECK_RETURN_WITH_DIAGNOSTICS( - total_width != 0, - "failed to get width of byte_update"); - - std::size_t value_width=boolbv_width(expr.value().type()); - - mp_integer upper, lower; // of the byte - mp_integer max=total_width-1; - - if(expr.id()==ID_byte_update_little_endian) - { - lower = i*8; - upper = lower+value_width-1; - } - else if(expr.id()==ID_byte_update_big_endian) - { - upper = max-(i*8); - lower = max-((i+1)*8-1); - } - else - UNEXPECTEDCASE("byte update neither big nor little endian"); - - unflatten(BEGIN, expr.type()); - - if(upper==max) - { - if(lower==0) // the update expression is expr.value() - { - flatten2bv(expr.value()); - } - else // uppermost byte selected, only R needed - { - out << "(concat "; - flatten2bv(expr.value()); - out << " ((_ extract " << lower-1 << " 0) "; - flatten2bv(expr.op()); - out << "))"; - } - } - else - { - if(lower==0) // lowermost byte selected, only L needed - { - out << "(concat "; - out << "((_ extract " << max << " " << (upper+1) << ") "; - flatten2bv(expr.op()); - out << ") "; - flatten2bv(expr.value()); - out << ")"; - } - else // byte in the middle selected, L & R needed - { - out << "(concat (concat "; - out << "((_ extract " << max << " " << (upper+1) << ") "; - flatten2bv(expr.op()); - out << ") "; - flatten2bv(expr.value()); - out << ") ((_ extract " << (lower-1) << " 0) "; - flatten2bv(expr.op()); - out << "))"; - } - } - - unflatten(END, expr.type()); - - #else - - // We'll do an AND-mask for op(), and then OR-in - // the value() shifted by the offset * 8. - - std::size_t total_width=boolbv_width(expr.op().type()); - std::size_t value_width=boolbv_width(expr.value().type()); - - mp_integer mask=power(2, value_width)-1; - exprt one_mask=from_integer(mask, unsignedbv_typet(total_width)); - - const mult_exprt distance( - expr.offset(), from_integer(8, expr.offset().type())); - - const bitand_exprt and_expr(expr.op(), bitnot_exprt(one_mask)); - const typecast_exprt ext_value(expr.value(), one_mask.type()); - const bitor_exprt or_expr(and_expr, shl_exprt(ext_value, distance)); - + // we just run the flattener + exprt flattened_expr = lower_byte_update(expr, ns); unflatten(wheret::BEGIN, expr.type()); - flatten2bv(or_expr); + convert_expr(flattened_expr); unflatten(wheret::END, expr.type()); - #endif } literalt smt2_convt::convert(const exprt &expr) diff --git a/unit/solvers/lowering/byte_operators.cpp b/unit/solvers/lowering/byte_operators.cpp index 44ef7341f08..6290c725bed 100644 --- a/unit/solvers/lowering/byte_operators.cpp +++ b/unit/solvers/lowering/byte_operators.cpp @@ -136,13 +136,10 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]") std::vector types = { struct_typet({{"comp1", u16}, {"comp2", u16}}), struct_typet({{"comp1", u32}, {"comp2", u64}}), -#if 0 - // not currently handled: components not byte aligned struct_typet({{"comp1", u32}, {"compX", c_bit_field_typet(u8, 4)}, {"pad", c_bit_field_typet(u8, 4)}, {"comp2", u8}}), -#endif union_typet({{"compA", u32}, {"compB", u64}}), c_enum_typet(u16), c_enum_typet(unsignedbv_typet(128)), @@ -224,8 +221,8 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]") REQUIRE(!has_subexpr(lower_be, ID_byte_extract_little_endian)); REQUIRE(!has_subexpr(lower_be, ID_byte_extract_big_endian)); REQUIRE(lower_be.type() == be.type()); - REQUIRE(lower_be == r); - REQUIRE(lower_be_s == r); + REQUIRE(lower_be == *r); + REQUIRE(lower_be_s == *r); } } } @@ -262,8 +259,6 @@ SCENARIO("byte_update_lowering", "[core][solvers][lowering][byte_update]") REQUIRE(lower_bu1.type() == bu1.type()); REQUIRE(lower_bu1_s == from_integer(0xdead42ef, unsignedbv_typet(32))); -#if 0 - // this is currently broken, #2058 will fix it byte_update_exprt bu2 = bu1; bu2.id(ID_byte_update_big_endian); const exprt lower_bu2 = lower_byte_operators(bu2, ns); @@ -273,7 +268,6 @@ SCENARIO("byte_update_lowering", "[core][solvers][lowering][byte_update]") REQUIRE(!has_subexpr(lower_bu2, ID_byte_update_big_endian)); REQUIRE(lower_bu2.type() == bu2.type()); REQUIRE(lower_bu2_s == from_integer(0xde42beef, unsignedbv_typet(32))); -#endif } } @@ -291,32 +285,27 @@ SCENARIO("byte_update_lowering", "[core][solvers][lowering][byte_update]") constant_exprt size = from_integer(8, size_type()); std::vector types = { - // TODO: only arrays and scalars - // struct_typet({{"comp1", u16}, {"comp2", u16}}), - // struct_typet({{"comp1", u32}, {"comp2", u64}}), -#if 0 - // not currently handled: components not byte aligned + struct_typet({{"comp1", u16}, {"comp2", u16}}), + struct_typet({{"comp1", u32}, {"comp2", u64}}), struct_typet({{"comp1", u32}, {"compX", c_bit_field_typet(u8, 4)}, {"pad", c_bit_field_typet(u8, 4)}, {"comp2", u8}}), -#endif - // TODO: only arrays and scalars - // union_typet({{"compA", u32}, {"compB", u64}}), - // c_enum_typet(u16), - // c_enum_typet(unsignedbv_typet(128)), - // array_typet(u8, size), - // array_typet(s32, size), - // array_typet(u64, size), + union_typet({{"compA", u32}, {"compB", u64}}), + c_enum_typet(u16), + c_enum_typet(unsignedbv_typet(128)), + array_typet(u8, size), + array_typet(s32, size), + array_typet(u64, size), unsignedbv_typet(24), unsignedbv_typet(128), signedbv_typet(24), signedbv_typet(128), // ieee_float_spect::single_precision().to_type(), + // generates the correct value, but remains wrapped in a typecast // pointer_typet(u64, 64), - // TODO: only arrays and scalars - // vector_typet(u8, size), - // vector_typet(u64, size), + vector_typet(u8, size), + vector_typet(u64, size), // complex_typet(s16), // complex_typet(u64) }; @@ -388,10 +377,8 @@ SCENARIO("byte_update_lowering", "[core][solvers][lowering][byte_update]") REQUIRE(!has_subexpr(lower_bu, ID_byte_extract_big_endian)); REQUIRE(!has_subexpr(lower_bu, ID_byte_extract_little_endian)); REQUIRE(lower_bu.type() == bu.type()); - // TODO: does not currently hold - // REQUIRE(lower_bu == r); - // TODO: does not currently hold - // REQUIRE(lower_bu_s == r); + REQUIRE(lower_bu == *r); + REQUIRE(lower_bu_s == *r); } } }