diff --git a/regression/cbmc/Array_operations4/main.c b/regression/cbmc/Array_operations4/main.c new file mode 100644 index 00000000000..18cb51e1ebf --- /dev/null +++ b/regression/cbmc/Array_operations4/main.c @@ -0,0 +1,14 @@ +int main() +{ + char source[8]; + int int_source[2]; + int target[4]; + int n; + if(n >= 0 && n < 3) + { + __CPROVER_array_replace(&target[n], source); + __CPROVER_array_replace(&target[n], int_source); + __CPROVER_assert(target[n] == int_source[0], ""); + __CPROVER_assert(target[n + 1] == int_source[1], ""); + } +} diff --git a/regression/cbmc/Array_operations4/program-only.desc b/regression/cbmc/Array_operations4/program-only.desc new file mode 100644 index 00000000000..89aef7588d2 --- /dev/null +++ b/regression/cbmc/Array_operations4/program-only.desc @@ -0,0 +1,13 @@ +CORE paths-lifo-expected-failure +main.c +--program-only +target!0@1#2 == +target!0@1#3 == +^EXIT=0$ +^SIGNAL=0$ +-- +byte_update_ +-- +This test demonstrates that we can simplify byte_update expressions to, e.g., +WITH expressions. +Disabled for paths-lifo mode, which does not support --program-only. diff --git a/regression/cbmc/Array_operations4/test.desc b/regression/cbmc/Array_operations4/test.desc new file mode 100644 index 00000000000..766e78589d2 --- /dev/null +++ b/regression/cbmc/Array_operations4/test.desc @@ -0,0 +1,9 @@ +CORE +main.c + +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +-- diff --git a/regression/cbmc/havoc_slice/functional.c b/regression/cbmc/havoc_slice/functional.c new file mode 100644 index 00000000000..01ab6686910 --- /dev/null +++ b/regression/cbmc/havoc_slice/functional.c @@ -0,0 +1,109 @@ +#include +#include + +/* + +When translating to SMT, structs are represented by algebraic datatypes (ADTs) +and arrays of structs by arrays of ADTs. + +When forming a pointer ranging over an array of struct using a nondeterministic +index, the pointer offset appears completely non-deterministic to CBMC, and +in-place updates made using assignments or __CPROVER_array_replace then expand +into large sequences of byte_updates ranging over the whole array. + +If we could somehow track the fact that a pointer formed using arr[i] is still +aligned on array cell boundaries, i.e. is of the form i*sizeof(T) where T is the +type of the array, across intermediate variables and assignments then we would +be able to encode these cases in SMT more optimally: + +T arr[N]; +size_t i = nondetd_size_t(); +if (i < N) { + T *ai = &arr[i]; + T v = nondet_T(); + *ai = v; + // here we have ai of the form &a[0] + i*sizeof(T) assigned with a value of +size sizeof(T) + // can be modeled as a functional array update at index i. +} + +size_t k = nondet_size_t(); +if (k < N) { + size_t S = N-k; + T nondet[S]; + T *ak = &a[k]; + __CPROVER_array_replace(ak, nondet); + // here we have ai of the form &a[0] + k*sizeof(T) updated in place with a +value of size k*sizeof(T) + // can be modeled as a functional slice update at index k with k elements. +} + +At the moment these constructs blow up with --z3 and --bitwuzla +*/ + +// #define N 4 // use 8, 16, ... to see the blowup +#define K 4 + +typedef struct +{ + int32_t coeffs[N]; +} vec_N; + +typedef struct +{ + vec_N vec[K]; +} vec_K_N; + +unsigned int __VERIFIER_nondet_unsigned_int(); +vec_N __VERIFIER_nondet_vec_N(); + +int main(void) +{ + vec_K_N *v = malloc(sizeof(vec_K_N)); + __CPROVER_assume(v); + + unsigned int i = __VERIFIER_nondet_unsigned_int(); + + // models a nondet loop step from an arbitrary state + if(i < K) + { +#ifdef ASSIGN_DIRECT + // nondet assignment without indirection through a + // simplifies to a functional update + v->vec[i] = __VERIFIER_nondet_vec_N(); +#endif + + // simulates how symex models argument passing for a function call + vec_N *__arg = &v->vec[i]; + vec_N *a = __arg; + +#ifdef ASSIGN + // nondet assignment with indirection through a + // currently does NOT simplifies to a functional update but ultimately + // should + *a = __VERIFIER_nondet_vec_N(); +#endif + +#ifdef SLICE_BYTES + // Modeled as a byte slice operation without indirection + // does NOT simplify to a functional array update due to lack of pattern + // matching on the pointer offset expression. + // We could realize the pointer offset is of the form i*16 and that the + // new value is of size 16 too but we currently don't. + char nondet[sizeof(vec_N)]; + __CPROVER_array_replace((char *)a, nondet); +#endif + +#ifdef SLICE_TYPED + // Modeled as a typed slice operation without indirection. + // Does NOT simplify to a functional array update due to lack of pattern + // matching on the pointer offset expression and types. + // We could realize the pointer offset is of the form i*16 and that the + // new value is of size 16 too but we currently don't. + vec_N nondet[1]; + __CPROVER_array_replace(a, nondet); +#endif + __CPROVER_assert(a->coeffs[0] > 0, "expected to fail"); + } + return 0; +} diff --git a/regression/cbmc/havoc_slice/functional.desc b/regression/cbmc/havoc_slice/functional.desc new file mode 100644 index 00000000000..f5fee1bf191 --- /dev/null +++ b/regression/cbmc/havoc_slice/functional.desc @@ -0,0 +1,8 @@ +CORE +functional.c +-DN=4 -DASSIGN_DIRECT +^VERIFICATION FAILED$ +^EXIT=10$ +^SIGNAL=0$ +-- +^warning: ignoring diff --git a/regression/cbmc/havoc_slice/functional_assign.desc b/regression/cbmc/havoc_slice/functional_assign.desc new file mode 100644 index 00000000000..026a195c27d --- /dev/null +++ b/regression/cbmc/havoc_slice/functional_assign.desc @@ -0,0 +1,10 @@ +CORE +functional.c +-DN=4 -DASSIGN --program-only +^EXIT=0$ +^SIGNAL=0$ +-- +byte_update +-- +We want these tests not to produce any byte_update expressions, but instead want +updates at specific array indices. diff --git a/regression/cbmc/havoc_slice/functional_assign_direct.desc b/regression/cbmc/havoc_slice/functional_assign_direct.desc new file mode 100644 index 00000000000..c94c3187aba --- /dev/null +++ b/regression/cbmc/havoc_slice/functional_assign_direct.desc @@ -0,0 +1,10 @@ +CORE +functional.c +-DN=4 -DASSIGN_DIRECT --program-only +^EXIT=0$ +^SIGNAL=0$ +-- +byte_update +-- +We want these tests not to produce any byte_update expressions, but instead want +updates at specific array indices. diff --git a/regression/cbmc/havoc_slice/functional_slice_bytes.desc b/regression/cbmc/havoc_slice/functional_slice_bytes.desc new file mode 100644 index 00000000000..03778bda8ca --- /dev/null +++ b/regression/cbmc/havoc_slice/functional_slice_bytes.desc @@ -0,0 +1,10 @@ +CORE +functional.c +-DN=4 -DSLICE_BYTES --program-only +^EXIT=0$ +^SIGNAL=0$ +-- +byte_update +-- +We want these tests not to produce any byte_update expressions, but instead want +updates at specific array indices. diff --git a/regression/cbmc/havoc_slice/functional_slice_typed.desc b/regression/cbmc/havoc_slice/functional_slice_typed.desc new file mode 100644 index 00000000000..26a54ba9b74 --- /dev/null +++ b/regression/cbmc/havoc_slice/functional_slice_typed.desc @@ -0,0 +1,10 @@ +CORE +functional.c +-DN=4 -DSLICE_TYPED --program-only +^EXIT=0$ +^SIGNAL=0$ +-- +byte_update +-- +We want these tests not to produce any byte_update expressions, but instead want +updates at specific array indices. diff --git a/regression/cbmc/trace-values/unbounded_array.c b/regression/cbmc/trace-values/unbounded_array.c index 76d23252541..654b04707cf 100644 --- a/regression/cbmc/trace-values/unbounded_array.c +++ b/regression/cbmc/trace-values/unbounded_array.c @@ -6,6 +6,7 @@ int main(int argc, char *argv[]) unsigned long size; __CPROVER_assume(size < 100 && size > 10); int *array = malloc(size * sizeof(int)); + __CPROVER_assume(array); array[size - 1] = 42; int fixed_array[] = {1, 2}; __CPROVER_array_replace(array, fixed_array); diff --git a/regression/validate-trace-xml-schema/check.py b/regression/validate-trace-xml-schema/check.py index 9a5ad7426f6..d544778fc37 100644 --- a/regression/validate-trace-xml-schema/check.py +++ b/regression/validate-trace-xml-schema/check.py @@ -35,9 +35,14 @@ ['show_properties1', 'test.desc'], # program-only instead of trace ['vla1', 'program-only.desc'], + ['Array_operations4', 'program-only.desc'], ['Pointer_Arithmetic19', 'test.desc'], ['Quantifiers-simplify', 'simplify_not_forall.desc'], ['array-cell-sensitivity15', 'test.desc'], + ['havoc_slice', 'functional_assign.desc'], + ['havoc_slice', 'functional_assign_direct.desc'], + ['havoc_slice', 'functional_slice_bytes.desc'], + ['havoc_slice', 'functional_slice_typed.desc'], ['saturating_arithmetric', 'output-formula.desc'], # these test for invalid command line handling ['bad_option', 'test_multiple.desc'], diff --git a/src/util/pointer_offset_size.cpp b/src/util/pointer_offset_size.cpp index e39b355fd86..53ba12b3308 100644 --- a/src/util/pointer_offset_size.cpp +++ b/src/util/pointer_offset_size.cpp @@ -687,7 +687,143 @@ std::optional get_subexpression_at_offset( const auto offset_bytes = numeric_cast(offset); if(!offset_bytes.has_value()) - return {}; + { + // offset is not a constant; try to see whether it is a multiple of a + // constant, or a sum that involves a multiple of a constant + if(auto array_type = type_try_dynamic_cast(expr.type())) + { + const auto target_size_bits = pointer_offset_bits(target_type, ns); + const auto elem_size_bits = + pointer_offset_bits(array_type->element_type(), ns); + + // no arrays of zero-, or unknown-sized elements, or ones where elements + // have a bit-width that isn't a multiple of bytes + if( + !target_size_bits.has_value() || !elem_size_bits.has_value() || + *elem_size_bits <= 0 || + *elem_size_bits % config.ansi_c.char_width != 0 || + *target_size_bits != *elem_size_bits) + { + return {}; + } + + // if we have an offset C + x (where C is a constant) we can try to + // recurse by first looking at the member at offset C + if( + offset.id() == ID_plus && offset.operands().size() == 2 && + (to_multi_ary_expr(offset).op0().is_constant() || + to_multi_ary_expr(offset).op1().is_constant())) + { + const plus_exprt &offset_plus = to_plus_expr(offset); + const auto &const_factor = numeric_cast_v(to_constant_expr( + offset_plus.op0().is_constant() ? offset_plus.op0() + : offset_plus.op1())); + const exprt &other_factor = offset_plus.op0().is_constant() + ? offset_plus.op1() + : offset_plus.op0(); + + auto expr_at_offset_C = + get_subexpression_at_offset(expr, const_factor, target_type, ns); + + if( + expr_at_offset_C.has_value() && expr_at_offset_C->id() == ID_index && + to_index_expr(*expr_at_offset_C).index().is_zero()) + { + return get_subexpression_at_offset( + to_index_expr(*expr_at_offset_C).array(), + other_factor, + target_type, + ns); + } + } + + // give up if the offset expression isn't of the form K * i or i * K + // (where K is a constant) + if( + offset.id() != ID_mult || offset.operands().size() != 2 || + (!to_multi_ary_expr(offset).op0().is_constant() && + !to_multi_ary_expr(offset).op1().is_constant())) + { + return {}; + } + + const mult_exprt &offset_mult = to_mult_expr(offset); + const auto &const_factor = numeric_cast_v(to_constant_expr( + offset_mult.op0().is_constant() ? offset_mult.op0() + : offset_mult.op1())); + const exprt &other_factor = + offset_mult.op0().is_constant() ? offset_mult.op1() : offset_mult.op0(); + + if(const_factor % (*elem_size_bits / config.ansi_c.char_width) != 0) + return {}; + + exprt index = mult_exprt{ + other_factor, + from_integer( + const_factor / (*elem_size_bits / config.ansi_c.char_width), + other_factor.type())}; + + return get_subexpression_at_offset( + index_exprt{ + expr, + typecast_exprt::conditional_cast(index, array_type->index_type())}, + 0, + target_type, + ns); + } + else if( + auto struct_tag_type = + type_try_dynamic_cast(expr.type())) + { + // If the offset expression is of the form K * i or i * K (where K is a + // constant) and the first component of the struct is an array we will + // recurse on that member. + const auto &components = ns.follow_tag(*struct_tag_type).components(); + if( + !components.empty() && + can_cast_type(components.front().type()) && + offset.id() == ID_mult && offset.operands().size() == 2 && + (to_multi_ary_expr(offset).op0().is_constant() || + to_multi_ary_expr(offset).op1().is_constant())) + { + return get_subexpression_at_offset( + member_exprt{expr, components.front()}, offset, target_type, ns); + } + // if we have an offset C + x (where C is a constant) we can try to + // recurse by first looking at the member at offset C + else if( + offset.id() == ID_plus && offset.operands().size() == 2 && + (to_multi_ary_expr(offset).op0().is_constant() || + to_multi_ary_expr(offset).op1().is_constant())) + { + const plus_exprt &offset_plus = to_plus_expr(offset); + const auto &const_factor = numeric_cast_v(to_constant_expr( + offset_plus.op0().is_constant() ? offset_plus.op0() + : offset_plus.op1())); + const exprt &other_factor = offset_plus.op0().is_constant() + ? offset_plus.op1() + : offset_plus.op0(); + + auto expr_at_offset_C = + get_subexpression_at_offset(expr, const_factor, target_type, ns); + + if( + expr_at_offset_C.has_value() && expr_at_offset_C->id() == ID_index && + to_index_expr(*expr_at_offset_C).index().is_zero()) + { + return get_subexpression_at_offset( + to_index_expr(*expr_at_offset_C).array(), + other_factor, + target_type, + ns); + } + } + + return {}; + } + else + return {}; + } else return get_subexpression_at_offset(expr, *offset_bytes, target_type, ns); } diff --git a/src/util/simplify_expr.cpp b/src/util/simplify_expr.cpp index 8b14c8cb55e..b9b73c8bbc9 100644 --- a/src/util/simplify_expr.cpp +++ b/src/util/simplify_expr.cpp @@ -1717,9 +1717,8 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) } } - // the following require a constant offset auto offset = numeric_cast(expr.offset()); - if(!offset.has_value() || *offset < 0) + if(offset.has_value() && *offset < 0) return unchanged(expr); // try to simplify byte_extract(byte_update(...)) @@ -1727,7 +1726,9 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) std::optional update_offset; if(bu) update_offset = numeric_cast(bu->offset()); - if(bu && el_size.has_value() && update_offset.has_value()) + if( + offset.has_value() && bu && el_size.has_value() && + update_offset.has_value()) { // byte_extract(byte_update(root, offset_u, value), offset_e) so that the // update does not affect what is being extracted simplifies to @@ -1775,12 +1776,13 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) // don't do any of the following if endianness doesn't match, as // bytes need to be swapped if( - *offset == 0 && ((expr.id() == ID_byte_extract_little_endian && - config.ansi_c.endianness == - configt::ansi_ct::endiannesst::IS_LITTLE_ENDIAN) || - (expr.id() == ID_byte_extract_big_endian && - config.ansi_c.endianness == - configt::ansi_ct::endiannesst::IS_BIG_ENDIAN))) + offset.has_value() && *offset == 0 && + ((expr.id() == ID_byte_extract_little_endian && + config.ansi_c.endianness == + configt::ansi_ct::endiannesst::IS_LITTLE_ENDIAN) || + (expr.id() == ID_byte_extract_big_endian && + config.ansi_c.endianness == + configt::ansi_ct::endiannesst::IS_BIG_ENDIAN))) { // byte extract of full object is object if(expr.type() == expr.op().type()) @@ -1817,7 +1819,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) return unchanged(expr); if( - expr.op().id() == ID_array_of && + offset.has_value() && expr.op().id() == ID_array_of && to_array_of_expr(expr.op()).op().is_constant()) { const auto const_bits_opt = expr2bits( @@ -1854,7 +1856,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) // in some cases we even handle non-const array_of if( - expr.op().id() == ID_array_of && + offset.has_value() && expr.op().id() == ID_array_of && (*offset * expr.get_bits_per_byte()) % (*el_size) == 0 && *el_size <= pointer_offset_bits(to_array_of_expr(expr.op()).what().type(), ns)) @@ -1870,7 +1872,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) expr2bits(expr.op(), expr.id() == ID_byte_extract_little_endian, ns); if( - bits.has_value() && + offset.has_value() && bits.has_value() && mp_integer(bits->size()) >= *el_size + *offset * expr.get_bits_per_byte()) { // make sure we don't lose bits with structs containing flexible array @@ -1986,7 +1988,9 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) const array_typet &array_type = to_array_type(expr.op().type()); const auto &element_bit_width = pointer_offset_bits(array_type.element_type(), ns); - if(element_bit_width.has_value() && *element_bit_width > 0) + if( + offset.has_value() && element_bit_width.has_value() && + *element_bit_width > 0) { if( *offset > 0 && @@ -2026,7 +2030,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) // try to refine it down to extracting from a member or an index in an array auto subexpr = - get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns); + get_subexpression_at_offset(expr.op(), expr.offset(), expr.type(), ns); if(subexpr.has_value() && subexpr.value() != expr) return changed(simplify_rec(subexpr.value())); // recursive call @@ -2227,14 +2231,134 @@ simplify_exprt::simplify_byte_update(const byte_update_exprt &expr) } } - // the following require a constant offset - if(!offset_int.has_value() || *offset_int < 0) - return unchanged(expr); - // size must be known if(!val_size.has_value() || *val_size == 0) return unchanged(expr); + // byte_update(root, offset, value) is with(root, index, value) when root is + // array-typed, the size of value matches the array-element width, and offset + // is guaranteed to be a multiple of the array-element width + if(auto array_type = type_try_dynamic_cast(root.type())) + { + auto el_size = pointer_offset_bits(array_type->element_type(), ns); + + if(el_size.has_value() && *el_size > 0 && *val_size % *el_size == 0) + { + if( + offset_int.has_value() && + (*offset_int * expr.get_bits_per_byte()) % *el_size == 0) + { + mp_integer base_offset = + (*offset_int * expr.get_bits_per_byte()) / *el_size; + with_exprt result_expr{ + root, + from_integer(base_offset, array_type->index_type()), + byte_extract_exprt{ + matching_byte_extract_id, + value, + from_integer(0, offset.type()), + expr.get_bits_per_byte(), + array_type->element_type()}}; + mp_integer n_elements = *val_size / *el_size; + + for(mp_integer i = 1; i < n_elements; ++i) + { + result_expr.add_to_operands( + from_integer(base_offset + i, array_type->index_type()), + byte_extract_exprt{ + matching_byte_extract_id, + value, + from_integer( + i * (*el_size / expr.get_bits_per_byte()), offset.type()), + expr.get_bits_per_byte(), + array_type->element_type()}); + } + + return changed(simplify_rec(result_expr)); + } + // if we have an offset C + x (where C is a constant) we can try to + // recurse by first looking at the member at offset C + else if( + offset.id() == ID_plus && offset.operands().size() == 2 && + (to_multi_ary_expr(offset).op0().is_constant() || + to_multi_ary_expr(offset).op1().is_constant())) + { + const plus_exprt &offset_plus = to_plus_expr(offset); + const auto &const_factor = offset_plus.op0().is_constant() + ? offset_plus.op0() + : offset_plus.op1(); + const exprt &other_factor = offset_plus.op0().is_constant() + ? offset_plus.op1() + : offset_plus.op0(); + + auto tmp = expr; + tmp.set_offset(const_factor); + exprt expr_at_offset_C = simplify_byte_update(tmp); + + if( + expr_at_offset_C.id() == ID_with && + to_with_expr(expr_at_offset_C).where().is_zero()) + { + tmp.set_op(to_with_expr(expr_at_offset_C).old()); + tmp.set_offset(other_factor); + return changed(simplify_byte_update(tmp)); + } + } + else if( + offset.id() == ID_mult && offset.operands().size() == 2 && + (to_multi_ary_expr(offset).op0().is_constant() || + to_multi_ary_expr(offset).op1().is_constant())) + { + const mult_exprt &offset_mult = to_mult_expr(offset); + const auto &const_factor = numeric_cast_v(to_constant_expr( + offset_mult.op0().is_constant() ? offset_mult.op0() + : offset_mult.op1())); + const exprt &other_factor = offset_mult.op0().is_constant() + ? offset_mult.op1() + : offset_mult.op0(); + + if((const_factor * expr.get_bits_per_byte()) % *el_size == 0) + { + exprt base_offset = mult_exprt{ + other_factor, + from_integer( + (const_factor * expr.get_bits_per_byte()) / *el_size, + other_factor.type())}; + with_exprt result_expr{ + root, + typecast_exprt::conditional_cast( + base_offset, array_type->index_type()), + byte_extract_exprt{ + matching_byte_extract_id, + value, + from_integer(0, offset.type()), + expr.get_bits_per_byte(), + array_type->element_type()}}; + mp_integer n_elements = *val_size / *el_size; + for(mp_integer i = 1; i < n_elements; ++i) + { + result_expr.add_to_operands( + typecast_exprt::conditional_cast( + plus_exprt{base_offset, from_integer(i, base_offset.type())}, + array_type->index_type()), + byte_extract_exprt{ + matching_byte_extract_id, + value, + from_integer( + i * (*el_size / expr.get_bits_per_byte()), offset.type()), + expr.get_bits_per_byte(), + array_type->element_type()}); + } + return changed(simplify_rec(result_expr)); + } + } + } + } + + // the following require a constant offset + if(!offset_int.has_value() || *offset_int < 0) + return unchanged(expr); + // Are we updating (parts of) a struct? Do individual member updates // instead, unless there are non-byte-sized bit fields if(root.type().id() == ID_struct || root.type().id() == ID_struct_tag)