Skip to content

Commit 3ffb445

Browse files
committed
Simplify multiple-of-element size access to arrays
Array operations may fall back to byte_extract or byte_update expressions in parts of the code base. Simplify this to index or WITH expressions, respectively, when the offset is known to be a multiple of the array-element size, or a constant offset plus a multiple of the array-element size. Fixes: #8617
1 parent 1bf65fa commit 3ffb445

File tree

9 files changed

+352
-22
lines changed

9 files changed

+352
-22
lines changed
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
int main()
2+
{
3+
char source[8];
4+
int int_source[2];
5+
int target[4];
6+
int n;
7+
if(n >= 0 && n < 3)
8+
{
9+
__CPROVER_array_replace(&target[n], source);
10+
__CPROVER_array_replace(&target[n], int_source);
11+
__CPROVER_assert(target[n] == int_source[0], "");
12+
__CPROVER_assert(target[n + 1] == int_source[1], "");
13+
}
14+
}
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
CORE paths-lifo-expected-failure
2+
main.c
3+
--program-only
4+
target!0@1#2 ==
5+
target!0@1#3 ==
6+
^EXIT=0$
7+
^SIGNAL=0$
8+
--
9+
byte_update_
10+
--
11+
This test demonstrates that we can simplify byte_update expressions to, e.g.,
12+
WITH expressions.
13+
Disabled for paths-lifo mode, which does not support --program-only.
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
CORE
2+
main.c
3+
4+
^VERIFICATION SUCCESSFUL$
5+
^EXIT=0$
6+
^SIGNAL=0$
7+
--
8+
^warning: ignoring
9+
--

regression/cbmc/havoc_slice/functional_assign.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
KNOWNBUG
1+
CORE
22
functional.c
33
-DN=4 -DASSIGN --program-only
44
^EXIT=0$

regression/cbmc/havoc_slice/functional_slice_bytes.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
KNOWNBUG
1+
CORE
22
functional.c
33
-DN=4 -DSLICE_BYTES --program-only
44
^EXIT=0$

regression/cbmc/havoc_slice/functional_slice_typed.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
KNOWNBUG
1+
CORE
22
functional.c
33
-DN=4 -DSLICE_TYPED --program-only
44
^EXIT=0$

regression/validate-trace-xml-schema/check.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
['show_properties1', 'test.desc'],
3636
# program-only instead of trace
3737
['vla1', 'program-only.desc'],
38+
['Array_operations4', 'program-only.desc'],
3839
['Pointer_Arithmetic19', 'test.desc'],
3940
['Quantifiers-simplify', 'simplify_not_forall.desc'],
4041
['array-cell-sensitivity15', 'test.desc'],

src/util/pointer_offset_size.cpp

Lines changed: 170 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,16 @@ std::optional<exprt> get_subexpression_at_offset(
678678
expr, from_integer(offset_bytes, c_index_type()), target_type_raw);
679679
}
680680

681+
static bool is_multiplication_by_constant(const exprt &expr)
682+
{
683+
if(expr.id() != ID_mult)
684+
return false;
685+
if(expr.operands().size() != 2)
686+
return false;
687+
return to_multi_ary_expr(expr).op0().is_constant() ||
688+
to_multi_ary_expr(expr).op1().is_constant();
689+
}
690+
681691
std::optional<exprt> get_subexpression_at_offset(
682692
const exprt &expr,
683693
const exprt &offset,
@@ -687,7 +697,166 @@ std::optional<exprt> get_subexpression_at_offset(
687697
const auto offset_bytes = numeric_cast<mp_integer>(offset);
688698

689699
if(!offset_bytes.has_value())
690-
return {};
700+
{
701+
// offset is not a constant; try to see whether it is a multiple of a
702+
// constant, or a sum that involves a multiple of a constant
703+
if(auto array_type = type_try_dynamic_cast<array_typet>(expr.type()))
704+
{
705+
const auto target_size_bits = pointer_offset_bits(target_type, ns);
706+
const auto elem_size_bits =
707+
pointer_offset_bits(array_type->element_type(), ns);
708+
709+
// no arrays of zero-, or unknown-sized elements, or ones where elements
710+
// have a bit-width that isn't a multiple of bytes
711+
if(
712+
!target_size_bits.has_value() || !elem_size_bits.has_value() ||
713+
*elem_size_bits <= 0 ||
714+
*elem_size_bits % config.ansi_c.char_width != 0 ||
715+
*target_size_bits > *elem_size_bits)
716+
{
717+
return {};
718+
}
719+
720+
// If we have an offset C + x (where C is a constant) we can try to
721+
// recurse by first looking at the member at offset C.
722+
if(
723+
offset.id() == ID_plus && offset.operands().size() == 2 &&
724+
(to_multi_ary_expr(offset).op0().is_constant() ||
725+
to_multi_ary_expr(offset).op1().is_constant()))
726+
{
727+
const plus_exprt &offset_plus = to_plus_expr(offset);
728+
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
729+
offset_plus.op0().is_constant() ? offset_plus.op0()
730+
: offset_plus.op1()));
731+
const exprt &other_factor = offset_plus.op0().is_constant()
732+
? offset_plus.op1()
733+
: offset_plus.op0();
734+
735+
auto expr_at_offset_C =
736+
get_subexpression_at_offset(expr, const_factor, target_type, ns);
737+
738+
if(
739+
expr_at_offset_C.has_value() && expr_at_offset_C->id() == ID_index &&
740+
to_index_expr(*expr_at_offset_C).index().is_zero())
741+
{
742+
return get_subexpression_at_offset(
743+
to_index_expr(*expr_at_offset_C).array(),
744+
other_factor,
745+
target_type,
746+
ns);
747+
}
748+
}
749+
// If we have an offset that is a sum of multiplications of the form K * i
750+
// or i * K (where K is a constant) then try to recurse while removing one
751+
// element from this sum.
752+
else if(
753+
offset.id() == ID_plus && offset.operands().size() == 2 &&
754+
is_multiplication_by_constant(to_multi_ary_expr(offset).op0()))
755+
{
756+
const plus_exprt &offset_plus = to_plus_expr(offset);
757+
const mult_exprt &mul = to_mult_expr(offset_plus.op0());
758+
const exprt &remaining_offset = offset_plus.op1();
759+
const auto &const_factor = numeric_cast_v<mp_integer>(
760+
to_constant_expr(mul.op0().is_constant() ? mul.op0() : mul.op1()));
761+
const exprt &other_factor =
762+
mul.op0().is_constant() ? mul.op1() : mul.op0();
763+
764+
if(const_factor % (*elem_size_bits / config.ansi_c.char_width) != 0)
765+
return {};
766+
767+
exprt index = mult_exprt{
768+
other_factor,
769+
from_integer(
770+
const_factor / (*elem_size_bits / config.ansi_c.char_width),
771+
other_factor.type())};
772+
773+
return get_subexpression_at_offset(
774+
index_exprt{
775+
expr,
776+
typecast_exprt::conditional_cast(index, array_type->index_type())},
777+
remaining_offset,
778+
target_type,
779+
ns);
780+
}
781+
782+
// give up if the offset expression isn't of the form K * i or i * K
783+
// (where K is a constant)
784+
if(!is_multiplication_by_constant(offset))
785+
{
786+
return {};
787+
}
788+
789+
const mult_exprt &offset_mult = to_mult_expr(offset);
790+
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
791+
offset_mult.op0().is_constant() ? offset_mult.op0()
792+
: offset_mult.op1()));
793+
const exprt &other_factor =
794+
offset_mult.op0().is_constant() ? offset_mult.op1() : offset_mult.op0();
795+
796+
if(const_factor % (*elem_size_bits / config.ansi_c.char_width) != 0)
797+
return {};
798+
799+
exprt index = mult_exprt{
800+
other_factor,
801+
from_integer(
802+
const_factor / (*elem_size_bits / config.ansi_c.char_width),
803+
other_factor.type())};
804+
805+
return get_subexpression_at_offset(
806+
index_exprt{
807+
expr,
808+
typecast_exprt::conditional_cast(index, array_type->index_type())},
809+
0,
810+
target_type,
811+
ns);
812+
}
813+
else if(
814+
auto struct_tag_type =
815+
type_try_dynamic_cast<struct_tag_typet>(expr.type()))
816+
{
817+
// If the struct only has a single member then we recurse into that
818+
// member.
819+
const auto &components = ns.follow_tag(*struct_tag_type).components();
820+
if(components.size() == 1)
821+
{
822+
return get_subexpression_at_offset(
823+
member_exprt{expr, components.front()}, offset, target_type, ns);
824+
}
825+
// If we have an offset C + x (where C is a constant) we can try to
826+
// recurse by first looking at the member at offset C.
827+
else if(
828+
offset.id() == ID_plus && offset.operands().size() == 2 &&
829+
(to_multi_ary_expr(offset).op0().is_constant() ||
830+
to_multi_ary_expr(offset).op1().is_constant()))
831+
{
832+
const plus_exprt &offset_plus = to_plus_expr(offset);
833+
const auto &const_factor = numeric_cast_v<mp_integer>(to_constant_expr(
834+
offset_plus.op0().is_constant() ? offset_plus.op0()
835+
: offset_plus.op1()));
836+
const exprt &other_factor = offset_plus.op0().is_constant()
837+
? offset_plus.op1()
838+
: offset_plus.op0();
839+
840+
auto expr_at_offset_C =
841+
get_subexpression_at_offset(expr, const_factor, target_type, ns);
842+
843+
if(
844+
expr_at_offset_C.has_value() && expr_at_offset_C->id() == ID_index &&
845+
to_index_expr(*expr_at_offset_C).index().is_zero())
846+
{
847+
return get_subexpression_at_offset(
848+
to_index_expr(*expr_at_offset_C).array(),
849+
other_factor,
850+
target_type,
851+
ns);
852+
}
853+
}
854+
855+
return {};
856+
}
857+
else
858+
return {};
859+
}
691860
else
692861
return get_subexpression_at_offset(expr, *offset_bytes, target_type, ns);
693862
}

0 commit comments

Comments
 (0)