Skip to content

Commit d6de37e

Browse files
committed
Value-set based dereferencing must consider possible index expressions
Value-set based dereferencing must not take an access path through an object that precludes a subsequent index expression from accessing a different part of the object. Such a situation can arise when the value set has a known (constant) offset for the pointer that would identify one particular element in an array (within that object). The code using value-set based dereferencing, however, may be trying to resolve a subexpression of an index expression, where said index expression would lead to a different element that may itself be part of a different array within the same overall object. Fixes: diffblue#8570
1 parent c66ffbe commit d6de37e

File tree

13 files changed

+81
-34
lines changed

13 files changed

+81
-34
lines changed

regression/cbmc/Pointer_array7/main.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ int main()
1818
uint16_t x[2];
1919
x[0] = 1;
2020
x[1] = 2;
21-
uint8_t *y = (uint8_t *)x;
22-
uint16_t z = *((uint16_t *)(y + 1));
21+
uint8_t *y = (uint8_t *)x + 1;
22+
uint16_t z = *((uint16_t *)(y));
2323

2424
#ifdef USE_BIG_ENDIAN
2525
assert(z == 256u);

regression/cbmc/Pointer_array8/main.c

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#pragma pack(push)
2+
#pragma pack(1)
3+
typedef struct
4+
{
5+
int data[2];
6+
} arr;
7+
8+
typedef struct
9+
{
10+
arr vec[2];
11+
} arrvec;
12+
#pragma pack(pop)
13+
14+
int main()
15+
{
16+
arrvec A;
17+
arrvec *x = &A;
18+
__CPROVER_assume(x->vec[1].data[0] < 42);
19+
unsigned k;
20+
__CPROVER_assert(k != 2 || ((int *)x)[k] < 42, "");
21+
}
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE no-new-smt
2+
main.c
3+
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
^VERIFICATION SUCCESSFUL$
7+
--
8+
^warning: ignoring

src/goto-programs/rewrite_union.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ static bool restore_union_rec(exprt &expr, const namespacet &ns)
159159
member_exprt{be.op(), comp.get_name(), comp.type()},
160160
be.offset(),
161161
be.type(),
162+
true,
162163
ns);
163164
if(result.has_value())
164165
{

src/goto-symex/field_sensitivity.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ exprt field_sensitivityt::apply(
110110
tmp.remove_level_2();
111111
const member_exprt member{tmp.get_original_expr(), comp};
112112
auto recursive_member =
113-
get_subexpression_at_offset(member, be.offset(), be.type(), ns);
113+
get_subexpression_at_offset(member, be.offset(), be.type(), true, ns);
114114
if(
115115
recursive_member.has_value() &&
116116
(recursive_member->id() == ID_member ||

src/memory-analyzer/analyze_symbol.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ exprt gdb_value_extractort::get_pointer_to_member_value(
327327
struct_symbol_expr,
328328
member_offset,
329329
to_pointer_type(expr.type()).base_type(),
330+
true,
330331
ns);
331332
DATA_INVARIANT(
332333
maybe_member_expr.has_value(), "structure doesn't have member");
@@ -554,7 +555,11 @@ exprt gdb_value_extractort::get_pointer_value(
554555
if(target_expr.type().id() == ID_array)
555556
{
556557
const auto result_indexed_expr = get_subexpression_at_offset(
557-
target_expr, 0, to_pointer_type(zero_expr.type()).base_type(), ns);
558+
target_expr,
559+
0,
560+
to_pointer_type(zero_expr.type()).base_type(),
561+
true,
562+
ns);
558563
CHECK_RETURN(result_indexed_expr.has_value());
559564
if(result_indexed_expr->type() == zero_expr.type())
560565
return *result_indexed_expr;

src/pointer-analysis/value_set_dereference.cpp

+5-4
Original file line numberDiff line numberDiff line change
@@ -643,11 +643,12 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
643643
}
644644
}
645645

646-
// try to build a member/index expression - do not use byte_extract
646+
// Try to build a member/index expression instead of using byte_extract;
647+
// don't recurse into aggregate types as the caller's expression may be
648+
// within an index expression, i.e., a further offset beyond o.offset() may
649+
// get added on to it.
647650
auto subexpr = get_subexpression_at_offset(
648-
root_object, o.offset(), dereference_type, ns);
649-
if(subexpr.has_value())
650-
simplify(subexpr.value(), ns);
651+
root_object, o.offset(), dereference_type, false, ns);
651652
if(
652653
subexpr.has_value() &&
653654
subexpr.value().id() != ID_byte_extract_little_endian &&

src/solvers/flattening/pointer_logic.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ exprt pointer_logict::pointer_expr(
115115
}
116116
}
117117
auto deep_object_opt =
118-
get_subexpression_at_offset(object_expr, pointer.offset, subtype, ns);
118+
get_subexpression_at_offset(object_expr, pointer.offset, subtype, true, ns);
119119
CHECK_RETURN(deep_object_opt.has_value());
120120
exprt deep_object = deep_object_opt.value();
121121
simplify(deep_object, ns);

src/util/pointer_offset_size.cpp

+21-12
Original file line numberDiff line numberDiff line change
@@ -561,17 +561,11 @@ std::optional<exprt> get_subexpression_at_offset(
561561
const exprt &expr,
562562
const mp_integer &offset_bytes,
563563
const typet &target_type_raw,
564+
bool recurse_into_arrays,
564565
const namespacet &ns)
565566
{
566567
if(offset_bytes == 0 && expr.type() == target_type_raw)
567-
{
568-
exprt result = expr;
569-
570-
if(expr.type() != target_type_raw)
571-
result.type() = target_type_raw;
572-
573-
return result;
574-
}
568+
return expr;
575569

576570
if(
577571
offset_bytes == 0 && expr.type().id() == ID_pointer &&
@@ -611,6 +605,7 @@ std::optional<exprt> get_subexpression_at_offset(
611605
member,
612606
offset_bytes - m_offset_bits / config.ansi_c.char_width,
613607
target_type_raw,
608+
recurse_into_arrays,
614609
ns);
615610
}
616611

@@ -638,9 +633,18 @@ std::optional<exprt> get_subexpression_at_offset(
638633
const auto offset_inside_elem = offset_bytes % elem_size_bytes;
639634
const auto target_size_bytes =
640635
*target_size_bits / config.ansi_c.char_width;
641-
// only recurse if the cell completely contains the target
642636
if(
643-
index < array_size &&
637+
index < array_size && array_type.element_type() == target_type_raw &&
638+
offset_inside_elem == 0)
639+
{
640+
return index_exprt{
641+
expr,
642+
from_integer(
643+
offset_bytes / elem_size_bytes, array_type.index_type())};
644+
}
645+
// only recurse if the cell completely contains the target
646+
else if(
647+
recurse_into_arrays && index < array_size &&
644648
offset_inside_elem + target_size_bytes <= elem_size_bytes)
645649
{
646650
return get_subexpression_at_offset(
@@ -650,6 +654,7 @@ std::optional<exprt> get_subexpression_at_offset(
650654
offset_bytes / elem_size_bytes, array_type.index_type())),
651655
offset_inside_elem,
652656
target_type_raw,
657+
recurse_into_arrays,
653658
ns);
654659
}
655660
}
@@ -676,7 +681,7 @@ std::optional<exprt> get_subexpression_at_offset(
676681
{
677682
const member_exprt member(expr, component.get_name(), component.type());
678683
return get_subexpression_at_offset(
679-
member, offset_bytes, target_type_raw, ns);
684+
member, offset_bytes, target_type_raw, recurse_into_arrays, ns);
680685
}
681686
}
682687
}
@@ -689,12 +694,16 @@ std::optional<exprt> get_subexpression_at_offset(
689694
const exprt &expr,
690695
const exprt &offset,
691696
const typet &target_type,
697+
bool recurse_into_arrays,
692698
const namespacet &ns)
693699
{
694700
const auto offset_bytes = numeric_cast<mp_integer>(offset);
695701

696702
if(!offset_bytes.has_value())
697703
return {};
698704
else
699-
return get_subexpression_at_offset(expr, *offset_bytes, target_type, ns);
705+
{
706+
return get_subexpression_at_offset(
707+
expr, *offset_bytes, target_type, recurse_into_arrays, ns);
708+
}
700709
}

src/util/pointer_offset_size.h

+2
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,14 @@ std::optional<exprt> get_subexpression_at_offset(
5656
const exprt &expr,
5757
const mp_integer &offset,
5858
const typet &target_type,
59+
bool recurse_into_arrays,
5960
const namespacet &ns);
6061

6162
std::optional<exprt> get_subexpression_at_offset(
6263
const exprt &expr,
6364
const exprt &offset,
6465
const typet &target_type,
66+
bool recurse_into_arrays,
6567
const namespacet &ns);
6668

6769
#endif // CPROVER_UTIL_POINTER_OFFSET_SIZE_H

src/util/simplify_expr.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -2023,7 +2023,7 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
20232023

20242024
// try to refine it down to extracting from a member or an index in an array
20252025
auto subexpr =
2026-
get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns);
2026+
get_subexpression_at_offset(expr.op(), *offset, expr.type(), true, ns);
20272027
if(subexpr.has_value() && subexpr.value() != expr)
20282028
return changed(simplify_rec(subexpr.value())); // recursive call
20292029

src/util/simplify_expr_struct.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ simplify_exprt::simplify_member(const member_exprt &expr)
242242
if(requested_offset.has_value())
243243
{
244244
auto equivalent_member = get_subexpression_at_offset(
245-
typecast_expr.op(), *requested_offset, expr.type(), ns);
245+
typecast_expr.op(), *requested_offset, expr.type(), true, ns);
246246

247247
// Guess: turning this into a byte-extract operation is not really an
248248
// optimisation.

unit/util/pointer_offset_size.cpp

+11-11
Original file line numberDiff line numberDiff line change
@@ -34,30 +34,30 @@ TEST_CASE("Build subexpression to access element at offset into array")
3434
symbol_exprt a("array", array_type);
3535

3636
{
37-
const auto result = get_subexpression_at_offset(a, 0, t, ns);
37+
const auto result = get_subexpression_at_offset(a, 0, t, true, ns);
3838
REQUIRE(result.value() == index_exprt(a, from_integer(0, c_index_type())));
3939
}
4040

4141
{
42-
const auto result = get_subexpression_at_offset(a, 32 / 8, t, ns);
42+
const auto result = get_subexpression_at_offset(a, 32 / 8, t, true, ns);
4343
REQUIRE(result.value() == index_exprt(a, from_integer(1, c_index_type())));
4444
}
4545

4646
{
4747
const auto result =
48-
get_subexpression_at_offset(a, from_integer(0, size_type()), t, ns);
48+
get_subexpression_at_offset(a, from_integer(0, size_type()), t, true, ns);
4949
REQUIRE(result.value() == index_exprt(a, from_integer(0, c_index_type())));
5050
}
5151

5252
{
5353
const auto result =
54-
get_subexpression_at_offset(a, size_of_expr(t, ns).value(), t, ns);
54+
get_subexpression_at_offset(a, size_of_expr(t, ns).value(), t, true, ns);
5555
REQUIRE(result.value() == index_exprt(a, from_integer(1, c_index_type())));
5656
}
5757

5858
{
5959
const signedbv_typet small_t(8);
60-
const auto result = get_subexpression_at_offset(a, 1, small_t, ns);
60+
const auto result = get_subexpression_at_offset(a, 1, small_t, true, ns);
6161
REQUIRE(
6262
result.value() == make_byte_extract(
6363
index_exprt(a, from_integer(0, c_index_type())),
@@ -67,7 +67,7 @@ TEST_CASE("Build subexpression to access element at offset into array")
6767

6868
{
6969
const signedbv_typet int16_t(16);
70-
const auto result = get_subexpression_at_offset(a, 3, int16_t, ns);
70+
const auto result = get_subexpression_at_offset(a, 3, int16_t, true, ns);
7171
// At offset 3 there are only 8 bits remaining in an element of type t so
7272
// not enough to fill a 16 bit int, so this cannot be transformed in an
7373
// index_exprt.
@@ -94,30 +94,30 @@ TEST_CASE("Build subexpression to access element at offset into struct")
9494
symbol_exprt s("struct", st);
9595

9696
{
97-
const auto result = get_subexpression_at_offset(s, 0, t, ns);
97+
const auto result = get_subexpression_at_offset(s, 0, t, true, ns);
9898
REQUIRE(result.value() == member_exprt(s, "foo", t));
9999
}
100100

101101
{
102-
const auto result = get_subexpression_at_offset(s, 32 / 8, t, ns);
102+
const auto result = get_subexpression_at_offset(s, 32 / 8, t, true, ns);
103103
REQUIRE(result.value() == member_exprt(s, "bar", t));
104104
}
105105

106106
{
107107
const auto result =
108-
get_subexpression_at_offset(s, from_integer(0, size_type()), t, ns);
108+
get_subexpression_at_offset(s, from_integer(0, size_type()), t, true, ns);
109109
REQUIRE(result.value() == member_exprt(s, "foo", t));
110110
}
111111

112112
{
113113
const auto result =
114-
get_subexpression_at_offset(s, size_of_expr(t, ns).value(), t, ns);
114+
get_subexpression_at_offset(s, size_of_expr(t, ns).value(), t, true, ns);
115115
REQUIRE(result.value() == member_exprt(s, "bar", t));
116116
}
117117

118118
{
119119
const signedbv_typet small_t(8);
120-
const auto result = get_subexpression_at_offset(s, 1, small_t, ns);
120+
const auto result = get_subexpression_at_offset(s, 1, small_t, true, ns);
121121
REQUIRE(
122122
result.value() ==
123123
make_byte_extract(

0 commit comments

Comments
 (0)