Skip to content

Commit 0ab3688

Browse files
committed
Generalise get_subexpression_at_offset to extract parts of members
This enables re-use in the simplifier and other places. Adding a unit test of get_subexpression_at_offset.
1 parent 8ca4419 commit 0ab3688

File tree

5 files changed

+194
-41
lines changed

5 files changed

+194
-41
lines changed

src/pointer-analysis/value_set_dereference.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,8 +468,9 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
468468
}
469469
else
470470
{
471+
// try to build a member/index expression - do not use byte_extract
471472
exprt subexpr = get_subexpression_at_offset(
472-
root_object_subexpression, o.offset(), dereference_type, ns);
473+
root_object_subexpression, o.offset(), dereference_type, "", ns);
473474
if(subexpr.is_not_nil())
474475
{
475476
// Successfully found a member, array index, or combination thereof

src/util/pointer_offset_size.cpp

Lines changed: 66 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Author: Daniel Kroening, [email protected]
1212
#include "pointer_offset_size.h"
1313

1414
#include "arith_tools.h"
15+
#include "base_type.h"
16+
#include "byte_operators.h"
1517
#include "c_types.h"
1618
#include "invariant.h"
1719
#include "namespace.h"
@@ -632,64 +634,87 @@ exprt build_sizeof_expr(
632634

633635
exprt get_subexpression_at_offset(
634636
const exprt &expr,
635-
mp_integer offset,
637+
const mp_integer &offset,
636638
const typet &target_type_raw,
639+
const irep_idt &byte_extract_op,
637640
const namespacet &ns)
638641
{
639-
exprt result = expr;
640-
const typet &source_type=ns.follow(result.type());
641-
const typet &target_type=ns.follow(target_type_raw);
642+
if(offset == 0 && base_type_eq(expr.type(), target_type_raw, ns))
643+
{
644+
exprt result = expr;
645+
646+
if(expr.type() != target_type_raw)
647+
result.type() = target_type_raw;
642648

643-
if(offset==0 && source_type==target_type)
644649
return result;
650+
}
651+
652+
const typet &source_type = ns.follow(expr.type());
653+
const auto target_size = pointer_offset_bits(target_type_raw, ns);
654+
if(!target_size.has_value())
655+
return nil_exprt();
645656

646657
if(source_type.id()==ID_struct)
647658
{
648-
const auto &st=to_struct_type(source_type);
649-
const struct_typet::componentst &components=st.components();
650-
member_offset_iterator offsets(st, ns);
651-
while(offsets->first<components.size() &&
652-
offsets->second!=-1 &&
653-
offsets->second<=offset)
659+
const struct_typet &struct_type = to_struct_type(source_type);
660+
661+
mp_integer m_offset_bits = 0;
662+
for(const auto &component : struct_type.components())
654663
{
655-
auto nextit=offsets;
656-
++nextit;
657-
if((offsets->first+1)==components.size() || offset<nextit->second)
664+
const auto m_size = pointer_offset_bits(component.type(), ns);
665+
if(!m_size.has_value())
666+
return nil_exprt();
667+
668+
if(
669+
offset * 8 >= m_offset_bits && m_offset_bits % 8 == 0 &&
670+
offset * 8 + *target_size <= m_offset_bits + *m_size)
658671
{
659-
// This field might be, or might contain, the answer.
660-
result=
661-
member_exprt(
662-
result,
663-
components[offsets->first].get_name(),
664-
components[offsets->first].type());
665-
return
666-
get_subexpression_at_offset(
667-
result, offset-offsets->second, target_type, ns);
672+
const member_exprt member(expr, component.get_name(), component.type());
673+
return get_subexpression_at_offset(
674+
member,
675+
offset - m_offset_bits / 8,
676+
target_type_raw,
677+
byte_extract_op,
678+
ns);
668679
}
669-
++offsets;
680+
681+
m_offset_bits += *m_size;
670682
}
671-
return nil_exprt();
672683
}
673684
else if(source_type.id()==ID_array)
674685
{
675-
// A cell of the array might be, or contain, the subexpression
676-
// we're looking for.
677-
const auto &at=to_array_type(source_type);
678-
679-
auto elem_size = pointer_offset_size(at.subtype(), ns);
680-
681-
if(!elem_size.has_value())
682-
return nil_exprt();
686+
const array_typet &array_type = to_array_type(source_type);
683687

684-
mp_integer cellidx = offset / (*elem_size);
688+
const auto elem_size = pointer_offset_bits(array_type.subtype(), ns);
685689

686-
if(cellidx < 0 || !cellidx.is_long())
690+
// no arrays of non-byte, zero-, or unknown-sized objects
691+
if(!elem_size.has_value() || *elem_size == 0 || *elem_size % 8 != 0)
687692
return nil_exprt();
688693

689-
offset = offset % (*elem_size);
694+
if(*target_size <= *elem_size)
695+
return get_subexpression_at_offset(
696+
index_exprt(
697+
expr, from_integer(offset / (*elem_size / 8), index_type())),
698+
offset % (*elem_size / 8),
699+
target_type_raw,
700+
byte_extract_op,
701+
ns);
702+
}
690703

691-
result=index_exprt(result, from_integer(cellidx, unsignedbv_typet(64)));
692-
return get_subexpression_at_offset(result, offset, target_type, ns);
704+
const auto source_size = pointer_offset_bits(source_type, ns);
705+
if(
706+
offset == 0 && source_size.has_value() && *source_size == *target_size &&
707+
source_type.id() == ID_pointer && target_type_raw.id() == ID_pointer)
708+
{
709+
return typecast_exprt(expr, target_type_raw);
710+
}
711+
else if(!byte_extract_op.empty())
712+
{
713+
return byte_extract_exprt(
714+
byte_extract_op,
715+
expr,
716+
from_integer(offset, index_type()),
717+
target_type_raw);
693718
}
694719
else
695720
return nil_exprt();
@@ -699,12 +724,14 @@ exprt get_subexpression_at_offset(
699724
const exprt &expr,
700725
const exprt &offset,
701726
const typet &target_type,
727+
const irep_idt &byte_extract_op,
702728
const namespacet &ns)
703729
{
704730
mp_integer offset_const;
705731

706732
if(to_integer(offset, offset_const))
707733
return nil_exprt();
708734
else
709-
return get_subexpression_at_offset(expr, offset_const, target_type, ns);
735+
return get_subexpression_at_offset(
736+
expr, offset_const, target_type, byte_extract_op, ns);
710737
}

src/util/pointer_offset_size.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,16 @@ exprt build_sizeof_expr(
8181

8282
exprt get_subexpression_at_offset(
8383
const exprt &expr,
84-
mp_integer offset,
84+
const mp_integer &offset,
8585
const typet &target_type,
86+
const irep_idt &byte_extract_op,
8687
const namespacet &ns);
8788

8889
exprt get_subexpression_at_offset(
8990
const exprt &expr,
9091
const exprt &offset,
9192
const typet &target_type,
93+
const irep_idt &byte_extract_op,
9294
const namespacet &ns);
9395

9496
#endif // CPROVER_UTIL_POINTER_OFFSET_SIZE_H

unit/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ SRC += analyses/ai/ai.cpp \
4040
util/irep_sharing.cpp \
4141
util/message.cpp \
4242
util/optional.cpp \
43+
util/pointer_offset_size.cpp \
4344
util/replace_symbol.cpp \
4445
util/sharing_map.cpp \
4546
util/sharing_node.cpp \

unit/util/pointer_offset_size.cpp

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*******************************************************************\
2+
3+
Module: Unit tests of expression size/offset computation
4+
5+
Author: Michael Tautschnig
6+
7+
\*******************************************************************/
8+
9+
#include <testing-utils/catch.hpp>
10+
11+
#include <util/arith_tools.h>
12+
#include <util/byte_operators.h>
13+
#include <util/c_types.h>
14+
#include <util/cmdline.h>
15+
#include <util/config.h>
16+
#include <util/namespace.h>
17+
#include <util/pointer_offset_size.h>
18+
#include <util/std_expr.h>
19+
#include <util/symbol_table.h>
20+
21+
TEST_CASE("Build subexpression to access element at offset into array")
22+
{
23+
// this test does require a proper architecture to be set so that byte extract
24+
// uses adequate endianness
25+
cmdlinet cmdline;
26+
config.set(cmdline);
27+
28+
symbol_tablet symbol_table;
29+
namespacet ns(symbol_table);
30+
31+
const signedbv_typet t(32);
32+
33+
array_typet array_type(t, from_integer(2, size_type()));
34+
symbol_exprt a("array", array_type);
35+
36+
{
37+
const exprt result = get_subexpression_at_offset(a, 0, t, "", ns);
38+
REQUIRE(result == index_exprt(a, from_integer(0, index_type())));
39+
}
40+
41+
{
42+
const exprt result = get_subexpression_at_offset(a, 32 / 8, t, "", ns);
43+
REQUIRE(result == index_exprt(a, from_integer(1, index_type())));
44+
}
45+
46+
{
47+
const exprt result =
48+
get_subexpression_at_offset(a, from_integer(0, size_type()), t, "", ns);
49+
REQUIRE(result == index_exprt(a, from_integer(0, index_type())));
50+
}
51+
52+
{
53+
const exprt result =
54+
get_subexpression_at_offset(a, size_of_expr(t, ns), t, "", ns);
55+
REQUIRE(result == index_exprt(a, from_integer(1, index_type())));
56+
}
57+
58+
{
59+
const signedbv_typet small_t(8);
60+
const exprt result =
61+
get_subexpression_at_offset(a, 1, small_t, byte_extract_id(), ns);
62+
REQUIRE(
63+
result == byte_extract_exprt(
64+
byte_extract_id(),
65+
index_exprt(a, from_integer(0, index_type())),
66+
from_integer(1, index_type()),
67+
small_t));
68+
}
69+
}
70+
71+
TEST_CASE("Build subexpression to access element at offset into struct")
72+
{
73+
// this test does require a proper architecture to be set so that byte extract
74+
// uses adequate endianness
75+
cmdlinet cmdline;
76+
config.set(cmdline);
77+
78+
symbol_tablet symbol_table;
79+
namespacet ns(symbol_table);
80+
81+
const signedbv_typet t(32);
82+
83+
struct_typet st;
84+
st.components().emplace_back("foo", t);
85+
st.components().emplace_back("bar", t);
86+
87+
symbol_exprt s("struct", st);
88+
89+
{
90+
const exprt result = get_subexpression_at_offset(s, 0, t, "", ns);
91+
REQUIRE(result == member_exprt(s, "foo", t));
92+
}
93+
94+
{
95+
const exprt result = get_subexpression_at_offset(s, 32 / 8, t, "", ns);
96+
REQUIRE(result == member_exprt(s, "bar", t));
97+
}
98+
99+
{
100+
const exprt result =
101+
get_subexpression_at_offset(s, from_integer(0, size_type()), t, "", ns);
102+
REQUIRE(result == member_exprt(s, "foo", t));
103+
}
104+
105+
{
106+
const exprt result =
107+
get_subexpression_at_offset(s, size_of_expr(t, ns), t, "", ns);
108+
REQUIRE(result == member_exprt(s, "bar", t));
109+
}
110+
111+
{
112+
const signedbv_typet small_t(8);
113+
const exprt result =
114+
get_subexpression_at_offset(s, 1, small_t, byte_extract_id(), ns);
115+
REQUIRE(
116+
result == byte_extract_exprt(
117+
byte_extract_id(),
118+
member_exprt(s, "foo", t),
119+
from_integer(1, index_type()),
120+
small_t));
121+
}
122+
}

0 commit comments

Comments
 (0)