Skip to content

Commit 8d7fbad

Browse files
committed
Generalise get_subexpression_at_offset to extract parts of members
This enables re-use in the simplifier and other places. Adding a unit test of get_subexpression_at_offset.
1 parent 16492df commit 8d7fbad

File tree

5 files changed

+194
-41
lines changed

5 files changed

+194
-41
lines changed

src/pointer-analysis/value_set_dereference.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,8 +468,9 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
468468
}
469469
else
470470
{
471+
// try to build a member/index expression - do not use byte_extract
471472
exprt subexpr = get_subexpression_at_offset(
472-
root_object_subexpression, o.offset(), dereference_type, ns);
473+
root_object_subexpression, o.offset(), dereference_type, "", ns);
473474
if(subexpr.is_not_nil())
474475
{
475476
// Successfully found a member, array index, or combination thereof

src/util/pointer_offset_size.cpp

Lines changed: 66 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Author: Daniel Kroening, [email protected]
1212
#include "pointer_offset_size.h"
1313

1414
#include "arith_tools.h"
15+
#include "base_type.h"
16+
#include "byte_operators.h"
1517
#include "c_types.h"
1618
#include "invariant.h"
1719
#include "namespace.h"
@@ -676,64 +678,87 @@ exprt build_sizeof_expr(
676678

677679
exprt get_subexpression_at_offset(
678680
const exprt &expr,
679-
mp_integer offset,
681+
const mp_integer &offset,
680682
const typet &target_type_raw,
683+
const irep_idt &byte_extract_op,
681684
const namespacet &ns)
682685
{
683-
exprt result = expr;
684-
const typet &source_type=ns.follow(result.type());
685-
const typet &target_type=ns.follow(target_type_raw);
686+
if(offset == 0 && base_type_eq(expr.type(), target_type_raw, ns))
687+
{
688+
exprt result = expr;
689+
690+
if(expr.type() != target_type_raw)
691+
result.type() = target_type_raw;
686692

687-
if(offset==0 && source_type==target_type)
688693
return result;
694+
}
695+
696+
const typet &source_type = ns.follow(expr.type());
697+
const auto target_size = pointer_offset_bits(target_type_raw, ns);
698+
if(!target_size.has_value())
699+
return nil_exprt();
689700

690701
if(source_type.id()==ID_struct)
691702
{
692-
const auto &st=to_struct_type(source_type);
693-
const struct_typet::componentst &components=st.components();
694-
member_offset_iterator offsets(st, ns);
695-
while(offsets->first<components.size() &&
696-
offsets->second!=-1 &&
697-
offsets->second<=offset)
703+
const struct_typet &struct_type = to_struct_type(source_type);
704+
705+
mp_integer m_offset_bits = 0;
706+
for(const auto &component : struct_type.components())
698707
{
699-
auto nextit=offsets;
700-
++nextit;
701-
if((offsets->first+1)==components.size() || offset<nextit->second)
708+
const auto m_size = pointer_offset_bits(component.type(), ns);
709+
if(!m_size.has_value())
710+
return nil_exprt();
711+
712+
if(
713+
offset * 8 >= m_offset_bits && m_offset_bits % 8 == 0 &&
714+
offset * 8 + *target_size <= m_offset_bits + *m_size)
702715
{
703-
// This field might be, or might contain, the answer.
704-
result=
705-
member_exprt(
706-
result,
707-
components[offsets->first].get_name(),
708-
components[offsets->first].type());
709-
return
710-
get_subexpression_at_offset(
711-
result, offset-offsets->second, target_type, ns);
716+
const member_exprt member(expr, component.get_name(), component.type());
717+
return get_subexpression_at_offset(
718+
member,
719+
offset - m_offset_bits / 8,
720+
target_type_raw,
721+
byte_extract_op,
722+
ns);
712723
}
713-
++offsets;
724+
725+
m_offset_bits += *m_size;
714726
}
715-
return nil_exprt();
716727
}
717728
else if(source_type.id()==ID_array)
718729
{
719-
// A cell of the array might be, or contain, the subexpression
720-
// we're looking for.
721-
const auto &at=to_array_type(source_type);
722-
723-
auto elem_size = pointer_offset_size(at.subtype(), ns);
724-
725-
if(!elem_size.has_value())
726-
return nil_exprt();
730+
const array_typet &array_type = to_array_type(source_type);
727731

728-
mp_integer cellidx = offset / (*elem_size);
732+
const auto elem_size = pointer_offset_bits(array_type.subtype(), ns);
729733

730-
if(cellidx < 0 || !cellidx.is_long())
734+
// no arrays of non-byte, zero-, or unknown-sized objects
735+
if(!elem_size.has_value() || *elem_size == 0 || *elem_size % 8 != 0)
731736
return nil_exprt();
732737

733-
offset = offset % (*elem_size);
738+
if(*target_size <= *elem_size)
739+
return get_subexpression_at_offset(
740+
index_exprt(
741+
expr, from_integer(offset / (*elem_size / 8), index_type())),
742+
offset % (*elem_size / 8),
743+
target_type_raw,
744+
byte_extract_op,
745+
ns);
746+
}
734747

735-
result=index_exprt(result, from_integer(cellidx, unsignedbv_typet(64)));
736-
return get_subexpression_at_offset(result, offset, target_type, ns);
748+
const auto source_size = pointer_offset_bits(source_type, ns);
749+
if(
750+
offset == 0 && source_size.has_value() && *source_size == *target_size &&
751+
source_type.id() == ID_pointer && target_type_raw.id() == ID_pointer)
752+
{
753+
return typecast_exprt(expr, target_type_raw);
754+
}
755+
else if(!byte_extract_op.empty())
756+
{
757+
return byte_extract_exprt(
758+
byte_extract_op,
759+
expr,
760+
from_integer(offset, index_type()),
761+
target_type_raw);
737762
}
738763
else
739764
return nil_exprt();
@@ -743,12 +768,14 @@ exprt get_subexpression_at_offset(
743768
const exprt &expr,
744769
const exprt &offset,
745770
const typet &target_type,
771+
const irep_idt &byte_extract_op,
746772
const namespacet &ns)
747773
{
748774
mp_integer offset_const;
749775

750776
if(to_integer(offset, offset_const))
751777
return nil_exprt();
752778
else
753-
return get_subexpression_at_offset(expr, offset_const, target_type, ns);
779+
return get_subexpression_at_offset(
780+
expr, offset_const, target_type, byte_extract_op, ns);
754781
}

src/util/pointer_offset_size.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,16 @@ exprt build_sizeof_expr(
8181

8282
exprt get_subexpression_at_offset(
8383
const exprt &expr,
84-
mp_integer offset,
84+
const mp_integer &offset,
8585
const typet &target_type,
86+
const irep_idt &byte_extract_op,
8687
const namespacet &ns);
8788

8889
exprt get_subexpression_at_offset(
8990
const exprt &expr,
9091
const exprt &offset,
9192
const typet &target_type,
93+
const irep_idt &byte_extract_op,
9294
const namespacet &ns);
9395

9496
#endif // CPROVER_UTIL_POINTER_OFFSET_SIZE_H

unit/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ SRC += analyses/ai/ai.cpp \
4040
util/irep_sharing.cpp \
4141
util/message.cpp \
4242
util/optional.cpp \
43+
util/pointer_offset_size.cpp \
4344
util/replace_symbol.cpp \
4445
util/sharing_map.cpp \
4546
util/sharing_node.cpp \

unit/util/pointer_offset_size.cpp

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*******************************************************************\
2+
3+
Module: Unit tests of expression size/offset computation
4+
5+
Author: Michael Tautschnig
6+
7+
\*******************************************************************/
8+
9+
#include <testing-utils/catch.hpp>
10+
11+
#include <util/arith_tools.h>
12+
#include <util/byte_operators.h>
13+
#include <util/c_types.h>
14+
#include <util/cmdline.h>
15+
#include <util/config.h>
16+
#include <util/namespace.h>
17+
#include <util/pointer_offset_size.h>
18+
#include <util/std_expr.h>
19+
#include <util/symbol_table.h>
20+
21+
TEST_CASE("Build subexpression to access element at offset into array")
22+
{
23+
// this test does require a proper architecture to be set so that byte extract
24+
// uses adequate endianness
25+
cmdlinet cmdline;
26+
config.set(cmdline);
27+
28+
symbol_tablet symbol_table;
29+
namespacet ns(symbol_table);
30+
31+
const signedbv_typet t(32);
32+
33+
array_typet array_type(t, from_integer(2, size_type()));
34+
symbol_exprt a("array", array_type);
35+
36+
{
37+
const exprt result = get_subexpression_at_offset(a, 0, t, "", ns);
38+
REQUIRE(result == index_exprt(a, from_integer(0, index_type())));
39+
}
40+
41+
{
42+
const exprt result = get_subexpression_at_offset(a, 32 / 8, t, "", ns);
43+
REQUIRE(result == index_exprt(a, from_integer(1, index_type())));
44+
}
45+
46+
{
47+
const exprt result =
48+
get_subexpression_at_offset(a, from_integer(0, size_type()), t, "", ns);
49+
REQUIRE(result == index_exprt(a, from_integer(0, index_type())));
50+
}
51+
52+
{
53+
const exprt result =
54+
get_subexpression_at_offset(a, size_of_expr(t, ns), t, "", ns);
55+
REQUIRE(result == index_exprt(a, from_integer(1, index_type())));
56+
}
57+
58+
{
59+
const signedbv_typet small_t(8);
60+
const exprt result =
61+
get_subexpression_at_offset(a, 1, small_t, byte_extract_id(), ns);
62+
REQUIRE(
63+
result == byte_extract_exprt(
64+
byte_extract_id(),
65+
index_exprt(a, from_integer(0, index_type())),
66+
from_integer(1, index_type()),
67+
small_t));
68+
}
69+
}
70+
71+
TEST_CASE("Build subexpression to access element at offset into struct")
72+
{
73+
// this test does require a proper architecture to be set so that byte extract
74+
// uses adequate endianness
75+
cmdlinet cmdline;
76+
config.set(cmdline);
77+
78+
symbol_tablet symbol_table;
79+
namespacet ns(symbol_table);
80+
81+
const signedbv_typet t(32);
82+
83+
struct_typet st;
84+
st.components().emplace_back("foo", t);
85+
st.components().emplace_back("bar", t);
86+
87+
symbol_exprt s("struct", st);
88+
89+
{
90+
const exprt result = get_subexpression_at_offset(s, 0, t, "", ns);
91+
REQUIRE(result == member_exprt(s, "foo", t));
92+
}
93+
94+
{
95+
const exprt result = get_subexpression_at_offset(s, 32 / 8, t, "", ns);
96+
REQUIRE(result == member_exprt(s, "bar", t));
97+
}
98+
99+
{
100+
const exprt result =
101+
get_subexpression_at_offset(s, from_integer(0, size_type()), t, "", ns);
102+
REQUIRE(result == member_exprt(s, "foo", t));
103+
}
104+
105+
{
106+
const exprt result =
107+
get_subexpression_at_offset(s, size_of_expr(t, ns), t, "", ns);
108+
REQUIRE(result == member_exprt(s, "bar", t));
109+
}
110+
111+
{
112+
const signedbv_typet small_t(8);
113+
const exprt result =
114+
get_subexpression_at_offset(s, 1, small_t, byte_extract_id(), ns);
115+
REQUIRE(
116+
result == byte_extract_exprt(
117+
byte_extract_id(),
118+
member_exprt(s, "foo", t),
119+
from_integer(1, index_type()),
120+
small_t));
121+
}
122+
}

0 commit comments

Comments
 (0)