Skip to content

Commit 341a79f

Browse files
committed
Generalise get_subexpression_at_offset to extract parts of members
This enables re-use in the simplifier and other places. Adding a unit test of get_subexpression_at_offset.
1 parent d5fede5 commit 341a79f

File tree

5 files changed

+194
-41
lines changed

5 files changed

+194
-41
lines changed

src/pointer-analysis/value_set_dereference.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,8 +468,9 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
468468
}
469469
else
470470
{
471+
// try to build a member/index expression - do not use byte_extract
471472
exprt subexpr = get_subexpression_at_offset(
472-
root_object_subexpression, o.offset(), dereference_type, ns);
473+
root_object_subexpression, o.offset(), dereference_type, "", ns);
473474
if(subexpr.is_not_nil())
474475
{
475476
// Successfully found a member, array index, or combination thereof

src/util/pointer_offset_size.cpp

Lines changed: 66 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Author: Daniel Kroening, [email protected]
1212
#include "pointer_offset_size.h"
1313

1414
#include "arith_tools.h"
15+
#include "base_type.h"
16+
#include "byte_operators.h"
1517
#include "c_types.h"
1618
#include "invariant.h"
1719
#include "namespace.h"
@@ -672,64 +674,87 @@ exprt build_sizeof_expr(
672674

673675
exprt get_subexpression_at_offset(
674676
const exprt &expr,
675-
mp_integer offset,
677+
const mp_integer &offset,
676678
const typet &target_type_raw,
679+
const irep_idt &byte_extract_op,
677680
const namespacet &ns)
678681
{
679-
exprt result = expr;
680-
const typet &source_type=ns.follow(result.type());
681-
const typet &target_type=ns.follow(target_type_raw);
682+
if(offset == 0 && base_type_eq(expr.type(), target_type_raw, ns))
683+
{
684+
exprt result = expr;
685+
686+
if(expr.type() != target_type_raw)
687+
result.type() = target_type_raw;
682688

683-
if(offset==0 && source_type==target_type)
684689
return result;
690+
}
691+
692+
const typet &source_type = ns.follow(expr.type());
693+
const auto target_size = pointer_offset_bits(target_type_raw, ns);
694+
if(!target_size.has_value())
695+
return nil_exprt();
685696

686697
if(source_type.id()==ID_struct)
687698
{
688-
const auto &st=to_struct_type(source_type);
689-
const struct_typet::componentst &components=st.components();
690-
member_offset_iterator offsets(st, ns);
691-
while(offsets->first<components.size() &&
692-
offsets->second!=-1 &&
693-
offsets->second<=offset)
699+
const struct_typet &struct_type = to_struct_type(source_type);
700+
701+
mp_integer m_offset_bits = 0;
702+
for(const auto &component : struct_type.components())
694703
{
695-
auto nextit=offsets;
696-
++nextit;
697-
if((offsets->first+1)==components.size() || offset<nextit->second)
704+
const auto m_size = pointer_offset_bits(component.type(), ns);
705+
if(!m_size.has_value())
706+
return nil_exprt();
707+
708+
if(
709+
offset * 8 >= m_offset_bits && m_offset_bits % 8 == 0 &&
710+
offset * 8 + *target_size <= m_offset_bits + *m_size)
698711
{
699-
// This field might be, or might contain, the answer.
700-
result=
701-
member_exprt(
702-
result,
703-
components[offsets->first].get_name(),
704-
components[offsets->first].type());
705-
return
706-
get_subexpression_at_offset(
707-
result, offset-offsets->second, target_type, ns);
712+
const member_exprt member(expr, component.get_name(), component.type());
713+
return get_subexpression_at_offset(
714+
member,
715+
offset - m_offset_bits / 8,
716+
target_type_raw,
717+
byte_extract_op,
718+
ns);
708719
}
709-
++offsets;
720+
721+
m_offset_bits += *m_size;
710722
}
711-
return nil_exprt();
712723
}
713724
else if(source_type.id()==ID_array)
714725
{
715-
// A cell of the array might be, or contain, the subexpression
716-
// we're looking for.
717-
const auto &at=to_array_type(source_type);
718-
719-
auto elem_size = pointer_offset_size(at.subtype(), ns);
720-
721-
if(!elem_size.has_value())
722-
return nil_exprt();
726+
const array_typet &array_type = to_array_type(source_type);
723727

724-
mp_integer cellidx = offset / (*elem_size);
728+
const auto elem_size = pointer_offset_bits(array_type.subtype(), ns);
725729

726-
if(cellidx < 0 || !cellidx.is_long())
730+
// no arrays of non-byte, zero-, or unknown-sized objects
731+
if(!elem_size.has_value() || *elem_size == 0 || *elem_size % 8 != 0)
727732
return nil_exprt();
728733

729-
offset = offset % (*elem_size);
734+
if(*target_size <= *elem_size)
735+
return get_subexpression_at_offset(
736+
index_exprt(
737+
expr, from_integer(offset / (*elem_size / 8), index_type())),
738+
offset % (*elem_size / 8),
739+
target_type_raw,
740+
byte_extract_op,
741+
ns);
742+
}
730743

731-
result=index_exprt(result, from_integer(cellidx, unsignedbv_typet(64)));
732-
return get_subexpression_at_offset(result, offset, target_type, ns);
744+
const auto source_size = pointer_offset_bits(source_type, ns);
745+
if(
746+
offset == 0 && source_size.has_value() && *source_size == *target_size &&
747+
source_type.id() == ID_pointer && target_type_raw.id() == ID_pointer)
748+
{
749+
return typecast_exprt(expr, target_type_raw);
750+
}
751+
else if(!byte_extract_op.empty())
752+
{
753+
return byte_extract_exprt(
754+
byte_extract_op,
755+
expr,
756+
from_integer(offset, index_type()),
757+
target_type_raw);
733758
}
734759
else
735760
return nil_exprt();
@@ -739,12 +764,14 @@ exprt get_subexpression_at_offset(
739764
const exprt &expr,
740765
const exprt &offset,
741766
const typet &target_type,
767+
const irep_idt &byte_extract_op,
742768
const namespacet &ns)
743769
{
744770
mp_integer offset_const;
745771

746772
if(to_integer(offset, offset_const))
747773
return nil_exprt();
748774
else
749-
return get_subexpression_at_offset(expr, offset_const, target_type, ns);
775+
return get_subexpression_at_offset(
776+
expr, offset_const, target_type, byte_extract_op, ns);
750777
}

src/util/pointer_offset_size.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,14 +81,16 @@ exprt build_sizeof_expr(
8181

8282
exprt get_subexpression_at_offset(
8383
const exprt &expr,
84-
mp_integer offset,
84+
const mp_integer &offset,
8585
const typet &target_type,
86+
const irep_idt &byte_extract_op,
8687
const namespacet &ns);
8788

8889
exprt get_subexpression_at_offset(
8990
const exprt &expr,
9091
const exprt &offset,
9192
const typet &target_type,
93+
const irep_idt &byte_extract_op,
9294
const namespacet &ns);
9395

9496
#endif // CPROVER_UTIL_POINTER_OFFSET_SIZE_H

unit/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ SRC += analyses/ai/ai.cpp \
4040
util/irep_sharing.cpp \
4141
util/message.cpp \
4242
util/optional.cpp \
43+
util/pointer_offset_size.cpp \
4344
util/replace_symbol.cpp \
4445
util/sharing_map.cpp \
4546
util/sharing_node.cpp \

unit/util/pointer_offset_size.cpp

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/*******************************************************************\
2+
3+
Module: Unit tests of expression size/offset computation
4+
5+
Author: Michael Tautschnig
6+
7+
\*******************************************************************/
8+
9+
#include <testing-utils/catch.hpp>
10+
11+
#include <util/arith_tools.h>
12+
#include <util/byte_operators.h>
13+
#include <util/c_types.h>
14+
#include <util/cmdline.h>
15+
#include <util/config.h>
16+
#include <util/namespace.h>
17+
#include <util/pointer_offset_size.h>
18+
#include <util/std_expr.h>
19+
#include <util/symbol_table.h>
20+
21+
TEST_CASE("Build subexpression to access element at offset into array")
22+
{
23+
// this test does require a proper architecture to be set so that byte extract
24+
// uses adequate endianness
25+
cmdlinet cmdline;
26+
config.set(cmdline);
27+
28+
symbol_tablet symbol_table;
29+
namespacet ns(symbol_table);
30+
31+
const signedbv_typet t(32);
32+
33+
array_typet array_type(t, from_integer(2, size_type()));
34+
symbol_exprt a("array", array_type);
35+
36+
{
37+
const exprt result = get_subexpression_at_offset(a, 0, t, "", ns);
38+
REQUIRE(result == index_exprt(a, from_integer(0, index_type())));
39+
}
40+
41+
{
42+
const exprt result = get_subexpression_at_offset(a, 32 / 8, t, "", ns);
43+
REQUIRE(result == index_exprt(a, from_integer(1, index_type())));
44+
}
45+
46+
{
47+
const exprt result =
48+
get_subexpression_at_offset(a, from_integer(0, size_type()), t, "", ns);
49+
REQUIRE(result == index_exprt(a, from_integer(0, index_type())));
50+
}
51+
52+
{
53+
const exprt result =
54+
get_subexpression_at_offset(a, size_of_expr(t, ns), t, "", ns);
55+
REQUIRE(result == index_exprt(a, from_integer(1, index_type())));
56+
}
57+
58+
{
59+
const signedbv_typet small_t(8);
60+
const exprt result =
61+
get_subexpression_at_offset(a, 1, small_t, byte_extract_id(), ns);
62+
REQUIRE(
63+
result == byte_extract_exprt(
64+
byte_extract_id(),
65+
index_exprt(a, from_integer(0, index_type())),
66+
from_integer(1, index_type()),
67+
small_t));
68+
}
69+
}
70+
71+
TEST_CASE("Build subexpression to access element at offset into struct")
72+
{
73+
// this test does require a proper architecture to be set so that byte extract
74+
// uses adequate endianness
75+
cmdlinet cmdline;
76+
config.set(cmdline);
77+
78+
symbol_tablet symbol_table;
79+
namespacet ns(symbol_table);
80+
81+
const signedbv_typet t(32);
82+
83+
struct_typet st;
84+
st.components().emplace_back("foo", t);
85+
st.components().emplace_back("bar", t);
86+
87+
symbol_exprt s("struct", st);
88+
89+
{
90+
const exprt result = get_subexpression_at_offset(s, 0, t, "", ns);
91+
REQUIRE(result == member_exprt(s, "foo", t));
92+
}
93+
94+
{
95+
const exprt result = get_subexpression_at_offset(s, 32 / 8, t, "", ns);
96+
REQUIRE(result == member_exprt(s, "bar", t));
97+
}
98+
99+
{
100+
const exprt result =
101+
get_subexpression_at_offset(s, from_integer(0, size_type()), t, "", ns);
102+
REQUIRE(result == member_exprt(s, "foo", t));
103+
}
104+
105+
{
106+
const exprt result =
107+
get_subexpression_at_offset(s, size_of_expr(t, ns), t, "", ns);
108+
REQUIRE(result == member_exprt(s, "bar", t));
109+
}
110+
111+
{
112+
const signedbv_typet small_t(8);
113+
const exprt result =
114+
get_subexpression_at_offset(s, 1, small_t, byte_extract_id(), ns);
115+
REQUIRE(
116+
result == byte_extract_exprt(
117+
byte_extract_id(),
118+
member_exprt(s, "foo", t),
119+
from_integer(1, index_type()),
120+
small_t));
121+
}
122+
}

0 commit comments

Comments
 (0)