Skip to content

Commit b16d247

Browse files
authored
Merge pull request #3167 from tautschnig/get_subexpr-parts-of-members
Generalise get_subexpression_at_offset to extract parts of members
2 parents 26df513 + 2ff4d2f commit b16d247

File tree

6 files changed

+198
-50
lines changed

6 files changed

+198
-50
lines changed

src/pointer-analysis/value_set_dereference.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,9 +468,10 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
468468
}
469469
else
470470
{
471+
// try to build a member/index expression - do not use byte_extract
471472
exprt subexpr = get_subexpression_at_offset(
472473
root_object_subexpression, o.offset(), dereference_type, ns);
473-
if(subexpr.is_not_nil())
474+
if(subexpr.is_not_nil() && subexpr.id() != byte_extract_id())
474475
{
475476
// Successfully found a member, array index, or combination thereof
476477
// that matches the desired type and offset:

src/util/pointer_offset_size.cpp

Lines changed: 58 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Author: Daniel Kroening, [email protected]
1212
#include "pointer_offset_size.h"
1313

1414
#include "arith_tools.h"
15+
#include "base_type.h"
16+
#include "byte_operators.h"
1517
#include "c_types.h"
1618
#include "invariant.h"
1719
#include "namespace.h"
@@ -676,67 +678,82 @@ exprt build_sizeof_expr(
676678

677679
exprt get_subexpression_at_offset(
678680
const exprt &expr,
679-
mp_integer offset,
681+
const mp_integer &offset_bytes,
680682
const typet &target_type_raw,
681683
const namespacet &ns)
682684
{
683-
exprt result = expr;
684-
const typet &source_type=ns.follow(result.type());
685-
const typet &target_type=ns.follow(target_type_raw);
685+
if(offset_bytes == 0 && base_type_eq(expr.type(), target_type_raw, ns))
686+
{
687+
exprt result = expr;
688+
689+
if(expr.type() != target_type_raw)
690+
result.type() = target_type_raw;
686691

687-
if(offset==0 && source_type==target_type)
688692
return result;
693+
}
694+
695+
const typet &source_type = ns.follow(expr.type());
696+
const auto target_size_bits = pointer_offset_bits(target_type_raw, ns);
697+
if(!target_size_bits.has_value())
698+
return nil_exprt();
689699

690700
if(source_type.id()==ID_struct)
691701
{
692-
const auto &st=to_struct_type(source_type);
693-
const struct_typet::componentst &components=st.components();
694-
member_offset_iterator offsets(st, ns);
695-
while(offsets->first<components.size() &&
696-
offsets->second!=-1 &&
697-
offsets->second<=offset)
702+
const struct_typet &struct_type = to_struct_type(source_type);
703+
704+
mp_integer m_offset_bits = 0;
705+
for(const auto &component : struct_type.components())
698706
{
699-
auto nextit=offsets;
700-
++nextit;
701-
if((offsets->first+1)==components.size() || offset<nextit->second)
707+
const auto m_size_bits = pointer_offset_bits(component.type(), ns);
708+
if(!m_size_bits.has_value())
709+
return nil_exprt();
710+
711+
// if this member completely contains the target, and this member is
712+
// byte-aligned, recurse into it
713+
if(
714+
offset_bytes * 8 >= m_offset_bits && m_offset_bits % 8 == 0 &&
715+
offset_bytes * 8 + *target_size_bits <= m_offset_bits + *m_size_bits)
702716
{
703-
// This field might be, or might contain, the answer.
704-
result=
705-
member_exprt(
706-
result,
707-
components[offsets->first].get_name(),
708-
components[offsets->first].type());
709-
return
710-
get_subexpression_at_offset(
711-
result, offset-offsets->second, target_type, ns);
717+
const member_exprt member(expr, component.get_name(), component.type());
718+
return get_subexpression_at_offset(
719+
member, offset_bytes - m_offset_bits / 8, target_type_raw, ns);
712720
}
713-
++offsets;
721+
722+
m_offset_bits += *m_size_bits;
714723
}
715-
return nil_exprt();
716724
}
717725
else if(source_type.id()==ID_array)
718726
{
719-
// A cell of the array might be, or contain, the subexpression
720-
// we're looking for.
721-
const auto &at=to_array_type(source_type);
727+
const array_typet &array_type = to_array_type(source_type);
722728

723-
auto elem_size = pointer_offset_size(at.subtype(), ns);
729+
const auto elem_size_bits = pointer_offset_bits(array_type.subtype(), ns);
724730

725-
if(!elem_size.has_value())
726-
return nil_exprt();
727-
728-
mp_integer cellidx = offset / (*elem_size);
729-
730-
if(cellidx < 0 || !cellidx.is_long())
731+
// no arrays of non-byte-aligned, zero-, or unknown-sized objects
732+
if(
733+
!elem_size_bits.has_value() || *elem_size_bits == 0 ||
734+
*elem_size_bits % 8 != 0)
735+
{
731736
return nil_exprt();
737+
}
732738

733-
offset = offset % (*elem_size);
734-
735-
result=index_exprt(result, from_integer(cellidx, unsignedbv_typet(64)));
736-
return get_subexpression_at_offset(result, offset, target_type, ns);
739+
if(*target_size_bits <= *elem_size_bits)
740+
{
741+
const mp_integer elem_size_bytes = *elem_size_bits / 8;
742+
return get_subexpression_at_offset(
743+
index_exprt(
744+
expr, from_integer(offset_bytes / elem_size_bytes, index_type())),
745+
offset_bytes % elem_size_bytes,
746+
target_type_raw,
747+
ns);
748+
}
737749
}
738-
else
739-
return nil_exprt();
750+
751+
const byte_extract_exprt be(
752+
byte_extract_id(),
753+
expr,
754+
from_integer(offset_bytes, index_type()),
755+
target_type_raw);
756+
return simplify_expr(be, ns);
740757
}
741758

742759
exprt get_subexpression_at_offset(

src/util/pointer_offset_size.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ exprt build_sizeof_expr(
8181

8282
exprt get_subexpression_at_offset(
8383
const exprt &expr,
84-
mp_integer offset,
84+
const mp_integer &offset,
8585
const typet &target_type,
8686
const namespacet &ns);
8787

src/util/simplify_expr.cpp

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1679,17 +1679,26 @@ bool simplify_exprt::simplify_byte_extract(byte_extract_exprt &expr)
16791679
if(to_integer(expr.offset(), offset) || offset<0)
16801680
return true;
16811681

1682-
// byte extract of full object is object
16831682
// don't do any of the following if endianness doesn't match, as
16841683
// bytes need to be swapped
1685-
if(
1686-
offset == 0 && base_type_eq(expr.type(), expr.op().type(), ns) &&
1687-
byte_extract_id() == expr.id())
1684+
if(offset == 0 && byte_extract_id() == expr.id())
16881685
{
1689-
exprt tmp=expr.op();
1690-
expr.swap(tmp);
1686+
// byte extract of full object is object
1687+
if(base_type_eq(expr.type(), expr.op().type(), ns))
1688+
{
1689+
exprt tmp = expr.op();
1690+
expr.swap(tmp);
16911691

1692-
return false;
1692+
return false;
1693+
}
1694+
else if(
1695+
expr.type().id() == ID_pointer && expr.op().type().id() == ID_pointer)
1696+
{
1697+
typecast_exprt tc(expr.op(), expr.type());
1698+
expr.swap(tc);
1699+
1700+
return false;
1701+
}
16931702
}
16941703

16951704
// no proper simplification for expr.type()==void

unit/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ SRC += analyses/ai/ai.cpp \
4040
util/irep_sharing.cpp \
4141
util/message.cpp \
4242
util/optional.cpp \
43+
util/pointer_offset_size.cpp \
4344
util/replace_symbol.cpp \
4445
util/sharing_map.cpp \
4546
util/sharing_node.cpp \

unit/util/pointer_offset_size.cpp

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/*******************************************************************\
2+
3+
Module: Unit tests of expression size/offset computation
4+
5+
Author: Michael Tautschnig
6+
7+
\*******************************************************************/
8+
9+
#include <testing-utils/catch.hpp>
10+
11+
#include <util/arith_tools.h>
12+
#include <util/byte_operators.h>
13+
#include <util/c_types.h>
14+
#include <util/cmdline.h>
15+
#include <util/config.h>
16+
#include <util/namespace.h>
17+
#include <util/pointer_offset_size.h>
18+
#include <util/std_expr.h>
19+
#include <util/symbol_table.h>
20+
21+
TEST_CASE("Build subexpression to access element at offset into array")
22+
{
23+
// this test does require a proper architecture to be set so that byte extract
24+
// uses adequate endianness
25+
cmdlinet cmdline;
26+
config.set(cmdline);
27+
28+
symbol_tablet symbol_table;
29+
namespacet ns(symbol_table);
30+
31+
const signedbv_typet t(32);
32+
33+
array_typet array_type(t, from_integer(2, size_type()));
34+
symbol_exprt a("array", array_type);
35+
36+
{
37+
const exprt result = get_subexpression_at_offset(a, 0, t, ns);
38+
REQUIRE(result == index_exprt(a, from_integer(0, index_type())));
39+
}
40+
41+
{
42+
const exprt result = get_subexpression_at_offset(a, 32 / 8, t, ns);
43+
REQUIRE(result == index_exprt(a, from_integer(1, index_type())));
44+
}
45+
46+
{
47+
const exprt result =
48+
get_subexpression_at_offset(a, from_integer(0, size_type()), t, ns);
49+
REQUIRE(result == index_exprt(a, from_integer(0, index_type())));
50+
}
51+
52+
{
53+
const exprt result =
54+
get_subexpression_at_offset(a, size_of_expr(t, ns), t, ns);
55+
REQUIRE(result == index_exprt(a, from_integer(1, index_type())));
56+
}
57+
58+
{
59+
const signedbv_typet small_t(8);
60+
const exprt result = get_subexpression_at_offset(a, 1, small_t, ns);
61+
REQUIRE(
62+
result == byte_extract_exprt(
63+
byte_extract_id(),
64+
index_exprt(a, from_integer(0, index_type())),
65+
from_integer(1, index_type()),
66+
small_t));
67+
}
68+
}
69+
70+
TEST_CASE("Build subexpression to access element at offset into struct")
71+
{
72+
// this test does require a proper architecture to be set so that byte extract
73+
// uses adequate endianness
74+
cmdlinet cmdline;
75+
config.set(cmdline);
76+
77+
symbol_tablet symbol_table;
78+
namespacet ns(symbol_table);
79+
80+
const signedbv_typet t(32);
81+
82+
struct_typet st;
83+
st.components().emplace_back("foo", t);
84+
st.components().emplace_back("bar", t);
85+
86+
symbol_exprt s("struct", st);
87+
88+
{
89+
const exprt result = get_subexpression_at_offset(s, 0, t, ns);
90+
REQUIRE(result == member_exprt(s, "foo", t));
91+
}
92+
93+
{
94+
const exprt result = get_subexpression_at_offset(s, 32 / 8, t, ns);
95+
REQUIRE(result == member_exprt(s, "bar", t));
96+
}
97+
98+
{
99+
const exprt result =
100+
get_subexpression_at_offset(s, from_integer(0, size_type()), t, ns);
101+
REQUIRE(result == member_exprt(s, "foo", t));
102+
}
103+
104+
{
105+
const exprt result =
106+
get_subexpression_at_offset(s, size_of_expr(t, ns), t, ns);
107+
REQUIRE(result == member_exprt(s, "bar", t));
108+
}
109+
110+
{
111+
const signedbv_typet small_t(8);
112+
const exprt result = get_subexpression_at_offset(s, 1, small_t, ns);
113+
REQUIRE(
114+
result == byte_extract_exprt(
115+
byte_extract_id(),
116+
member_exprt(s, "foo", t),
117+
from_integer(1, index_type()),
118+
small_t));
119+
}
120+
}

0 commit comments

Comments
 (0)