Skip to content

Generalise get_subexpression_at_offset to extract parts of members #3167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/pointer-analysis/value_set_dereference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,9 +468,10 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
}
else
{
// try to build a member/index expression - do not use byte_extract
exprt subexpr = get_subexpression_at_offset(
root_object_subexpression, o.offset(), dereference_type, ns);
if(subexpr.is_not_nil())
if(subexpr.is_not_nil() && subexpr.id() != byte_extract_id())
{
// Successfully found a member, array index, or combination thereof
// that matches the desired type and offset:
Expand Down
99 changes: 58 additions & 41 deletions src/util/pointer_offset_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ Author: Daniel Kroening, [email protected]
#include "pointer_offset_size.h"

#include "arith_tools.h"
#include "base_type.h"
#include "byte_operators.h"
#include "c_types.h"
#include "invariant.h"
#include "namespace.h"
Expand Down Expand Up @@ -676,67 +678,82 @@ exprt build_sizeof_expr(

exprt get_subexpression_at_offset(
const exprt &expr,
mp_integer offset,
const mp_integer &offset_bytes,
const typet &target_type_raw,
const namespacet &ns)
{
exprt result = expr;
const typet &source_type=ns.follow(result.type());
const typet &target_type=ns.follow(target_type_raw);
if(offset_bytes == 0 && base_type_eq(expr.type(), target_type_raw, ns))
{
exprt result = expr;

if(expr.type() != target_type_raw)
result.type() = target_type_raw;

if(offset==0 && source_type==target_type)
return result;
}

const typet &source_type = ns.follow(expr.type());
const auto target_size_bits = pointer_offset_bits(target_type_raw, ns);
if(!target_size_bits.has_value())
return nil_exprt();

if(source_type.id()==ID_struct)
{
const auto &st=to_struct_type(source_type);
const struct_typet::componentst &components=st.components();
member_offset_iterator offsets(st, ns);
while(offsets->first<components.size() &&
offsets->second!=-1 &&
offsets->second<=offset)
const struct_typet &struct_type = to_struct_type(source_type);

mp_integer m_offset_bits = 0;
for(const auto &component : struct_type.components())
{
auto nextit=offsets;
++nextit;
if((offsets->first+1)==components.size() || offset<nextit->second)
const auto m_size_bits = pointer_offset_bits(component.type(), ns);
if(!m_size_bits.has_value())
return nil_exprt();

// if this member completely contains the target, and this member is
// byte-aligned, recurse into it
if(
offset_bytes * 8 >= m_offset_bits && m_offset_bits % 8 == 0 &&
offset_bytes * 8 + *target_size_bits <= m_offset_bits + *m_size_bits)
{
// This field might be, or might contain, the answer.
result=
member_exprt(
result,
components[offsets->first].get_name(),
components[offsets->first].type());
return
get_subexpression_at_offset(
result, offset-offsets->second, target_type, ns);
const member_exprt member(expr, component.get_name(), component.type());
return get_subexpression_at_offset(
member, offset_bytes - m_offset_bits / 8, target_type_raw, ns);
}
++offsets;

m_offset_bits += *m_size_bits;
}
return nil_exprt();
}
else if(source_type.id()==ID_array)
{
// A cell of the array might be, or contain, the subexpression
// we're looking for.
const auto &at=to_array_type(source_type);
const array_typet &array_type = to_array_type(source_type);

auto elem_size = pointer_offset_size(at.subtype(), ns);
const auto elem_size_bits = pointer_offset_bits(array_type.subtype(), ns);

if(!elem_size.has_value())
return nil_exprt();

mp_integer cellidx = offset / (*elem_size);

if(cellidx < 0 || !cellidx.is_long())
// no arrays of non-byte-aligned, zero-, or unknown-sized objects
if(
!elem_size_bits.has_value() || *elem_size_bits == 0 ||
*elem_size_bits % 8 != 0)
{
return nil_exprt();
}

offset = offset % (*elem_size);

result=index_exprt(result, from_integer(cellidx, unsignedbv_typet(64)));
return get_subexpression_at_offset(result, offset, target_type, ns);
if(*target_size_bits <= *elem_size_bits)
{
const mp_integer elem_size_bytes = *elem_size_bits / 8;
return get_subexpression_at_offset(
index_exprt(
expr, from_integer(offset_bytes / elem_size_bytes, index_type())),
offset_bytes % elem_size_bytes,
target_type_raw,
ns);
}
}
else
return nil_exprt();

const byte_extract_exprt be(
byte_extract_id(),
expr,
from_integer(offset_bytes, index_type()),
target_type_raw);
return simplify_expr(be, ns);
}

exprt get_subexpression_at_offset(
Expand Down
2 changes: 1 addition & 1 deletion src/util/pointer_offset_size.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ exprt build_sizeof_expr(

exprt get_subexpression_at_offset(
const exprt &expr,
mp_integer offset,
const mp_integer &offset,
const typet &target_type,
const namespacet &ns);

Expand Down
23 changes: 16 additions & 7 deletions src/util/simplify_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1679,17 +1679,26 @@ bool simplify_exprt::simplify_byte_extract(byte_extract_exprt &expr)
if(to_integer(expr.offset(), offset) || offset<0)
return true;

// byte extract of full object is object
// don't do any of the following if endianness doesn't match, as
// bytes need to be swapped
if(
offset == 0 && base_type_eq(expr.type(), expr.op().type(), ns) &&
byte_extract_id() == expr.id())
if(offset == 0 && byte_extract_id() == expr.id())
{
exprt tmp=expr.op();
expr.swap(tmp);
// byte extract of full object is object
if(base_type_eq(expr.type(), expr.op().type(), ns))
{
exprt tmp = expr.op();
expr.swap(tmp);

return false;
return false;
}
else if(
expr.type().id() == ID_pointer && expr.op().type().id() == ID_pointer)
{
typecast_exprt tc(expr.op(), expr.type());
expr.swap(tc);

return false;
}
}

// no proper simplification for expr.type()==void
Expand Down
1 change: 1 addition & 0 deletions unit/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ SRC += analyses/ai/ai.cpp \
util/irep_sharing.cpp \
util/message.cpp \
util/optional.cpp \
util/pointer_offset_size.cpp \
util/replace_symbol.cpp \
util/sharing_map.cpp \
util/sharing_node.cpp \
Expand Down
120 changes: 120 additions & 0 deletions unit/util/pointer_offset_size.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*******************************************************************\

Module: Unit tests of expression size/offset computation

Author: Michael Tautschnig

\*******************************************************************/

#include <testing-utils/catch.hpp>

#include <util/arith_tools.h>
#include <util/byte_operators.h>
#include <util/c_types.h>
#include <util/cmdline.h>
#include <util/config.h>
#include <util/namespace.h>
#include <util/pointer_offset_size.h>
#include <util/std_expr.h>
#include <util/symbol_table.h>

TEST_CASE("Build subexpression to access element at offset into array")
{
// this test does require a proper architecture to be set so that byte extract
// uses adequate endianness
cmdlinet cmdline;
config.set(cmdline);

symbol_tablet symbol_table;
namespacet ns(symbol_table);

const signedbv_typet t(32);

array_typet array_type(t, from_integer(2, size_type()));
symbol_exprt a("array", array_type);

{
const exprt result = get_subexpression_at_offset(a, 0, t, ns);
REQUIRE(result == index_exprt(a, from_integer(0, index_type())));
}

{
const exprt result = get_subexpression_at_offset(a, 32 / 8, t, ns);
REQUIRE(result == index_exprt(a, from_integer(1, index_type())));
}

{
const exprt result =
get_subexpression_at_offset(a, from_integer(0, size_type()), t, ns);
REQUIRE(result == index_exprt(a, from_integer(0, index_type())));
}

{
const exprt result =
get_subexpression_at_offset(a, size_of_expr(t, ns), t, ns);
REQUIRE(result == index_exprt(a, from_integer(1, index_type())));
}

{
const signedbv_typet small_t(8);
const exprt result = get_subexpression_at_offset(a, 1, small_t, ns);
REQUIRE(
result == byte_extract_exprt(
byte_extract_id(),
index_exprt(a, from_integer(0, index_type())),
from_integer(1, index_type()),
small_t));
}
}

TEST_CASE("Build subexpression to access element at offset into struct")
{
// this test does require a proper architecture to be set so that byte extract
// uses adequate endianness
cmdlinet cmdline;
config.set(cmdline);

symbol_tablet symbol_table;
namespacet ns(symbol_table);

const signedbv_typet t(32);

struct_typet st;
st.components().emplace_back("foo", t);
st.components().emplace_back("bar", t);

symbol_exprt s("struct", st);

{
const exprt result = get_subexpression_at_offset(s, 0, t, ns);
REQUIRE(result == member_exprt(s, "foo", t));
}

{
const exprt result = get_subexpression_at_offset(s, 32 / 8, t, ns);
REQUIRE(result == member_exprt(s, "bar", t));
}

{
const exprt result =
get_subexpression_at_offset(s, from_integer(0, size_type()), t, ns);
REQUIRE(result == member_exprt(s, "foo", t));
}

{
const exprt result =
get_subexpression_at_offset(s, size_of_expr(t, ns), t, ns);
REQUIRE(result == member_exprt(s, "bar", t));
}

{
const signedbv_typet small_t(8);
const exprt result = get_subexpression_at_offset(s, 1, small_t, ns);
REQUIRE(
result == byte_extract_exprt(
byte_extract_id(),
member_exprt(s, "foo", t),
from_integer(1, index_type()),
small_t));
}
}