Skip to content

Simplify byte-extract from struct or union expressions #5873

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 49 additions & 0 deletions regression/cbmc/simplify-union/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#include <assert.h>
#include <stdlib.h>

struct O
{
char *ptr;
unsigned len;
};

struct E
{
int e;
};

union U {
struct O ok;
struct E err;
};

struct S
{
union U u;
_Bool success;
};

void alloc(struct S *s, unsigned size)
{
char *p = malloc(sizeof(char) * size);
if(p != 0)
{
s->u.ok = (struct O){p, size};
s->success = 1;
}
else
{
s->success = 0;
}
}

int main()
{
struct S s;
alloc(&s, 2);
if(s.success)
{
assert(s.u.ok.len == 2);
s.u.ok.ptr = "a";
}
}
11 changes: 11 additions & 0 deletions regression/cbmc/simplify-union/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CORE
main.c

^Generated 1 VCC\(s\), 0 remaining after simplification$
^VERIFICATION SUCCESSFUL$
^EXIT=0$
^SIGNAL=0$
--
^warning: ignoring
--
The len member should constant propagate with the help of simplification.
2 changes: 1 addition & 1 deletion scripts/expected_doxygen_warnings.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ warning: Included by graph for 'invariant.h' not generated, too many nodes (172)
warning: Included by graph for 'irep.h' not generated, too many nodes (80), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
warning: Included by graph for 'message.h' not generated, too many nodes (97), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
warning: Included by graph for 'namespace.h' not generated, too many nodes (88), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
warning: Included by graph for 'pointer_expr.h' not generated, too many nodes (121), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
warning: Included by graph for 'pointer_expr.h' not generated, too many nodes (122), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
warning: Included by graph for 'prefix.h' not generated, too many nodes (62), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
warning: Included by graph for 'simplify_expr.h' not generated, too many nodes (67), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
warning: Included by graph for 'std_code.h' not generated, too many nodes (71), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
Expand Down
5 changes: 5 additions & 0 deletions src/pointer-analysis/value_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,11 @@ void value_sett::get_value_set_rec(
get_value_set_rec(*it, dest, suffix, original_type, ns);
}
}
else if(expr.id() == ID_union)
{
get_value_set_rec(
to_union_expr(expr).op(), dest, suffix, original_type, ns);
}
else if(expr.id()==ID_with)
{
const with_exprt &with_expr = to_with_expr(expr);
Expand Down
22 changes: 22 additions & 0 deletions src/util/pointer_offset_size.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ Author: Daniel Kroening, [email protected]
#include "c_types.h"
#include "invariant.h"
#include "namespace.h"
#include "pointer_expr.h"
#include "simplify_expr.h"
#include "ssa_expr.h"
#include "std_expr.h"
Expand Down Expand Up @@ -681,6 +682,27 @@ optionalt<exprt> get_subexpression_at_offset(
}
}
}
else if(
object_descriptor_exprt(expr).root_object().id() == ID_union &&
source_type.id() == ID_union)
{
const union_typet &union_type = to_union_type(source_type);

for(const auto &component : union_type.components())
{
const auto m_size_bits = pointer_offset_bits(component.type(), ns);
if(!m_size_bits.has_value())
continue;

// if this member completely contains the target, recurse into it
if(offset_bytes * 8 + *target_size_bits <= *m_size_bits)
{
const member_exprt member(expr, component.get_name(), component.type());
return get_subexpression_at_offset(
member, offset_bytes, target_type_raw, ns);
}
}
}

return make_byte_extract(
expr, from_integer(offset_bytes, index_type()), target_type_raw);
Expand Down
65 changes: 65 additions & 0 deletions src/util/simplify_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1765,6 +1765,71 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
return std::move(*tmp);
}

// push byte extracts into struct or union expressions, just like
// lower_byte_extract does (this is the same code, except recursive calls use
// simplify rather than lower_byte_extract)
if(expr.op().id() == ID_struct || expr.op().id() == ID_union)
{
if(expr.type().id() == ID_struct || expr.type().id() == ID_struct_tag)
{
const struct_typet &struct_type = to_struct_type(ns.follow(expr.type()));
const struct_typet::componentst &components = struct_type.components();

bool failed = false;
struct_exprt s({}, expr.type());

for(const auto &comp : components)
{
auto component_bits = pointer_offset_bits(comp.type(), ns);

// the next member would be misaligned, abort
if(
!component_bits.has_value() || *component_bits == 0 ||
*component_bits % 8 != 0)
{
failed = true;
break;
}

auto member_offset_opt =
member_offset_expr(struct_type, comp.get_name(), ns);

if(!member_offset_opt.has_value())
{
failed = true;
break;
}

exprt new_offset = simplify_rec(
plus_exprt{expr.offset(),
typecast_exprt::conditional_cast(
member_offset_opt.value(), expr.offset().type())});

byte_extract_exprt tmp = expr;
tmp.type() = comp.type();
tmp.offset() = new_offset;

s.add_to_operands(simplify_byte_extract(tmp));
}

if(!failed)
return s;
}
else if(expr.type().id() == ID_union || expr.type().id() == ID_union_tag)
{
const union_typet &union_type = to_union_type(ns.follow(expr.type()));
auto widest_member_opt = union_type.find_widest_union_component(ns);
if(widest_member_opt.has_value())
{
byte_extract_exprt be = expr;
be.type() = widest_member_opt->first.type();
return union_exprt{widest_member_opt->first.get_name(),
simplify_byte_extract(be),
expr.type()};
}
}
}

// try to refine it down to extracting from a member or an index in an array
auto subexpr =
get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns);
Expand Down