Skip to content

Commit a6f29ba

Browse files
committed
Simplify byte-extract from struct or union expressions
With rewrite_union introducing rewriting union accesses to byte-extract operations, simplifying such expressions must cover as many cases as possible. Constants were mostly already being rewritten via expr2bits, but that wouldn't be able to handle expressions involving pointers (even when they are effectively constants when using address-of operations). The additional simplification rules mimic what lower_byte_extract already did at a later stage, but such late rewrites would not benefit constant propagation done by goto-symex.
1 parent e317683 commit a6f29ba

File tree

2 files changed

+87
-0
lines changed

2 files changed

+87
-0
lines changed

src/util/pointer_offset_size.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Author: Daniel Kroening, [email protected]
1616
#include "c_types.h"
1717
#include "invariant.h"
1818
#include "namespace.h"
19+
#include "pointer_expr.h"
1920
#include "simplify_expr.h"
2021
#include "ssa_expr.h"
2122
#include "std_expr.h"
@@ -681,6 +682,27 @@ optionalt<exprt> get_subexpression_at_offset(
681682
}
682683
}
683684
}
685+
else if(
686+
object_descriptor_exprt(expr).root_object().id() == ID_union &&
687+
source_type.id() == ID_union)
688+
{
689+
const union_typet &union_type = to_union_type(source_type);
690+
691+
for(const auto &component : union_type.components())
692+
{
693+
const auto m_size_bits = pointer_offset_bits(component.type(), ns);
694+
if(!m_size_bits.has_value())
695+
continue;
696+
697+
// if this member completely contains the target, recurse into it
698+
if(offset_bytes * 8 + *target_size_bits <= *m_size_bits)
699+
{
700+
const member_exprt member(expr, component.get_name(), component.type());
701+
return get_subexpression_at_offset(
702+
member, offset_bytes, target_type_raw, ns);
703+
}
704+
}
705+
}
684706

685707
return byte_extract_exprt(
686708
byte_extract_id(),

src/util/simplify_expr.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,6 +1709,71 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
17091709
return std::move(*tmp);
17101710
}
17111711

1712+
// push byte extracts into struct or union expressions, just like
1713+
// lower_byte_extract does (this is the same code, except recursive calls use
1714+
// simplify rather than lower_byte_extract)
1715+
if(expr.op().id() == ID_struct || expr.op().id() == ID_union)
1716+
{
1717+
if(expr.type().id() == ID_struct || expr.type().id() == ID_struct_tag)
1718+
{
1719+
const struct_typet &struct_type = to_struct_type(ns.follow(expr.type()));
1720+
const struct_typet::componentst &components = struct_type.components();
1721+
1722+
bool failed = false;
1723+
struct_exprt s({}, expr.type());
1724+
1725+
for(const auto &comp : components)
1726+
{
1727+
auto component_bits = pointer_offset_bits(comp.type(), ns);
1728+
1729+
// the next member would be misaligned, abort
1730+
if(
1731+
!component_bits.has_value() || *component_bits == 0 ||
1732+
*component_bits % 8 != 0)
1733+
{
1734+
failed = true;
1735+
break;
1736+
}
1737+
1738+
auto member_offset_opt =
1739+
member_offset_expr(struct_type, comp.get_name(), ns);
1740+
1741+
if(!member_offset_opt.has_value())
1742+
{
1743+
failed = true;
1744+
break;
1745+
}
1746+
1747+
exprt new_offset = simplify_rec(
1748+
plus_exprt{expr.offset(),
1749+
typecast_exprt::conditional_cast(
1750+
member_offset_opt.value(), expr.offset().type())});
1751+
1752+
byte_extract_exprt tmp = expr;
1753+
tmp.type() = comp.type();
1754+
tmp.offset() = new_offset;
1755+
1756+
s.add_to_operands(simplify_byte_extract(tmp));
1757+
}
1758+
1759+
if(!failed)
1760+
return s;
1761+
}
1762+
else if(expr.type().id() == ID_union || expr.type().id() == ID_union_tag)
1763+
{
1764+
const union_typet &union_type = to_union_type(ns.follow(expr.type()));
1765+
auto widest_member_opt = union_type.find_widest_union_component(ns);
1766+
if(widest_member_opt.has_value())
1767+
{
1768+
byte_extract_exprt be = expr;
1769+
be.type() = widest_member_opt->first.type();
1770+
return union_exprt{widest_member_opt->first.get_name(),
1771+
simplify_byte_extract(be),
1772+
expr.type()};
1773+
}
1774+
}
1775+
}
1776+
17121777
// try to refine it down to extracting from a member or an index in an array
17131778
auto subexpr =
17141779
get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns);

0 commit comments

Comments
 (0)