Skip to content

Commit c412a6b

Browse files
committed
Simplify byte-extract from struct or union expressions
With rewrite_union rewriting union accesses to byte-extract operations, simplifying such expressions must cover as many cases as possible. Constants were mostly already being rewritten via expr2bits, but that wouldn't be able to handle expressions involving pointers (even when they are effectively constants when using address-of operations). The additional simplification rules mimic what lower_byte_extract already did at a later stage, but such late rewrites would not benefit constant propagation done by goto-symex.
1 parent abba3d7 commit c412a6b

File tree

6 files changed

+153
-1
lines changed

6 files changed

+153
-1
lines changed

regression/cbmc/simplify-union/main.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include <assert.h>
2+
#include <stdlib.h>
3+
4+
struct O
5+
{
6+
char *ptr;
7+
unsigned len;
8+
};
9+
10+
struct E
11+
{
12+
int e;
13+
};
14+
15+
union U {
16+
struct O ok;
17+
struct E err;
18+
};
19+
20+
struct S
21+
{
22+
union U u;
23+
_Bool success;
24+
};
25+
26+
void alloc(struct S *s, unsigned size)
27+
{
28+
char *p = malloc(sizeof(char) * size);
29+
if(p != 0)
30+
{
31+
s->u.ok = (struct O){p, size};
32+
s->success = 1;
33+
}
34+
else
35+
{
36+
s->success = 0;
37+
}
38+
}
39+
40+
int main()
41+
{
42+
struct S s;
43+
alloc(&s, 2);
44+
if(s.success)
45+
{
46+
assert(s.u.ok.len == 2);
47+
s.u.ok.ptr = "a";
48+
}
49+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
CORE
2+
main.c
3+
4+
^Generated 1 VCC\(s\), 0 remaining after simplification$
5+
^VERIFICATION SUCCESSFUL$
6+
^EXIT=0$
7+
^SIGNAL=0$
8+
--
9+
^warning: ignoring
10+
--
11+
The len member should constant propagate with the help of simplification.

scripts/expected_doxygen_warnings.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ warning: Included by graph for 'invariant.h' not generated, too many nodes (186)
3131
warning: Included by graph for 'irep.h' not generated, too many nodes (62), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
3232
warning: Included by graph for 'message.h' not generated, too many nodes (118), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
3333
warning: Included by graph for 'namespace.h' not generated, too many nodes (110), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
34-
warning: Included by graph for 'pointer_expr.h' not generated, too many nodes (116), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
34+
warning: Included by graph for 'pointer_expr.h' not generated, too many nodes (117), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
3535
warning: Included by graph for 'prefix.h' not generated, too many nodes (85), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
3636
warning: Included by graph for 'simplify_expr.h' not generated, too many nodes (79), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
3737
warning: Included by graph for 'std_code.h' not generated, too many nodes (79), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.

src/pointer-analysis/value_set.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,11 @@ void value_sett::get_value_set_rec(
784784
get_value_set_rec(*it, dest, suffix, original_type, ns);
785785
}
786786
}
787+
else if(expr.id() == ID_union)
788+
{
789+
get_value_set_rec(
790+
to_union_expr(expr).op(), dest, suffix, original_type, ns);
791+
}
787792
else if(expr.id()==ID_with)
788793
{
789794
const with_exprt &with_expr = to_with_expr(expr);

src/util/pointer_offset_size.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Author: Daniel Kroening, [email protected]
1616
#include "c_types.h"
1717
#include "invariant.h"
1818
#include "namespace.h"
19+
#include "pointer_expr.h"
1920
#include "simplify_expr.h"
2021
#include "ssa_expr.h"
2122
#include "std_expr.h"
@@ -681,6 +682,27 @@ optionalt<exprt> get_subexpression_at_offset(
681682
}
682683
}
683684
}
685+
else if(
686+
object_descriptor_exprt(expr).root_object().id() == ID_union &&
687+
source_type.id() == ID_union)
688+
{
689+
const union_typet &union_type = to_union_type(source_type);
690+
691+
for(const auto &component : union_type.components())
692+
{
693+
const auto m_size_bits = pointer_offset_bits(component.type(), ns);
694+
if(!m_size_bits.has_value())
695+
continue;
696+
697+
// if this member completely contains the target, recurse into it
698+
if(offset_bytes * 8 + *target_size_bits <= *m_size_bits)
699+
{
700+
const member_exprt member(expr, component.get_name(), component.type());
701+
return get_subexpression_at_offset(
702+
member, offset_bytes, target_type_raw, ns);
703+
}
704+
}
705+
}
684706

685707
return byte_extract_exprt(
686708
byte_extract_id(),

src/util/simplify_expr.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1733,6 +1733,71 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
17331733
return std::move(*tmp);
17341734
}
17351735

1736+
// push byte extracts into struct or union expressions, just like
1737+
// lower_byte_extract does (this is the same code, except recursive calls use
1738+
// simplify rather than lower_byte_extract)
1739+
if(expr.op().id() == ID_struct || expr.op().id() == ID_union)
1740+
{
1741+
if(expr.type().id() == ID_struct || expr.type().id() == ID_struct_tag)
1742+
{
1743+
const struct_typet &struct_type = to_struct_type(ns.follow(expr.type()));
1744+
const struct_typet::componentst &components = struct_type.components();
1745+
1746+
bool failed = false;
1747+
struct_exprt s({}, expr.type());
1748+
1749+
for(const auto &comp : components)
1750+
{
1751+
auto component_bits = pointer_offset_bits(comp.type(), ns);
1752+
1753+
// the next member would be misaligned, abort
1754+
if(
1755+
!component_bits.has_value() || *component_bits == 0 ||
1756+
*component_bits % 8 != 0)
1757+
{
1758+
failed = true;
1759+
break;
1760+
}
1761+
1762+
auto member_offset_opt =
1763+
member_offset_expr(struct_type, comp.get_name(), ns);
1764+
1765+
if(!member_offset_opt.has_value())
1766+
{
1767+
failed = true;
1768+
break;
1769+
}
1770+
1771+
exprt new_offset = simplify_rec(
1772+
plus_exprt{expr.offset(),
1773+
typecast_exprt::conditional_cast(
1774+
member_offset_opt.value(), expr.offset().type())});
1775+
1776+
byte_extract_exprt tmp = expr;
1777+
tmp.type() = comp.type();
1778+
tmp.offset() = new_offset;
1779+
1780+
s.add_to_operands(simplify_byte_extract(tmp));
1781+
}
1782+
1783+
if(!failed)
1784+
return s;
1785+
}
1786+
else if(expr.type().id() == ID_union || expr.type().id() == ID_union_tag)
1787+
{
1788+
const union_typet &union_type = to_union_type(ns.follow(expr.type()));
1789+
auto widest_member_opt = union_type.find_widest_union_component(ns);
1790+
if(widest_member_opt.has_value())
1791+
{
1792+
byte_extract_exprt be = expr;
1793+
be.type() = widest_member_opt->first.type();
1794+
return union_exprt{widest_member_opt->first.get_name(),
1795+
simplify_byte_extract(be),
1796+
expr.type()};
1797+
}
1798+
}
1799+
}
1800+
17361801
// try to refine it down to extracting from a member or an index in an array
17371802
auto subexpr =
17381803
get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns);

0 commit comments

Comments
 (0)