Skip to content

Commit 971178b

Browse files
committed
Simplify byte-extract from struct or union expressions
With rewrite_union introducing rewriting union accesses to byte-extract operations, simplifying such expressions must cover as many cases as possible. Constants were mostly already being rewritten via expr2bits, but that wouldn't be able to handle expressions involving pointers (even when they are effectively constants when using address-of operations). The additional simplification rules mimic what lower_byte_extract already did at a later stage, but such late rewrites would not benefit constant propagation done by goto-symex.
1 parent b26d347 commit 971178b

File tree

6 files changed

+153
-1
lines changed

6 files changed

+153
-1
lines changed

regression/cbmc/simplify-union/main.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#include <assert.h>
2+
#include <stdlib.h>
3+
4+
struct O
5+
{
6+
char *ptr;
7+
unsigned len;
8+
};
9+
10+
struct E
11+
{
12+
int e;
13+
};
14+
15+
union U {
16+
struct O ok;
17+
struct E err;
18+
};
19+
20+
struct S
21+
{
22+
union U u;
23+
_Bool success;
24+
};
25+
26+
void alloc(struct S *s, unsigned size)
27+
{
28+
char *p = malloc(sizeof(char) * size);
29+
if(p != 0)
30+
{
31+
s->u.ok = (struct O){p, size};
32+
s->success = 1;
33+
}
34+
else
35+
{
36+
s->success = 0;
37+
}
38+
}
39+
40+
int main()
41+
{
42+
struct S s;
43+
alloc(&s, 2);
44+
if(s.success)
45+
{
46+
assert(s.u.ok.len == 2);
47+
s.u.ok.ptr = "a";
48+
}
49+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
CORE
2+
main.c
3+
4+
^Generated 1 VCC\(s\), 0 remaining after simplification$
5+
^VERIFICATION SUCCESSFUL$
6+
^EXIT=0$
7+
^SIGNAL=0$
8+
--
9+
^warning: ignoring
10+
--
11+
The len member should constant propagate with the help of simplification.

scripts/expected_doxygen_warnings.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ warning: Included by graph for 'invariant.h' not generated, too many nodes (187)
9292
warning: Included by graph for 'irep.h' not generated, too many nodes (62), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
9393
warning: Included by graph for 'message.h' not generated, too many nodes (117), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
9494
warning: Included by graph for 'namespace.h' not generated, too many nodes (109), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
95-
warning: Included by graph for 'pointer_expr.h' not generated, too many nodes (101), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
95+
warning: Included by graph for 'pointer_expr.h' not generated, too many nodes (102), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
9696
warning: Included by graph for 'prefix.h' not generated, too many nodes (86), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
9797
warning: Included by graph for 'simplify_expr.h' not generated, too many nodes (77), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.
9898
warning: Included by graph for 'std_code.h' not generated, too many nodes (78), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES.

src/pointer-analysis/value_set.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,11 @@ void value_sett::get_value_set_rec(
784784
get_value_set_rec(*it, dest, suffix, original_type, ns);
785785
}
786786
}
787+
else if(expr.id() == ID_union)
788+
{
789+
get_value_set_rec(
790+
to_union_expr(expr).op(), dest, suffix, original_type, ns);
791+
}
787792
else if(expr.id()==ID_with)
788793
{
789794
const with_exprt &with_expr = to_with_expr(expr);

src/util/pointer_offset_size.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Author: Daniel Kroening, [email protected]
1616
#include "c_types.h"
1717
#include "invariant.h"
1818
#include "namespace.h"
19+
#include "pointer_expr.h"
1920
#include "simplify_expr.h"
2021
#include "ssa_expr.h"
2122
#include "std_expr.h"
@@ -681,6 +682,27 @@ optionalt<exprt> get_subexpression_at_offset(
681682
}
682683
}
683684
}
685+
else if(
686+
object_descriptor_exprt(expr).root_object().id() == ID_union &&
687+
source_type.id() == ID_union)
688+
{
689+
const union_typet &union_type = to_union_type(source_type);
690+
691+
for(const auto &component : union_type.components())
692+
{
693+
const auto m_size_bits = pointer_offset_bits(component.type(), ns);
694+
if(!m_size_bits.has_value())
695+
continue;
696+
697+
// if this member completely contains the target, recurse into it
698+
if(offset_bytes * 8 + *target_size_bits <= *m_size_bits)
699+
{
700+
const member_exprt member(expr, component.get_name(), component.type());
701+
return get_subexpression_at_offset(
702+
member, offset_bytes, target_type_raw, ns);
703+
}
704+
}
705+
}
684706

685707
return byte_extract_exprt(
686708
byte_extract_id(),

src/util/simplify_expr.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1709,6 +1709,71 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr)
17091709
return std::move(*tmp);
17101710
}
17111711

1712+
// push byte extracts into struct or union expressions, just like
1713+
// lower_byte_extract does (this is the same code, except recursive calls use
1714+
// simplify rather than lower_byte_extract)
1715+
if(expr.op().id() == ID_struct || expr.op().id() == ID_union)
1716+
{
1717+
if(expr.type().id() == ID_struct || expr.type().id() == ID_struct_tag)
1718+
{
1719+
const struct_typet &struct_type = to_struct_type(ns.follow(expr.type()));
1720+
const struct_typet::componentst &components = struct_type.components();
1721+
1722+
bool failed = false;
1723+
struct_exprt s({}, expr.type());
1724+
1725+
for(const auto &comp : components)
1726+
{
1727+
auto component_bits = pointer_offset_bits(comp.type(), ns);
1728+
1729+
// the next member would be misaligned, abort
1730+
if(
1731+
!component_bits.has_value() || *component_bits == 0 ||
1732+
*component_bits % 8 != 0)
1733+
{
1734+
failed = true;
1735+
break;
1736+
}
1737+
1738+
auto member_offset_opt =
1739+
member_offset_expr(struct_type, comp.get_name(), ns);
1740+
1741+
if(!member_offset_opt.has_value())
1742+
{
1743+
failed = true;
1744+
break;
1745+
}
1746+
1747+
exprt new_offset = simplify_rec(
1748+
plus_exprt{expr.offset(),
1749+
typecast_exprt::conditional_cast(
1750+
member_offset_opt.value(), expr.offset().type())});
1751+
1752+
byte_extract_exprt tmp = expr;
1753+
tmp.type() = comp.type();
1754+
tmp.offset() = new_offset;
1755+
1756+
s.add_to_operands(simplify_byte_extract(tmp));
1757+
}
1758+
1759+
if(!failed)
1760+
return s;
1761+
}
1762+
else if(expr.type().id() == ID_union || expr.type().id() == ID_union_tag)
1763+
{
1764+
const union_typet &union_type = to_union_type(ns.follow(expr.type()));
1765+
auto widest_member_opt = union_type.find_widest_union_component(ns);
1766+
if(widest_member_opt.has_value())
1767+
{
1768+
byte_extract_exprt be = expr;
1769+
be.type() = widest_member_opt->first.type();
1770+
return union_exprt{widest_member_opt->first.get_name(),
1771+
simplify_byte_extract(be),
1772+
expr.type()};
1773+
}
1774+
}
1775+
}
1776+
17121777
// try to refine it down to extracting from a member or an index in an array
17131778
auto subexpr =
17141779
get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns);

0 commit comments

Comments
 (0)