From 6544f7c267d583c108372cd1c5b981c2ab0ab915 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Sat, 27 Feb 2021 01:49:19 +0000 Subject: [PATCH] Simplify byte-extract from struct or union expressions With rewrite_union rewriting union accesses to byte-extract operations, simplifying such expressions must cover as many cases as possible. Constants were mostly already being rewritten via expr2bits, but that wouldn't be able to handle expressions involving pointers (even when they are effectively constants when using address-of operations). The additional simplification rules mimic what lower_byte_extract already did at a later stage, but such late rewrites would not benefit constant propagation done by goto-symex. --- regression/cbmc/simplify-union/main.c | 49 ++++++++++++++++++ regression/cbmc/simplify-union/test.desc | 11 ++++ scripts/expected_doxygen_warnings.txt | 2 +- src/pointer-analysis/value_set.cpp | 5 ++ src/util/pointer_offset_size.cpp | 22 ++++++++ src/util/simplify_expr.cpp | 65 ++++++++++++++++++++++++ 6 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 regression/cbmc/simplify-union/main.c create mode 100644 regression/cbmc/simplify-union/test.desc diff --git a/regression/cbmc/simplify-union/main.c b/regression/cbmc/simplify-union/main.c new file mode 100644 index 00000000000..7a4a43522b4 --- /dev/null +++ b/regression/cbmc/simplify-union/main.c @@ -0,0 +1,49 @@ +#include +#include + +struct O +{ + char *ptr; + unsigned len; +}; + +struct E +{ + int e; +}; + +union U { + struct O ok; + struct E err; +}; + +struct S +{ + union U u; + _Bool success; +}; + +void alloc(struct S *s, unsigned size) +{ + char *p = malloc(sizeof(char) * size); + if(p != 0) + { + s->u.ok = (struct O){p, size}; + s->success = 1; + } + else + { + s->success = 0; + } +} + +int main() +{ + struct S s; + alloc(&s, 2); + if(s.success) + { + assert(s.u.ok.len == 2); + s.u.ok.ptr = "a"; + } +} diff --git a/regression/cbmc/simplify-union/test.desc b/regression/cbmc/simplify-union/test.desc new file mode 100644 index 00000000000..de8c67be832 --- /dev/null +++ b/regression/cbmc/simplify-union/test.desc @@ -0,0 +1,11 @@ +CORE +main.c + +^Generated 1 VCC\(s\), 0 remaining after simplification$ +^VERIFICATION SUCCESSFUL$ +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +-- +The len member should constant propagate with the help of simplification. diff --git a/scripts/expected_doxygen_warnings.txt b/scripts/expected_doxygen_warnings.txt index fc1b1075f9c..cb512d21ff6 100644 --- a/scripts/expected_doxygen_warnings.txt +++ b/scripts/expected_doxygen_warnings.txt @@ -7,7 +7,7 @@ warning: Included by graph for 'invariant.h' not generated, too many nodes (172) warning: Included by graph for 'irep.h' not generated, too many nodes (80), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. warning: Included by graph for 'message.h' not generated, too many nodes (97), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. warning: Included by graph for 'namespace.h' not generated, too many nodes (88), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. -warning: Included by graph for 'pointer_expr.h' not generated, too many nodes (121), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. +warning: Included by graph for 'pointer_expr.h' not generated, too many nodes (122), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. warning: Included by graph for 'prefix.h' not generated, too many nodes (62), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. warning: Included by graph for 'simplify_expr.h' not generated, too many nodes (67), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. warning: Included by graph for 'std_code.h' not generated, too many nodes (71), threshold is 60. Consider increasing DOT_GRAPH_MAX_NODES. diff --git a/src/pointer-analysis/value_set.cpp b/src/pointer-analysis/value_set.cpp index 972027af251..ab470f94bfc 100644 --- a/src/pointer-analysis/value_set.cpp +++ b/src/pointer-analysis/value_set.cpp @@ -784,6 +784,11 @@ void value_sett::get_value_set_rec( get_value_set_rec(*it, dest, suffix, original_type, ns); } } + else if(expr.id() == ID_union) + { + get_value_set_rec( + to_union_expr(expr).op(), dest, suffix, original_type, ns); + } else if(expr.id()==ID_with) { const with_exprt &with_expr = to_with_expr(expr); diff --git a/src/util/pointer_offset_size.cpp b/src/util/pointer_offset_size.cpp index 0ca699855cc..55dd1519a65 100644 --- a/src/util/pointer_offset_size.cpp +++ b/src/util/pointer_offset_size.cpp @@ -16,6 +16,7 @@ Author: Daniel Kroening, kroening@kroening.com #include "c_types.h" #include "invariant.h" #include "namespace.h" +#include "pointer_expr.h" #include "simplify_expr.h" #include "ssa_expr.h" #include "std_expr.h" @@ -681,6 +682,27 @@ optionalt get_subexpression_at_offset( } } } + else if( + object_descriptor_exprt(expr).root_object().id() == ID_union && + source_type.id() == ID_union) + { + const union_typet &union_type = to_union_type(source_type); + + for(const auto &component : union_type.components()) + { + const auto m_size_bits = pointer_offset_bits(component.type(), ns); + if(!m_size_bits.has_value()) + continue; + + // if this member completely contains the target, recurse into it + if(offset_bytes * 8 + *target_size_bits <= *m_size_bits) + { + const member_exprt member(expr, component.get_name(), component.type()); + return get_subexpression_at_offset( + member, offset_bytes, target_type_raw, ns); + } + } + } return make_byte_extract( expr, from_integer(offset_bytes, index_type()), target_type_raw); diff --git a/src/util/simplify_expr.cpp b/src/util/simplify_expr.cpp index 8a0756ce077..53798fd905b 100644 --- a/src/util/simplify_expr.cpp +++ b/src/util/simplify_expr.cpp @@ -1765,6 +1765,71 @@ simplify_exprt::simplify_byte_extract(const byte_extract_exprt &expr) return std::move(*tmp); } + // push byte extracts into struct or union expressions, just like + // lower_byte_extract does (this is the same code, except recursive calls use + // simplify rather than lower_byte_extract) + if(expr.op().id() == ID_struct || expr.op().id() == ID_union) + { + if(expr.type().id() == ID_struct || expr.type().id() == ID_struct_tag) + { + const struct_typet &struct_type = to_struct_type(ns.follow(expr.type())); + const struct_typet::componentst &components = struct_type.components(); + + bool failed = false; + struct_exprt s({}, expr.type()); + + for(const auto &comp : components) + { + auto component_bits = pointer_offset_bits(comp.type(), ns); + + // the next member would be misaligned, abort + if( + !component_bits.has_value() || *component_bits == 0 || + *component_bits % 8 != 0) + { + failed = true; + break; + } + + auto member_offset_opt = + member_offset_expr(struct_type, comp.get_name(), ns); + + if(!member_offset_opt.has_value()) + { + failed = true; + break; + } + + exprt new_offset = simplify_rec( + plus_exprt{expr.offset(), + typecast_exprt::conditional_cast( + member_offset_opt.value(), expr.offset().type())}); + + byte_extract_exprt tmp = expr; + tmp.type() = comp.type(); + tmp.offset() = new_offset; + + s.add_to_operands(simplify_byte_extract(tmp)); + } + + if(!failed) + return s; + } + else if(expr.type().id() == ID_union || expr.type().id() == ID_union_tag) + { + const union_typet &union_type = to_union_type(ns.follow(expr.type())); + auto widest_member_opt = union_type.find_widest_union_component(ns); + if(widest_member_opt.has_value()) + { + byte_extract_exprt be = expr; + be.type() = widest_member_opt->first.type(); + return union_exprt{widest_member_opt->first.get_name(), + simplify_byte_extract(be), + expr.type()}; + } + } + } + // try to refine it down to extracting from a member or an index in an array auto subexpr = get_subexpression_at_offset(expr.op(), *offset, expr.type(), ns);