diff --git a/src/pointer-analysis/value_set_dereference.cpp b/src/pointer-analysis/value_set_dereference.cpp index bf552be57f6..549b247dceb 100644 --- a/src/pointer-analysis/value_set_dereference.cpp +++ b/src/pointer-analysis/value_set_dereference.cpp @@ -468,9 +468,10 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to( } else { + // try to build a member/index expression - do not use byte_extract exprt subexpr = get_subexpression_at_offset( root_object_subexpression, o.offset(), dereference_type, ns); - if(subexpr.is_not_nil()) + if(subexpr.is_not_nil() && subexpr.id() != byte_extract_id()) { // Successfully found a member, array index, or combination thereof // that matches the desired type and offset: diff --git a/src/util/pointer_offset_size.cpp b/src/util/pointer_offset_size.cpp index 653ab08812f..7c2fc632bc1 100644 --- a/src/util/pointer_offset_size.cpp +++ b/src/util/pointer_offset_size.cpp @@ -12,6 +12,8 @@ Author: Daniel Kroening, kroening@kroening.com #include "pointer_offset_size.h" #include "arith_tools.h" +#include "base_type.h" +#include "byte_operators.h" #include "c_types.h" #include "invariant.h" #include "namespace.h" @@ -676,67 +678,82 @@ exprt build_sizeof_expr( exprt get_subexpression_at_offset( const exprt &expr, - mp_integer offset, + const mp_integer &offset_bytes, const typet &target_type_raw, const namespacet &ns) { - exprt result = expr; - const typet &source_type=ns.follow(result.type()); - const typet &target_type=ns.follow(target_type_raw); + if(offset_bytes == 0 && base_type_eq(expr.type(), target_type_raw, ns)) + { + exprt result = expr; + + if(expr.type() != target_type_raw) + result.type() = target_type_raw; - if(offset==0 && source_type==target_type) return result; + } + + const typet &source_type = ns.follow(expr.type()); + const auto target_size_bits = pointer_offset_bits(target_type_raw, ns); + if(!target_size_bits.has_value()) + return nil_exprt(); if(source_type.id()==ID_struct) { - const auto &st=to_struct_type(source_type); - const struct_typet::componentst &components=st.components(); - member_offset_iterator offsets(st, ns); - while(offsets->firstsecond!=-1 && - offsets->second<=offset) + const struct_typet &struct_type = to_struct_type(source_type); + + mp_integer m_offset_bits = 0; + for(const auto &component : struct_type.components()) { - auto nextit=offsets; - ++nextit; - if((offsets->first+1)==components.size() || offsetsecond) + const auto m_size_bits = pointer_offset_bits(component.type(), ns); + if(!m_size_bits.has_value()) + return nil_exprt(); + + // if this member completely contains the target, and this member is + // byte-aligned, recurse into it + if( + offset_bytes * 8 >= m_offset_bits && m_offset_bits % 8 == 0 && + offset_bytes * 8 + *target_size_bits <= m_offset_bits + *m_size_bits) { - // This field might be, or might contain, the answer. - result= - member_exprt( - result, - components[offsets->first].get_name(), - components[offsets->first].type()); - return - get_subexpression_at_offset( - result, offset-offsets->second, target_type, ns); + const member_exprt member(expr, component.get_name(), component.type()); + return get_subexpression_at_offset( + member, offset_bytes - m_offset_bits / 8, target_type_raw, ns); } - ++offsets; + + m_offset_bits += *m_size_bits; } - return nil_exprt(); } else if(source_type.id()==ID_array) { - // A cell of the array might be, or contain, the subexpression - // we're looking for. - const auto &at=to_array_type(source_type); + const array_typet &array_type = to_array_type(source_type); - auto elem_size = pointer_offset_size(at.subtype(), ns); + const auto elem_size_bits = pointer_offset_bits(array_type.subtype(), ns); - if(!elem_size.has_value()) - return nil_exprt(); - - mp_integer cellidx = offset / (*elem_size); - - if(cellidx < 0 || !cellidx.is_long()) + // no arrays of non-byte-aligned, zero-, or unknown-sized objects + if( + !elem_size_bits.has_value() || *elem_size_bits == 0 || + *elem_size_bits % 8 != 0) + { return nil_exprt(); + } - offset = offset % (*elem_size); - - result=index_exprt(result, from_integer(cellidx, unsignedbv_typet(64))); - return get_subexpression_at_offset(result, offset, target_type, ns); + if(*target_size_bits <= *elem_size_bits) + { + const mp_integer elem_size_bytes = *elem_size_bits / 8; + return get_subexpression_at_offset( + index_exprt( + expr, from_integer(offset_bytes / elem_size_bytes, index_type())), + offset_bytes % elem_size_bytes, + target_type_raw, + ns); + } } - else - return nil_exprt(); + + const byte_extract_exprt be( + byte_extract_id(), + expr, + from_integer(offset_bytes, index_type()), + target_type_raw); + return simplify_expr(be, ns); } exprt get_subexpression_at_offset( diff --git a/src/util/pointer_offset_size.h b/src/util/pointer_offset_size.h index 340f02a049d..2219cbe1e55 100644 --- a/src/util/pointer_offset_size.h +++ b/src/util/pointer_offset_size.h @@ -81,7 +81,7 @@ exprt build_sizeof_expr( exprt get_subexpression_at_offset( const exprt &expr, - mp_integer offset, + const mp_integer &offset, const typet &target_type, const namespacet &ns); diff --git a/src/util/simplify_expr.cpp b/src/util/simplify_expr.cpp index 60b0bfdcb5b..499ec065ae4 100644 --- a/src/util/simplify_expr.cpp +++ b/src/util/simplify_expr.cpp @@ -1679,17 +1679,26 @@ bool simplify_exprt::simplify_byte_extract(byte_extract_exprt &expr) if(to_integer(expr.offset(), offset) || offset<0) return true; - // byte extract of full object is object // don't do any of the following if endianness doesn't match, as // bytes need to be swapped - if( - offset == 0 && base_type_eq(expr.type(), expr.op().type(), ns) && - byte_extract_id() == expr.id()) + if(offset == 0 && byte_extract_id() == expr.id()) { - exprt tmp=expr.op(); - expr.swap(tmp); + // byte extract of full object is object + if(base_type_eq(expr.type(), expr.op().type(), ns)) + { + exprt tmp = expr.op(); + expr.swap(tmp); - return false; + return false; + } + else if( + expr.type().id() == ID_pointer && expr.op().type().id() == ID_pointer) + { + typecast_exprt tc(expr.op(), expr.type()); + expr.swap(tc); + + return false; + } } // no proper simplification for expr.type()==void diff --git a/unit/Makefile b/unit/Makefile index b6fe09fc521..ab66d42db41 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -40,6 +40,7 @@ SRC += analyses/ai/ai.cpp \ util/irep_sharing.cpp \ util/message.cpp \ util/optional.cpp \ + util/pointer_offset_size.cpp \ util/replace_symbol.cpp \ util/sharing_map.cpp \ util/sharing_node.cpp \ diff --git a/unit/util/pointer_offset_size.cpp b/unit/util/pointer_offset_size.cpp new file mode 100644 index 00000000000..64f301f3a8b --- /dev/null +++ b/unit/util/pointer_offset_size.cpp @@ -0,0 +1,120 @@ +/*******************************************************************\ + + Module: Unit tests of expression size/offset computation + + Author: Michael Tautschnig + +\*******************************************************************/ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +TEST_CASE("Build subexpression to access element at offset into array") +{ + // this test does require a proper architecture to be set so that byte extract + // uses adequate endianness + cmdlinet cmdline; + config.set(cmdline); + + symbol_tablet symbol_table; + namespacet ns(symbol_table); + + const signedbv_typet t(32); + + array_typet array_type(t, from_integer(2, size_type())); + symbol_exprt a("array", array_type); + + { + const exprt result = get_subexpression_at_offset(a, 0, t, ns); + REQUIRE(result == index_exprt(a, from_integer(0, index_type()))); + } + + { + const exprt result = get_subexpression_at_offset(a, 32 / 8, t, ns); + REQUIRE(result == index_exprt(a, from_integer(1, index_type()))); + } + + { + const exprt result = + get_subexpression_at_offset(a, from_integer(0, size_type()), t, ns); + REQUIRE(result == index_exprt(a, from_integer(0, index_type()))); + } + + { + const exprt result = + get_subexpression_at_offset(a, size_of_expr(t, ns), t, ns); + REQUIRE(result == index_exprt(a, from_integer(1, index_type()))); + } + + { + const signedbv_typet small_t(8); + const exprt result = get_subexpression_at_offset(a, 1, small_t, ns); + REQUIRE( + result == byte_extract_exprt( + byte_extract_id(), + index_exprt(a, from_integer(0, index_type())), + from_integer(1, index_type()), + small_t)); + } +} + +TEST_CASE("Build subexpression to access element at offset into struct") +{ + // this test does require a proper architecture to be set so that byte extract + // uses adequate endianness + cmdlinet cmdline; + config.set(cmdline); + + symbol_tablet symbol_table; + namespacet ns(symbol_table); + + const signedbv_typet t(32); + + struct_typet st; + st.components().emplace_back("foo", t); + st.components().emplace_back("bar", t); + + symbol_exprt s("struct", st); + + { + const exprt result = get_subexpression_at_offset(s, 0, t, ns); + REQUIRE(result == member_exprt(s, "foo", t)); + } + + { + const exprt result = get_subexpression_at_offset(s, 32 / 8, t, ns); + REQUIRE(result == member_exprt(s, "bar", t)); + } + + { + const exprt result = + get_subexpression_at_offset(s, from_integer(0, size_type()), t, ns); + REQUIRE(result == member_exprt(s, "foo", t)); + } + + { + const exprt result = + get_subexpression_at_offset(s, size_of_expr(t, ns), t, ns); + REQUIRE(result == member_exprt(s, "bar", t)); + } + + { + const signedbv_typet small_t(8); + const exprt result = get_subexpression_at_offset(s, 1, small_t, ns); + REQUIRE( + result == byte_extract_exprt( + byte_extract_id(), + member_exprt(s, "foo", t), + from_integer(1, index_type()), + small_t)); + } +}