Skip to content

Support byte-extracting from unions of non-fixed size #5252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 109 additions & 40 deletions src/solvers/lowering/byte_operators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,41 @@ Author: Daniel Kroening, [email protected]
#include <util/simplify_expr.h>
#include <util/string_constant.h>

/// Determine the member of maximum fixed bit width in a union type. If no
/// member, or no member of fixed and non-zero width can be found, return
/// nullopt.
/// \param union_type: Type to determine the member of.
/// \param ns: Namespace to resolve tag types.
/// \return Pair of a componentt pointing to the maximum fixed bit-width
/// member of \p union_type and the bit width of that member.
static optionalt<std::pair<struct_union_typet::componentt, mp_integer>>
find_widest_union_component(const union_typet &union_type, const namespacet &ns)
{
const union_typet::componentst &components = union_type.components();

mp_integer max_width = 0;
typet max_comp_type;
irep_idt max_comp_name;

for(const auto &comp : components)
{
auto element_width = pointer_offset_bits(comp.type(), ns);

if(!element_width.has_value() || *element_width <= max_width)
continue;

max_width = *element_width;
max_comp_type = comp.type();
max_comp_name = comp.get_name();
}

if(max_width == 0)
return {};
else
return std::make_pair(
struct_union_typet::componentt{max_comp_name, max_comp_type}, max_width);
}

static exprt bv_to_expr(
const exprt &bitvector_expr,
const typet &target_type,
Expand Down Expand Up @@ -106,6 +141,53 @@ static struct_exprt bv_to_struct_expr(
return struct_exprt{std::move(operands), struct_type};
}

/// Convert a bitvector-typed expression \p bitvector_expr to a union-typed
/// expression. See \ref bv_to_expr for an overview.
static union_exprt bv_to_union_expr(
const exprt &bitvector_expr,
const union_typet &union_type,
const endianness_mapt &endianness_map,
const namespacet &ns)
{
const union_typet::componentst &components = union_type.components();

// empty union, handled the same way as done in expr_initializert
if(components.empty())
return union_exprt{irep_idt{}, nil_exprt{}, union_type};

const auto widest_member = find_widest_union_component(union_type, ns);

std::size_t component_bits;
if(widest_member.has_value())
component_bits = numeric_cast_v<std::size_t>(widest_member->second);
else
component_bits = to_bitvector_type(bitvector_expr.type()).get_width();

if(component_bits == 0)
{
return union_exprt{components.front().get_name(),
constant_exprt{irep_idt{}, components.front().type()},
union_type};
}

const auto bounds = map_bounds(endianness_map, 0, component_bits - 1);
bitvector_typet type{bitvector_expr.type().id(), component_bits};
const irep_idt &component_name = widest_member.has_value()
? widest_member->first.get_name()
: components.front().get_name();
const typet &component_type = widest_member.has_value()
? widest_member->first.type()
: components.front().type();
return union_exprt{
component_name,
bv_to_expr(
extractbits_exprt{bitvector_expr, bounds.ub, bounds.lb, std::move(type)},
component_type,
endianness_map,
ns),
union_type};
}

/// Convert a bitvector-typed expression \p bitvector_expr to an array-typed
/// expression. See \ref bv_to_expr for an overview.
static array_exprt bv_to_array_expr(
Expand Down Expand Up @@ -294,6 +376,21 @@ static exprt bv_to_expr(
result.type() = target_type;
return std::move(result);
}
else if(target_type.id() == ID_union)
{
return bv_to_union_expr(
bitvector_expr, to_union_type(target_type), endianness_map, ns);
}
else if(target_type.id() == ID_union_tag)
{
union_exprt result = bv_to_union_expr(
bitvector_expr,
ns.follow_tag(to_union_tag_type(target_type)),
endianness_map,
ns);
result.type() = target_type;
return std::move(result);
}
else if(target_type.id() == ID_array)
{
return bv_to_array_expr(
Expand Down Expand Up @@ -1124,31 +1221,18 @@ exprt lower_byte_extract(const byte_extract_exprt &src, const namespacet &ns)
else if(src.type().id() == ID_union || src.type().id() == ID_union_tag)
{
const union_typet &union_type = to_union_type(ns.follow(src.type()));
const union_typet::componentst &components = union_type.components();

mp_integer max_width = 0;
typet max_comp_type;
irep_idt max_comp_name;

for(const auto &comp : components)
{
auto element_width = pointer_offset_bits(comp.type(), ns);

if(!element_width.has_value() || *element_width <= max_width)
continue;

max_width = *element_width;
max_comp_type = comp.type();
max_comp_name = comp.get_name();
}
const auto widest_member = find_widest_union_component(union_type, ns);

if(max_width > 0)
if(widest_member.has_value())
{
byte_extract_exprt tmp(unpacked);
tmp.type() = max_comp_type;
tmp.type() = widest_member->first.type();

return union_exprt(
max_comp_name, lower_byte_extract(tmp, ns), src.type());
widest_member->first.get_name(),
lower_byte_extract(tmp, ns),
src.type());
}
}

Expand Down Expand Up @@ -1945,34 +2029,19 @@ static exprt lower_byte_update_union(
const optionalt<exprt> &non_const_update_bound,
const namespacet &ns)
{
const union_typet::componentst &components = union_type.components();

mp_integer max_width = 0;
typet max_comp_type;
irep_idt max_comp_name;

for(const auto &comp : components)
{
auto element_width = pointer_offset_bits(comp.type(), ns);

if(!element_width.has_value() || *element_width <= max_width)
continue;

max_width = *element_width;
max_comp_type = comp.type();
max_comp_name = comp.get_name();
}
const auto widest_member = find_widest_union_component(union_type, ns);

PRECONDITION_WITH_DIAGNOSTICS(
max_width > 0,
widest_member.has_value(),
"lower_byte_update of union of unknown size is not supported");

byte_update_exprt bu = src;
bu.set_op(member_exprt{src.op(), max_comp_name, max_comp_type});
bu.type() = max_comp_type;
bu.set_op(member_exprt{
src.op(), widest_member->first.get_name(), widest_member->first.type()});
bu.type() = widest_member->first.type();

return union_exprt{
max_comp_name,
widest_member->first.get_name(),
lower_byte_update(bu, value_as_byte_array, non_const_update_bound, ns),
src.type()};
}
Expand Down
53 changes: 53 additions & 0 deletions unit/solvers/lowering/byte_operators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,59 @@ SCENARIO("byte_extract_lowering", "[core][solvers][lowering][byte_extract]")
}
}

GIVEN("A an unbounded union byte_extract over a bounded operand")
{
const exprt deadbeef = from_integer(0xdeadbeef, unsignedbv_typet(32));
const byte_extract_exprt be1(
ID_byte_extract_little_endian,
deadbeef,
from_integer(1, index_type()),
union_typet(
{{"unbounded_array",
array_typet(
unsignedbv_typet(16), exprt(ID_infinity, size_type()))}}));

THEN("byte_extract lowering does not raise an exception")
{
const exprt lower_be1 = lower_byte_extract(be1, ns);

REQUIRE(!has_subexpr(lower_be1, ID_byte_extract_little_endian));
REQUIRE(lower_be1.type() == be1.type());

byte_extract_exprt be2 = be1;
be2.id(ID_byte_extract_big_endian);
const exprt lower_be2 = lower_byte_extract(be2, ns);

REQUIRE(!has_subexpr(lower_be2, ID_byte_extract_big_endian));
REQUIRE(lower_be2.type() == be2.type());
}
}

GIVEN("A an empty union byte_extract over a bounded operand")
{
const exprt deadbeef = from_integer(0xdeadbeef, unsignedbv_typet(32));
const byte_extract_exprt be1(
ID_byte_extract_little_endian,
deadbeef,
from_integer(1, index_type()),
union_typet{});

THEN("byte_extract lowering does not raise an exception")
{
const exprt lower_be1 = lower_byte_extract(be1, ns);

REQUIRE(!has_subexpr(lower_be1, ID_byte_extract_little_endian));
REQUIRE(lower_be1.type() == be1.type());

byte_extract_exprt be2 = be1;
be2.id(ID_byte_extract_big_endian);
const exprt lower_be2 = lower_byte_extract(be2, ns);

REQUIRE(!has_subexpr(lower_be2, ID_byte_extract_big_endian));
REQUIRE(lower_be2.type() == be2.type());
}
}

GIVEN("A a byte_extract from a string constant")
{
string_constantt s("ABCD");
Expand Down