Skip to content

Value-set dereference: use cond_exprt to avoid quadratic guards #4555

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

31 changes: 31 additions & 0 deletions src/goto-symex/build_goto_trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,37 @@ static exprt build_full_lhs_rec(
else
return std::move(tmp2);
}
else if(id == ID_cond)
{
const auto &original_cond_expr = to_cond_expr(src_original);
const auto &ssa_cond_expr = to_cond_expr(src_ssa);
cond_exprt result(
{}, src_original.type(), original_cond_expr.is_exclusive());
for(std::size_t i = 0; i < original_cond_expr.get_n_cases(); ++i)
{
exprt condition =
decision_procedure.get(to_cond_expr(src_ssa).condition(i));
if(condition.is_false())
continue;

exprt value = build_full_lhs_rec(
decision_procedure,
ns,
original_cond_expr.value(i),
ssa_cond_expr.value(i));

if(
condition.is_true() &&
(result.get_n_cases() == 0 || result.is_exclusive()))
{
return value;
}

result.add_case(condition, value);
}

return std::move(result);
}
else if(id==ID_typecast)
{
typecast_exprt tmp=to_typecast_expr(src_original);
Expand Down
7 changes: 7 additions & 0 deletions src/goto-symex/goto_symex.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,13 @@ class goto_symext
const exprt &rhs,
exprt::operandst &,
assignment_typet);
void symex_assign_cond(
statet &,
const cond_exprt &lhs,
const exprt &full_lhs,
const exprt &rhs,
exprt::operandst &,
assignment_typet);
void symex_assign_byte_extract(
statet &,
const byte_extract_exprt &lhs,
Expand Down
10 changes: 10 additions & 0 deletions src/goto-symex/goto_symex_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -579,6 +579,16 @@ void goto_symex_statet::rename_address(exprt &expr, const namespacet &ns)

if_expr.type()=if_expr.true_case().type();
}
else if(expr.id() == ID_cond)
{
cond_exprt &cond_expr = to_cond_expr(expr);
for(std::size_t i = 0; i < cond_expr.get_n_cases(); ++i)
{
cond_expr.condition(i) =
rename<level>(std::move(cond_expr.condition(i)), ns).get();
rename_address<level>(cond_expr.value(i), ns);
}
}
else if(expr.id()==ID_member)
{
member_exprt &member_expr=to_member_expr(expr);
Expand Down
41 changes: 41 additions & 0 deletions src/goto-symex/symex_assign.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ void goto_symext::symex_assign_rec(
else if(lhs.id()==ID_if)
symex_assign_if(
state, to_if_expr(lhs), full_lhs, rhs, guard, assignment_type);
else if(lhs.id() == ID_cond)
symex_assign_cond(
state, to_cond_expr(lhs), full_lhs, rhs, guard, assignment_type);
else if(lhs.id()==ID_typecast)
symex_assign_typecast(
state, to_typecast_expr(lhs), full_lhs, rhs, guard, assignment_type);
Expand Down Expand Up @@ -613,6 +616,44 @@ void goto_symext::symex_assign_if(
}
}

void goto_symext::symex_assign_cond(
statet &state,
const cond_exprt &lhs,
const exprt &full_lhs,
const exprt &rhs,
exprt::operandst &guard,
assignment_typet assignment_type)
{
std::size_t old_guard_size = guard.size();

for(std::size_t i = 0; i < lhs.get_n_cases(); ++i)
{
exprt renamed_guard = state.rename(lhs.condition(i), ns).get();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 It's confusing that the variable names don't distinguish between the incoming guard and the extra bit coming from the current case we're considering. Maybe renamed_case_guard?

do_simplify(renamed_guard);
if(!renamed_guard.is_false())
{
guard.push_back(renamed_guard);
symex_assign_rec(
state, lhs.value(i), full_lhs, rhs, guard, assignment_type);
guard.pop_back();
}

// If this one is a certainty the remaining cases are irrelevant:
if(renamed_guard.is_true())
break;

// If the conditions are non-exclusive, further cases can only happen if
// this one did not. If they are exclusive then they can be tested
// independently.
if(!lhs.is_exclusive())
guard.push_back(not_exprt(renamed_guard));
}

// Restore the guard to its state before entering this function:
INVARIANT(guard.size() >= old_guard_size, "must not shrink the guard!");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/shrink/grow/?

guard.resize(old_guard_size);
}

void goto_symext::symex_assign_byte_extract(
statet &state,
const byte_extract_exprt &lhs,
Expand Down
55 changes: 55 additions & 0 deletions src/goto-symex/symex_clean_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Author: Daniel Kroening, [email protected]
#include <util/arith_tools.h>
#include <util/byte_operators.h>
#include <util/c_types.h>
#include <util/expr_iterator.h>
#include <util/pointer_offset_size.h>
#include <util/simplify_expr.h>

Expand Down Expand Up @@ -49,6 +50,27 @@ process_array_expr(exprt &expr, bool do_simplify, const namespacet &ns)

if_expr.type()=if_expr.true_case().type();
}
else if(expr.id() == ID_cond)
{
cond_exprt &cond_expr = to_cond_expr(expr);
typet result_type;
for(std::size_t i = 0; i < cond_expr.get_n_cases(); ++i)
{
process_array_expr(cond_expr.value(i), do_simplify, ns);
if(i == 0)
result_type = cond_expr.value(i).type();
else if(cond_expr.value(i).type() != result_type)
{
cond_expr.value(i) = byte_extract_exprt(
byte_extract_id(),
cond_expr.value(i),
from_integer(0, index_type()),
result_type);
}
}

cond_expr.type() = result_type;
}
else if(expr.id()==ID_address_of)
{
// strip
Expand Down Expand Up @@ -169,6 +191,33 @@ replace_nondet(exprt &expr, symex_nondet_generatort &build_symex_nondet)
}
}

static void lower_cond_expr(exprt &expr)
{
for(auto it = expr.depth_begin(), itend = expr.depth_end(); it != itend; ++it)
{
if(it->id() == ID_cond)
{
exprt new_expr;
const auto &cond_expr = to_cond_expr(*it);
INVARIANT(cond_expr.get_n_cases() >= 1, "cond_expr should not be empty");
for(std::size_t i = cond_expr.get_n_cases() - 1;; --i)
{
if(i == cond_expr.get_n_cases() - 1)
new_expr = cond_expr.value(i);
else
{
new_expr =
if_exprt(cond_expr.condition(i), cond_expr.value(i), new_expr);
}
if(i == 0)
break;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd use for(std::size_t i = cond_expr.get_n_cases(); i > 0; --i) and then if(i == cond_expr.get_n_cases()) as well as cond_expr.value(i - 1) and cond_expr.condition(i - 1)`.

}

it.mutate() = std::move(new_expr);
}
}
}

void goto_symext::clean_expr(
exprt &expr,
statet &state,
Expand All @@ -177,6 +226,12 @@ void goto_symext::clean_expr(
replace_nondet(expr, path_storage.build_symex_nondet);
dereference(expr, state, write);

// We know how to handle cond_exprt on the LHS (symex_assign_rec does this),
// but cond_exprt on the RHS is currently patchily handled, especially by the
// Java string solver. For now, lower such expressions to nested if_exprts.
if(!write)
lower_cond_expr(expr);

// make sure all remaining byte extract operations use the root
// object to avoid nesting of with/update and byte_update when on
// lhs
Expand Down
12 changes: 12 additions & 0 deletions src/goto-symex/symex_dereference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,18 @@ exprt goto_symext::address_arithmetic(

result=if_expr;
}
else if(expr.id() == ID_cond)
{
cond_exprt cond_expr = to_cond_expr(expr);
for(std::size_t i = 0; i < cond_expr.get_n_cases(); ++i)
{
dereference_rec(cond_expr.condition(i), state, false);
cond_expr.value(i) =
address_arithmetic(cond_expr.value(i), state, keep_array);
}

result = std::move(cond_expr);
}
else if(expr.id()==ID_symbol ||
expr.id()==ID_string_constant ||
expr.id()==ID_label ||
Expand Down
15 changes: 15 additions & 0 deletions src/goto-symex/symex_other.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,21 @@ void goto_symext::havoc_rec(
guard_f.add(not_exprt(if_expr.cond()));
havoc_rec(state, guard_f, if_expr.false_case());
}
else if(dest.id() == ID_cond)
{
const auto &cond_expr = to_cond_expr(dest);
for(std::size_t i = 0; i < cond_expr.get_n_cases(); ++i)
{
guardt guard = state.guard;
if(!cond_expr.is_exclusive())
{
for(std::size_t j = 0; j < i; ++j)
guard.add(not_exprt(cond_expr.condition(j)));
}
guard.add(cond_expr.condition(i));
havoc_rec(state, guard, cond_expr.value(i));
}
}
else if(dest.id()==ID_typecast)
{
havoc_rec(state, guard, to_typecast_expr(dest).op());
Expand Down
12 changes: 12 additions & 0 deletions src/pointer-analysis/value_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,12 @@ void value_sett::get_value_set_rec(
get_value_set_rec(expr.op1(), dest, suffix, original_type, ns);
get_value_set_rec(expr.op2(), dest, suffix, original_type, ns);
}
else if(expr.id() == ID_cond)
{
const auto &cond_expr = to_cond_expr(expr);
for(std::size_t i = 0; i < cond_expr.get_n_cases(); ++i)
get_value_set_rec(cond_expr.value(i), dest, suffix, original_type, ns);
}
else if(expr.id()==ID_address_of)
{
if(expr.operands().size()!=1)
Expand Down Expand Up @@ -1169,6 +1175,12 @@ void value_sett::get_reference_set_rec(
get_reference_set_rec(expr.op2(), dest, ns);
return;
}
else if(expr.id() == ID_cond)
{
const auto &cond_expr = to_cond_expr(expr);
for(std::size_t i = 0; i < cond_expr.get_n_cases(); ++i)
get_reference_set_rec(cond_expr.value(i), dest, ns);
}

insert(dest, exprt(ID_unknown, expr.type()));
}
Expand Down
76 changes: 54 additions & 22 deletions src/pointer-analysis/value_set_dereference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,21 @@ exprt value_set_dereferencet::dereference(const exprt &pointer)
throw "dereference expected pointer type, but got "+
pointer.type().pretty();

// we may get ifs due to recursive calls
// we may get ifs or conds due to recursive calls
if(pointer.id()==ID_if)
{
const if_exprt &if_expr=to_if_expr(pointer);
exprt true_case = dereference(if_expr.true_case());
exprt false_case = dereference(if_expr.false_case());
return if_exprt(if_expr.cond(), true_case, false_case);
}
else if(pointer.id() == ID_cond)
{
cond_exprt result = to_cond_expr(pointer);
for(std::size_t i = 0; i < result.get_n_cases(); ++i)
result.value(i) = dereference(result.value(i));
return std::move(result);
}

// type of the object
const typet &type=pointer.type().subtype();
Expand Down Expand Up @@ -107,12 +114,12 @@ exprt value_set_dereferencet::dereference(const exprt &pointer)
may_fail=true;
}

exprt failure_value;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it too non-trivial to check that failure_value is never used uninitialised. See comment below.


if(may_fail)
{
// first see if we have a "failed object" for this pointer

exprt failure_value;

if(
const symbolt *failed_symbol =
dereference_callback.get_or_create_failed_symbol(pointer))
Expand All @@ -138,36 +145,61 @@ exprt value_set_dereferencet::dereference(const exprt &pointer)
failure_value=symbol.symbol_expr();
failure_value.set(ID_C_invalid_object, true);
}

valuet value;
value.value=failure_value;
value.pointer_guard=true_exprt();
values.push_front(value);
}

// now build big case split, but we only do "good" objects

exprt value=nil_exprt();

for(std::list<valuet>::const_iterator
it=values.begin();
it!=values.end();
it++)
optionalt<exprt> value_without_condition;
cond_exprt cond({}, type, true);
for(const auto &alias_value : values)
{
if(it->value.is_not_nil())
if(alias_value.value.is_not_nil())
{
if(value.is_nil()) // first?
value=it->value;
if(alias_value.pointer_guard.is_false())
{
INVARIANT(
!value_without_condition.has_value(),
"can't discriminate between two different catch-all aliases");
value_without_condition = alias_value.value;
}
else
value=if_exprt(it->pointer_guard, it->value, value);
{
cond.add_case(alias_value.pointer_guard, alias_value.value);
INVARIANT(
alias_value.value.type() == type,
"deref value types should match the pointer being derefd");
}
}
}

#if 0
std::cout << "R: " << format(value) << "\n\n";
#endif
// I'd like to put an invariant here that values without a pointer guard, such
// as integer_address, cannot co-occur with failed objects, but this isn't the
// case. There's no way to write a GOTO condition to discriminate between the
// two however, so purely by historical accident, the failed object takes
// precedence:

if(may_fail || value_without_condition.has_value())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can't we just combine value_without_condition and failure_value into a single expression?

{
// The cases must be disjoint, so add
// "!(p == &o1 || p == &o2 || p == &o3 || ...) => failure-value"
exprt::operandst other_case_conditions;
for(std::size_t i = 0; i < cond.get_n_cases(); ++i)
other_case_conditions.push_back(cond.condition(i));
cond.add_case(
not_exprt(disjunction(other_case_conditions)),
may_fail ? failure_value : *value_without_condition);
}

return value;
#if 0
std::cout << "R: " << format(cond) << "\n\n";
#endif

if(cond.get_n_cases() == 0)
return nil_exprt();
else if(cond.get_n_cases() == 1)
return cond.value(0);
else
return std::move(cond);
}

/// Check if the two types have matching number of ID_pointer levels, with
Expand Down
Loading