Skip to content

Commit a082d67

Browse files
author
Owen
committed
filter value sets at gotos and assumes
When a conditional change of control flow in goto-symex (i.e. gotos and assumes), if there is a pointer-typed symbol in the condition then try to filter its value set separately for each branch based on which values are actually possible on that branch. We only do this when there is only one pointer-typed symbol in the condition. The intention is to make the value-sets more accurate. In particular, this lays the groundwork for dealing with virtual method dispatch much more efficiently. See #2122 for an approach that was rejected. For example in java, code like `x.equals(y)`, where `x` could point to an Object or a String, gets turned into code like ``` if (x.@class_identifier == 'java.lang.Object') x . java.lang.Object.equals(y) else if (x.@class_identifier == 'java.lang.String') x . java.lang.String.equals(y) ``` When symex goes into java.lang.String.equals it doesn't filter the value-set so that `this` (which is `x`) only points to the String, not the Object. This causes symex to add complicated expressions to the formula for field accesses to fields that x won't have if it points to an Object.
1 parent 4bc7a5b commit a082d67

File tree

4 files changed

+194
-0
lines changed

4 files changed

+194
-0
lines changed

src/goto-symex/symex_goto.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,15 @@ void goto_symext::symex_goto(statet &state)
230230

231231
symex_transition(state, state_pc, backward);
232232

233+
// update our value-set when particular objects are only compatible with
234+
// one or other branch
235+
try_filter_value_sets(
236+
state,
237+
instruction.get_condition(),
238+
backward ? &state.value_set : &goto_state_list.back().second.value_set,
239+
!backward ? &state.value_set : &goto_state_list.back().second.value_set,
240+
ns);
241+
233242
// produce new guard symbol
234243
exprt guard_expr;
235244

src/goto-symex/symex_main.cpp

+161
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Author: Daniel Kroening, [email protected]
1515
#include <memory>
1616

1717
#include <util/exception_utils.h>
18+
#include <util/expr_iterator.h>
1819
#include <util/expr_util.h>
1920
#include <util/make_unique.h>
2021
#include <util/mathematical_expr.h>
@@ -124,6 +125,12 @@ void goto_symext::vcc(
124125

125126
void goto_symext::symex_assume(statet &state, const exprt &cond)
126127
{
128+
if(!cond.is_false())
129+
{
130+
// try to update the value sets using information contained in the guard
131+
try_filter_value_sets(state, cond, &state.value_set, nullptr, ns);
132+
}
133+
127134
exprt simplified_cond=cond;
128135

129136
clean_expr(simplified_cond, state, false);
@@ -539,3 +546,157 @@ void goto_symext::symex_step(
539546
UNREACHABLE;
540547
}
541548
}
549+
550+
/// Check if an expression only contains one unique symbol (possibly repeated
551+
/// multiple times)
552+
/// \param expr: The expression to examine
553+
/// \return If only one unique symbol occurs in \p expr then return it;
554+
/// otherwise return an empty optionalt
555+
static optionalt<symbol_exprt>
556+
find_unique_pointer_typed_symbol(const exprt &expr)
557+
{
558+
optionalt<symbol_exprt> return_value;
559+
for(auto it = expr.depth_cbegin(); it != expr.depth_cend(); ++it)
560+
{
561+
const symbol_exprt *symbol_expr = expr_try_dynamic_cast<symbol_exprt>(*it);
562+
if(symbol_expr && can_cast_type<pointer_typet>(symbol_expr->type()))
563+
{
564+
// If we already have a potential return value, check if it is the same
565+
// symbol, and return an empty optionalt if not
566+
if(return_value && *symbol_expr != *return_value)
567+
{
568+
return {};
569+
}
570+
return_value = *symbol_expr;
571+
}
572+
}
573+
574+
// Either expr contains no pointer-typed symbols or it contains one unique
575+
// pointer-typed symbol, possibly repeated multiple times
576+
return return_value;
577+
}
578+
579+
/// This is a simplified version of value_set_dereferencet::build_reference_to.
580+
/// It ignores the ID_dynamic_object case (which doesn't occur in goto-symex)
581+
/// and gives up for integer addresses and non-trivial symbols
582+
/// \param value_set_element: An element of a value-set
583+
/// \param type: the type of the expression that might point to
584+
/// \p value_set_element
585+
/// \return An expression for the value of the pointer indicated by \p
586+
/// value_set_element if it is easy to determine, or an empty optionalt
587+
/// otherwise
588+
static optionalt<exprt>
589+
value_set_element_to_expr(exprt value_set_element, pointer_typet type)
590+
{
591+
const object_descriptor_exprt *object_descriptor =
592+
expr_try_dynamic_cast<object_descriptor_exprt>(value_set_element);
593+
if(!object_descriptor)
594+
{
595+
return {};
596+
}
597+
598+
const exprt &root_object = object_descriptor->root_object();
599+
const exprt &object = object_descriptor->object();
600+
601+
if(root_object.id() == ID_null_object)
602+
{
603+
return null_pointer_exprt{type};
604+
}
605+
else if(root_object.id() == ID_integer_address)
606+
{
607+
return {};
608+
}
609+
else
610+
{
611+
// We should do something like
612+
// value_set_dereference::dereference_type_compare, which deals with
613+
// arrays having types containing void
614+
if(object_descriptor->offset().is_zero() && object.type() == type.subtype())
615+
{
616+
return address_of_exprt(object);
617+
}
618+
else
619+
{
620+
return {};
621+
}
622+
}
623+
}
624+
625+
void goto_symext::try_filter_value_sets(
626+
goto_symex_statet &state,
627+
exprt condition,
628+
value_sett *value_set_for_true_case,
629+
value_sett *value_set_for_false_case,
630+
const namespacet &ns)
631+
{
632+
condition = state.rename<L1>(std::move(condition), ns);
633+
634+
optionalt<symbol_exprt> symbol_expr =
635+
find_unique_pointer_typed_symbol(condition);
636+
637+
if(!symbol_expr)
638+
{
639+
return;
640+
}
641+
642+
const pointer_typet &symbol_type = to_pointer_type(symbol_expr->type());
643+
644+
value_setst::valuest value_set_elements;
645+
state.value_set.get_value_set(*symbol_expr, value_set_elements, ns);
646+
647+
// Try evaluating the condition with the symbol replaced by a pointer to each
648+
// one of its possible values in turn. If that leads to a true for some
649+
// value_set_element then we can delete it from the value set that will be
650+
// used if the condition is false, and vice versa.
651+
for(const auto &value_set_element : value_set_elements)
652+
{
653+
optionalt<exprt> possible_value =
654+
value_set_element_to_expr(value_set_element, symbol_type);
655+
656+
if(!possible_value)
657+
{
658+
continue;
659+
}
660+
661+
exprt modified_condition(condition);
662+
663+
address_of_aware_replace_symbolt replace_symbol{};
664+
replace_symbol.insert(*symbol_expr, *possible_value);
665+
replace_symbol(modified_condition);
666+
667+
// This do_simplify() is needed for the following reason: if condition is
668+
// `*p == a` and we replace `p` with `&a` then we get `*&a == a`. Suppose
669+
// our constant propagation knows that `a` is `. Without this call to
670+
// do_simplify(), state.rename() turns this into `*&a == 1` (because
671+
// rename() doesn't do constant propagation inside addresses), which
672+
// do_simplify() turns into `a == 1`, which cannot be evaluated as true
673+
// without another round of constant propagation.
674+
// It would be sufficient to replace this call to do_simplify() with
675+
// something that just replaces `*&x` with `x` whenever it finds it.
676+
do_simplify(modified_condition);
677+
678+
const bool record_events = state.record_events;
679+
state.record_events = false;
680+
modified_condition = state.rename(std::move(modified_condition), ns);
681+
state.record_events = record_events;
682+
683+
do_simplify(modified_condition);
684+
685+
if(value_set_for_true_case && modified_condition.is_false())
686+
{
687+
value_sett::entryt *entry = const_cast<value_sett::entryt *>(
688+
value_set_for_true_case->get_entry_for_symbol(
689+
symbol_expr->get_identifier(), symbol_type, "", ns));
690+
value_set_for_true_case->erase_value_from_entry(
691+
*entry, value_set_element);
692+
}
693+
else if(value_set_for_false_case && modified_condition.is_true())
694+
{
695+
value_sett::entryt *entry = const_cast<value_sett::entryt *>(
696+
value_set_for_false_case->get_entry_for_symbol(
697+
symbol_expr->get_identifier(), symbol_type, "", ns));
698+
value_set_for_false_case->erase_value_from_entry(
699+
*entry, value_set_element);
700+
}
701+
}
702+
}

src/pointer-analysis/value_set.cpp

+22
Original file line numberDiff line numberDiff line change
@@ -1615,3 +1615,25 @@ exprt value_sett::make_member(
16151615

16161616
return member_expr;
16171617
}
1618+
1619+
void value_sett::erase_value_from_entry(
1620+
entryt &entry,
1621+
const exprt &value_to_erase)
1622+
{
1623+
std::vector<object_map_dt::key_type> keys_to_erase;
1624+
1625+
for(const auto &key_value : entry.object_map.read())
1626+
{
1627+
const auto &rhs_object = to_expr(key_value);
1628+
if(rhs_object == value_to_erase)
1629+
{
1630+
keys_to_erase.emplace_back(key_value.first);
1631+
}
1632+
}
1633+
1634+
DATA_INVARIANT(
1635+
keys_to_erase.size() == 1,
1636+
"value_sett::erase_value_from_entry() should erase exactly one value");
1637+
1638+
entry.object_map.write().erase(keys_to_erase[0]);
1639+
}

src/pointer-analysis/value_set.h

+2
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,8 @@ class value_sett
473473
const std::string &suffix,
474474
const namespacet &ns) const;
475475

476+
void erase_value_from_entry(entryt &entry, const exprt &value_to_erase);
477+
476478
protected:
477479
/// Reads the set of objects pointed to by `expr`, including making
478480
/// recursive lookups for dereference operations etc.

0 commit comments

Comments
 (0)