Skip to content

Commit ef6cf42

Browse files
author
Owen
committed
filter value sets at gotos and assumes
When a conditional change of control flow in goto-symex (i.e. gotos and assumes), if there is a pointer-typed symbol in the condition then try to filter its value set separately for each branch based on which values are actually possible on that branch. We only do this when there is only one pointer-typed symbol in the condition. The intention is to make the value-sets more accurate. In particular, this lays the groundwork for dealing with virtual method dispatch much more efficiently. See diffblue#2122 for an approach that was rejected. For example in java, code like `x.equals(y)`, where `x` could point to an Object or a String, gets turned into code like ``` if (x.@class_identifier == 'java.lang.Object') x . java.lang.Object.equals(y) else if (x.@class_identifier == 'java.lang.String') x . java.lang.String.equals(y) ``` When symex goes into java.lang.String.equals it doesn't filter the value-set so that `this` (which is `x`) only points to the String, not the Object. This causes symex to add complicated expressions to the formula for field accesses to fields that x won't have if it points to an Object.
1 parent f6f9cf9 commit ef6cf42

File tree

5 files changed

+204
-0
lines changed

5 files changed

+204
-0
lines changed

src/goto-symex/goto_symex.h

+22
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,28 @@ class goto_symext
308308
virtual void symex_other(statet &state);
309309

310310
void symex_assert(const goto_programt::instructiont &, statet &);
311+
312+
/// Try to filter value sets based on whether possible values of a
313+
/// pointer-typed symbol make the condition true or false. We only do this
314+
/// when there is only one pointer-typed symbol in \p condition.
315+
/// \param state: The current state
316+
/// \param condition: The condition which is being evaluated, which it expects
317+
/// will not have been cleaned or renamed. In practice, it's fine if it has
318+
/// been cleaned and renamed up to level L1.
319+
/// \param value_set_for_true_case: A pointer to the value set that possible
320+
/// values should be removed from if they make the condition false, or
321+
/// nullptr if this shouldn't be done
322+
/// \param value_set_for_false_case: A pointer to the value set that possible
323+
/// values should be removed from if they make the condition true, or
324+
/// nullptr if this shouldn't be done
325+
/// \param ns: A namespace
326+
void try_filter_value_sets(
327+
goto_symex_statet &state,
328+
exprt condition,
329+
value_sett *value_set_for_true_case,
330+
value_sett *value_set_for_false_case,
331+
const namespacet &ns);
332+
311333
virtual void vcc(
312334
const exprt &,
313335
const std::string &msg,

src/goto-symex/symex_goto.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Author: Daniel Kroening, [email protected]
2020
#include <util/std_expr.h>
2121

2222
#include <analyses/dirty.h>
23+
#include <util/replace_symbol.h>
2324
#include <util/simplify_expr.h>
2425

2526
void goto_symext::symex_goto(statet &state)
@@ -214,6 +215,18 @@ void goto_symext::symex_goto(statet &state)
214215

215216
symex_transition(state, state_pc, backward);
216217

218+
if(!new_guard.is_true())
219+
{
220+
// update our value-set when particular objects are only compatible with
221+
// one or other branch
222+
try_filter_value_sets(
223+
state,
224+
instruction.get_condition(),
225+
backward ? &state.value_set : &goto_state_list.back().value_set,
226+
!backward ? &state.value_set : &goto_state_list.back().value_set,
227+
ns);
228+
}
229+
217230
// adjust guards
218231
if(new_guard.is_true())
219232
{

src/goto-symex/symex_main.cpp

+148
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Author: Daniel Kroening, [email protected]
1515
#include <memory>
1616

1717
#include <util/exception_utils.h>
18+
#include <util/expr_iterator.h>
1819
#include <util/expr_util.h>
1920
#include <util/make_unique.h>
2021
#include <util/mathematical_expr.h>
@@ -125,6 +126,13 @@ void goto_symext::vcc(
125126

126127
void goto_symext::symex_assume(statet &state, const exprt &cond)
127128
{
129+
if(!cond.is_false())
130+
{
131+
// try to update the value sets using information contained in the guard
132+
try_filter_value_sets(
133+
state, cond, &state.value_set, nullptr, ns);
134+
}
135+
128136
exprt simplified_cond=cond;
129137

130138
clean_expr(simplified_cond, state, false);
@@ -534,3 +542,143 @@ void goto_symext::symex_step(
534542
UNREACHABLE;
535543
}
536544
}
545+
546+
/// Check if an expression only contains one unique symbol (possibly repeated
547+
/// multiple times)
548+
/// \param expr: The expression to examine
549+
/// \return If only one unique symbol occurs in \p expr then return it;
550+
/// otherwise return an empty optionalt
551+
static optionalt<symbol_exprt>
552+
find_unique_pointer_typed_symbol(const exprt &expr)
553+
{
554+
optionalt<symbol_exprt> return_value;
555+
for(auto it = expr.depth_cbegin(); it != expr.depth_cend(); ++it)
556+
{
557+
const symbol_exprt *symbol_expr = expr_try_dynamic_cast<symbol_exprt>(*it);
558+
if(symbol_expr && can_cast_type<pointer_typet>(symbol_expr->type()))
559+
{
560+
// If we already have a potential return value, check if it is the same
561+
// symbol, and return an empty optionalt if not
562+
if(return_value && *symbol_expr != *return_value)
563+
{
564+
return {};
565+
}
566+
return_value = *symbol_expr;
567+
}
568+
}
569+
570+
// Either expr contains no pointer-typed symbols or it contains one unique
571+
// pointer-typed symbol, possibly repeated multiple times
572+
return return_value;
573+
}
574+
575+
/// This is a simplified version of value_set_dereferencet::build_reference_to.
576+
/// It ignores the ID_dynamic_object case (which doesn't occur in goto-symex)
577+
/// and gives up for integer addresses and non-trivial symbols
578+
/// \param value_set_element: An element of a value-set
579+
/// \param type: the type of the expression that might point to
580+
/// \p value_set_element
581+
/// \return An expression for the value of the pointer indicated by \p
582+
/// value_set_element if it is easy to determine, or an empty optionalt
583+
/// otherwise
584+
static optionalt<exprt>
585+
value_set_element_to_expr(exprt value_set_element, pointer_typet type)
586+
{
587+
const object_descriptor_exprt *object_descriptor =
588+
expr_try_dynamic_cast<object_descriptor_exprt>(value_set_element);
589+
if(!object_descriptor)
590+
{
591+
return {};
592+
}
593+
594+
const exprt &root_object = object_descriptor->root_object();
595+
const exprt &object = object_descriptor->object();
596+
597+
if(root_object.id() == ID_null_object)
598+
{
599+
return null_pointer_exprt{type};
600+
}
601+
else if(root_object.id() == ID_integer_address)
602+
{
603+
return {};
604+
}
605+
else
606+
{
607+
// We should do something like
608+
// value_set_dereference::dereference_type_compare, which deals with
609+
// arrays having types containing void
610+
if(object_descriptor->offset().is_zero() && object.type() == type.subtype())
611+
{
612+
return address_of_exprt(object);
613+
}
614+
else
615+
{
616+
return {};
617+
}
618+
}
619+
}
620+
621+
void goto_symext::try_filter_value_sets(
622+
goto_symex_statet &state,
623+
exprt condition,
624+
value_sett *value_set_for_true_case,
625+
value_sett *value_set_for_false_case,
626+
const namespacet &ns)
627+
{
628+
condition = state.rename<L1>(std::move(condition), ns);
629+
630+
optionalt<symbol_exprt> symbol_expr =
631+
find_unique_pointer_typed_symbol(condition);
632+
633+
if(!symbol_expr)
634+
{
635+
return;
636+
}
637+
638+
const pointer_typet &symbol_type = to_pointer_type(symbol_expr->type());
639+
640+
value_setst::valuest value_set_elements;
641+
state.value_set.get_value_set(*symbol_expr, value_set_elements, ns);
642+
643+
// Try evaluating the condition with the symbol replaced by a pointer to each
644+
// one of its possible values in turn. If that leads to a true for some
645+
// value_set_element then we can delete it from the value set that will be
646+
// used if the condition is false, and vice versa.
647+
for(const auto &value_set_element : value_set_elements)
648+
{
649+
optionalt<exprt> possible_value =
650+
value_set_element_to_expr(value_set_element, symbol_type);
651+
652+
if(!possible_value)
653+
{
654+
continue;
655+
}
656+
657+
exprt modified_condition(condition);
658+
659+
address_of_aware_replace_symbolt replace_symbol{};
660+
replace_symbol.insert(*symbol_expr, *possible_value);
661+
replace_symbol(modified_condition);
662+
663+
do_simplify(modified_condition);
664+
modified_condition = state.rename(std::move(modified_condition), ns);
665+
do_simplify(modified_condition);
666+
667+
if(value_set_for_true_case && modified_condition.is_false())
668+
{
669+
value_sett::entryt *entry = const_cast<value_sett::entryt *>(
670+
value_set_for_true_case->get_entry_for_symbol(
671+
symbol_expr->get_identifier(), ns.follow(symbol_type), ""));
672+
value_set_for_true_case->erase_value_from_entry(
673+
*entry, value_set_element);
674+
}
675+
else if(value_set_for_false_case && modified_condition.is_true())
676+
{
677+
value_sett::entryt *entry = const_cast<value_sett::entryt *>(
678+
value_set_for_false_case->get_entry_for_symbol(
679+
symbol_expr->get_identifier(), ns.follow(symbol_type), ""));
680+
value_set_for_false_case->erase_value_from_entry(
681+
*entry, value_set_element);
682+
}
683+
}
684+
}

src/pointer-analysis/value_set.cpp

+19
Original file line numberDiff line numberDiff line change
@@ -1607,3 +1607,22 @@ exprt value_sett::make_member(
16071607

16081608
return member_expr;
16091609
}
1610+
1611+
void value_sett::erase_value_from_entry(entryt &entry, const exprt &value_to_erase)
1612+
{
1613+
std::vector<object_map_dt::key_type> keys_to_erase;
1614+
for(const auto &key_value : entry.object_map.read())
1615+
{
1616+
const auto &rhs_object = to_expr(key_value);
1617+
if(rhs_object == value_to_erase)
1618+
{
1619+
keys_to_erase.emplace_back(key_value.first);
1620+
}
1621+
}
1622+
DATA_INVARIANT(
1623+
!keys_to_erase.empty(), "erase_value() should erase at least one value");
1624+
for(const auto &key : keys_to_erase)
1625+
{
1626+
entry.object_map.write().erase(key);
1627+
}
1628+
}

src/pointer-analysis/value_set.h

+2
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,8 @@ class value_sett
465465
const typet &expr_type,
466466
const std::string &suffix) const;
467467

468+
void erase_value_from_entry(entryt &entry, const exprt &value_to_erase);
469+
468470
protected:
469471
/// Reads the set of objects pointed to by `expr`, including making
470472
/// recursive lookups for dereference operations etc.

0 commit comments

Comments
 (0)