Skip to content

Cache dereferences during symex to avoid repeatedly inserting large dereferences into SSA #5964

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions jbmc/src/jbmc/jbmc_parse_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,9 @@ void jbmc_parse_optionst::get_command_line_options(optionst &options)
options.set_option("validate-goto-model", true);
}

options.set_option(
"symex-cache-dereferences", cmdline.isset("symex-cache-dereferences"));

PARSE_OPTIONS_GOTO_TRACE(cmdline, options);

if(cmdline.isset("no-lazy-methods"))
Expand Down
14 changes: 14 additions & 0 deletions regression/cbmc/dereference-cache-flag/test-no-cache.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CORE
test.c
--show-vcc
\(main::1::p!0@1#2 = address_of
^EXIT=0$
^SIGNAL=0$
--
--
This is just checking that the --symex-cache-dereferences flag actually turns
dereference caching on and off.

I would like to match the expression more precisely, but the order of
comparisons for address of depends on platform-specific logic (unordered_xxx),
and the corresponding regex would look ridiculous.
10 changes: 10 additions & 0 deletions regression/cbmc/dereference-cache-flag/test-with-cache.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
test.c
--show-vcc --symex-cache-dereferences
symex::dereference_cache!0#1 = 0
^EXIT=0$
^SIGNAL=0$
--
--
This is just checking that the --symex-cache-dereferences flag actually
turns dereference caching on.
7 changes: 7 additions & 0 deletions regression/cbmc/dereference-cache-flag/test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include <assert.h>

int main(void)
{
int cond, x, y, *p = cond ? &x : &y;
assert(*p == 0);
}
3 changes: 3 additions & 0 deletions src/cbmc/cbmc_parse_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,9 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options)
"max-node-refinement",
cmdline.get_value("max-node-refinement"));

options.set_option(
"symex-cache-dereferences", cmdline.isset("symex-cache-dereferences"));

if(cmdline.isset("incremental-loop"))
{
options.set_option(
Expand Down
6 changes: 4 additions & 2 deletions src/goto-checker/bmc_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ void run_property_decider(
"(incremental-loop):" \
"(unwind-min):" \
"(unwind-max):" \
"(ignore-properties-before-unwind-min)"
"(ignore-properties-before-unwind-min)" \
"(symex-cache-dereferences)" \

#define HELP_BMC \
" --paths [strategy] explore paths one at a time\n" \
Expand Down Expand Up @@ -251,7 +252,8 @@ void run_property_decider(
" iteration are allowed to fail due to\n" \
" complexity violations before the loop\n" \
" gets blacklisted\n" \
" --graphml-witness filename write the witness in GraphML format to filename\n" // NOLINT(*)
" --graphml-witness filename write the witness in GraphML format to filename\n" /* NOLINT(*) */ \
" --symex-cache-dereferences enable caching of repeated dereferences" \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good if new options followed the pattern of having OPT_ and HELP_ macros.

// clang-format on

#endif // CPROVER_GOTO_CHECKER_BMC_UTIL_H
4 changes: 4 additions & 0 deletions src/goto-symex/goto_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ class goto_statet
symex_level2t level2;

public:
/// This is used for eliminating repeated complicated dereferences.
/// \see goto_symext::dereference_rec
sharing_mapt<exprt, symbol_exprt, false, irep_hash> dereference_cache;

const symex_level2t &get_level2() const
{
return level2;
Expand Down
7 changes: 6 additions & 1 deletion src/goto-symex/goto_symex.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,12 @@ class goto_symext
exprt make_auto_object(const typet &, statet &);
virtual void dereference(exprt &, statet &, bool write);

void dereference_rec(exprt &, statet &, bool write);
symbol_exprt cache_dereference(exprt &dereference_result, statet &state);
void dereference_rec(
exprt &expr,
statet &state,
bool write,
bool is_in_quantifier);
exprt address_arithmetic(
const exprt &,
statet &,
Expand Down
8 changes: 8 additions & 0 deletions src/goto-symex/symex_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ struct symex_configt final
/// enables certain analyses that otherwise aren't run.
bool complexity_limits_active;

/// \brief Whether or not to replace multiple occurrences of the same
/// dereference with a single symbol that contains the result of that
/// dereference. Can sometimes lead to a significant performance
/// improvement, but sometimes also makes things worse.
/// See https://github.com/diffblue/cbmc/pull/5964 for performance data.
/// Used in goto_symext::dereference_rec
bool cache_dereferences;

/// \brief Construct a symex_configt using options specified in an
/// \ref optionst
explicit symex_configt(const optionst &options);
Expand Down
107 changes: 94 additions & 13 deletions src/goto-symex/symex_dereference.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@ Author: Daniel Kroening, [email protected]
#include <util/exception_utils.h>
#include <util/expr_iterator.h>
#include <util/expr_util.h>
#include <util/fresh_symbol.h>
#include <util/invariant.h>
#include <util/pointer_offset_size.h>

#include <pointer-analysis/value_set_dereference.h>

#include "expr_skeleton.h"
#include "symex_assign.h"
#include "symex_dereference_state.h"

/// Transforms an lvalue expression by replacing any dereference operations it
Expand Down Expand Up @@ -71,7 +74,7 @@ exprt goto_symext::address_arithmetic(

// there could be further dereferencing in the offset
exprt offset=be.offset();
dereference_rec(offset, state, false);
dereference_rec(offset, state, false, false);

result=plus_exprt(result, offset);

Expand Down Expand Up @@ -107,14 +110,14 @@ exprt goto_symext::address_arithmetic(
// just grab the pointer, but be wary of further dereferencing
// in the pointer itself
result=to_dereference_expr(expr).pointer();
dereference_rec(result, state, false);
dereference_rec(result, state, false, false);
}
else if(expr.id()==ID_if)
{
if_exprt if_expr=to_if_expr(expr);

// the condition is not an address
dereference_rec(if_expr.cond(), state, false);
dereference_rec(if_expr.cond(), state, false, false);

// recursive call
if_expr.true_case() =
Expand All @@ -131,7 +134,7 @@ exprt goto_symext::address_arithmetic(
{
// give up, just dereference
result=expr;
dereference_rec(result, state, false);
dereference_rec(result, state, false, false);

// turn &array into &array[0]
if(result.type().id() == ID_array && !keep_array)
Expand Down Expand Up @@ -191,14 +194,72 @@ exprt goto_symext::address_arithmetic(
return result;
}

symbol_exprt
goto_symext::cache_dereference(exprt &dereference_result, statet &state)
{
auto const cache_key = [&] {
auto cache_key =
state.field_sensitivity.apply(ns, state, dereference_result, false);
if(auto let_expr = expr_try_dynamic_cast<let_exprt>(dereference_result))
{
let_expr->value() = state.rename<L2>(let_expr->value(), ns).get();
}
else
{
cache_key = state.rename<L2>(cache_key, ns).get();
}
return cache_key;
}();

if(auto cached = state.dereference_cache.find(cache_key))
{
return *cached;
}

auto const &cache_symbol = get_fresh_aux_symbol(
cache_key.type(),
"symex",
"dereference_cache",
dereference_result.source_location(),
language_mode,
ns,
state.symbol_table);

// we need to lift possible lets
// (come from the value set to avoid repeating complex pointer comparisons)
auto cache_value = cache_key;
lift_lets(state, cache_value);

auto assign = symex_assignt{
state, symex_targett::assignment_typet::STATE, ns, symex_config, target};

auto cache_symbol_expr = cache_symbol.symbol_expr();
assign.assign_symbol(
to_ssa_expr(state.rename<L1>(cache_symbol_expr, ns).get()),
expr_skeletont{},
cache_value,
{});

state.dereference_cache.insert(cache_key, cache_symbol_expr);
return cache_symbol_expr;
}

/// If \p expr is a \ref dereference_exprt, replace it with explicit references
/// to the objects it may point to. Otherwise recursively apply this function to
/// \p expr's operands, with special cases for address-of (handled by \ref
/// goto_symext::address_arithmetic) and certain common expression patterns
/// such as `&struct.flexible_array[0]` (see inline comments in code).
/// Note that \p write is used to alter behaviour when this function is
/// operating on the LHS of an assignment. Similarly \p is_in_quantifier
/// indicates when the dereference is inside a quantifier (related to scoping
/// when dereference caching is enabled).
/// For full details of this method's pointer replacement and potential side-
/// effects see \ref goto_symext::dereference
void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
void goto_symext::dereference_rec(
exprt &expr,
statet &state,
bool write,
bool is_in_quantifier)
{
if(expr.id()==ID_dereference)
{
Expand All @@ -221,7 +282,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
tmp1.swap(to_dereference_expr(expr).pointer());

// first make sure there are no dereferences in there
dereference_rec(tmp1, state, false);
dereference_rec(tmp1, state, false, is_in_quantifier);

// Depending on the nature of the pointer expression, the recursive deref
// operation might have introduced a construct such as
Expand Down Expand Up @@ -271,10 +332,29 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
dereference.dereference(tmp1, symex_config.show_points_to_sets);
// std::cout << "**** " << format(tmp2) << '\n';

expr.swap(tmp2);

// this may yield a new auto-object
trigger_auto_object(expr, state);
trigger_auto_object(tmp2, state);

// Check various conditions for when we should try to cache the expression.
// 1. Caching dereferences must be enabled.
// 2. Do not cache inside LHS of writes.
// 3. Do not cache inside quantifiers (references variables outside their
// scope).
// 4. Only cache "complicated" expressions, i.e. of the form
// [let p = <expr> in ]
// (p == &something ? something : ...))
// Otherwise we should just return it unchanged.
if(
symex_config.cache_dereferences && !write && !is_in_quantifier &&
(tmp2.id() == ID_if || tmp2.id() == ID_let))
{
expr = cache_dereference(tmp2, state);
}
else
{
expr = std::move(tmp2);
}
}
else if(
expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member &&
Expand All @@ -293,7 +373,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
tmp.add_source_location()=expr.source_location();

// recursive call
dereference_rec(tmp, state, write);
dereference_rec(tmp, state, write, is_in_quantifier);

expr.swap(tmp);
}
Expand Down Expand Up @@ -331,17 +411,18 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
to_address_of_expr(tc_op).object(),
from_integer(0, index_type())));

dereference_rec(expr, state, write);
dereference_rec(expr, state, write, is_in_quantifier);
}
else
{
dereference_rec(tc_op, state, write);
dereference_rec(tc_op, state, write, is_in_quantifier);
}
}
else
{
bool is_quantifier = expr.id() == ID_forall || expr.id() == ID_exists;
Forall_operands(it, expr)
dereference_rec(*it, state, write);
dereference_rec(*it, state, write, is_in_quantifier || is_quantifier);
}
}

Expand Down Expand Up @@ -409,7 +490,7 @@ void goto_symext::dereference(exprt &expr, statet &state, bool write)
});

// start the recursion!
dereference_rec(expr, state, write);
dereference_rec(expr, state, write, false);
// dereferencing may introduce new symbol_exprt
// (like __CPROVER_memory)
expr = state.rename<L1>(std::move(expr), ns).get();
Expand Down
3 changes: 2 additions & 1 deletion src/goto-symex/symex_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ symex_configt::symex_configt(const optionst &options)
"max-field-sensitivity-array-size")
: DEFAULT_MAX_FIELD_SENSITIVITY_ARRAY_SIZE),
complexity_limits_active(
options.get_signed_int_option("symex-complexity-limit") > 0)
options.get_signed_int_option("symex-complexity-limit") > 0),
cache_dereferences{options.get_bool_option("symex-cache-dereferences")}
{
}

Expand Down