diff --git a/jbmc/src/jbmc/jbmc_parse_options.cpp b/jbmc/src/jbmc/jbmc_parse_options.cpp index 75988d1eab0..8a95b6b4057 100644 --- a/jbmc/src/jbmc/jbmc_parse_options.cpp +++ b/jbmc/src/jbmc/jbmc_parse_options.cpp @@ -425,6 +425,9 @@ void jbmc_parse_optionst::get_command_line_options(optionst &options) options.set_option("validate-goto-model", true); } + options.set_option( + "symex-cache-dereferences", cmdline.isset("symex-cache-dereferences")); + PARSE_OPTIONS_GOTO_TRACE(cmdline, options); if(cmdline.isset("no-lazy-methods")) diff --git a/regression/cbmc/dereference-cache-flag/test-no-cache.desc b/regression/cbmc/dereference-cache-flag/test-no-cache.desc new file mode 100644 index 00000000000..23ee595f1e5 --- /dev/null +++ b/regression/cbmc/dereference-cache-flag/test-no-cache.desc @@ -0,0 +1,14 @@ +CORE +test.c +--show-vcc +\(main::1::p!0@1#2 = address_of +^EXIT=0$ +^SIGNAL=0$ +-- +-- +This is just checking that the --symex-cache-dereferences flag actually turns +dereference caching on and off. + +I would like to match the expression more precisely, but the order of +comparisons for address of depends on platform-specific logic (unordered_xxx), +and the corresponding regex would look ridiculous. diff --git a/regression/cbmc/dereference-cache-flag/test-with-cache.desc b/regression/cbmc/dereference-cache-flag/test-with-cache.desc new file mode 100644 index 00000000000..b4342f18c76 --- /dev/null +++ b/regression/cbmc/dereference-cache-flag/test-with-cache.desc @@ -0,0 +1,10 @@ +CORE +test.c +--show-vcc --symex-cache-dereferences +symex::dereference_cache!0#1 = 0 +^EXIT=0$ +^SIGNAL=0$ +-- +-- +This is just checking that the --symex-cache-dereferences flag actually +turns dereference caching on. diff --git a/regression/cbmc/dereference-cache-flag/test.c b/regression/cbmc/dereference-cache-flag/test.c new file mode 100644 index 00000000000..59cc9b35b23 --- /dev/null +++ b/regression/cbmc/dereference-cache-flag/test.c @@ -0,0 +1,7 @@ +#include + +int main(void) +{ + int cond, x, y, *p = cond ? &x : &y; + assert(*p == 0); +} diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index 5b7ba9223e1..0bce24fc6e1 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -342,6 +342,9 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) "max-node-refinement", cmdline.get_value("max-node-refinement")); + options.set_option( + "symex-cache-dereferences", cmdline.isset("symex-cache-dereferences")); + if(cmdline.isset("incremental-loop")) { options.set_option( diff --git a/src/goto-checker/bmc_util.h b/src/goto-checker/bmc_util.h index 0b820ecc455..3f922af1cc2 100644 --- a/src/goto-checker/bmc_util.h +++ b/src/goto-checker/bmc_util.h @@ -197,7 +197,8 @@ void run_property_decider( "(incremental-loop):" \ "(unwind-min):" \ "(unwind-max):" \ - "(ignore-properties-before-unwind-min)" + "(ignore-properties-before-unwind-min)" \ + "(symex-cache-dereferences)" \ #define HELP_BMC \ " --paths [strategy] explore paths one at a time\n" \ @@ -251,7 +252,8 @@ void run_property_decider( " iteration are allowed to fail due to\n" \ " complexity violations before the loop\n" \ " gets blacklisted\n" \ - " --graphml-witness filename write the witness in GraphML format to filename\n" // NOLINT(*) + " --graphml-witness filename write the witness in GraphML format to filename\n" /* NOLINT(*) */ \ + " --symex-cache-dereferences enable caching of repeated dereferences" \ // clang-format on #endif // CPROVER_GOTO_CHECKER_BMC_UTIL_H diff --git a/src/goto-symex/goto_state.h b/src/goto-symex/goto_state.h index f2ce7660229..62405bf2ce6 100644 --- a/src/goto-symex/goto_state.h +++ b/src/goto-symex/goto_state.h @@ -38,6 +38,10 @@ class goto_statet symex_level2t level2; public: + /// This is used for eliminating repeated complicated dereferences. + /// \see goto_symext::dereference_rec + sharing_mapt dereference_cache; + const symex_level2t &get_level2() const { return level2; diff --git a/src/goto-symex/goto_symex.h b/src/goto-symex/goto_symex.h index 5663bde1703..49f3edae121 100644 --- a/src/goto-symex/goto_symex.h +++ b/src/goto-symex/goto_symex.h @@ -299,7 +299,12 @@ class goto_symext exprt make_auto_object(const typet &, statet &); virtual void dereference(exprt &, statet &, bool write); - void dereference_rec(exprt &, statet &, bool write); + symbol_exprt cache_dereference(exprt &dereference_result, statet &state); + void dereference_rec( + exprt &expr, + statet &state, + bool write, + bool is_in_quantifier); exprt address_arithmetic( const exprt &, statet &, diff --git a/src/goto-symex/symex_config.h b/src/goto-symex/symex_config.h index b7bd52e83f6..70650744356 100644 --- a/src/goto-symex/symex_config.h +++ b/src/goto-symex/symex_config.h @@ -55,6 +55,14 @@ struct symex_configt final /// enables certain analyses that otherwise aren't run. bool complexity_limits_active; + /// \brief Whether or not to replace multiple occurrences of the same + /// dereference with a single symbol that contains the result of that + /// dereference. Can sometimes lead to a significant performance + /// improvement, but sometimes also makes things worse. + /// See https://github.com/diffblue/cbmc/pull/5964 for performance data. + /// Used in goto_symext::dereference_rec + bool cache_dereferences; + /// \brief Construct a symex_configt using options specified in an /// \ref optionst explicit symex_configt(const optionst &options); diff --git a/src/goto-symex/symex_dereference.cpp b/src/goto-symex/symex_dereference.cpp index 9b3d0370dd5..a88321abd48 100644 --- a/src/goto-symex/symex_dereference.cpp +++ b/src/goto-symex/symex_dereference.cpp @@ -17,11 +17,14 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include #include #include +#include "expr_skeleton.h" +#include "symex_assign.h" #include "symex_dereference_state.h" /// Transforms an lvalue expression by replacing any dereference operations it @@ -71,7 +74,7 @@ exprt goto_symext::address_arithmetic( // there could be further dereferencing in the offset exprt offset=be.offset(); - dereference_rec(offset, state, false); + dereference_rec(offset, state, false, false); result=plus_exprt(result, offset); @@ -107,14 +110,14 @@ exprt goto_symext::address_arithmetic( // just grab the pointer, but be wary of further dereferencing // in the pointer itself result=to_dereference_expr(expr).pointer(); - dereference_rec(result, state, false); + dereference_rec(result, state, false, false); } else if(expr.id()==ID_if) { if_exprt if_expr=to_if_expr(expr); // the condition is not an address - dereference_rec(if_expr.cond(), state, false); + dereference_rec(if_expr.cond(), state, false, false); // recursive call if_expr.true_case() = @@ -131,7 +134,7 @@ exprt goto_symext::address_arithmetic( { // give up, just dereference result=expr; - dereference_rec(result, state, false); + dereference_rec(result, state, false, false); // turn &array into &array[0] if(result.type().id() == ID_array && !keep_array) @@ -191,14 +194,72 @@ exprt goto_symext::address_arithmetic( return result; } +symbol_exprt +goto_symext::cache_dereference(exprt &dereference_result, statet &state) +{ + auto const cache_key = [&] { + auto cache_key = + state.field_sensitivity.apply(ns, state, dereference_result, false); + if(auto let_expr = expr_try_dynamic_cast(dereference_result)) + { + let_expr->value() = state.rename(let_expr->value(), ns).get(); + } + else + { + cache_key = state.rename(cache_key, ns).get(); + } + return cache_key; + }(); + + if(auto cached = state.dereference_cache.find(cache_key)) + { + return *cached; + } + + auto const &cache_symbol = get_fresh_aux_symbol( + cache_key.type(), + "symex", + "dereference_cache", + dereference_result.source_location(), + language_mode, + ns, + state.symbol_table); + + // we need to lift possible lets + // (come from the value set to avoid repeating complex pointer comparisons) + auto cache_value = cache_key; + lift_lets(state, cache_value); + + auto assign = symex_assignt{ + state, symex_targett::assignment_typet::STATE, ns, symex_config, target}; + + auto cache_symbol_expr = cache_symbol.symbol_expr(); + assign.assign_symbol( + to_ssa_expr(state.rename(cache_symbol_expr, ns).get()), + expr_skeletont{}, + cache_value, + {}); + + state.dereference_cache.insert(cache_key, cache_symbol_expr); + return cache_symbol_expr; +} + /// If \p expr is a \ref dereference_exprt, replace it with explicit references /// to the objects it may point to. Otherwise recursively apply this function to /// \p expr's operands, with special cases for address-of (handled by \ref /// goto_symext::address_arithmetic) and certain common expression patterns /// such as `&struct.flexible_array[0]` (see inline comments in code). +/// Note that \p write is used to alter behaviour when this function is +/// operating on the LHS of an assignment. Similarly \p is_in_quantifier +/// indicates when the dereference is inside a quantifier (related to scoping +/// when dereference caching is enabled). /// For full details of this method's pointer replacement and potential side- /// effects see \ref goto_symext::dereference -void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) +void goto_symext::dereference_rec( + exprt &expr, + statet &state, + bool write, + bool is_in_quantifier) { if(expr.id()==ID_dereference) { @@ -221,7 +282,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) tmp1.swap(to_dereference_expr(expr).pointer()); // first make sure there are no dereferences in there - dereference_rec(tmp1, state, false); + dereference_rec(tmp1, state, false, is_in_quantifier); // Depending on the nature of the pointer expression, the recursive deref // operation might have introduced a construct such as @@ -271,10 +332,29 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) dereference.dereference(tmp1, symex_config.show_points_to_sets); // std::cout << "**** " << format(tmp2) << '\n'; - expr.swap(tmp2); // this may yield a new auto-object - trigger_auto_object(expr, state); + trigger_auto_object(tmp2, state); + + // Check various conditions for when we should try to cache the expression. + // 1. Caching dereferences must be enabled. + // 2. Do not cache inside LHS of writes. + // 3. Do not cache inside quantifiers (references variables outside their + // scope). + // 4. Only cache "complicated" expressions, i.e. of the form + // [let p = in ] + // (p == &something ? something : ...)) + // Otherwise we should just return it unchanged. + if( + symex_config.cache_dereferences && !write && !is_in_quantifier && + (tmp2.id() == ID_if || tmp2.id() == ID_let)) + { + expr = cache_dereference(tmp2, state); + } + else + { + expr = std::move(tmp2); + } } else if( expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member && @@ -293,7 +373,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) tmp.add_source_location()=expr.source_location(); // recursive call - dereference_rec(tmp, state, write); + dereference_rec(tmp, state, write, is_in_quantifier); expr.swap(tmp); } @@ -331,17 +411,18 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write) to_address_of_expr(tc_op).object(), from_integer(0, index_type()))); - dereference_rec(expr, state, write); + dereference_rec(expr, state, write, is_in_quantifier); } else { - dereference_rec(tc_op, state, write); + dereference_rec(tc_op, state, write, is_in_quantifier); } } else { + bool is_quantifier = expr.id() == ID_forall || expr.id() == ID_exists; Forall_operands(it, expr) - dereference_rec(*it, state, write); + dereference_rec(*it, state, write, is_in_quantifier || is_quantifier); } } @@ -409,7 +490,7 @@ void goto_symext::dereference(exprt &expr, statet &state, bool write) }); // start the recursion! - dereference_rec(expr, state, write); + dereference_rec(expr, state, write, false); // dereferencing may introduce new symbol_exprt // (like __CPROVER_memory) expr = state.rename(std::move(expr), ns).get(); diff --git a/src/goto-symex/symex_main.cpp b/src/goto-symex/symex_main.cpp index b5053888497..f2fbcd50315 100644 --- a/src/goto-symex/symex_main.cpp +++ b/src/goto-symex/symex_main.cpp @@ -56,7 +56,8 @@ symex_configt::symex_configt(const optionst &options) "max-field-sensitivity-array-size") : DEFAULT_MAX_FIELD_SENSITIVITY_ARRAY_SIZE), complexity_limits_active( - options.get_signed_int_option("symex-complexity-limit") > 0) + options.get_signed_int_option("symex-complexity-limit") > 0), + cache_dereferences{options.get_bool_option("symex-cache-dereferences")} { }