Skip to content

Commit 4ecf01d

Browse files
authored
Merge pull request #5964 from hannes-steffenhagen-diffblue/cse-dereference-cleanup
Cache dereferences during symex to avoid repeatedly inserting large dereferences into SSA
2 parents ab404f6 + d554326 commit 4ecf01d

File tree

11 files changed

+155
-17
lines changed

11 files changed

+155
-17
lines changed

jbmc/src/jbmc/jbmc_parse_options.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,9 @@ void jbmc_parse_optionst::get_command_line_options(optionst &options)
425425
options.set_option("validate-goto-model", true);
426426
}
427427

428+
options.set_option(
429+
"symex-cache-dereferences", cmdline.isset("symex-cache-dereferences"));
430+
428431
PARSE_OPTIONS_GOTO_TRACE(cmdline, options);
429432

430433
if(cmdline.isset("no-lazy-methods"))
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
CORE
2+
test.c
3+
--show-vcc
4+
\(main::1::p!0@1#2 = address_of
5+
^EXIT=0$
6+
^SIGNAL=0$
7+
--
8+
--
9+
This is just checking that the --symex-cache-dereferences flag actually turns
10+
dereference caching on and off.
11+
12+
I would like to match the expression more precisely, but the order of
13+
comparisons for address of depends on platform-specific logic (unordered_xxx),
14+
and the corresponding regex would look ridiculous.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
CORE
2+
test.c
3+
--show-vcc --symex-cache-dereferences
4+
symex::dereference_cache!0#1 = 0
5+
^EXIT=0$
6+
^SIGNAL=0$
7+
--
8+
--
9+
This is just checking that the --symex-cache-dereferences flag actually
10+
turns dereference caching on.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#include <assert.h>
2+
3+
int main(void)
4+
{
5+
int cond, x, y, *p = cond ? &x : &y;
6+
assert(*p == 0);
7+
}

src/cbmc/cbmc_parse_options.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,9 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options)
342342
"max-node-refinement",
343343
cmdline.get_value("max-node-refinement"));
344344

345+
options.set_option(
346+
"symex-cache-dereferences", cmdline.isset("symex-cache-dereferences"));
347+
345348
if(cmdline.isset("incremental-loop"))
346349
{
347350
options.set_option(

src/goto-checker/bmc_util.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,8 @@ void run_property_decider(
197197
"(incremental-loop):" \
198198
"(unwind-min):" \
199199
"(unwind-max):" \
200-
"(ignore-properties-before-unwind-min)"
200+
"(ignore-properties-before-unwind-min)" \
201+
"(symex-cache-dereferences)" \
201202

202203
#define HELP_BMC \
203204
" --paths [strategy] explore paths one at a time\n" \
@@ -251,7 +252,8 @@ void run_property_decider(
251252
" iteration are allowed to fail due to\n" \
252253
" complexity violations before the loop\n" \
253254
" gets blacklisted\n" \
254-
" --graphml-witness filename write the witness in GraphML format to filename\n" // NOLINT(*)
255+
" --graphml-witness filename write the witness in GraphML format to filename\n" /* NOLINT(*) */ \
256+
" --symex-cache-dereferences enable caching of repeated dereferences" \
255257
// clang-format on
256258

257259
#endif // CPROVER_GOTO_CHECKER_BMC_UTIL_H

src/goto-symex/goto_state.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@ class goto_statet
3838
symex_level2t level2;
3939

4040
public:
41+
/// This is used for eliminating repeated complicated dereferences.
42+
/// \see goto_symext::dereference_rec
43+
sharing_mapt<exprt, symbol_exprt, false, irep_hash> dereference_cache;
44+
4145
const symex_level2t &get_level2() const
4246
{
4347
return level2;

src/goto-symex/goto_symex.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,12 @@ class goto_symext
299299
exprt make_auto_object(const typet &, statet &);
300300
virtual void dereference(exprt &, statet &, bool write);
301301

302-
void dereference_rec(exprt &, statet &, bool write);
302+
symbol_exprt cache_dereference(exprt &dereference_result, statet &state);
303+
void dereference_rec(
304+
exprt &expr,
305+
statet &state,
306+
bool write,
307+
bool is_in_quantifier);
303308
exprt address_arithmetic(
304309
const exprt &,
305310
statet &,

src/goto-symex/symex_config.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ struct symex_configt final
5555
/// enables certain analyses that otherwise aren't run.
5656
bool complexity_limits_active;
5757

58+
/// \brief Whether or not to replace multiple occurrences of the same
59+
/// dereference with a single symbol that contains the result of that
60+
/// dereference. Can sometimes lead to a significant performance
61+
/// improvement, but sometimes also makes things worse.
62+
/// See https://github.com/diffblue/cbmc/pull/5964 for performance data.
63+
/// Used in goto_symext::dereference_rec
64+
bool cache_dereferences;
65+
5866
/// \brief Construct a symex_configt using options specified in an
5967
/// \ref optionst
6068
explicit symex_configt(const optionst &options);

src/goto-symex/symex_dereference.cpp

Lines changed: 94 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@ Author: Daniel Kroening, [email protected]
1717
#include <util/exception_utils.h>
1818
#include <util/expr_iterator.h>
1919
#include <util/expr_util.h>
20+
#include <util/fresh_symbol.h>
2021
#include <util/invariant.h>
2122
#include <util/pointer_offset_size.h>
2223

2324
#include <pointer-analysis/value_set_dereference.h>
2425

26+
#include "expr_skeleton.h"
27+
#include "symex_assign.h"
2528
#include "symex_dereference_state.h"
2629

2730
/// Transforms an lvalue expression by replacing any dereference operations it
@@ -71,7 +74,7 @@ exprt goto_symext::address_arithmetic(
7174

7275
// there could be further dereferencing in the offset
7376
exprt offset=be.offset();
74-
dereference_rec(offset, state, false);
77+
dereference_rec(offset, state, false, false);
7578

7679
result=plus_exprt(result, offset);
7780

@@ -107,14 +110,14 @@ exprt goto_symext::address_arithmetic(
107110
// just grab the pointer, but be wary of further dereferencing
108111
// in the pointer itself
109112
result=to_dereference_expr(expr).pointer();
110-
dereference_rec(result, state, false);
113+
dereference_rec(result, state, false, false);
111114
}
112115
else if(expr.id()==ID_if)
113116
{
114117
if_exprt if_expr=to_if_expr(expr);
115118

116119
// the condition is not an address
117-
dereference_rec(if_expr.cond(), state, false);
120+
dereference_rec(if_expr.cond(), state, false, false);
118121

119122
// recursive call
120123
if_expr.true_case() =
@@ -131,7 +134,7 @@ exprt goto_symext::address_arithmetic(
131134
{
132135
// give up, just dereference
133136
result=expr;
134-
dereference_rec(result, state, false);
137+
dereference_rec(result, state, false, false);
135138

136139
// turn &array into &array[0]
137140
if(result.type().id() == ID_array && !keep_array)
@@ -191,14 +194,72 @@ exprt goto_symext::address_arithmetic(
191194
return result;
192195
}
193196

197+
symbol_exprt
198+
goto_symext::cache_dereference(exprt &dereference_result, statet &state)
199+
{
200+
auto const cache_key = [&] {
201+
auto cache_key =
202+
state.field_sensitivity.apply(ns, state, dereference_result, false);
203+
if(auto let_expr = expr_try_dynamic_cast<let_exprt>(dereference_result))
204+
{
205+
let_expr->value() = state.rename<L2>(let_expr->value(), ns).get();
206+
}
207+
else
208+
{
209+
cache_key = state.rename<L2>(cache_key, ns).get();
210+
}
211+
return cache_key;
212+
}();
213+
214+
if(auto cached = state.dereference_cache.find(cache_key))
215+
{
216+
return *cached;
217+
}
218+
219+
auto const &cache_symbol = get_fresh_aux_symbol(
220+
cache_key.type(),
221+
"symex",
222+
"dereference_cache",
223+
dereference_result.source_location(),
224+
language_mode,
225+
ns,
226+
state.symbol_table);
227+
228+
// we need to lift possible lets
229+
// (come from the value set to avoid repeating complex pointer comparisons)
230+
auto cache_value = cache_key;
231+
lift_lets(state, cache_value);
232+
233+
auto assign = symex_assignt{
234+
state, symex_targett::assignment_typet::STATE, ns, symex_config, target};
235+
236+
auto cache_symbol_expr = cache_symbol.symbol_expr();
237+
assign.assign_symbol(
238+
to_ssa_expr(state.rename<L1>(cache_symbol_expr, ns).get()),
239+
expr_skeletont{},
240+
cache_value,
241+
{});
242+
243+
state.dereference_cache.insert(cache_key, cache_symbol_expr);
244+
return cache_symbol_expr;
245+
}
246+
194247
/// If \p expr is a \ref dereference_exprt, replace it with explicit references
195248
/// to the objects it may point to. Otherwise recursively apply this function to
196249
/// \p expr's operands, with special cases for address-of (handled by \ref
197250
/// goto_symext::address_arithmetic) and certain common expression patterns
198251
/// such as `&struct.flexible_array[0]` (see inline comments in code).
252+
/// Note that \p write is used to alter behaviour when this function is
253+
/// operating on the LHS of an assignment. Similarly \p is_in_quantifier
254+
/// indicates when the dereference is inside a quantifier (related to scoping
255+
/// when dereference caching is enabled).
199256
/// For full details of this method's pointer replacement and potential side-
200257
/// effects see \ref goto_symext::dereference
201-
void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
258+
void goto_symext::dereference_rec(
259+
exprt &expr,
260+
statet &state,
261+
bool write,
262+
bool is_in_quantifier)
202263
{
203264
if(expr.id()==ID_dereference)
204265
{
@@ -221,7 +282,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
221282
tmp1.swap(to_dereference_expr(expr).pointer());
222283

223284
// first make sure there are no dereferences in there
224-
dereference_rec(tmp1, state, false);
285+
dereference_rec(tmp1, state, false, is_in_quantifier);
225286

226287
// Depending on the nature of the pointer expression, the recursive deref
227288
// operation might have introduced a construct such as
@@ -271,10 +332,29 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
271332
dereference.dereference(tmp1, symex_config.show_points_to_sets);
272333
// std::cout << "**** " << format(tmp2) << '\n';
273334

274-
expr.swap(tmp2);
275335

276336
// this may yield a new auto-object
277-
trigger_auto_object(expr, state);
337+
trigger_auto_object(tmp2, state);
338+
339+
// Check various conditions for when we should try to cache the expression.
340+
// 1. Caching dereferences must be enabled.
341+
// 2. Do not cache inside LHS of writes.
342+
// 3. Do not cache inside quantifiers (references variables outside their
343+
// scope).
344+
// 4. Only cache "complicated" expressions, i.e. of the form
345+
// [let p = <expr> in ]
346+
// (p == &something ? something : ...))
347+
// Otherwise we should just return it unchanged.
348+
if(
349+
symex_config.cache_dereferences && !write && !is_in_quantifier &&
350+
(tmp2.id() == ID_if || tmp2.id() == ID_let))
351+
{
352+
expr = cache_dereference(tmp2, state);
353+
}
354+
else
355+
{
356+
expr = std::move(tmp2);
357+
}
278358
}
279359
else if(
280360
expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member &&
@@ -293,7 +373,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
293373
tmp.add_source_location()=expr.source_location();
294374

295375
// recursive call
296-
dereference_rec(tmp, state, write);
376+
dereference_rec(tmp, state, write, is_in_quantifier);
297377

298378
expr.swap(tmp);
299379
}
@@ -331,17 +411,18 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
331411
to_address_of_expr(tc_op).object(),
332412
from_integer(0, index_type())));
333413

334-
dereference_rec(expr, state, write);
414+
dereference_rec(expr, state, write, is_in_quantifier);
335415
}
336416
else
337417
{
338-
dereference_rec(tc_op, state, write);
418+
dereference_rec(tc_op, state, write, is_in_quantifier);
339419
}
340420
}
341421
else
342422
{
423+
bool is_quantifier = expr.id() == ID_forall || expr.id() == ID_exists;
343424
Forall_operands(it, expr)
344-
dereference_rec(*it, state, write);
425+
dereference_rec(*it, state, write, is_in_quantifier || is_quantifier);
345426
}
346427
}
347428

@@ -409,7 +490,7 @@ void goto_symext::dereference(exprt &expr, statet &state, bool write)
409490
});
410491

411492
// start the recursion!
412-
dereference_rec(expr, state, write);
493+
dereference_rec(expr, state, write, false);
413494
// dereferencing may introduce new symbol_exprt
414495
// (like __CPROVER_memory)
415496
expr = state.rename<L1>(std::move(expr), ns).get();

src/goto-symex/symex_main.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,8 @@ symex_configt::symex_configt(const optionst &options)
5656
"max-field-sensitivity-array-size")
5757
: DEFAULT_MAX_FIELD_SENSITIVITY_ARRAY_SIZE),
5858
complexity_limits_active(
59-
options.get_signed_int_option("symex-complexity-limit") > 0)
59+
options.get_signed_int_option("symex-complexity-limit") > 0),
60+
cache_dereferences{options.get_bool_option("symex-cache-dereferences")}
6061
{
6162
}
6263

0 commit comments

Comments
 (0)