Skip to content

Commit b96df4b

Browse files
Hannes SteffenhagenHannes Steffenhagen
Hannes Steffenhagen
authored and
Hannes Steffenhagen
committed
Add CSE dereference feature
1 parent e17c9db commit b96df4b

10 files changed

+144
-19
lines changed

regression/cbmc/double_deref/README

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,9 @@ double_deref_with_cast.desc -- a double-deref with an intervening cast (*(int*)*
2525
double_deref_with_member.desc -- a double-deref with an intervening member expression (p->field1->field2), should use a let-expression
2626
double_deref_with_pointer_arithmetic.desc -- a double-deref with intervening pointer arithmetic (p[idx1][idx2]), should use a let-expression
2727
*_single_alias.desc -- variants of the above where the first dereference points to a single possible object, so no let-expression is necessary
28+
29+
Why no-dereference-cache?
30+
-------------------------
31+
32+
Because these tests test for specific VCCs to do with multiple nested complex
33+
dereferences and the dereference caching rewrites these.

regression/cbmc/double_deref/double_deref.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CORE
1+
CORE no-dereference-cache
22
double_deref.c
33
--show-vcc
44
\{1\} \(main::1::pptr!0@1#2 = address_of\(main::1::ptr[12]!0@1\) \? main::argc!0@1#1 = [12] : main::argc!0@1#1 = [12]\)

regression/cbmc/double_deref/double_deref_with_cast.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CORE
1+
CORE no-dereference-cache
22
double_deref_with_cast.c
33
--show-vcc
44
\{1\} \(cast\(main::1::pptr!0@1#2, signedbv\[32\]\*\*\) = address_of\(main::1::ptr2!0@1\) \? main::argc!0@1#1 = 2 \: main::argc!0@1#1 = 1\)

regression/cbmc/double_deref/double_deref_with_member.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CORE
1+
CORE no-dereference-cache
22
double_deref_with_member.c
33
--show-vcc
44
^\{1\} \(main::1::cptr!0@1#2 = address_of\(main::1::container2!0@1\) \? main::argc!0@1#1 = 2 : main::argc!0@1#1 = 1\)

regression/cbmc/double_deref/double_deref_with_pointer_arithmetic.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CORE
1+
CORE no-dereference-cache
22
double_deref_with_pointer_arithmetic.c
33
--show-vcc
44
^\{-[0-9]+\} derefd_pointer::derefd_pointer!0#1 = \{ symex_dynamic::dynamic_object1#3\[\[0\]\], symex_dynamic::dynamic_object1#3\[\[1\]\] \}\[cast\(mod #source_location=""\(main::argc!0@1#1, 2\), signedbv\[64\]\)\]

regression/cbmc/double_deref/double_deref_with_pointer_arithmetic_single_alias.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
CORE
1+
CORE no-dereference-cache
22
double_deref_with_pointer_arithmetic_single_alias.c
33
--show-vcc
44
\{1\} \(derefd_pointer::derefd_pointer!0#1 = address_of\(symex_dynamic::dynamic_object2\) \? main::argc!0@1#1 = 1 : symex::invalid_object!0#0 = main::argc!0@1#1\)

src/goto-symex/goto_state.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,27 @@ Author: Romain Brenguier, [email protected]
1212

1313
#include <util/format_expr.h>
1414

15+
optionalt<symbol_exprt>
16+
dereference_cachet::lookup(const exprt &dereference) const
17+
{
18+
auto it = cache.find(dereference);
19+
if(it == cache.end())
20+
{
21+
return nullopt;
22+
}
23+
else
24+
{
25+
return {it->second};
26+
}
27+
}
28+
29+
void dereference_cachet::insert(
30+
exprt new_cached_expr,
31+
symbol_exprt new_cache_symbol)
32+
{
33+
cache.emplace(std::move(new_cached_expr), std::move(new_cache_symbol));
34+
}
35+
1536
/// Print the constant propagation map in a human-friendly format.
1637
/// This is primarily for use from the debugger; please don't delete me just
1738
/// because there aren't any current callers.

src/goto-symex/goto_state.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ Author: Romain Brenguier, [email protected]
1212
#ifndef CPROVER_GOTO_SYMEX_GOTO_STATE_H
1313
#define CPROVER_GOTO_SYMEX_GOTO_STATE_H
1414

15+
#include <unordered_map>
1516
#include <util/sharing_map.h>
1617

1718
#include <analyses/guard.h>
@@ -24,6 +25,19 @@ Author: Romain Brenguier, [email protected]
2425
// by the parent class.
2526
class goto_symex_statet;
2627

28+
/// This is used for eliminating repeated complicated dereferences.
29+
/// This is pretty much just a simple wrapper around a map.
30+
/// \see goto_symext::dereference_rec
31+
struct dereference_cachet
32+
{
33+
private:
34+
std::unordered_map<exprt, symbol_exprt, irep_hash> cache;
35+
36+
public:
37+
optionalt<symbol_exprt> lookup(const exprt &pointer) const;
38+
void insert(exprt new_cached_pointer_expr, symbol_exprt new_cache_symbol);
39+
};
40+
2741
/// Container for data that varies per program point, e.g. the constant
2842
/// propagator state, when state needs to branch. This is copied out of
2943
/// goto_symex_statet at a control-flow fork and then back into it at a
@@ -38,6 +52,8 @@ class goto_statet
3852
symex_level2t level2;
3953

4054
public:
55+
dereference_cachet dereference_cache;
56+
4157
const symex_level2t &get_level2() const
4258
{
4359
return level2;

src/goto-symex/goto_symex.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,12 @@ class goto_symext
299299
exprt make_auto_object(const typet &, statet &);
300300
virtual void dereference(exprt &, statet &, bool write);
301301

302-
void dereference_rec(exprt &, statet &, bool write);
302+
symbol_exprt cache_dereference(exprt &dereference_result, statet &state);
303+
void dereference_rec(
304+
exprt &expr,
305+
statet &state,
306+
bool write,
307+
bool is_in_quantifier);
303308
exprt address_arithmetic(
304309
const exprt &,
305310
statet &,

src/goto-symex/symex_dereference.cpp

Lines changed: 90 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@ Author: Daniel Kroening, [email protected]
1717
#include <util/exception_utils.h>
1818
#include <util/expr_iterator.h>
1919
#include <util/expr_util.h>
20+
#include <util/fresh_symbol.h>
2021
#include <util/invariant.h>
2122
#include <util/pointer_offset_size.h>
2223

2324
#include <pointer-analysis/value_set_dereference.h>
2425

26+
#include "expr_skeleton.h"
27+
#include "symex_assign.h"
2528
#include "symex_dereference_state.h"
2629

2730
/// Transforms an lvalue expression by replacing any dereference operations it
@@ -71,7 +74,7 @@ exprt goto_symext::address_arithmetic(
7174

7275
// there could be further dereferencing in the offset
7376
exprt offset=be.offset();
74-
dereference_rec(offset, state, false);
77+
dereference_rec(offset, state, false, false);
7578

7679
result=plus_exprt(result, offset);
7780

@@ -107,14 +110,14 @@ exprt goto_symext::address_arithmetic(
107110
// just grab the pointer, but be wary of further dereferencing
108111
// in the pointer itself
109112
result=to_dereference_expr(expr).pointer();
110-
dereference_rec(result, state, false);
113+
dereference_rec(result, state, false, false);
111114
}
112115
else if(expr.id()==ID_if)
113116
{
114117
if_exprt if_expr=to_if_expr(expr);
115118

116119
// the condition is not an address
117-
dereference_rec(if_expr.cond(), state, false);
120+
dereference_rec(if_expr.cond(), state, false, false);
118121

119122
// recursive call
120123
if_expr.true_case() =
@@ -131,7 +134,7 @@ exprt goto_symext::address_arithmetic(
131134
{
132135
// give up, just dereference
133136
result=expr;
134-
dereference_rec(result, state, false);
137+
dereference_rec(result, state, false, false);
135138

136139
// turn &array into &array[0]
137140
if(result.type().id() == ID_array && !keep_array)
@@ -191,14 +194,68 @@ exprt goto_symext::address_arithmetic(
191194
return result;
192195
}
193196

197+
symbol_exprt
198+
goto_symext::cache_dereference(exprt &dereference_result, statet &state)
199+
{
200+
auto const cache_key = [&] {
201+
auto cache_key =
202+
state.field_sensitivity.apply(ns, state, dereference_result, false);
203+
if(auto let_expr = expr_try_dynamic_cast<let_exprt>(dereference_result))
204+
{
205+
let_expr->value() = state.rename<L2>(let_expr->value(), ns).get();
206+
}
207+
else
208+
{
209+
cache_key = state.rename<L2>(cache_key, ns).get();
210+
}
211+
return cache_key;
212+
}();
213+
214+
if(auto cached = state.dereference_cache.lookup(cache_key))
215+
{
216+
return *cached;
217+
}
218+
219+
auto const &cache_symbol = get_fresh_aux_symbol(
220+
cache_key.type(),
221+
"symex",
222+
"dereference_cache",
223+
dereference_result.source_location(),
224+
ID_C,
225+
ns,
226+
state.symbol_table);
227+
228+
// we need to lift possible lets
229+
// (come from the value set to avoid repeating complex pointer comparisons)
230+
auto cache_value = cache_key;
231+
lift_lets(state, cache_value);
232+
233+
exprt::operandst guard{};
234+
auto assign = symex_assignt{
235+
state, symex_targett::assignment_typet::STATE, ns, symex_config, target};
236+
237+
assign.assign_symbol(
238+
to_ssa_expr(state.rename<L1>(cache_symbol.symbol_expr(), ns).get()),
239+
expr_skeletont{},
240+
cache_value,
241+
guard);
242+
243+
state.dereference_cache.insert(cache_key, cache_symbol.symbol_expr());
244+
return cache_symbol.symbol_expr();
245+
}
246+
194247
/// If \p expr is a \ref dereference_exprt, replace it with explicit references
195248
/// to the objects it may point to. Otherwise recursively apply this function to
196249
/// \p expr's operands, with special cases for address-of (handled by \ref
197250
/// goto_symext::address_arithmetic) and certain common expression patterns
198251
/// such as `&struct.flexible_array[0]` (see inline comments in code).
199252
/// For full details of this method's pointer replacement and potential side-
200253
/// effects see \ref goto_symext::dereference
201-
void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
254+
void goto_symext::dereference_rec(
255+
exprt &expr,
256+
statet &state,
257+
bool write,
258+
bool is_in_quantifier)
202259
{
203260
if(expr.id()==ID_dereference)
204261
{
@@ -221,7 +278,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
221278
tmp1.swap(to_dereference_expr(expr).pointer());
222279

223280
// first make sure there are no dereferences in there
224-
dereference_rec(tmp1, state, false);
281+
dereference_rec(tmp1, state, false, is_in_quantifier);
225282

226283
// Depending on the nature of the pointer expression, the recursive deref
227284
// operation might have introduced a construct such as
@@ -271,10 +328,29 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
271328
dereference.dereference(tmp1, symex_config.show_points_to_sets);
272329
// std::cout << "**** " << format(tmp2) << '\n';
273330

274-
expr.swap(tmp2);
275331

276332
// this may yield a new auto-object
277-
trigger_auto_object(expr, state);
333+
trigger_auto_object(tmp2, state);
334+
335+
// If the dereference result is not a complicated expression
336+
// (i.e. of the form
337+
// [let p = <expr> in ]
338+
// (p == &something ? something : ...))
339+
// we should just return it unchanged.
340+
// also if we are on the lhs of an assignment we should also not attempt to
341+
// go to the cache we cannot do this for quantified expressions because this
342+
// would result in us referencing quantifier variables outside the scope of
343+
// the quantifier.
344+
if(
345+
!write && !is_in_quantifier &&
346+
(tmp2.id() == ID_if || tmp2.id() == ID_let))
347+
{
348+
expr = cache_dereference(tmp2, state);
349+
}
350+
else
351+
{
352+
expr = tmp2;
353+
}
278354
}
279355
else if(
280356
expr.id() == ID_index && to_index_expr(expr).array().id() == ID_member &&
@@ -293,7 +369,7 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
293369
tmp.add_source_location()=expr.source_location();
294370

295371
// recursive call
296-
dereference_rec(tmp, state, write);
372+
dereference_rec(tmp, state, write, is_in_quantifier);
297373

298374
expr.swap(tmp);
299375
}
@@ -331,17 +407,18 @@ void goto_symext::dereference_rec(exprt &expr, statet &state, bool write)
331407
to_address_of_expr(tc_op).object(),
332408
from_integer(0, index_type())));
333409

334-
dereference_rec(expr, state, write);
410+
dereference_rec(expr, state, write, is_in_quantifier);
335411
}
336412
else
337413
{
338-
dereference_rec(tc_op, state, write);
414+
dereference_rec(tc_op, state, write, is_in_quantifier);
339415
}
340416
}
341417
else
342418
{
419+
bool is_quantifier = expr.id() == ID_forall || expr.id() == ID_exists;
343420
Forall_operands(it, expr)
344-
dereference_rec(*it, state, write);
421+
dereference_rec(*it, state, write, is_in_quantifier || is_quantifier);
345422
}
346423
}
347424

@@ -409,7 +486,7 @@ void goto_symext::dereference(exprt &expr, statet &state, bool write)
409486
});
410487

411488
// start the recursion!
412-
dereference_rec(expr, state, write);
489+
dereference_rec(expr, state, write, false);
413490
// dereferencing may introduce new symbol_exprt
414491
// (like __CPROVER_memory)
415492
expr = state.rename<L1>(std::move(expr), ns).get();

0 commit comments

Comments
 (0)