Skip to content

Commit 3b7d59a

Browse files
authored
Merge pull request #4097 from smowton/smowton/admin/clean-and-document-symex-deref
Clean and document symex-dereference
2 parents 51ccac8 + 115cb7e commit 3b7d59a

11 files changed

+175
-226
lines changed

src/goto-symex/goto_symex.h

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -222,23 +222,14 @@ class goto_symext
222222

223223
void trigger_auto_object(const exprt &, statet &);
224224
void initialize_auto_object(const exprt &, statet &);
225-
void process_array_expr(statet &, exprt &, bool);
225+
void process_array_expr(statet &, exprt &);
226226
exprt make_auto_object(const typet &, statet &);
227-
virtual void dereference(exprt &, statet &, bool write);
228-
229-
void dereference_rec(exprt &, statet &, guardt &, bool write);
230-
231-
void dereference_rec_address_of(
232-
exprt &,
233-
statet &,
234-
guardt &);
235-
236-
static bool is_index_member_symbol_if(const exprt &expr);
227+
virtual void dereference(exprt &, statet &);
237228

229+
void dereference_rec(exprt &, statet &);
238230
exprt address_arithmetic(
239231
const exprt &,
240232
statet &,
241-
guardt &,
242233
bool keep_array);
243234

244235
virtual void symex_goto(statet &);

src/goto-symex/symex_clean_expr.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -120,18 +120,14 @@ process_array_expr(exprt &expr, bool do_simplify, const namespacet &ns)
120120
}
121121
}
122122

123-
void goto_symext::process_array_expr(statet &state, exprt &expr, bool is_lhs)
123+
void goto_symext::process_array_expr(statet &state, exprt &expr)
124124
{
125125
symex_dereference_statet symex_dereference_state(*this, state);
126126

127127
value_set_dereferencet dereference(
128128
ns, state.symbol_table, symex_dereference_state, language_mode, false);
129129

130-
expr = dereference.dereference(
131-
expr,
132-
guardt{true_exprt()},
133-
is_lhs ? value_set_dereferencet::modet::WRITE
134-
: value_set_dereferencet::modet::READ);
130+
expr = dereference.dereference(expr, guardt{true_exprt()});
135131

136132
::process_array_expr(expr, symex_config.simplify_opt, ns);
137133
}
@@ -180,7 +176,7 @@ void goto_symext::clean_expr(
180176
const bool write)
181177
{
182178
replace_nondet(expr, path_storage.build_symex_nondet);
183-
dereference(expr, state, write);
179+
dereference(expr, state);
184180

185181
// make sure all remaining byte extract operations use the root
186182
// object to avoid nesting of with/update and byte_update when on

src/goto-symex/symex_dereference.cpp

Lines changed: 78 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -23,76 +23,22 @@ Author: Daniel Kroening, [email protected]
2323

2424
#include "symex_dereference_state.h"
2525

26-
void goto_symext::dereference_rec_address_of(
27-
exprt &expr,
28-
statet &state,
29-
guardt &guard)
30-
{
31-
// Could be member, could be if, could be index.
32-
33-
if(expr.id()==ID_member)
34-
dereference_rec_address_of(
35-
to_member_expr(expr).struct_op(), state, guard);
36-
else if(expr.id()==ID_if)
37-
{
38-
// the condition is not an address
39-
dereference_rec(
40-
to_if_expr(expr).cond(), state, guard, false);
41-
42-
// add to guard?
43-
dereference_rec_address_of(
44-
to_if_expr(expr).true_case(), state, guard);
45-
dereference_rec_address_of(
46-
to_if_expr(expr).false_case(), state, guard);
47-
}
48-
else if(expr.id()==ID_index)
49-
{
50-
// the index is not an address
51-
dereference_rec(
52-
to_index_expr(expr).index(), state, guard, false);
53-
54-
// the array _is_ an address
55-
dereference_rec_address_of(
56-
to_index_expr(expr).array(), state, guard);
57-
}
58-
else
59-
{
60-
// give up and dereference
61-
62-
dereference_rec(expr, state, guard, false);
63-
}
64-
}
65-
66-
bool goto_symext::is_index_member_symbol_if(const exprt &expr)
67-
{
68-
// Could be member, could be if, could be index.
69-
70-
if(expr.id()==ID_member)
71-
{
72-
return is_index_member_symbol_if(
73-
to_member_expr(expr).struct_op());
74-
}
75-
else if(expr.id()==ID_if)
76-
{
77-
return
78-
is_index_member_symbol_if(to_if_expr(expr).true_case()) &&
79-
is_index_member_symbol_if(to_if_expr(expr).false_case());
80-
}
81-
else if(expr.id()==ID_index)
82-
{
83-
return is_index_member_symbol_if(to_index_expr(expr).array());
84-
}
85-
else if(expr.id()==ID_symbol)
86-
return true;
87-
else
88-
return false;
89-
}
90-
91-
/// Evaluate an ID_address_of expression
26+
/// Transforms an lvalue expression by replacing any dereference operations it
27+
/// contains with explicit references to the objects they may point to (using
28+
/// \ref goto_symext::dereference_rec), and translates `byte_extract,` `member`
29+
/// and `index` operations into integer offsets from a root symbol (if any).
30+
/// These are ultimately expressed in the form
31+
/// `(target_type*)((char*)(&underlying_symbol) + offset)`.
32+
/// \param expr: expression to replace with normalised, dereference-free form
33+
/// \param state: working state. See \ref goto_symext::dereference for possible
34+
/// side-effects of a dereference operation.
35+
/// \param keep_array: if true and an underlying object is an array, return
36+
/// its address (`&array`); otherwise return the address of its first element
37+
/// (`&array[0]).
38+
/// \return the transformed lvalue expression
9239
exprt goto_symext::address_arithmetic(
9340
const exprt &expr,
9441
statet &state,
95-
guardt &guard,
9642
bool keep_array)
9743
{
9844
exprt result;
@@ -106,7 +52,7 @@ exprt goto_symext::address_arithmetic(
10652
const byte_extract_exprt &be=to_byte_extract_expr(expr);
10753

10854
// recursive call
109-
result=address_arithmetic(be.op(), state, guard, keep_array);
55+
result = address_arithmetic(be.op(), state, keep_array);
11056

11157
if(be.op().type().id() == ID_array && result.id() == ID_address_of)
11258
{
@@ -124,7 +70,7 @@ exprt goto_symext::address_arithmetic(
12470

12571
// there could be further dereferencing in the offset
12672
exprt offset=be.offset();
127-
dereference_rec(offset, state, guard, false);
73+
dereference_rec(offset, state);
12874

12975
result=plus_exprt(result, offset);
13076

@@ -149,7 +95,7 @@ exprt goto_symext::address_arithmetic(
14995
byte_extract_id(), ode.root_object(), ode.offset(), expr.type());
15096

15197
// recursive call
152-
result=address_arithmetic(be, state, guard, keep_array);
98+
result = address_arithmetic(be, state, keep_array);
15399

154100
do_simplify(result);
155101
}
@@ -160,20 +106,20 @@ exprt goto_symext::address_arithmetic(
160106
// just grab the pointer, but be wary of further dereferencing
161107
// in the pointer itself
162108
result=to_dereference_expr(expr).pointer();
163-
dereference_rec(result, state, guard, false);
109+
dereference_rec(result, state);
164110
}
165111
else if(expr.id()==ID_if)
166112
{
167113
if_exprt if_expr=to_if_expr(expr);
168114

169115
// the condition is not an address
170-
dereference_rec(if_expr.cond(), state, guard, false);
116+
dereference_rec(if_expr.cond(), state);
171117

172118
// recursive call
173-
if_expr.true_case()=
174-
address_arithmetic(if_expr.true_case(), state, guard, keep_array);
175-
if_expr.false_case()=
176-
address_arithmetic(if_expr.false_case(), state, guard, keep_array);
119+
if_expr.true_case() =
120+
address_arithmetic(if_expr.true_case(), state, keep_array);
121+
if_expr.false_case() =
122+
address_arithmetic(if_expr.false_case(), state, keep_array);
177123

178124
result=if_expr;
179125
}
@@ -184,7 +130,7 @@ exprt goto_symext::address_arithmetic(
184130
{
185131
// give up, just dereference
186132
result=expr;
187-
dereference_rec(result, state, guard, false);
133+
dereference_rec(result, state);
188134

189135
// turn &array into &array[0]
190136
if(result.type().id() == ID_array && !keep_array)
@@ -208,7 +154,7 @@ exprt goto_symext::address_arithmetic(
208154
from_integer(offset, index_type()),
209155
expr.type());
210156

211-
result=address_arithmetic(be, state, guard, keep_array);
157+
result = address_arithmetic(be, state, keep_array);
212158

213159
do_simplify(result);
214160
}
@@ -219,7 +165,7 @@ exprt goto_symext::address_arithmetic(
219165
{
220166
const typecast_exprt &tc_expr = to_typecast_expr(expr);
221167

222-
result = address_arithmetic(tc_expr.op(), state, guard, keep_array);
168+
result = address_arithmetic(tc_expr.op(), state, keep_array);
223169

224170
// treat &array as &array[0]
225171
const typet &expr_type = expr.type();
@@ -244,11 +190,14 @@ exprt goto_symext::address_arithmetic(
244190
return result;
245191
}
246192

247-
void goto_symext::dereference_rec(
248-
exprt &expr,
249-
statet &state,
250-
guardt &guard,
251-
const bool write)
193+
/// If \p expr is a \ref dereference_exprt, replace it with explicit references
194+
/// to the objects it may point to. Otherwise recursively apply this function to
195+
/// \p expr's operands, with special cases for address-of (handled by \ref
196+
/// goto_symext::address_arithmetic) and certain common expression patterns
197+
/// such as `&struct.flexible_array[0]` (see inline comments in code).
198+
/// For full details of this method's pointer replacement and potential side-
199+
/// effects see \ref goto_symext::dereference
200+
void goto_symext::dereference_rec(exprt &expr, statet &state)
252201
{
253202
if(expr.id()==ID_dereference)
254203
{
@@ -272,7 +221,7 @@ void goto_symext::dereference_rec(
272221
tmp1.swap(to_dereference_expr(expr).pointer());
273222

274223
// first make sure there are no dereferences in there
275-
dereference_rec(tmp1, state, guard, false);
224+
dereference_rec(tmp1, state);
276225

277226
// we need to set up some elaborate call-backs
278227
symex_dereference_statet symex_dereference_state(*this, state);
@@ -285,13 +234,7 @@ void goto_symext::dereference_rec(
285234
expr_is_not_null);
286235

287236
// std::cout << "**** " << format(tmp1) << '\n';
288-
exprt tmp2=
289-
dereference.dereference(
290-
tmp1,
291-
guard,
292-
write?
293-
value_set_dereferencet::modet::WRITE:
294-
value_set_dereferencet::modet::READ);
237+
exprt tmp2 = dereference.dereference(tmp1, guardt(true_exprt()));
295238
// std::cout << "**** " << format(tmp2) << '\n';
296239

297240
expr.swap(tmp2);
@@ -317,7 +260,7 @@ void goto_symext::dereference_rec(
317260
tmp.add_source_location()=expr.source_location();
318261

319262
// recursive call
320-
dereference_rec(tmp, state, guard, write);
263+
dereference_rec(tmp, state);
321264

322265
expr.swap(tmp);
323266
}
@@ -336,7 +279,6 @@ void goto_symext::dereference_rec(
336279
expr = address_arithmetic(
337280
object,
338281
state,
339-
guard,
340282
to_pointer_type(expr.type()).subtype().id() == ID_array);
341283
}
342284
else if(expr.id()==ID_typecast)
@@ -357,24 +299,58 @@ void goto_symext::dereference_rec(
357299
to_address_of_expr(tc_op).object(),
358300
from_integer(0, index_type())));
359301

360-
dereference_rec(expr, state, guard, write);
302+
dereference_rec(expr, state);
361303
}
362304
else
363305
{
364-
dereference_rec(tc_op, state, guard, write);
306+
dereference_rec(tc_op, state);
365307
}
366308
}
367309
else
368310
{
369311
Forall_operands(it, expr)
370-
dereference_rec(*it, state, guard, write);
312+
dereference_rec(*it, state);
371313
}
372314
}
373315

374-
void goto_symext::dereference(
375-
exprt &expr,
376-
statet &state,
377-
const bool write)
316+
/// Replace all dereference operations within \p expr with explicit references
317+
/// to the objects they may refer to. For example, the expression `*p1 + *p2`
318+
/// might be rewritten to `obj1 + (p2 == &obj2 ? obj2 : obj3)` in the case where
319+
/// `p1` is known to point to `obj1` and `p2` points to either `obj2` or `obj3`.
320+
/// The expression, and any object references introduced, are renamed to L1 in
321+
/// the process (so in fact we would get `obj1!0@3 + (p2!0@1 == ....` rather
322+
/// than the exact example given above).
323+
///
324+
/// It may have two kinds of side-effect:
325+
///
326+
/// 1. When an expression may (or must) point to something which cannot legally
327+
/// be dereferenced, such as a null pointer or an integer cast to a pointer,
328+
/// a "failed object" is created instead, via one of two routes:
329+
///
330+
/// a. if the `add_failed_symbols` pass has been run then a pointer-typed
331+
/// symbol `x` will have a corresponding failed symbol `x$object`. This
332+
/// is replicated according to L1 renaming on demand, so for example on
333+
/// the first failed dereference of `x!5@10` we will create
334+
/// `x$object!5@10` and add that to the symbol table.
335+
/// This addition is made by
336+
/// \ref symex_dereference_statet::get_or_create_failed_symbol
337+
///
338+
/// b. if such a failed symbol can't be found then symex will create one of
339+
/// its own, called `symex::failed_symbol` with some suffix. This is done
340+
/// by \ref value_set_dereferencet::dereference
341+
///
342+
/// In either case any newly-created symbol is added to \p state's symbol
343+
/// table and \p expr is altered to refer to it. Typically when \p expr has
344+
/// some legal targets as well this results in an expression like
345+
/// `ptr == &real_obj ? real_obj : ptr$object`.
346+
///
347+
/// 2. Any object whose base-name ends with `auto_object` is automatically
348+
/// initialised when dereferenced for the first time, creating a tree of
349+
/// pointers leading to fresh objects each time such a pointer is
350+
/// dereferenced. If new objects are created by this mechanism then
351+
/// state will be altered (by `symex_assign`) to initialise them.
352+
/// See \ref auto_objects.cpp for details.
353+
void goto_symext::dereference(exprt &expr, statet &state)
378354
{
379355
// The expression needs to be renamed to level 1
380356
// in order to distinguish addresses of local variables
@@ -384,8 +360,7 @@ void goto_symext::dereference(
384360
state.rename(expr, ns, goto_symex_statet::L1);
385361

386362
// start the recursion!
387-
guardt guard{true_exprt{}};
388-
dereference_rec(expr, state, guard, write);
363+
dereference_rec(expr, state);
389364
// dereferencing may introduce new symbol_exprt
390365
// (like __CPROVER_memory)
391366
state.rename(expr, ns, goto_symex_statet::L1);

0 commit comments

Comments
 (0)