Skip to content

Commit ab42137

Browse files
committed
Use local-safe-pointers analysis to improve Symex pointer resolution
This uses local_safe_pointerst to determine when symex dereferences a pointer that cannot be null. When it does the null result is excluded from the possible values, and therefore a $invalid_object reference may be excluded from the result of dereferencing such a pointer. This can improve constant propagation.
1 parent b5a0859 commit ab42137

File tree

13 files changed

+230
-12
lines changed

13 files changed

+230
-12
lines changed
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
CORE
2+
test.class
3+
--function test.main
4+
^EXIT=10$
5+
^SIGNAL=0$
6+
^VERIFICATION FAILED$
7+
Null pointer check: FAILURE$
8+
assertion at file test\.java line 21 .*: SUCCESS$
9+
--
10+
^warning: ignoring
11+
--
12+
JBMC should reports failure, as it might dereference a null pointer, but as Java
13+
is a safe language we should statically determine that if we reach the assertion
14+
it cannot be violated.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
public class test {
2+
3+
public int field;
4+
5+
public test(int value) {
6+
7+
field = value;
8+
9+
}
10+
11+
public static void main(boolean unknown) {
12+
13+
test t = new test(12345);
14+
test maybe_t = unknown ? null : t;
15+
16+
// t could still be null, but symex ought to notice that as the Java frontend introduces
17+
// checks before all pointer dereference operations, it can statically eliminate
18+
// the assertion below.
19+
20+
int field_value = maybe_t.field;
21+
assert field_value == 12345;
22+
23+
}
24+
25+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
CORE
2+
test.class
3+
--show-vcc --function test.main
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
--
7+
^warning: ignoring
8+
assertion at file test\.java line 22
9+
--
10+
Because symex should statically determine the assertion can't be violated there should not be a goal for it
11+
by the time --show-vccs prints the equation.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
static void noop() { }
2+
3+
int main(int argc, char **argv) {
4+
5+
int x;
6+
int *maybe_null = argc & 1 ? &x : 0;
7+
8+
// Should work (guarded by assume):
9+
int *ptr1 = maybe_null;
10+
__CPROVER_assume(ptr1 != 0);
11+
int deref1 = *ptr1;
12+
13+
// Should work (guarded by else):
14+
int *ptr2 = maybe_null;
15+
if(ptr2 == 0)
16+
{
17+
}
18+
else
19+
{
20+
int deref2 = *ptr2;
21+
}
22+
23+
// Should work (guarded by if):
24+
int *ptr3 = maybe_null;
25+
if(ptr3 != 0)
26+
{
27+
int deref3 = *ptr3;
28+
}
29+
30+
// Shouldn't work yet despite being safe (guarded by backward goto):
31+
int *ptr4 = maybe_null;
32+
goto check;
33+
34+
deref:
35+
int deref4 = *ptr4;
36+
goto end_test4;
37+
38+
check:
39+
__CPROVER_assume(ptr4 != 0);
40+
goto deref;
41+
42+
end_test4:
43+
44+
// Shouldn't work yet despite being safe (guarded by confluence):
45+
int *ptr5 = maybe_null;
46+
if(argc == 5)
47+
__CPROVER_assume(ptr5 != 0);
48+
else
49+
__CPROVER_assume(ptr5 != 0);
50+
int deref5 = *ptr5;
51+
52+
// Shouldn't work (unsafe as only guarded on one branch):
53+
int *ptr6 = maybe_null;
54+
if(argc == 6)
55+
__CPROVER_assume(ptr6 != 0);
56+
else
57+
{
58+
}
59+
int deref6 = *ptr6;
60+
61+
// Shouldn't work due to suspicion write to ptr6 might alter ptr7:
62+
int *ptr7 = maybe_null;
63+
__CPROVER_assume(ptr7 != 0);
64+
ptr6 = 0;
65+
int deref7 = *ptr7;
66+
67+
// Shouldn't work due to suspicion noop() call might alter ptr8:
68+
int *ptr8 = maybe_null;
69+
__CPROVER_assume(ptr8 != 0);
70+
noop();
71+
int deref8 = *ptr8;
72+
73+
assert(0);
74+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
CORE
2+
main.c
3+
--show-vcc
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
ptr4\$object
7+
ptr5\$object
8+
ptr6\$object
9+
ptr7\$object
10+
ptr8\$object
11+
--
12+
ptr[1-3]\$object
13+
^warning: ignoring
14+
--
15+
ptrX\$object appearing in the VCCs indicates symex was unsure whether the pointer had a valid
16+
target, and uses the $object symbol as a referred object of last resort. ptr1-3 should be judged
17+
not-null by symex, so their $object symbols do not occur.

src/goto-symex/goto_symex.h

+4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ Author: Daniel Kroening, [email protected]
1212
#ifndef CPROVER_GOTO_SYMEX_GOTO_SYMEX_H
1313
#define CPROVER_GOTO_SYMEX_GOTO_SYMEX_H
1414

15+
#include <analyses/local_safe_pointers.h>
16+
1517
#include <util/options.h>
1618
#include <util/message.h>
1719

@@ -465,6 +467,8 @@ class goto_symext
465467
void rewrite_quantifiers(exprt &, statet &);
466468

467469
path_storaget &path_storage;
470+
471+
std::unordered_map<irep_idt, local_safe_pointerst> safe_pointers;
468472
};
469473

470474
#endif // CPROVER_GOTO_SYMEX_GOTO_SYMEX_H

src/goto-symex/symex_dereference.cpp

+22-1
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,22 @@ void goto_symext::dereference_rec(
236236
if(expr.operands().size()!=1)
237237
throw "dereference takes one operand";
238238

239+
bool expr_is_not_null = false;
240+
241+
if(state.threads.size() == 1)
242+
{
243+
const irep_idt &expr_function = state.source.pc->function;
244+
if(!expr_function.empty())
245+
{
246+
dereference_exprt to_check = to_dereference_expr(expr);
247+
state.get_original_name(to_check);
248+
249+
expr_is_not_null =
250+
safe_pointers.at(expr_function).is_safe_dereference(
251+
to_check, state.source.pc);
252+
}
253+
}
254+
239255
exprt tmp1;
240256
tmp1.swap(expr.op0());
241257

@@ -246,7 +262,12 @@ void goto_symext::dereference_rec(
246262
symex_dereference_statet symex_dereference_state(*this, state);
247263

248264
value_set_dereferencet dereference(
249-
ns, state.symbol_table, options, symex_dereference_state, language_mode);
265+
ns,
266+
state.symbol_table,
267+
options,
268+
symex_dereference_state,
269+
language_mode,
270+
expr_is_not_null);
250271

251272
// std::cout << "**** " << format(tmp1) << '\n';
252273
exprt tmp2=

src/goto-symex/symex_function_call.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,11 @@ void goto_symext::symex_function_call_code(
222222

223223
state.dirty.populate_dirty_for_function(identifier, goto_function);
224224

225+
auto emplace_safe_pointers_result =
226+
safe_pointers.emplace(identifier, local_safe_pointerst{});
227+
if(emplace_safe_pointers_result.second)
228+
emplace_safe_pointers_result.first->second(goto_function.body);
229+
225230
const bool stop_recursing=get_unwind_recursion(
226231
identifier,
227232
state.source.thread_nr,

src/goto-symex/symex_main.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,15 @@ void goto_symext::initialize_entry_point(
132132

133133
INVARIANT(
134134
!pc->function.empty(), "all symexed instructions should have a function");
135-
state.dirty.populate_dirty_for_function(
136-
pc->function, get_goto_function(pc->function));
135+
136+
const goto_functiont &entry_point_function = get_goto_function(pc->function);
137+
138+
auto emplace_safe_pointers_result =
139+
safe_pointers.emplace(pc->function, local_safe_pointerst{});
140+
if(emplace_safe_pointers_result.second)
141+
emplace_safe_pointers_result.first->second(entry_point_function.body);
142+
143+
state.dirty.populate_dirty_for_function(pc->function, entry_point_function);
137144

138145
symex_transition(state, state.source.pc);
139146
}

src/pointer-analysis/goto_program_dereference.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ class goto_program_dereferencet:protected dereference_callbackt
3434
options(_options),
3535
ns(_ns),
3636
value_sets(_value_sets),
37-
dereference(_ns, _new_symbol_table, _options, *this, ID_nil) { }
37+
dereference(_ns, _new_symbol_table, _options, *this, ID_nil, false)
38+
{
39+
}
3840

3941
void dereference_program(
4042
goto_programt &goto_program,

src/pointer-analysis/value_set_dereference.cpp

+13-5
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,14 @@ exprt value_set_dereferencet::dereference(
104104

105105
#if 0
106106
std::cout << "V: " << format(value.pointer_guard) << " --> ";
107-
std::cout << format(value.value) << '\n';
107+
std::cout << format(value.value);
108+
if(value.ignore)
109+
std::cout << " (ignored)";
110+
std::cout << '\n';
108111
#endif
109112

110-
values.push_back(value);
113+
if(!value.ignore)
114+
values.push_back(value);
111115
}
112116

113117
// can this fail?
@@ -313,7 +317,11 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
313317

314318
if(root_object.id() == ID_null_object)
315319
{
316-
if(options.get_bool_option("pointer-check"))
320+
if(exclude_null_derefs)
321+
{
322+
result.ignore = true;
323+
}
324+
else if(options.get_bool_option("pointer-check"))
317325
{
318326
guardt tmp_guard(guard);
319327

@@ -409,9 +417,9 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
409417
// This is stuff like *((char *)5).
410418
// This is turned into an access to __CPROVER_memory[...].
411419

412-
if(language_mode==ID_java)
420+
if(language_mode == ID_java)
413421
{
414-
result.value=nil_exprt();
422+
result.ignore = true;
415423
return result;
416424
}
417425

src/pointer-analysis/value_set_dereference.h

+33-3
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,24 @@ class value_set_dereferencet
3535
* \param _options Options, in particular whether pointer checks are
3636
to be performed
3737
* \param _dereference_callback Callback object for error reporting
38+
* \param _language_mode Mode for any new symbols created to represent
39+
a dereference failure
40+
* \param _exclude_null_derefs Ignore value-set entries that indicate a given
41+
dereference may follow a null pointer
3842
*/
3943
value_set_dereferencet(
4044
const namespacet &_ns,
4145
symbol_tablet &_new_symbol_table,
4246
const optionst &_options,
4347
dereference_callbackt &_dereference_callback,
44-
const irep_idt _language_mode):
48+
const irep_idt _language_mode,
49+
bool _exclude_null_derefs):
4550
ns(_ns),
4651
new_symbol_table(_new_symbol_table),
4752
options(_options),
4853
dereference_callback(_dereference_callback),
49-
language_mode(_language_mode)
54+
language_mode(_language_mode),
55+
exclude_null_derefs(_exclude_null_derefs)
5056
{ }
5157

5258
virtual ~value_set_dereferencet() { }
@@ -82,6 +88,9 @@ class value_set_dereferencet
8288
/// language_mode: ID_java, ID_C or another language identifier
8389
/// if we know the source language in use, irep_idt() otherwise.
8490
const irep_idt language_mode;
91+
/// Flag indicating whether `value_set_dereferencet::dereference` should
92+
/// disregard an apparent attempt to dereference NULL
93+
const bool exclude_null_derefs;
8594
static unsigned invalid_counter;
8695

8796
bool dereference_type_compare(
@@ -92,17 +101,38 @@ class value_set_dereferencet
92101
exprt &dest,
93102
const exprt &offset) const;
94103

104+
/// Return value for `build_reference_to`; see that method for documentation.
95105
class valuet
96106
{
97107
public:
98108
exprt value;
99109
exprt pointer_guard;
110+
bool ignore;
100111

101-
valuet():value(nil_exprt()), pointer_guard(false_exprt())
112+
valuet():value(nil_exprt()), pointer_guard(false_exprt()), ignore(false)
102113
{
103114
}
104115
};
105116

117+
/// Get a guard and expression to access `what` under `guard`.
118+
/// \param what: value set entry to convert to an expression: either
119+
/// ID_unknown, ID_invalid, or an object_descriptor_exprt giving a referred
120+
/// object and offset.
121+
/// \param mode: whether the pointer is being read or written; used to create
122+
/// pointer validity checks if need be
123+
/// \param pointer: pointer expression that may point to `what`
124+
/// \param guard: guard under which the pointer is dereferenced
125+
/// \return
126+
/// * If we were explicitly instructed to ignore `what` as a possible
127+
/// pointer target: a `valuet` with `ignore` = true, and `value` and
128+
/// `pointer_guard` set to nil.
129+
/// * If we could build an expression corresponding to `what`:
130+
/// A `valuet` with non-nil `value`, and `pointer_guard` set to an
131+
/// appropriate check to determine if `pointer_expr` really points to
132+
/// `what` (for example, we might return
133+
/// `{.value = global, .pointer_guard = (pointer_expr == &global)}`
134+
/// * Otherwise, if we couldn't build an expression (e.g. for `what` ==
135+
/// ID_unknown), a `valuet` with nil `value` and `ignore` == false.
106136
valuet build_reference_to(
107137
const exprt &what,
108138
const modet mode,

0 commit comments

Comments
 (0)