Skip to content

Commit d0cf433

Browse files
committed
Strengthen local_safe_pointers to handle common Java operations
The local-safe-pointers analysis can already inform symex that certain pointers are known not to be null at particular program points. This strengthens the analysis to spot more cases such that it can determine no null pointer is dereferenced in the new regression test `jbmc/clean_derefs`, which exercises many common Java operations including array accesses. The specific improvements to local-safe-pointers: * Look through typecasts. This was already done for GOTO instructions, but now works for ASSUME as well. * Search for not-null expressions using base_type_eq instead of just irept::operator<. This means that when symex uses namespacet::follow to remove a symbol_typet that does not prevent local-safe-pointers from noting it is not null in a particular context.
1 parent e7b1a8a commit d0cf433

File tree

9 files changed

+202
-75
lines changed

9 files changed

+202
-75
lines changed
310 Bytes
Binary file not shown.
1.09 KB
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
2+
public class Test {
3+
4+
public int field;
5+
6+
public Test(int f) { field = f; }
7+
8+
public static void main(int nondet) {
9+
10+
// This test creates and uses objects in a few different contexts to check
11+
// that in all cases goto-symex realises that the dereferenced object cannot
12+
// be null (due to the NullPointerException check that guards all derefs),
13+
// therefore symex never generates a reference to any $failed_object symbol
14+
// that for a C program would stand in for an unknown or invalid object such
15+
// as a dereferenced null pointer.
16+
17+
Test t1 = new Test(nondet);
18+
Test t2 = new Test(nondet);
19+
20+
Test[] tarray = new Test[5];
21+
tarray[0] = null;
22+
tarray[1] = t2;
23+
tarray[2] = t1;
24+
25+
t1.field++;
26+
(nondet == 10 ? t1 : t2).field--;
27+
tarray[nondet % 5].field++;
28+
(nondet == 20 ? t1 : tarray[2]).field++;
29+
30+
Container c1 = new Container(t1);
31+
Container c2 = new Container(t2);
32+
Container c3 = new Container(null);
33+
34+
c1.t.field++;
35+
(nondet == 30 ? c1 : c3).t.field++;
36+
(nondet == 40 ? c2.t : tarray[3]).field--;
37+
38+
assert false;
39+
40+
}
41+
42+
}
43+
44+
class Container {
45+
46+
public Container(Test _t) { t = _t; }
47+
48+
public Test t;
49+
50+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
CORE
2+
Test.class
3+
--function Test.main --show-vcc
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
--
7+
invalid
8+
--
9+
This checks that no invalid_object references are generated -- these would be created if
10+
symex thought it might follow a null or other invalid pointer, whereas Java dereferences
11+
should always be checked not-null beforehand.

src/analyses/local_safe_pointers.cpp

+96-65
Original file line numberDiff line numberDiff line change
@@ -11,68 +11,68 @@ Author: Diffblue Ltd
1111

1212
#include "local_safe_pointers.h"
1313

14+
#include <util/base_type.h>
1415
#include <util/expr_iterator.h>
16+
#include <util/expr_util.h>
1517
#include <util/format_expr.h>
1618

17-
/// If `expr` is of the form `x != nullptr`, return x. Otherwise return null
18-
static const exprt *get_null_checked_expr(const exprt &expr)
19-
{
20-
if(expr.id() == ID_notequal)
21-
{
22-
const exprt *op0 = &expr.op0(), *op1 = &expr.op1();
23-
if(op0->type().id() == ID_pointer &&
24-
*op0 == null_pointer_exprt(to_pointer_type(op0->type())))
25-
{
26-
std::swap(op0, op1);
27-
}
28-
29-
if(op1->type().id() == ID_pointer &&
30-
*op1 == null_pointer_exprt(to_pointer_type(op1->type())))
31-
{
32-
while(op0->id() == ID_typecast)
33-
op0 = &op0->op0();
34-
return op0;
35-
}
36-
}
37-
38-
return nullptr;
39-
}
40-
41-
/// Return structure for `get_conditional_checked_expr`
19+
/// Return structure for `get_null_checked_expr` and
20+
/// `get_conditional_checked_expr`
4221
struct goto_null_checkt
4322
{
44-
/// If true, the given GOTO tests that a pointer expression is non-null on the
45-
/// taken branch; otherwise, on the not-taken branch.
23+
/// If true, the given GOTO/ASSUME tests that a pointer expression is non-null
24+
/// on the taken branch or passing case; otherwise, on the not-taken branch
25+
/// or on failure.
4626
bool checked_when_taken;
4727

4828
/// Null-tested pointer expression
4929
exprt checked_expr;
5030
};
5131

52-
/// Check if a GOTO guard expression tests if a pointer is null
53-
/// \param goto_guard: expression to check
32+
/// Check if `expr` tests if a pointer is null
33+
/// \param expr: expression to check
5434
/// \return a `goto_null_checkt` indicating what expression is tested and
5535
/// whether the check applies on the taken or not-taken branch, or an empty
5636
/// optionalt if this isn't a null check.
57-
static optionalt<goto_null_checkt>
58-
get_conditional_checked_expr(const exprt &goto_guard)
37+
static optionalt<goto_null_checkt> get_null_checked_expr(const exprt &expr)
5938
{
60-
exprt normalized_guard = goto_guard;
39+
exprt normalized_expr = expr;
40+
// If true, then a null check is made when test `expr` passes; if false,
41+
// one is made when it fails.
6142
bool checked_when_taken = true;
62-
while(normalized_guard.id() == ID_not || normalized_guard.id() == ID_equal)
43+
44+
// Reduce some roundabout ways of saying "x != null", e.g. "!(x == null)".
45+
while(normalized_expr.id() == ID_not)
6346
{
64-
if(normalized_guard.id() == ID_not)
65-
normalized_guard = normalized_guard.op0();
66-
else
67-
normalized_guard.id(ID_notequal);
47+
normalized_expr = normalized_expr.op0();
6848
checked_when_taken = !checked_when_taken;
6949
}
7050

71-
const exprt *checked_expr = get_null_checked_expr(normalized_guard);
72-
if(!checked_expr)
73-
return {};
74-
else
75-
return goto_null_checkt { checked_when_taken, *checked_expr };
51+
if(normalized_expr.id() == ID_equal)
52+
{
53+
normalized_expr.id(ID_notequal);
54+
checked_when_taken = !checked_when_taken;
55+
}
56+
57+
if(normalized_expr.id() == ID_notequal)
58+
{
59+
const exprt &op0 = skip_typecast(normalized_expr.op0());
60+
const exprt &op1 = skip_typecast(normalized_expr.op1());
61+
62+
if(op0.type().id() == ID_pointer &&
63+
op0 == null_pointer_exprt(to_pointer_type(op0.type())))
64+
{
65+
return { { checked_when_taken, op1 } };
66+
}
67+
68+
if(op1.type().id() == ID_pointer &&
69+
op1 == null_pointer_exprt(to_pointer_type(op1.type())))
70+
{
71+
return { { checked_when_taken, op0 } };
72+
}
73+
}
74+
75+
return {};
7676
}
7777

7878
/// Compute safe dereference expressions for a given GOTO program. This
@@ -82,7 +82,8 @@ get_conditional_checked_expr(const exprt &goto_guard)
8282
/// \param goto_program: program to analyse
8383
void local_safe_pointerst::operator()(const goto_programt &goto_program)
8484
{
85-
std::set<exprt> checked_expressions;
85+
std::set<exprt, base_type_comparet> checked_expressions(
86+
base_type_comparet{ns});
8687

8788
for(const auto &instruction : goto_program.instructions)
8889
{
@@ -91,11 +92,23 @@ void local_safe_pointerst::operator()(const goto_programt &goto_program)
9192
checked_expressions.clear();
9293
// Retrieve working set for forward GOTOs:
9394
else if(instruction.is_target())
94-
checked_expressions = non_null_expressions[instruction.location_number];
95+
{
96+
auto findit = non_null_expressions.find(instruction.location_number);
97+
if(findit != non_null_expressions.end())
98+
checked_expressions = findit->second;
99+
else
100+
{
101+
checked_expressions =
102+
std::set<exprt, base_type_comparet>(base_type_comparet{ns});
103+
}
104+
}
95105

96106
// Save the working set at this program point:
97107
if(!checked_expressions.empty())
98-
non_null_expressions[instruction.location_number] = checked_expressions;
108+
{
109+
non_null_expressions.emplace(
110+
instruction.location_number, checked_expressions);
111+
}
99112

100113
switch(instruction.type)
101114
{
@@ -113,35 +126,44 @@ void local_safe_pointerst::operator()(const goto_programt &goto_program)
113126

114127
// Possible checks:
115128
case ASSUME:
129+
if(auto assume_check = get_null_checked_expr(instruction.guard))
116130
{
117-
const exprt *checked_expr;
118-
if((checked_expr = get_null_checked_expr(instruction.guard)) != nullptr)
119-
{
120-
checked_expressions.insert(*checked_expr);
121-
}
122-
break;
131+
if(assume_check->checked_when_taken)
132+
checked_expressions.insert(assume_check->checked_expr);
123133
}
124134

135+
break;
136+
125137
case GOTO:
126138
if(!instruction.is_backwards_goto())
127139
{
128-
if(auto conditional_check =
129-
get_conditional_checked_expr(instruction.guard))
130-
{
131-
auto &taken_checked_expressions =
132-
non_null_expressions[instruction.get_target()->location_number];
133-
taken_checked_expressions = checked_expressions;
140+
// Copy current state to GOTO target:
134141

135-
if(conditional_check->checked_when_taken)
136-
taken_checked_expressions.insert(conditional_check->checked_expr);
137-
else
138-
checked_expressions.insert(conditional_check->checked_expr);
142+
auto target_emplace_result =
143+
non_null_expressions.emplace(
144+
instruction.get_target()->location_number, checked_expressions);
139145

140-
break;
146+
// If the target already has a state entry then it is a control-flow
147+
// merge point and everything will be assumed maybe-null in any case.
148+
if(target_emplace_result.second)
149+
{
150+
if(auto conditional_check = get_null_checked_expr(instruction.guard))
151+
{
152+
// Add the GOTO condition to either the target or current state,
153+
// as appropriate:
154+
if(conditional_check->checked_when_taken)
155+
{
156+
target_emplace_result.first->second.insert(
157+
conditional_check->checked_expr);
158+
}
159+
else
160+
checked_expressions.insert(conditional_check->checked_expr);
161+
}
141162
}
142-
// Otherwise fall through to...
143163
}
144164

165+
break;
166+
145167
default:
146168
// Pessimistically assume all other instructions might overwrite any
147169
// pointer with a possibly-null value.
@@ -157,7 +179,7 @@ void local_safe_pointerst::operator()(const goto_programt &goto_program)
157179
/// operator())
158180
/// \param ns: global namespace
159181
void local_safe_pointerst::output(
160-
std::ostream &out, const goto_programt &goto_program, const namespacet &ns)
182+
std::ostream &out, const goto_programt &goto_program)
161183
{
162184
forall_goto_program_instructions(i_it, goto_program)
163185
{
@@ -199,7 +221,7 @@ void local_safe_pointerst::output(
199221
/// operator())
200222
/// \param ns: global namespace
201223
void local_safe_pointerst::output_safe_dereferences(
202-
std::ostream &out, const goto_programt &goto_program, const namespacet &ns)
224+
std::ostream &out, const goto_programt &goto_program)
203225
{
204226
forall_goto_program_instructions(i_it, goto_program)
205227
{
@@ -251,3 +273,12 @@ bool local_safe_pointerst::is_non_null_at_program_point(
251273
tocheck = &tocheck->op0();
252274
return findit->second.count(*tocheck) != 0;
253275
}
276+
277+
bool local_safe_pointerst::base_type_comparet::operator()(
278+
const exprt &e1, const exprt &e2) const
279+
{
280+
if(base_type_eq(e1, e2, ns))
281+
return false;
282+
else
283+
return e1 < e2;
284+
}

src/analyses/local_safe_pointers.h

+40-5
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,45 @@ Author: Diffblue Ltd
2323
/// possibly-aliasing operations are handled pessimistically.
2424
class local_safe_pointerst
2525
{
26-
std::map<unsigned, std::set<exprt>> non_null_expressions;
26+
/// Comparator that regards base_type_eq expressions as equal, and otherwise
27+
/// uses the natural (operator<) ordering on irept.
28+
/// An expression is base_type_eq another one if their types, and types of
29+
/// their subexpressions, are identical except that one may use a symbol_typet
30+
/// while the other uses that type's expanded (namespacet::follow'd) form.
31+
class base_type_comparet
32+
{
33+
const namespacet &ns;
34+
35+
public:
36+
explicit base_type_comparet(const namespacet &ns)
37+
: ns(ns)
38+
{
39+
}
40+
41+
base_type_comparet(const base_type_comparet &other)
42+
: ns(other.ns)
43+
{
44+
}
45+
46+
base_type_comparet &operator=(const base_type_comparet &other)
47+
{
48+
INVARIANT(&ns == &other.ns, "base_type_comparet: clashing namespaces");
49+
return *this;
50+
}
51+
52+
bool operator()(const exprt &e1, const exprt &e2) const;
53+
};
54+
55+
std::map<unsigned, std::set<exprt, base_type_comparet>> non_null_expressions;
56+
57+
const namespacet &ns;
58+
59+
public:
60+
local_safe_pointerst(const namespacet &ns)
61+
: ns(ns)
62+
{
63+
}
2764

28-
public:
2965
void operator()(const goto_programt &goto_program);
3066

3167
bool is_non_null_at_program_point(
@@ -38,11 +74,10 @@ class local_safe_pointerst
3874
return is_non_null_at_program_point(deref.op0(), program_point);
3975
}
4076

41-
void output(
42-
std::ostream &stream, const goto_programt &program, const namespacet &ns);
77+
void output(std::ostream &stream, const goto_programt &program);
4378

4479
void output_safe_dereferences(
45-
std::ostream &stream, const goto_programt &program, const namespacet &ns);
80+
std::ostream &stream, const goto_programt &program);
4681
};
4782

4883
#endif // CPROVER_ANALYSES_LOCAL_SAFE_POINTERS_H

src/goto-instrument/goto_instrument_parse_options.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -297,17 +297,17 @@ int goto_instrument_parse_optionst::doit()
297297

298298
forall_goto_functions(it, goto_model.goto_functions)
299299
{
300-
local_safe_pointerst local_safe_pointers;
300+
local_safe_pointerst local_safe_pointers(ns);
301301
local_safe_pointers(it->second.body);
302302
std::cout << ">>>>\n";
303303
std::cout << ">>>> " << it->first << '\n';
304304
std::cout << ">>>>\n";
305305
if(cmdline.isset("show-local-safe-pointers"))
306-
local_safe_pointers.output(std::cout, it->second.body, ns);
306+
local_safe_pointers.output(std::cout, it->second.body);
307307
else
308308
{
309309
local_safe_pointers.output_safe_dereferences(
310-
std::cout, it->second.body, ns);
310+
std::cout, it->second.body);
311311
}
312312
std::cout << '\n';
313313
}

src/goto-symex/symex_function_call.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ void goto_symext::symex_function_call_code(
223223
state.dirty.populate_dirty_for_function(identifier, goto_function);
224224

225225
auto emplace_safe_pointers_result =
226-
safe_pointers.emplace(identifier, local_safe_pointerst{});
226+
safe_pointers.emplace(identifier, local_safe_pointerst{ns});
227227
if(emplace_safe_pointers_result.second)
228228
emplace_safe_pointers_result.first->second(goto_function.body);
229229

0 commit comments

Comments
 (0)