Skip to content

Commit 72b6b60

Browse files
committed
Symex: ignore null dereferences when targeting Java
In Java (and other safe languages) we know that any path that led to dereferencing a null pointer would have thrown or asserted beforehand (i.e. all pointer dereferences are checked). Therefore when dereferencing a pointer with value-set {NULL, someobj} there is no need to generate (for example) `ptr == &someobj ? someobj.somefield : invalid_object.somefield`, but rather we can simply produce `someobj.somefield`. In principle this could also be extended to C programs that have `--pointer-check` enabled, to other safe languages, and to specific pointer accesses that are analysed never-null at a particular program point.
1 parent 1d0cd01 commit 72b6b60

File tree

11 files changed

+157
-11
lines changed

11 files changed

+157
-11
lines changed
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
CORE
2+
test.class
3+
4+
^EXIT=10$
5+
^SIGNAL=0$
6+
^VERIFICATION FAILED$
7+
Throw null: FAILURE$
8+
assertion at file test\.java line 21 .*: SUCCESS$
9+
--
10+
^warning: ignoring
11+
--
12+
The test should fail, as it might dereference a null pointer, but as Java is a safe language we should
13+
statically determine that if we reach the assertion it cannot be violated.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
public class test {
2+
3+
public int field;
4+
5+
public test(int value) {
6+
7+
field = value;
8+
9+
}
10+
11+
public static void main(boolean unknown) {
12+
13+
test t = new test(12345);
14+
test maybe_t = unknown ? null : t;
15+
16+
// t could still be null, but symex ought to know that as this is Java source,
17+
// we must have a null check prior to dereferencing, so it can statically eliminate
18+
// the assertion below.
19+
20+
int field_value = maybe_t.field;
21+
assert field_value == 12345;
22+
23+
}
24+
25+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
CORE
2+
test.class
3+
--show-vcc
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
--
7+
^warning: ignoring
8+
assertion at file test\.java line 22
9+
--
10+
Because symex should statically determine the assertion can't be violated there should not be a goal for it
11+
by the time --show-vccs prints the equation.

src/cbmc/bmc.h

+10
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,16 @@ class bmct:public safety_checkert
116116
symex.add_recursion_unwind_handler(handler);
117117
}
118118

119+
void set_program_cannot_dereference_null(bool value)
120+
{
121+
symex.set_program_cannot_dereference_null(value);
122+
}
123+
124+
void set_program_cannot_dereference_int_as_pointer(bool value)
125+
{
126+
symex.set_program_cannot_dereference_int_as_pointer(value);
127+
}
128+
119129
static int do_language_agnostic_bmc(
120130
const path_strategy_choosert &path_strategy_chooser,
121131
const optionst &opts,

src/goto-symex/goto_symex.h

+18
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ class goto_symext
6262
ns(outer_symbol_table),
6363
target(_target),
6464
atomic_section_counter(0),
65+
program_cannot_dereference_null(false),
66+
program_cannot_dereference_int_as_pointer(false),
6567
log(mh),
6668
guard_identifier("goto_symex::\\guard"),
6769
path_storage(path_storage)
@@ -169,6 +171,16 @@ class goto_symext
169171
/// successor state to continue executing, and resume symex from that state.
170172
bool should_pause_symex;
171173

174+
void set_program_cannot_dereference_null(bool value)
175+
{
176+
program_cannot_dereference_null = value;
177+
}
178+
179+
void set_program_cannot_dereference_int_as_pointer(bool value)
180+
{
181+
program_cannot_dereference_int_as_pointer = value;
182+
}
183+
172184
protected:
173185
/// Initialise the symbolic execution and the given state with <code>pc</code>
174186
/// as entry point.
@@ -228,6 +240,12 @@ class goto_symext
228240
symex_target_equationt &target;
229241
unsigned atomic_section_counter;
230242

243+
/// Indicates if the source program always guards against certain kinds of
244+
/// pointer abuse that would normally make symex conservatively assume it
245+
/// cannot resolve a dereference to a constant, for example:
246+
bool program_cannot_dereference_null;
247+
bool program_cannot_dereference_int_as_pointer;
248+
231249
mutable messaget log;
232250

233251
friend class symex_dereference_statet;

src/goto-symex/symex_dereference.cpp

+7-1
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,13 @@ void goto_symext::dereference_rec(
246246
symex_dereference_statet symex_dereference_state(*this, state);
247247

248248
value_set_dereferencet dereference(
249-
ns, state.symbol_table, options, symex_dereference_state, language_mode);
249+
ns,
250+
state.symbol_table,
251+
options,
252+
symex_dereference_state,
253+
language_mode,
254+
program_cannot_dereference_null,
255+
program_cannot_dereference_int_as_pointer);
250256

251257
// std::cout << "**** " << format(tmp1) << '\n';
252258
exprt tmp2=

src/jbmc/jbmc_parse_options.cpp

+11-1
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,13 @@ void jbmc_parse_optionst::eval_verbosity()
9595
ui_message_handler.set_verbosity(v);
9696
}
9797

98+
/// Set symex flags that always hold for Java programs:
99+
static void jbmc_configure_bmc(bmct &bmc, const symbol_tablet &symbol_table)
100+
{
101+
bmc.set_program_cannot_dereference_null(true);
102+
bmc.set_program_cannot_dereference_int_as_pointer(true);
103+
}
104+
98105
void jbmc_parse_optionst::get_command_line_options(optionst &options)
99106
{
100107
if(config.set(cmdline))
@@ -502,10 +509,13 @@ int jbmc_parse_optionst::doit()
502509
return 0;
503510
}
504511

505-
std::function<void(bmct &, const symbol_tablet &)> configure_bmc = nullptr;
512+
std::function<void(bmct &, const symbol_tablet &)> configure_bmc =
513+
jbmc_configure_bmc;
506514
if(options.get_bool_option("java-unwind-enum-static"))
507515
{
508516
configure_bmc = [](bmct &bmc, const symbol_tablet &symbol_table) {
517+
jbmc_configure_bmc(bmc, symbol_table);
518+
509519
bmc.add_loop_unwind_handler(
510520
[&symbol_table](
511521
const irep_idt &function_id,

src/pointer-analysis/goto_program_dereference.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ class goto_program_dereferencet:protected dereference_callbackt
3434
options(_options),
3535
ns(_ns),
3636
value_sets(_value_sets),
37-
dereference(_ns, _new_symbol_table, _options, *this, ID_nil) { }
37+
dereference(_ns, _new_symbol_table, _options, *this, ID_nil, false, false)
38+
{
39+
}
3840

3941
void dereference_program(
4042
goto_programt &goto_program,

src/pointer-analysis/value_set_dereference.cpp

+13-5
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,14 @@ exprt value_set_dereferencet::dereference(
106106

107107
#if 0
108108
std::cout << "V: " << format(value.pointer_guard) << " --> ";
109-
std::cout << format(value.value) << '\n';
109+
std::cout << format(value.value);
110+
if(value.ignore)
111+
std::cout << " (ignored)";
112+
std::cout << '\n';
110113
#endif
111114

112-
values.push_back(value);
115+
if(!value.ignore)
116+
values.push_back(value);
113117
}
114118

115119
// can this fail?
@@ -288,7 +292,11 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
288292

289293
if(root_object.id() == ID_null_object)
290294
{
291-
if(options.get_bool_option("pointer-check"))
295+
if(exclude_null_derefs)
296+
{
297+
result.ignore = true;
298+
}
299+
else if(options.get_bool_option("pointer-check"))
292300
{
293301
guardt tmp_guard(guard);
294302

@@ -384,9 +392,9 @@ value_set_dereferencet::valuet value_set_dereferencet::build_reference_to(
384392
// This is stuff like *((char *)5).
385393
// This is turned into an access to __CPROVER_memory[...].
386394

387-
if(language_mode==ID_java)
395+
if(exclude_int_as_pointer_derefs)
388396
{
389-
result.value=nil_exprt();
397+
result.ignore = true;
390398
return result;
391399
}
392400

src/pointer-analysis/value_set_dereference.h

+46-3
Original file line numberDiff line numberDiff line change
@@ -35,18 +35,29 @@ class value_set_dereferencet
3535
* \param _options Options, in particular whether pointer checks are
3636
to be performed
3737
* \param _dereference_callback Callback object for error reporting
38+
* \param _language_mode Mode for any new symbols created to represent
39+
a dereference failure
40+
* \param _exclude_null_derefs Ignore value-set entries that indicate a given
41+
dereference may follow a null pointer
42+
* \param _exclude_int_as_pointer_derefs Ignore value-set entries that
43+
indicate a given dereference may follow an integer cast to a pointer
44+
(e.g. `(char*)5`)
3845
*/
3946
value_set_dereferencet(
4047
const namespacet &_ns,
4148
symbol_tablet &_new_symbol_table,
4249
const optionst &_options,
4350
dereference_callbackt &_dereference_callback,
44-
const irep_idt _language_mode):
51+
const irep_idt _language_mode,
52+
bool _exclude_null_derefs,
53+
bool _exclude_int_as_pointer_derefs):
4554
ns(_ns),
4655
new_symbol_table(_new_symbol_table),
4756
options(_options),
4857
dereference_callback(_dereference_callback),
49-
language_mode(_language_mode)
58+
language_mode(_language_mode),
59+
exclude_null_derefs(_exclude_null_derefs),
60+
exclude_int_as_pointer_derefs(_exclude_int_as_pointer_derefs)
5061
{ }
5162

5263
virtual ~value_set_dereferencet() { }
@@ -82,6 +93,13 @@ class value_set_dereferencet
8293
/// language_mode: ID_java, ID_C or another language identifier
8394
/// if we know the source language in use, irep_idt() otherwise.
8495
const irep_idt language_mode;
96+
/// Flag indicating whether `value_set_dereferencet::dereference` should
97+
/// disregard an apparent attempt to dereference NULL
98+
const bool exclude_null_derefs;
99+
/// Flag indicating whether `value_set_dereferencet::dereference` should
100+
/// disregard an apparent attempt to dereference an integer cast to a pointer
101+
/// (e.g. `(char*)5`)
102+
const bool exclude_int_as_pointer_derefs;
85103
static unsigned invalid_counter;
86104

87105
bool dereference_type_compare(
@@ -92,17 +110,38 @@ class value_set_dereferencet
92110
exprt &dest,
93111
const exprt &offset) const;
94112

113+
/// Return value for `build_reference_to`; see that method for documentation.
95114
class valuet
96115
{
97116
public:
98117
exprt value;
99118
exprt pointer_guard;
119+
bool ignore;
100120

101-
valuet():value(nil_exprt()), pointer_guard(false_exprt())
121+
valuet():value(nil_exprt()), pointer_guard(false_exprt()), ignore(false)
102122
{
103123
}
104124
};
105125

126+
/// Get a guard and expression to access `what` under `guard`.
127+
/// \param what: value set entry to convert to an expression: either
128+
/// ID_unknown, ID_invalid, or an object_descriptor_exprt giving a referred
129+
/// object and offset.
130+
/// \param mode: whether the pointer is being read or written; used to create
131+
/// pointer validity checks if need be
132+
/// \param pointer: pointer expression that may point to `what`
133+
/// \param guard: guard under which the pointer is dereferenced
134+
/// \return
135+
/// * If we were explicitly instructed to ignore `what` as a possible
136+
/// pointer target: a `valuet` with `ignore` = true, and `value` and
137+
/// `pointer_guard` set to nil.
138+
/// * If we could build an expression corresponding to `what`:
139+
/// A `valuet` with non-nil `value`, and `pointer_guard` set to an
140+
/// appropriate check to determine if `pointer_expr` really points to
141+
/// `what` (for example, we might return
142+
/// `{.value = global, .pointer_guard = (pointer_expr == &global)}`
143+
/// * Otherwise, if we couldn't build an expression (e.g. for `what` ==
144+
/// ID_unknown), a `valuet` with nil `value` and `ignore` == false.
106145
valuet build_reference_to(
107146
const exprt &what,
108147
const modet mode,
@@ -138,6 +177,10 @@ class value_set_dereferencet
138177
const typet &type,
139178
const guardt &guard,
140179
const exprt &offset);
180+
181+
/// Returns true if due to language guarantees or some pre-processing pass
182+
/// we always know pointers being dereferenced cannot be null.
183+
bool null_dereference_is_impossible() const;
141184
};
142185

143186
#endif // CPROVER_POINTER_ANALYSIS_VALUE_SET_DEREFERENCE_H

0 commit comments

Comments
 (0)