Skip to content

Commit 37c9da4

Browse files
committed
Java instanceof: avoid dereferencing null pointer
Previously our code was of the form tmp = x->@class_identifier is_instanceof = x != null && tmp == "A" || tmp == "B" || ... This was harmless as the value read from a null pointer was never used if it was null, but would present a spurious dereference of a possibly-null pointer, introducing false uncertainty about the vallue of `tmp`. Therefore we now generate: if x == null: is_instanceof = false else: tmp = x->@class_identifier is_instanceof = tmp == "A" || tmp == "B" || ...
1 parent 6882da6 commit 37c9da4

File tree

1 file changed

+96
-37
lines changed

1 file changed

+96
-37
lines changed

jbmc/src/java_bytecode/remove_instanceof.cpp

+96-37
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Author: Chris Smowton, [email protected]
1313

1414
#include <goto-programs/class_hierarchy.h>
1515
#include <goto-programs/class_identifier.h>
16+
#include <goto-programs/goto_convert.h>
1617

1718
#include <util/fresh_symbol.h>
1819
#include <java_bytecode/java_types.h>
@@ -39,7 +40,7 @@ class remove_instanceoft
3940
namespacet ns;
4041
class_hierarchyt class_hierarchy;
4142

42-
std::size_t lower_instanceof(
43+
bool lower_instanceof(
4344
exprt &, goto_programt &, goto_programt::targett);
4445
};
4546

@@ -49,18 +50,18 @@ class remove_instanceoft
4950
/// \param expr: Expression to lower (the code or the guard of an instruction)
5051
/// \param goto_program: program the expression belongs to
5152
/// \param this_inst: instruction the expression is found at
52-
/// \return number of instanceof expressions that have been replaced
53-
std::size_t remove_instanceoft::lower_instanceof(
53+
/// \return true if any instanceof instructionw was replaced
54+
bool remove_instanceoft::lower_instanceof(
5455
exprt &expr,
5556
goto_programt &goto_program,
5657
goto_programt::targett this_inst)
5758
{
5859
if(expr.id()!=ID_java_instanceof)
5960
{
60-
std::size_t replacements=0;
61+
bool changed = false;
6162
Forall_operands(it, expr)
62-
replacements+=lower_instanceof(*it, goto_program, this_inst);
63-
return replacements;
63+
changed |= lower_instanceof(*it, goto_program, this_inst);
64+
return changed;
6465
}
6566

6667
INVARIANT(
@@ -94,46 +95,99 @@ std::size_t remove_instanceoft::lower_instanceof(
9495
return a.compare(b) < 0;
9596
});
9697

97-
// Insert an instruction before the new check that assigns the clsid we're
98-
// checking for to a temporary, as GOTO program if-expressions should
99-
// not contain derefs.
100-
// We actually insert the assignment instruction after the existing one.
101-
// This will briefly be ill-formed (use before def of instanceof_tmp) but the
102-
// two will subsequently switch places. This makes sure that the inserted
103-
// assignement doesn't end up before any labels pointing at this instruction.
98+
// Make temporaries to store the class identifier (avoids repeated derefs) and
99+
// the instanceof result:
100+
104101
symbol_typet jlo=to_symbol_type(java_lang_object_type().subtype());
105-
exprt object_clsid=get_class_identifier_field(check_ptr, jlo, ns);
102+
exprt object_clsid = get_class_identifier_field(check_ptr, jlo, ns);
106103

107-
symbolt &newsym = get_fresh_aux_symbol(
104+
symbolt &clsid_tmp_sym = get_fresh_aux_symbol(
108105
object_clsid.type(),
109106
id2string(this_inst->function),
110-
"instanceof_tmp",
107+
"class_identifier_tmp",
108+
source_locationt(),
109+
ID_java,
110+
symbol_table);
111+
112+
symbolt &instanceof_result_sym = get_fresh_aux_symbol(
113+
bool_typet(),
114+
id2string(this_inst->function),
115+
"instanceof_result_tmp",
111116
source_locationt(),
112117
ID_java,
113118
symbol_table);
114119

115-
auto newinst=goto_program.insert_after(this_inst);
116-
newinst->make_assignment();
117-
newinst->code=code_assignt(newsym.symbol_expr(), object_clsid);
118-
newinst->source_location=this_inst->source_location;
119-
newinst->function=this_inst->function;
120+
// Create
121+
// if(expr == null)
122+
// instanceof_result = false;
123+
// else
124+
// string clsid = expr->@class_identifier
125+
// instanceof_result = clsid == "A" || clsid == "B" || ...
120126

121-
// Replace the instanceof construct with a conjunction of non-null and the
122-
// disjunction of all possible object types. According to the Java
123-
// specification, null instanceof T is false for all possible values of T.
127+
// According to the Java specification, null instanceof T is false for all
128+
// possible values of T.
124129
// (http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.20.2)
125-
notequal_exprt non_null_expr(
126-
check_ptr, null_pointer_exprt(to_pointer_type(check_ptr.type())));
130+
131+
code_ifthenelset is_null_branch;
132+
is_null_branch.cond() =
133+
equal_exprt(
134+
check_ptr, null_pointer_exprt(to_pointer_type(check_ptr.type())));
135+
is_null_branch.then_case() =
136+
code_assignt(instanceof_result_sym.symbol_expr(), false_exprt());
137+
138+
code_blockt else_block;
139+
else_block.add(code_declt(clsid_tmp_sym.symbol_expr()));
140+
else_block.add(code_assignt(clsid_tmp_sym.symbol_expr(), object_clsid));
141+
127142
exprt::operandst or_ops;
128143
for(const auto &clsname : children)
129144
{
130145
constant_exprt clsexpr(clsname, string_typet());
131-
equal_exprt test(newsym.symbol_expr(), clsexpr);
146+
equal_exprt test(clsid_tmp_sym.symbol_expr(), clsexpr);
132147
or_ops.push_back(test);
133148
}
134-
expr = and_exprt(non_null_expr, disjunction(or_ops));
149+
else_block.add(
150+
code_assignt(instanceof_result_sym.symbol_expr(), disjunction(or_ops)));
151+
152+
is_null_branch.else_case() = std::move(else_block);
153+
154+
// Replace the instanceof construct with instanceof_result:
155+
expr = instanceof_result_sym.symbol_expr();
156+
157+
std::ostringstream convert_output;
158+
stream_message_handlert convert_message_handler(convert_output);
159+
160+
// Insert the new test block before it:
161+
goto_programt new_check_program;
162+
goto_convert(
163+
is_null_branch,
164+
symbol_table,
165+
new_check_program,
166+
convert_message_handler,
167+
ID_java);
168+
169+
INVARIANT(
170+
convert_output.str().empty(),
171+
"remove_instanceof generated test should be goto-converted without error, "
172+
"but goto_convert reported: " + convert_output.str());
135173

136-
return 1;
174+
goto_program.destructive_insert(this_inst, new_check_program);
175+
176+
return true;
177+
}
178+
179+
static bool contains_instanceof(const exprt &e)
180+
{
181+
if(e.id() == ID_java_instanceof)
182+
return true;
183+
184+
for(const exprt &subexpr : e.operands())
185+
{
186+
if(contains_instanceof(subexpr))
187+
return true;
188+
}
189+
190+
return false;
137191
}
138192

139193
/// Replaces expressions like e instanceof A with e.\@class_identifier == "A"
@@ -146,15 +200,20 @@ bool remove_instanceoft::lower_instanceof(
146200
goto_programt &goto_program,
147201
goto_programt::targett target)
148202
{
149-
std::size_t replacements=
150-
lower_instanceof(target->code, goto_program, target)+
151-
lower_instanceof(target->guard, goto_program, target);
203+
if(target->is_target() &&
204+
(contains_instanceof(target->code) || contains_instanceof(target->guard)))
205+
{
206+
// If this is a branch target, add a skip beforehand so we can splice new
207+
// GOTO programs before the target instruction without inserting into the
208+
// wrong basic block.
209+
goto_program.insert_before_swap(target);
210+
target->make_skip();
211+
// Actually alter the now-moved instruction:
212+
++target;
213+
}
152214

153-
if(replacements==0)
154-
return false;
155-
// Swap the original instruction with the last assignment added after it
156-
target->swap(*std::next(target, replacements));
157-
return true;
215+
return lower_instanceof(target->code, goto_program, target) |
216+
lower_instanceof(target->guard, goto_program, target);
158217
}
159218

160219
/// Replace every instanceof in the passed function body with an explicit

0 commit comments

Comments
 (0)