Skip to content

SEC-472 Java instanceof: avoid dereferencing null pointer #2360

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions jbmc/src/janalyzer/janalyzer_parse_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,9 +670,9 @@ bool janalyzer_parse_optionst::process_goto_program(const optionst &options)
remove_virtual_functions(goto_model);
// remove Java throw and catch
// This introduces instanceof, so order is important:
remove_exceptions(goto_model);
remove_exceptions(goto_model, get_message_handler());
// remove rtti
remove_instanceof(goto_model);
remove_instanceof(goto_model, get_message_handler());

// do partial inlining
status() << "Partial Inlining" << eom;
Expand Down
26 changes: 19 additions & 7 deletions jbmc/src/java_bytecode/remove_exceptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,12 @@ class remove_exceptionst
explicit remove_exceptionst(
symbol_table_baset &_symbol_table,
function_may_throwt _function_may_throw,
bool remove_added_instanceof)
bool remove_added_instanceof,
message_handlert &message_handler)
: symbol_table(_symbol_table),
function_may_throw(_function_may_throw),
remove_added_instanceof(remove_added_instanceof)
remove_added_instanceof(remove_added_instanceof),
message_handler(message_handler)
{
}

Expand All @@ -102,6 +104,7 @@ class remove_exceptionst
symbol_table_baset &symbol_table;
function_may_throwt function_may_throw;
bool remove_added_instanceof;
message_handlert &message_handler;

symbol_exprt get_inflight_exception_global();

Expand Down Expand Up @@ -342,7 +345,7 @@ void remove_exceptionst::add_exception_dispatch_sequence(
t_exc->guard=check;

if(remove_added_instanceof)
remove_instanceof(t_exc, goto_program, symbol_table);
remove_instanceof(t_exc, goto_program, symbol_table, message_handler);
}
}
}
Expand Down Expand Up @@ -574,6 +577,7 @@ void remove_exceptionst::operator()(goto_programt &goto_program)
void remove_exceptions(
symbol_table_baset &symbol_table,
goto_functionst &goto_functions,
message_handlert &message_handler,
remove_exceptions_typest type)
{
const namespacet ns(symbol_table);
Expand All @@ -586,7 +590,8 @@ void remove_exceptions(
remove_exceptionst remove_exceptions(
symbol_table,
function_may_throw,
type == remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF);
type == remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF,
message_handler);
remove_exceptions(goto_functions);
}

Expand All @@ -600,12 +605,14 @@ void remove_exceptions(
/// \param symbol_table: global symbol table. The `@inflight_exception` global
/// may be added if not already present. It will not be initialised; that is
/// the caller's responsibility.
/// \param message_handler: logging output
/// \param type: specifies whether instanceof operations generated by this pass
/// should be lowered to class-identifier comparisons (using
/// remove_instanceof).
void remove_exceptions(
goto_programt &goto_program,
symbol_table_baset &symbol_table,
message_handlert &message_handler,
remove_exceptions_typest type)
{
remove_exceptionst::function_may_throwt any_function_may_throw =
Expand All @@ -614,12 +621,17 @@ void remove_exceptions(
remove_exceptionst remove_exceptions(
symbol_table,
any_function_may_throw,
type == remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF);
type == remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF,
message_handler);
remove_exceptions(goto_program);
}

/// removes throws/CATCH-POP/CATCH-PUSH
void remove_exceptions(goto_modelt &goto_model, remove_exceptions_typest type)
void remove_exceptions(
goto_modelt &goto_model,
message_handlert &message_handler,
remove_exceptions_typest type)
{
remove_exceptions(goto_model.symbol_table, goto_model.goto_functions, type);
remove_exceptions(
goto_model.symbol_table, goto_model.goto_functions, message_handler, type);
}
4 changes: 4 additions & 0 deletions jbmc/src/java_bytecode/remove_exceptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ Date: December 2016

#include <goto-programs/goto_model.h>

#include <util/message.h>

#define INFLIGHT_EXCEPTION_VARIABLE_BASENAME "@inflight_exception"
#define INFLIGHT_EXCEPTION_VARIABLE_NAME \
"java::" INFLIGHT_EXCEPTION_VARIABLE_BASENAME
Expand All @@ -32,11 +34,13 @@ enum class remove_exceptions_typest
void remove_exceptions(
goto_programt &goto_program,
symbol_table_baset &symbol_table,
message_handlert &message_handler,
remove_exceptions_typest type =
remove_exceptions_typest::DONT_REMOVE_INSTANCEOF);

void remove_exceptions(
goto_modelt &goto_model,
message_handlert &message_handler,
remove_exceptions_typest type =
remove_exceptions_typest::DONT_REMOVE_INSTANCEOF);

Expand Down
159 changes: 112 additions & 47 deletions jbmc/src/java_bytecode/remove_instanceof.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Author: Chris Smowton, [email protected]

#include <goto-programs/class_hierarchy.h>
#include <goto-programs/class_identifier.h>
#include <goto-programs/goto_convert.h>

#include <util/fresh_symbol.h>
#include <java_bytecode/java_types.h>
Expand All @@ -22,8 +23,11 @@ Author: Chris Smowton, [email protected]
class remove_instanceoft
{
public:
explicit remove_instanceoft(symbol_table_baset &symbol_table)
: symbol_table(symbol_table), ns(symbol_table)
remove_instanceoft(
symbol_table_baset &symbol_table, message_handlert &message_handler)
: symbol_table(symbol_table)
, ns(symbol_table)
, message_handler(message_handler)
{
class_hierarchy(symbol_table);
}
Expand All @@ -38,8 +42,9 @@ class remove_instanceoft
symbol_table_baset &symbol_table;
namespacet ns;
class_hierarchyt class_hierarchy;
message_handlert &message_handler;

std::size_t lower_instanceof(
bool lower_instanceof(
exprt &, goto_programt &, goto_programt::targett);
};

Expand All @@ -49,18 +54,18 @@ class remove_instanceoft
/// \param expr: Expression to lower (the code or the guard of an instruction)
/// \param goto_program: program the expression belongs to
/// \param this_inst: instruction the expression is found at
/// \return number of instanceof expressions that have been replaced
std::size_t remove_instanceoft::lower_instanceof(
/// \return true if any instanceof instructionw was replaced
bool remove_instanceoft::lower_instanceof(
exprt &expr,
goto_programt &goto_program,
goto_programt::targett this_inst)
{
if(expr.id()!=ID_java_instanceof)
{
std::size_t replacements=0;
bool changed = false;
Forall_operands(it, expr)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe chage to ranged for while you're at it

replacements+=lower_instanceof(*it, goto_program, this_inst);
return replacements;
changed |= lower_instanceof(*it, goto_program, this_inst);
return changed;
}

INVARIANT(
Expand Down Expand Up @@ -94,46 +99,91 @@ std::size_t remove_instanceoft::lower_instanceof(
return a.compare(b) < 0;
});

// Insert an instruction before the new check that assigns the clsid we're
// checking for to a temporary, as GOTO program if-expressions should
// not contain derefs.
// We actually insert the assignment instruction after the existing one.
// This will briefly be ill-formed (use before def of instanceof_tmp) but the
// two will subsequently switch places. This makes sure that the inserted
// assignement doesn't end up before any labels pointing at this instruction.
// Make temporaries to store the class identifier (avoids repeated derefs) and
// the instanceof result:

symbol_typet jlo=to_symbol_type(java_lang_object_type().subtype());
exprt object_clsid=get_class_identifier_field(check_ptr, jlo, ns);
exprt object_clsid = get_class_identifier_field(check_ptr, jlo, ns);

symbolt &newsym = get_fresh_aux_symbol(
symbolt &clsid_tmp_sym = get_fresh_aux_symbol(
object_clsid.type(),
id2string(this_inst->function),
"instanceof_tmp",
"class_identifier_tmp",
source_locationt(),
ID_java,
symbol_table);

symbolt &instanceof_result_sym = get_fresh_aux_symbol(
bool_typet(),
id2string(this_inst->function),
"instanceof_result_tmp",
source_locationt(),
ID_java,
symbol_table);

auto newinst=goto_program.insert_after(this_inst);
newinst->make_assignment();
newinst->code=code_assignt(newsym.symbol_expr(), object_clsid);
newinst->source_location=this_inst->source_location;
newinst->function=this_inst->function;
// Create
// if(expr == null)
// instanceof_result = false;
// else
// string clsid = expr->@class_identifier
// instanceof_result = clsid == "A" || clsid == "B" || ...

// Replace the instanceof construct with a conjunction of non-null and the
// disjunction of all possible object types. According to the Java
// specification, null instanceof T is false for all possible values of T.
// According to the Java specification, null instanceof T is false for all
// possible values of T.
// (http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.20.2)
notequal_exprt non_null_expr(
check_ptr, null_pointer_exprt(to_pointer_type(check_ptr.type())));

code_ifthenelset is_null_branch;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure goto_convertshould be used here... Would it be significantly longer to rewrite this as a proper goto program transformation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not much, but it seems worse than building the if/else structure I intend then letting goto-convert build the CFG as it sees fit. See also making stub methods in jbmc_parse_optionst, which is already done by making a codet and then goto_converting it

is_null_branch.cond() =
equal_exprt(
check_ptr, null_pointer_exprt(to_pointer_type(check_ptr.type())));
is_null_branch.then_case() =
code_assignt(instanceof_result_sym.symbol_expr(), false_exprt());

code_blockt else_block;
else_block.add(code_declt(clsid_tmp_sym.symbol_expr()));
else_block.add(code_assignt(clsid_tmp_sym.symbol_expr(), object_clsid));

exprt::operandst or_ops;
for(const auto &clsname : children)
{
constant_exprt clsexpr(clsname, string_typet());
equal_exprt test(newsym.symbol_expr(), clsexpr);
equal_exprt test(clsid_tmp_sym.symbol_expr(), clsexpr);
or_ops.push_back(test);
}
expr = and_exprt(non_null_expr, disjunction(or_ops));
else_block.add(
code_assignt(instanceof_result_sym.symbol_expr(), disjunction(or_ops)));

is_null_branch.else_case() = std::move(else_block);

// Replace the instanceof construct with instanceof_result:
expr = instanceof_result_sym.symbol_expr();

return 1;
// Insert the new test block before it:
goto_programt new_check_program;
goto_convert(
is_null_branch,
symbol_table,
new_check_program,
message_handler,
ID_java);

goto_program.destructive_insert(this_inst, new_check_program);

return true;
}

static bool contains_instanceof(const exprt &e)
{
if(e.id() == ID_java_instanceof)
return true;

for(const exprt &subexpr : e.operands())
{
if(contains_instanceof(subexpr))
return true;
}

return false;
}

/// Replaces expressions like e instanceof A with e.\@class_identifier == "A"
Expand All @@ -146,15 +196,20 @@ bool remove_instanceoft::lower_instanceof(
goto_programt &goto_program,
goto_programt::targett target)
{
std::size_t replacements=
lower_instanceof(target->code, goto_program, target)+
lower_instanceof(target->guard, goto_program, target);
if(target->is_target() &&
(contains_instanceof(target->code) || contains_instanceof(target->guard)))
{
// If this is a branch target, add a skip beforehand so we can splice new
// GOTO programs before the target instruction without inserting into the
// wrong basic block.
goto_program.insert_before_swap(target);
target->make_skip();
// Actually alter the now-moved instruction:
++target;
}

if(replacements==0)
return false;
// Swap the original instruction with the last assignment added after it
target->swap(*std::next(target, replacements));
return true;
return lower_instanceof(target->code, goto_program, target) |
lower_instanceof(target->guard, goto_program, target);
}

/// Replace every instanceof in the passed function body with an explicit
Expand Down Expand Up @@ -185,12 +240,14 @@ bool remove_instanceoft::lower_instanceof(goto_programt &goto_program)
/// \param target: The instruction to work on.
/// \param goto_program: The function body containing the instruction.
/// \param symbol_table: The symbol table to add symbols to.
/// \param message_handler: logging output
void remove_instanceof(
goto_programt::targett target,
goto_programt &goto_program,
symbol_table_baset &symbol_table)
symbol_table_baset &symbol_table,
message_handlert &message_handler)
{
remove_instanceoft rem(symbol_table);
remove_instanceoft rem(symbol_table, message_handler);
rem.lower_instanceof(goto_program, target);
}

Expand All @@ -199,11 +256,13 @@ void remove_instanceof(
/// \remarks Extra auxiliary variables may be introduced into symbol_table.
/// \param function: The function to work on.
/// \param symbol_table: The symbol table to add symbols to.
/// \param message_handler: logging output
void remove_instanceof(
goto_functionst::goto_functiont &function,
symbol_table_baset &symbol_table)
symbol_table_baset &symbol_table,
message_handlert &message_handler)
{
remove_instanceoft rem(symbol_table);
remove_instanceoft rem(symbol_table, message_handler);
rem.lower_instanceof(function.body);
}

Expand All @@ -212,11 +271,13 @@ void remove_instanceof(
/// \remarks Extra auxiliary variables may be introduced into symbol_table.
/// \param goto_functions: The functions to work on.
/// \param symbol_table: The symbol table to add symbols to.
/// \param message_handler: logging output
void remove_instanceof(
goto_functionst &goto_functions,
symbol_table_baset &symbol_table)
symbol_table_baset &symbol_table,
message_handlert &message_handler)
{
remove_instanceoft rem(symbol_table);
remove_instanceoft rem(symbol_table, message_handler);
bool changed=false;
for(auto &f : goto_functions.function_map)
changed=rem.lower_instanceof(f.second.body) || changed;
Expand All @@ -228,8 +289,12 @@ void remove_instanceof(
/// class-identifier test.
/// \remarks Extra auxiliary variables may be introduced into symbol_table.
/// \param goto_model: The functions to work on and the symbol table to add
/// \param message_handler: logging output
/// symbols to.
void remove_instanceof(goto_modelt &goto_model)
void remove_instanceof(
goto_modelt &goto_model,
message_handlert &message_handler)
{
remove_instanceof(goto_model.goto_functions, goto_model.symbol_table);
remove_instanceof(
goto_model.goto_functions, goto_model.symbol_table, message_handler);
}
Loading