From 419bc1b5fe1b0fbb1857a0de3fe2a64938333341 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 8 Jun 2018 16:50:46 +0100 Subject: [PATCH 1/2] Java instanceof: avoid dereferencing null pointer Previously our code was of the form tmp = x->@class_identifier is_instanceof = x != null && tmp == "A" || tmp == "B" || ... This was harmless as the value read from a null pointer was never used if it was null, but would present a spurious dereference of a possibly-null pointer, introducing false uncertainty about the vallue of `tmp`. Therefore we now generate: if x == null: is_instanceof = false else: tmp = x->@class_identifier is_instanceof = tmp == "A" || tmp == "B" || ... --- jbmc/src/java_bytecode/remove_instanceof.cpp | 134 ++++++++++++++----- 1 file changed, 97 insertions(+), 37 deletions(-) diff --git a/jbmc/src/java_bytecode/remove_instanceof.cpp b/jbmc/src/java_bytecode/remove_instanceof.cpp index cb93fcbbb98..78f268ead1c 100644 --- a/jbmc/src/java_bytecode/remove_instanceof.cpp +++ b/jbmc/src/java_bytecode/remove_instanceof.cpp @@ -13,6 +13,7 @@ Author: Chris Smowton, chris.smowton@diffblue.com #include #include +#include #include #include @@ -39,7 +40,7 @@ class remove_instanceoft namespacet ns; class_hierarchyt class_hierarchy; - std::size_t lower_instanceof( + bool lower_instanceof( exprt &, goto_programt &, goto_programt::targett); }; @@ -49,18 +50,18 @@ class remove_instanceoft /// \param expr: Expression to lower (the code or the guard of an instruction) /// \param goto_program: program the expression belongs to /// \param this_inst: instruction the expression is found at -/// \return number of instanceof expressions that have been replaced -std::size_t remove_instanceoft::lower_instanceof( +/// \return true if any instanceof instructionw was replaced +bool remove_instanceoft::lower_instanceof( exprt &expr, goto_programt &goto_program, goto_programt::targett this_inst) { if(expr.id()!=ID_java_instanceof) { - std::size_t replacements=0; + bool changed = false; Forall_operands(it, expr) - replacements+=lower_instanceof(*it, goto_program, this_inst); - return replacements; + changed |= lower_instanceof(*it, goto_program, this_inst); + return changed; } INVARIANT( @@ -94,46 +95,100 @@ std::size_t remove_instanceoft::lower_instanceof( return a.compare(b) < 0; }); - // Insert an instruction before the new check that assigns the clsid we're - // checking for to a temporary, as GOTO program if-expressions should - // not contain derefs. - // We actually insert the assignment instruction after the existing one. - // This will briefly be ill-formed (use before def of instanceof_tmp) but the - // two will subsequently switch places. This makes sure that the inserted - // assignement doesn't end up before any labels pointing at this instruction. + // Make temporaries to store the class identifier (avoids repeated derefs) and + // the instanceof result: + symbol_typet jlo=to_symbol_type(java_lang_object_type().subtype()); - exprt object_clsid=get_class_identifier_field(check_ptr, jlo, ns); + exprt object_clsid = get_class_identifier_field(check_ptr, jlo, ns); - symbolt &newsym = get_fresh_aux_symbol( + symbolt &clsid_tmp_sym = get_fresh_aux_symbol( object_clsid.type(), id2string(this_inst->function), - "instanceof_tmp", + "class_identifier_tmp", + source_locationt(), + ID_java, + symbol_table); + + symbolt &instanceof_result_sym = get_fresh_aux_symbol( + bool_typet(), + id2string(this_inst->function), + "instanceof_result_tmp", source_locationt(), ID_java, symbol_table); - auto newinst=goto_program.insert_after(this_inst); - newinst->make_assignment(); - newinst->code=code_assignt(newsym.symbol_expr(), object_clsid); - newinst->source_location=this_inst->source_location; - newinst->function=this_inst->function; + // Create + // if(expr == null) + // instanceof_result = false; + // else + // string clsid = expr->@class_identifier + // instanceof_result = clsid == "A" || clsid == "B" || ... - // Replace the instanceof construct with a conjunction of non-null and the - // disjunction of all possible object types. According to the Java - // specification, null instanceof T is false for all possible values of T. + // According to the Java specification, null instanceof T is false for all + // possible values of T. // (http://docs.oracle.com/javase/specs/jls/se7/html/jls-15.html#jls-15.20.2) - notequal_exprt non_null_expr( - check_ptr, null_pointer_exprt(to_pointer_type(check_ptr.type()))); + + code_ifthenelset is_null_branch; + is_null_branch.cond() = + equal_exprt( + check_ptr, null_pointer_exprt(to_pointer_type(check_ptr.type()))); + is_null_branch.then_case() = + code_assignt(instanceof_result_sym.symbol_expr(), false_exprt()); + + code_blockt else_block; + else_block.add(code_declt(clsid_tmp_sym.symbol_expr())); + else_block.add(code_assignt(clsid_tmp_sym.symbol_expr(), object_clsid)); + exprt::operandst or_ops; for(const auto &clsname : children) { constant_exprt clsexpr(clsname, string_typet()); - equal_exprt test(newsym.symbol_expr(), clsexpr); + equal_exprt test(clsid_tmp_sym.symbol_expr(), clsexpr); or_ops.push_back(test); } - expr = and_exprt(non_null_expr, disjunction(or_ops)); + else_block.add( + code_assignt(instanceof_result_sym.symbol_expr(), disjunction(or_ops))); + + is_null_branch.else_case() = std::move(else_block); + + // Replace the instanceof construct with instanceof_result: + expr = instanceof_result_sym.symbol_expr(); + + std::ostringstream convert_output; + stream_message_handlert convert_message_handler(convert_output); + convert_message_handler.set_verbosity(messaget::message_levelt::M_WARNING); + + // Insert the new test block before it: + goto_programt new_check_program; + goto_convert( + is_null_branch, + symbol_table, + new_check_program, + convert_message_handler, + ID_java); + + INVARIANT( + convert_output.str().empty(), + "remove_instanceof generated test should be goto-converted without error, " + "but goto_convert reported: " + convert_output.str()); - return 1; + goto_program.destructive_insert(this_inst, new_check_program); + + return true; +} + +static bool contains_instanceof(const exprt &e) +{ + if(e.id() == ID_java_instanceof) + return true; + + for(const exprt &subexpr : e.operands()) + { + if(contains_instanceof(subexpr)) + return true; + } + + return false; } /// Replaces expressions like e instanceof A with e.\@class_identifier == "A" @@ -146,15 +201,20 @@ bool remove_instanceoft::lower_instanceof( goto_programt &goto_program, goto_programt::targett target) { - std::size_t replacements= - lower_instanceof(target->code, goto_program, target)+ - lower_instanceof(target->guard, goto_program, target); + if(target->is_target() && + (contains_instanceof(target->code) || contains_instanceof(target->guard))) + { + // If this is a branch target, add a skip beforehand so we can splice new + // GOTO programs before the target instruction without inserting into the + // wrong basic block. + goto_program.insert_before_swap(target); + target->make_skip(); + // Actually alter the now-moved instruction: + ++target; + } - if(replacements==0) - return false; - // Swap the original instruction with the last assignment added after it - target->swap(*std::next(target, replacements)); - return true; + return lower_instanceof(target->code, goto_program, target) | + lower_instanceof(target->guard, goto_program, target); } /// Replace every instanceof in the passed function body with an explicit From 2369df322175810a642a3e44a8777f9008363243 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Fri, 22 Jun 2018 09:55:12 +0100 Subject: [PATCH 2/2] Add message handler to remove_instanceof and _exceptions This enables reporting errors from remove_instanceof usage of goto_convert. --- .../src/janalyzer/janalyzer_parse_options.cpp | 4 +- jbmc/src/java_bytecode/remove_exceptions.cpp | 26 ++++++++--- jbmc/src/java_bytecode/remove_exceptions.h | 4 ++ jbmc/src/java_bytecode/remove_instanceof.cpp | 45 ++++++++++--------- jbmc/src/java_bytecode/remove_instanceof.h | 16 ++++--- jbmc/src/jbmc/jbmc_parse_options.cpp | 7 ++- jbmc/src/jdiff/jdiff_parse_options.cpp | 4 +- .../java_replace_nondet/replace_nondet.cpp | 2 +- 8 files changed, 68 insertions(+), 40 deletions(-) diff --git a/jbmc/src/janalyzer/janalyzer_parse_options.cpp b/jbmc/src/janalyzer/janalyzer_parse_options.cpp index 32299505d7a..6d98ee0cbee 100644 --- a/jbmc/src/janalyzer/janalyzer_parse_options.cpp +++ b/jbmc/src/janalyzer/janalyzer_parse_options.cpp @@ -670,9 +670,9 @@ bool janalyzer_parse_optionst::process_goto_program(const optionst &options) remove_virtual_functions(goto_model); // remove Java throw and catch // This introduces instanceof, so order is important: - remove_exceptions(goto_model); + remove_exceptions(goto_model, get_message_handler()); // remove rtti - remove_instanceof(goto_model); + remove_instanceof(goto_model, get_message_handler()); // do partial inlining status() << "Partial Inlining" << eom; diff --git a/jbmc/src/java_bytecode/remove_exceptions.cpp b/jbmc/src/java_bytecode/remove_exceptions.cpp index f189eea79ee..41aaef59ac0 100644 --- a/jbmc/src/java_bytecode/remove_exceptions.cpp +++ b/jbmc/src/java_bytecode/remove_exceptions.cpp @@ -88,10 +88,12 @@ class remove_exceptionst explicit remove_exceptionst( symbol_table_baset &_symbol_table, function_may_throwt _function_may_throw, - bool remove_added_instanceof) + bool remove_added_instanceof, + message_handlert &message_handler) : symbol_table(_symbol_table), function_may_throw(_function_may_throw), - remove_added_instanceof(remove_added_instanceof) + remove_added_instanceof(remove_added_instanceof), + message_handler(message_handler) { } @@ -102,6 +104,7 @@ class remove_exceptionst symbol_table_baset &symbol_table; function_may_throwt function_may_throw; bool remove_added_instanceof; + message_handlert &message_handler; symbol_exprt get_inflight_exception_global(); @@ -342,7 +345,7 @@ void remove_exceptionst::add_exception_dispatch_sequence( t_exc->guard=check; if(remove_added_instanceof) - remove_instanceof(t_exc, goto_program, symbol_table); + remove_instanceof(t_exc, goto_program, symbol_table, message_handler); } } } @@ -574,6 +577,7 @@ void remove_exceptionst::operator()(goto_programt &goto_program) void remove_exceptions( symbol_table_baset &symbol_table, goto_functionst &goto_functions, + message_handlert &message_handler, remove_exceptions_typest type) { const namespacet ns(symbol_table); @@ -586,7 +590,8 @@ void remove_exceptions( remove_exceptionst remove_exceptions( symbol_table, function_may_throw, - type == remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF); + type == remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF, + message_handler); remove_exceptions(goto_functions); } @@ -600,12 +605,14 @@ void remove_exceptions( /// \param symbol_table: global symbol table. The `@inflight_exception` global /// may be added if not already present. It will not be initialised; that is /// the caller's responsibility. +/// \param message_handler: logging output /// \param type: specifies whether instanceof operations generated by this pass /// should be lowered to class-identifier comparisons (using /// remove_instanceof). void remove_exceptions( goto_programt &goto_program, symbol_table_baset &symbol_table, + message_handlert &message_handler, remove_exceptions_typest type) { remove_exceptionst::function_may_throwt any_function_may_throw = @@ -614,12 +621,17 @@ void remove_exceptions( remove_exceptionst remove_exceptions( symbol_table, any_function_may_throw, - type == remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF); + type == remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF, + message_handler); remove_exceptions(goto_program); } /// removes throws/CATCH-POP/CATCH-PUSH -void remove_exceptions(goto_modelt &goto_model, remove_exceptions_typest type) +void remove_exceptions( + goto_modelt &goto_model, + message_handlert &message_handler, + remove_exceptions_typest type) { - remove_exceptions(goto_model.symbol_table, goto_model.goto_functions, type); + remove_exceptions( + goto_model.symbol_table, goto_model.goto_functions, message_handler, type); } diff --git a/jbmc/src/java_bytecode/remove_exceptions.h b/jbmc/src/java_bytecode/remove_exceptions.h index 76358c640cc..7c0b364b67a 100644 --- a/jbmc/src/java_bytecode/remove_exceptions.h +++ b/jbmc/src/java_bytecode/remove_exceptions.h @@ -16,6 +16,8 @@ Date: December 2016 #include +#include + #define INFLIGHT_EXCEPTION_VARIABLE_BASENAME "@inflight_exception" #define INFLIGHT_EXCEPTION_VARIABLE_NAME \ "java::" INFLIGHT_EXCEPTION_VARIABLE_BASENAME @@ -32,11 +34,13 @@ enum class remove_exceptions_typest void remove_exceptions( goto_programt &goto_program, symbol_table_baset &symbol_table, + message_handlert &message_handler, remove_exceptions_typest type = remove_exceptions_typest::DONT_REMOVE_INSTANCEOF); void remove_exceptions( goto_modelt &goto_model, + message_handlert &message_handler, remove_exceptions_typest type = remove_exceptions_typest::DONT_REMOVE_INSTANCEOF); diff --git a/jbmc/src/java_bytecode/remove_instanceof.cpp b/jbmc/src/java_bytecode/remove_instanceof.cpp index 78f268ead1c..d980983bf41 100644 --- a/jbmc/src/java_bytecode/remove_instanceof.cpp +++ b/jbmc/src/java_bytecode/remove_instanceof.cpp @@ -23,8 +23,11 @@ Author: Chris Smowton, chris.smowton@diffblue.com class remove_instanceoft { public: - explicit remove_instanceoft(symbol_table_baset &symbol_table) - : symbol_table(symbol_table), ns(symbol_table) + remove_instanceoft( + symbol_table_baset &symbol_table, message_handlert &message_handler) + : symbol_table(symbol_table) + , ns(symbol_table) + , message_handler(message_handler) { class_hierarchy(symbol_table); } @@ -39,6 +42,7 @@ class remove_instanceoft symbol_table_baset &symbol_table; namespacet ns; class_hierarchyt class_hierarchy; + message_handlert &message_handler; bool lower_instanceof( exprt &, goto_programt &, goto_programt::targett); @@ -154,24 +158,15 @@ bool remove_instanceoft::lower_instanceof( // Replace the instanceof construct with instanceof_result: expr = instanceof_result_sym.symbol_expr(); - std::ostringstream convert_output; - stream_message_handlert convert_message_handler(convert_output); - convert_message_handler.set_verbosity(messaget::message_levelt::M_WARNING); - // Insert the new test block before it: goto_programt new_check_program; goto_convert( is_null_branch, symbol_table, new_check_program, - convert_message_handler, + message_handler, ID_java); - INVARIANT( - convert_output.str().empty(), - "remove_instanceof generated test should be goto-converted without error, " - "but goto_convert reported: " + convert_output.str()); - goto_program.destructive_insert(this_inst, new_check_program); return true; @@ -245,12 +240,14 @@ bool remove_instanceoft::lower_instanceof(goto_programt &goto_program) /// \param target: The instruction to work on. /// \param goto_program: The function body containing the instruction. /// \param symbol_table: The symbol table to add symbols to. +/// \param message_handler: logging output void remove_instanceof( goto_programt::targett target, goto_programt &goto_program, - symbol_table_baset &symbol_table) + symbol_table_baset &symbol_table, + message_handlert &message_handler) { - remove_instanceoft rem(symbol_table); + remove_instanceoft rem(symbol_table, message_handler); rem.lower_instanceof(goto_program, target); } @@ -259,11 +256,13 @@ void remove_instanceof( /// \remarks Extra auxiliary variables may be introduced into symbol_table. /// \param function: The function to work on. /// \param symbol_table: The symbol table to add symbols to. +/// \param message_handler: logging output void remove_instanceof( goto_functionst::goto_functiont &function, - symbol_table_baset &symbol_table) + symbol_table_baset &symbol_table, + message_handlert &message_handler) { - remove_instanceoft rem(symbol_table); + remove_instanceoft rem(symbol_table, message_handler); rem.lower_instanceof(function.body); } @@ -272,11 +271,13 @@ void remove_instanceof( /// \remarks Extra auxiliary variables may be introduced into symbol_table. /// \param goto_functions: The functions to work on. /// \param symbol_table: The symbol table to add symbols to. +/// \param message_handler: logging output void remove_instanceof( goto_functionst &goto_functions, - symbol_table_baset &symbol_table) + symbol_table_baset &symbol_table, + message_handlert &message_handler) { - remove_instanceoft rem(symbol_table); + remove_instanceoft rem(symbol_table, message_handler); bool changed=false; for(auto &f : goto_functions.function_map) changed=rem.lower_instanceof(f.second.body) || changed; @@ -288,8 +289,12 @@ void remove_instanceof( /// class-identifier test. /// \remarks Extra auxiliary variables may be introduced into symbol_table. /// \param goto_model: The functions to work on and the symbol table to add +/// \param message_handler: logging output /// symbols to. -void remove_instanceof(goto_modelt &goto_model) +void remove_instanceof( + goto_modelt &goto_model, + message_handlert &message_handler) { - remove_instanceof(goto_model.goto_functions, goto_model.symbol_table); + remove_instanceof( + goto_model.goto_functions, goto_model.symbol_table, message_handler); } diff --git a/jbmc/src/java_bytecode/remove_instanceof.h b/jbmc/src/java_bytecode/remove_instanceof.h index 8ff3da10328..7b1f08c1261 100644 --- a/jbmc/src/java_bytecode/remove_instanceof.h +++ b/jbmc/src/java_bytecode/remove_instanceof.h @@ -12,24 +12,28 @@ Author: Chris Smowton, chris.smowton@diffblue.com #ifndef CPROVER_JAVA_BYTECODE_REMOVE_INSTANCEOF_H #define CPROVER_JAVA_BYTECODE_REMOVE_INSTANCEOF_H -#include - #include #include +#include +#include + void remove_instanceof( goto_programt::targett target, goto_programt &goto_program, - symbol_table_baset &symbol_table); + symbol_table_baset &symbol_table, + message_handlert &); void remove_instanceof( goto_functionst::goto_functiont &function, - symbol_table_baset &symbol_table); + symbol_table_baset &symbol_table, + message_handlert &); void remove_instanceof( goto_functionst &goto_functions, - symbol_table_baset &symbol_table); + symbol_table_baset &symbol_table, + message_handlert &); -void remove_instanceof(goto_modelt &model); +void remove_instanceof(goto_modelt &model, message_handlert &); #endif diff --git a/jbmc/src/jbmc/jbmc_parse_options.cpp b/jbmc/src/jbmc/jbmc_parse_options.cpp index a36b61d4757..2bbdfc2391f 100644 --- a/jbmc/src/jbmc/jbmc_parse_options.cpp +++ b/jbmc/src/jbmc/jbmc_parse_options.cpp @@ -755,7 +755,7 @@ void jbmc_parse_optionst::process_goto_function( try { // Removal of RTTI inspection: - remove_instanceof(goto_function, symbol_table); + remove_instanceof(goto_function, symbol_table, get_message_handler()); // Java virtual functions -> explicit dispatch tables: remove_virtual_functions(function); @@ -769,6 +769,7 @@ void jbmc_parse_optionst::process_goto_function( remove_exceptions( goto_function.body, symbol_table, + get_message_handler(), remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF); } @@ -916,7 +917,9 @@ bool jbmc_parse_optionst::process_goto_functions( // remove catch and throw // (introduces instanceof but request it is removed) remove_exceptions( - goto_model, remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF); + goto_model, + get_message_handler(), + remove_exceptions_typest::REMOVE_ADDED_INSTANCEOF); // instrument library preconditions instrument_preconditions(goto_model); diff --git a/jbmc/src/jdiff/jdiff_parse_options.cpp b/jbmc/src/jdiff/jdiff_parse_options.cpp index 9eaee6b0cf3..43888b72f23 100644 --- a/jbmc/src/jdiff/jdiff_parse_options.cpp +++ b/jbmc/src/jdiff/jdiff_parse_options.cpp @@ -346,9 +346,9 @@ bool jdiff_parse_optionst::process_goto_program( // Java virtual functions -> explicit dispatch tables: remove_virtual_functions(goto_model); // remove catch and throw - remove_exceptions(goto_model); + remove_exceptions(goto_model, get_message_handler()); // Java instanceof -> clsid comparison: - remove_instanceof(goto_model); + remove_instanceof(goto_model, get_message_handler()); mm_io(goto_model); diff --git a/jbmc/unit/java_bytecode/java_replace_nondet/replace_nondet.cpp b/jbmc/unit/java_bytecode/java_replace_nondet/replace_nondet.cpp index 3011b7c7282..5249bacba73 100644 --- a/jbmc/unit/java_bytecode/java_replace_nondet/replace_nondet.cpp +++ b/jbmc/unit/java_bytecode/java_replace_nondet/replace_nondet.cpp @@ -95,7 +95,7 @@ TEST_CASE( goto_model_functiont model_function( symbol_table, functions, function_name, goto_function); - remove_instanceof(goto_function, symbol_table); + remove_instanceof(goto_function, symbol_table, null_message_handler); remove_virtual_functions(model_function);