diff --git a/jbmc/regression/jbmc/remove_virtual_function_typecast/test.desc b/jbmc/regression/jbmc/remove_virtual_function_typecast/test.desc index 19f4520679f..4ac473a6115 100644 --- a/jbmc/regression/jbmc/remove_virtual_function_typecast/test.desc +++ b/jbmc/regression/jbmc/remove_virtual_function_typecast/test.desc @@ -10,3 +10,5 @@ VirtualFunctions.class -- -- This doesn't work under symex-driven lazy loading because it is incompatible with lazy-methods (default) +Note the space before the checks for virtual function call on a and b, this +verifies there is no cast. diff --git a/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp b/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp index e5919ef1aa0..171c305d450 100644 --- a/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_convert_method.cpp @@ -36,16 +36,17 @@ Author: Daniel Kroening, kroening@kroening.com #include #include +#include #include #include #include -#include #include #include -#include +#include #include +#include /// Given a string of the format '?blah?', will return true when compared /// against a string that matches appart from any characters that are '?' @@ -428,11 +429,14 @@ void java_bytecode_convert_methodt::convert( id2string(class_symbol.name)+"."+id2string(m.name)+":"+m.descriptor; method_id=method_identifier; - symbolt &method_symbol=*symbol_table.get_writeable(method_identifier); - // Obtain a std::vector of java_method_typet::parametert objects from the // (function) type of the symbol - java_method_typet method_type = to_java_method_type(method_symbol.type); + // Don't try to hang on to this reference into the symbol table, as we're + // about to create symbols for the method's parameters, which would invalidate + // the reference. Instead, copy the type here, set it up, then assign it back + // to the symbol later. + java_method_typet method_type = + to_java_method_type(symbol_table.lookup_ref(method_identifier).type); method_type.set(ID_C_class, class_symbol.name); method_type.set_is_final(m.is_final); method_return_type = method_type.return_type(); @@ -555,6 +559,8 @@ void java_bytecode_convert_methodt::convert( param_index+=java_local_variable_slots(param.type()); } + symbolt &method_symbol = symbol_table.get_writeable_ref(method_identifier); + // The parameter slots detected in this function should agree with what // java_parameter_count() thinks about this method INVARIANT( @@ -641,20 +647,39 @@ static irep_idt get_if_cmp_operator(const irep_idt &stmt) throw "unhandled java comparison instruction"; } -static member_exprt to_member(const exprt &pointer, const exprt &fieldref) +static member_exprt +to_member(const exprt &pointer, const exprt &fieldref, const namespacet &ns) { struct_tag_typet class_type(fieldref.get(ID_class)); - const typecast_exprt pointer2(pointer, java_reference_type(class_type)); + const exprt typed_pointer = + typecast_exprt::conditional_cast(pointer, java_reference_type(class_type)); + + const irep_idt &component_name = fieldref.get(ID_component_name); - dereference_exprt obj_deref=checked_dereference(pointer2, class_type); + exprt accessed_object = checked_dereference(typed_pointer, class_type); + + // The field access is described as being against a particular type, but it + // may in fact belong to any ancestor type. + while(1) + { + struct_typet object_type = + to_struct_type(ns.follow(accessed_object.type())); + auto component = object_type.get_component(component_name); - const member_exprt member_expr( - obj_deref, - fieldref.get(ID_component_name), - fieldref.type()); + if(component.is_not_nil()) + return member_exprt(accessed_object, component); - return member_expr; + // Component not present: check the immediate parent type. + + const auto &components = object_type.components(); + INVARIANT( + components.size() != 0, + "infer_opaque_type_fields should guarantee that a member access has a " + "corresponding field"); + + accessed_object = member_exprt(accessed_object, components.front()); + } } /// Find all goto statements in 'repl' that target 'old_label' and redirect them @@ -1530,7 +1555,7 @@ code_blockt java_bytecode_convert_methodt::convert_instructions( else if(statement=="getfield") { PRECONDITION(op.size() == 1 && results.size() == 1); - results[0]=java_bytecode_promotion(to_member(op[0], arg0)); + results[0] = java_bytecode_promotion(to_member(op[0], arg0, ns)); } else if(statement=="getstatic") { @@ -2070,34 +2095,63 @@ void java_bytecode_convert_methodt::convert_invoke( const bool is_virtual( statement == "invokevirtual" || statement == "invokeinterface"); + const irep_idt &invoked_method_id = arg0.get(ID_identifier); + INVARIANT( + !invoked_method_id.empty(), + "invoke statement arg0 must have an identifier"); + + auto method_symbol = symbol_table.symbols.find(invoked_method_id); + + // Use the most precise type available: the actual symbol has generic info, + // whereas the type given by the invoke instruction doesn't and is therefore + // less accurate. + if(method_symbol != symbol_table.symbols.end()) + { + const auto &restored_type = + original_return_type(symbol_table, invoked_method_id); + // Note the number of parameters might change here due to constructors using + // invokespecial will have zero arguments (the `this` is added below) + // but the symbol for the will have the this parameter. + INVARIANT( + to_java_method_type(arg0.type()).return_type().id() == + restored_type.return_type().id(), + "Function return type must not change in kind"); + arg0.type() = restored_type; + } + + // Note arg0 and arg0.type() are subject to many side-effects in this method, + // then finally used to populate the call instruction. java_method_typet &method_type = to_java_method_type(arg0.type()); + java_method_typet::parameterst ¶meters(method_type.parameters()); if(use_this) { + irep_idt classname = arg0.get(ID_C_class); + if(parameters.empty() || !parameters[0].get_this()) { - irep_idt classname = arg0.get(ID_C_class); typet thistype = struct_tag_typet(classname); - // Note invokespecial is used for super-method calls as well as - // constructors. - if(statement == "invokespecial") - { - if(is_constructor(arg0.get(ID_identifier))) - { - if(needed_lazy_methods) - needed_lazy_methods->add_needed_class(classname); - method_type.set_is_constructor(); - } - else - method_type.set(ID_java_super_method_call, true); - } reference_typet object_ref_type = java_reference_type(thistype); java_method_typet::parametert this_p(object_ref_type); this_p.set_this(); this_p.set_base_name(ID_this); parameters.insert(parameters.begin(), this_p); } + + // Note invokespecial is used for super-method calls as well as + // constructors. + if(statement == "invokespecial") + { + if(is_constructor(invoked_method_id)) + { + if(needed_lazy_methods) + needed_lazy_methods->add_needed_class(classname); + method_type.set_is_constructor(); + } + else + method_type.set(ID_java_super_method_call, true); + } } code_function_callt call; @@ -2160,18 +2214,17 @@ void java_bytecode_convert_methodt::convert_invoke( // accessible from the original caller, but not from the generated test. // Therefore we must assume that the method is not accessible. // We set opaque methods as final to avoid assuming they can be overridden. - irep_idt id = arg0.get(ID_identifier); if( - symbol_table.symbols.find(id) == symbol_table.symbols.end() && + method_symbol == symbol_table.symbols.end() && !(is_virtual && is_method_inherited(arg0.get(ID_C_class), arg0.get(ID_component_name)))) { symbolt symbol; - symbol.name = id; + symbol.name = invoked_method_id; symbol.base_name = arg0.get(ID_C_base_name); symbol.pretty_name = id2string(arg0.get(ID_C_class)).substr(6) + "." + id2string(symbol.base_name) + "()"; - symbol.type = arg0.type(); + symbol.type = method_type; symbol.type.set(ID_access, ID_private); to_java_method_type(symbol.type).set_is_final(true); symbol.value.make_nil(); @@ -2196,10 +2249,10 @@ void java_bytecode_convert_methodt::convert_invoke( else { // static binding - call.function() = symbol_exprt(arg0.get(ID_identifier), arg0.type()); + call.function() = symbol_exprt(invoked_method_id, method_type); if(needed_lazy_methods) { - needed_lazy_methods->add_needed_method(arg0.get(ID_identifier)); + needed_lazy_methods->add_needed_method(invoked_method_id); // Calling a static method causes static initialization: needed_lazy_methods->add_needed_class(arg0.get(ID_C_class)); } @@ -2543,7 +2596,7 @@ code_blockt java_bytecode_convert_methodt::convert_putfield( block, bytecode_write_typet::FIELD, arg0.get(ID_component_name)); - block.add(code_assignt(to_member(op[0], arg0), op[1])); + block.add(code_assignt(to_member(op[0], arg0, ns), op[1])); return block; } @@ -2918,8 +2971,7 @@ optionalt java_bytecode_convert_methodt::convert_invoke_dynamic( if(return_type.id() == ID_empty) return {}; - const auto value = - zero_initializer(return_type, location, namespacet(symbol_table)); + const auto value = zero_initializer(return_type, location, ns); if(!value.has_value()) { error().source_location = location; diff --git a/jbmc/src/java_bytecode/java_bytecode_convert_method_class.h b/jbmc/src/java_bytecode/java_bytecode_convert_method_class.h index c7ac52e570d..94202ee9d65 100644 --- a/jbmc/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/jbmc/src/java_bytecode/java_bytecode_convert_method_class.h @@ -43,6 +43,7 @@ class java_bytecode_convert_methodt:public messaget bool threading_support) : messaget(_message_handler), symbol_table(symbol_table), + ns(symbol_table), max_array_length(_max_array_length), throw_assertion_error(throw_assertion_error), threading_support(threading_support), @@ -69,6 +70,7 @@ class java_bytecode_convert_methodt:public messaget protected: symbol_table_baset &symbol_table; + namespacet ns; const size_t max_array_length; const bool throw_assertion_error; const bool threading_support; diff --git a/jbmc/src/java_bytecode/java_bytecode_typecheck.h b/jbmc/src/java_bytecode/java_bytecode_typecheck.h index 4d56e105bf8..beaaaa21cb9 100644 --- a/jbmc/src/java_bytecode/java_bytecode_typecheck.h +++ b/jbmc/src/java_bytecode/java_bytecode_typecheck.h @@ -69,7 +69,6 @@ class java_bytecode_typecheckt:public typecheckt void typecheck_code(codet &); void typecheck_type(typet &); void typecheck_expr_symbol(symbol_exprt &); - void typecheck_expr_member(member_exprt &); void typecheck_expr_java_new(side_effect_exprt &); void typecheck_expr_java_new_array(side_effect_exprt &); diff --git a/jbmc/src/java_bytecode/java_bytecode_typecheck_expr.cpp b/jbmc/src/java_bytecode/java_bytecode_typecheck_expr.cpp index 88ba3ab02e4..aca1c14b6a6 100644 --- a/jbmc/src/java_bytecode/java_bytecode_typecheck_expr.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_typecheck_expr.cpp @@ -52,8 +52,6 @@ void java_bytecode_typecheckt::typecheck_expr(exprt &expr) else if(statement==ID_java_new_array) typecheck_expr_java_new_array(to_side_effect_expr(expr)); } - else if(expr.id()==ID_member) - typecheck_expr_member(to_member_expr(expr)); } void java_bytecode_typecheckt::typecheck_expr_java_new(side_effect_exprt &expr) @@ -120,46 +118,3 @@ void java_bytecode_typecheckt::typecheck_expr_symbol(symbol_exprt &expr) expr.type()=symbol.type; } } - -void java_bytecode_typecheckt::typecheck_expr_member(member_exprt &expr) -{ - // The member might be in a parent class or an opaque class, which we resolve - // here. - const irep_idt component_name=expr.get_component_name(); - - while(1) - { - typet base_type(ns.follow(expr.struct_op().type())); - - if(base_type.id()!=ID_struct) - break; // give up - - struct_typet &struct_type= - to_struct_type(base_type); - - if(struct_type.has_component(component_name)) - return; // done - - // look at parent - struct_typet::componentst &components= - struct_type.components(); - - if(components.empty()) - break; - - const struct_typet::componentt &c=components.front(); - - member_exprt m(expr.struct_op(), c.get_name(), c.type()); - m.add_source_location()=expr.source_location(); - - expr.struct_op()=m; - } - - warning().source_location=expr.source_location(); - warning() << "failed to find field `" - << component_name << "` in class hierarchy" << eom; - - // We replace by a non-det of same type - side_effect_expr_nondett nondet(expr.type(), expr.source_location()); - expr.swap(nondet); -} diff --git a/jbmc/src/java_bytecode/java_pointer_casts.cpp b/jbmc/src/java_bytecode/java_pointer_casts.cpp index 702211faeb1..7a89e4367ed 100644 --- a/jbmc/src/java_bytecode/java_pointer_casts.cpp +++ b/jbmc/src/java_bytecode/java_pointer_casts.cpp @@ -118,8 +118,12 @@ exprt make_clean_pointer_cast( return bare_ptr; exprt superclass_ptr=bare_ptr; + // Looking at base types discards generic qualifiers (because those are + // recorded on the pointer, not the pointee), so it may still be necessary + // to use a cast to reintroduce the qualifier (for example, the base might + // be recorded as a List, when we're looking for a List) if(find_superclass_with_type(superclass_ptr, target_base, ns)) - return superclass_ptr; + return typecast_exprt::conditional_cast(superclass_ptr, target_type); return typecast_exprt(bare_ptr, target_type); } diff --git a/jbmc/unit/java-testing-utils/require_goto_statements.cpp b/jbmc/unit/java-testing-utils/require_goto_statements.cpp index aa8d93e513d..871fd74c3d2 100644 --- a/jbmc/unit/java-testing-utils/require_goto_statements.cpp +++ b/jbmc/unit/java-testing-utils/require_goto_statements.cpp @@ -11,11 +11,46 @@ Author: Diffblue Ltd. #include #include -#include #include +#include #include +#include +#include #include +/// Given an expression, attempt to find the underlying symbol it represents +/// by skipping over type casts and removing balanced dereference/address_of +/// operations +optionalt +root_object(const exprt &lhs_expr, const symbol_tablet &symbol_table) +{ + auto expr = skip_typecast(lhs_expr); + int dereference_balance = 0; + while(!can_cast_expr(expr)) + { + if(const auto deref = expr_try_dynamic_cast(expr)) + { + ++dereference_balance; + expr = skip_typecast(deref->pointer()); + } + else if( + const auto address_of = expr_try_dynamic_cast(expr)) + { + --dereference_balance; + expr = skip_typecast(address_of->object()); + } + else + { + return {}; + } + } + if(dereference_balance != 0) + { + return {}; + } + return to_symbol_expr(expr); +} + /// Expand value of a function to include all child codets /// \param function_value: The value of the function (e.g. got by looking up /// the function in the symbol table and getting the value) @@ -104,8 +139,8 @@ require_goto_statements::find_struct_component_assignments( ode.build(superclass_expr, ns); if( superclass_expr.get_component_name() == supercomponent_name && - to_symbol_expr(ode.root_object()).get_identifier() == - structure_name) + root_object(ode.root_object(), symbol_table)->get_identifier() == + structure_name) { if( code_assign.rhs() == @@ -126,9 +161,11 @@ require_goto_statements::find_struct_component_assignments( // member_exprt member_expr: // - component name: \p component_name // - operand (component of): symbol for \p structure_name + + const auto &root_object = + ::root_object(member_expr.struct_op(), symbol_table); if( - member_expr.op().id() == ID_symbol && - to_symbol_expr(member_expr.op()).get_identifier() == structure_name && + root_object && root_object->get_identifier() == structure_name && member_expr.get_component_name() == component_name) { if( @@ -317,42 +354,32 @@ const irep_idt &require_goto_statements::require_struct_component_assignment( superclass_name, component_name, symbol_table); + INFO( + "looking for component assignment " << component_name << " in " + << structure_name); REQUIRE(component_assignments.non_null_assignments.size() == 1); - // We are expecting that the resulting statement can be of two forms: + // We are expecting that the resulting statement can be of the form: // 1. structure_name.(@superclass_name if given).component = - // (struct cast_type_name *) tmp_object_factory$1; + // (optional type cast *) tmp_object_factory$1; // followed by a direct assignment like this: // tmp_object_factory$1 = &tmp_object_factory$2; - // 2. structure_name.component = &tmp_object_factory$1; + // 2. structure_name.component = (optional cast *)&tmp_object_factory$1 exprt component_assignment_rhs_expr = - component_assignments.non_null_assignments[0].rhs(); + skip_typecast(component_assignments.non_null_assignments[0].rhs()); - // If the rhs is a typecast (case 1 above), deconstruct it to get the name of - // the variable and find the assignment to it - if(component_assignment_rhs_expr.id() == ID_typecast) + // If the rhs is not an address of must be in case 1 + if(!can_cast_expr(component_assignment_rhs_expr)) { - const auto &component_assignment_rhs = - to_typecast_expr(component_assignment_rhs_expr); - - // Check the type we are casting to - if(typecast_name.has_value()) - { - REQUIRE(component_assignment_rhs.type().id() == ID_pointer); - REQUIRE( - to_struct_tag_type( - to_pointer_type(component_assignment_rhs.type()).subtype()) - .get(ID_identifier) == typecast_name.value()); - } - const auto &component_reference_tmp_name = - to_symbol_expr(component_assignment_rhs.op()).get_identifier(); + to_symbol_expr(component_assignment_rhs_expr).get_identifier(); const auto &component_reference_assignments = require_goto_statements::find_pointer_assignments( component_reference_tmp_name, entry_point_instructions) .non_null_assignments; REQUIRE(component_reference_assignments.size() == 1); - component_assignment_rhs_expr = component_reference_assignments[0].rhs(); + component_assignment_rhs_expr = + skip_typecast(component_reference_assignments[0].rhs()); } // The rhs assigns an address of a variable, get its name @@ -480,7 +507,8 @@ require_goto_statements::require_entry_point_argument_assignment( const auto &argument_assignment = argument_assignments.non_null_assignments[0]; const auto &argument_tmp_name = - to_symbol_expr(to_address_of_expr(argument_assignment.rhs()).object()) + to_symbol_expr( + to_address_of_expr(skip_typecast(argument_assignment.rhs())).object()) .get_identifier(); return argument_tmp_name; } diff --git a/src/goto-programs/remove_virtual_functions.cpp b/src/goto-programs/remove_virtual_functions.cpp index 34d4cd42002..c54647bee3f 100644 --- a/src/goto-programs/remove_virtual_functions.cpp +++ b/src/goto-programs/remove_virtual_functions.cpp @@ -113,16 +113,35 @@ static void create_static_function_call( const namespacet &ns) { call.function() = function_symbol; - // Cast the `this` pointer to the correct type for the new callee: - const auto &callee_type = - to_code_type(ns.lookup(function_symbol.get_identifier()).type); - const code_typet::parametert *this_param = callee_type.get_this(); + // Cast the pointers to the correct type for the new callee: + // Note the `this` pointer is expected to change type, but other pointers + // could also change due to e.g. using a different alias to refer to the same + // type (in Java, for example, we see ArrayList.add(ArrayList::E arg) + // overriding Collection.add(Collection::E arg)) + const auto &callee_parameters = + to_code_type(ns.lookup(function_symbol.get_identifier()).type).parameters(); + auto &call_args = call.arguments(); + INVARIANT( - this_param != nullptr, - "Virtual function callees must have a `this` argument"); - typet need_type = this_param->type(); - if(!type_eq(call.arguments()[0].type(), need_type, ns)) - call.arguments()[0].make_typecast(need_type); + callee_parameters.size() == call_args.size(), + "function overrides must have matching argument counts"); + + for(std::size_t i = 0; i < call_args.size(); ++i) + { + const typet &need_type = callee_parameters[i].type(); + + if(!type_eq(call_args[i].type(), need_type, ns)) + { + // If this wasn't language agnostic code we'd also like to check + // compatibility-- for example, Java overrides may have differing generic + // qualifiers, but not different base types. + INVARIANT( + call_args[i].type().id() == ID_pointer, + "where overriding function argument types differ, " + "those arguments must be pointer-typed"); + call_args[i].make_typecast(need_type); + } + } } /// Replace virtual function call with a static function call diff --git a/src/goto-symex/symex_assign.cpp b/src/goto-symex/symex_assign.cpp index 05939f6a44b..4048c4834c0 100644 --- a/src/goto-symex/symex_assign.cpp +++ b/src/goto-symex/symex_assign.cpp @@ -28,6 +28,10 @@ void goto_symext::symex_assign( exprt lhs=code.lhs(); exprt rhs=code.rhs(); + DATA_INVARIANT( + base_type_eq(lhs.type(), rhs.type(), ns), + "assignments must be type consistent"); + clean_expr(lhs, state, true); clean_expr(rhs, state, false); diff --git a/src/util/allocate_objects.cpp b/src/util/allocate_objects.cpp index d9d088791d4..2da68a84dac 100644 --- a/src/util/allocate_objects.cpp +++ b/src/util/allocate_objects.cpp @@ -212,12 +212,8 @@ exprt allocate_objectst::allocate_non_dynamic_object( aux_symbol.is_static_lifetime = static_lifetime; symbols_created.push_back(&aux_symbol); - exprt aoe = - ns.follow(aux_symbol.symbol_expr().type()) != - ns.follow(target_expr.type().subtype()) - ? (exprt)typecast_exprt( - address_of_exprt(aux_symbol.symbol_expr()), target_expr.type()) - : address_of_exprt(aux_symbol.symbol_expr()); + exprt aoe = typecast_exprt::conditional_cast( + address_of_exprt(aux_symbol.symbol_expr()), target_expr.type()); code_assignt code(target_expr, aoe); code.add_source_location() = source_location; diff --git a/src/util/expr_initializer.cpp b/src/util/expr_initializer.cpp index 0ea2c515ee1..cb981543d0a 100644 --- a/src/util/expr_initializer.cpp +++ b/src/util/expr_initializer.cpp @@ -270,10 +270,13 @@ exprt expr_initializert::expr_initializer_rec( } else if(type_id==ID_c_enum_tag) { - return - expr_initializer_rec( - ns.follow_tag(to_c_enum_tag_type(type)), - source_location); + exprt result = expr_initializer_rec( + ns.follow_tag(to_c_enum_tag_type(type)), source_location); + + // use the tag type + result.type() = type; + + return result; } else if(type_id==ID_struct_tag) {