From e163ab60766b90187e69420ac83f7e2071a57e73 Mon Sep 17 00:00:00 2001 From: Chris Smowton Date: Tue, 30 Jan 2018 17:28:26 +0000 Subject: [PATCH] Java frontend: create synthetic static initialisers for stub globals Previously the Java frontend discovered static field references during method conversion, and when it found them it created global symbols with nondet values, causing an object tree to be created in __CPROVER_initialize. This caused two problems: (1) they were created late, meaning when incrementally loading functions, __CPROVER_initialize may already have been created when a new stub static field is discovered, and (2) by creating potentially large trees of potential objects in __CPROVER_initialize, symex would be compelled to accrue a lot of possibly-unused state. This change moves the object tree creation into a synthetic static initialiser, which both defers executing the initialisation until their class is actually used, and also creates the static initialisers before method conversion, such that its already_run variable is mentioned in __CPROVER_initialize when it is created (the initialize function is now both smaller and "right first time"). --- .../cbmc-java/generic_class_bound1/test.desc | 2 +- .../java_bytecode_convert_class.cpp | 4 + .../java_bytecode_convert_method.cpp | 38 +--- .../java_bytecode_convert_method_class.h | 4 - src/java_bytecode/java_bytecode_language.cpp | 156 ++++++++++++-- src/java_bytecode/java_bytecode_language.h | 29 +-- src/java_bytecode/java_entry_point.cpp | 13 -- .../java_static_initializers.cpp | 198 ++++++++++++++++-- src/java_bytecode/java_static_initializers.h | 36 +++- src/java_bytecode/object_factory_parameters.h | 39 ++++ src/java_bytecode/synthetic_methods_map.h | 21 ++ 11 files changed, 424 insertions(+), 116 deletions(-) create mode 100644 src/java_bytecode/object_factory_parameters.h create mode 100644 src/java_bytecode/synthetic_methods_map.h diff --git a/regression/cbmc-java/generic_class_bound1/test.desc b/regression/cbmc-java/generic_class_bound1/test.desc index 3ac218b6d84..0195fb3ecc4 100644 --- a/regression/cbmc-java/generic_class_bound1/test.desc +++ b/regression/cbmc-java/generic_class_bound1/test.desc @@ -7,5 +7,5 @@ Gn.class .*file Gn.java line 9 function java::Gn.foo1:\(LGn;\)V bytecode-index 1 block 1: FAILED .*file Gn.java line 10 function java::Gn.foo1:\(LGn;\)V bytecode-index 4 block 2: FAILED .*file Gn.java line 11 function java::Gn.foo1:\(LGn;\)V bytecode-index 5 block 3: FAILED -.*file Gn.java line 13 function java::Gn.main:\(\[Ljava/lang/String;\)V bytecode-index 2 block 1: SATISFIED +.*file Gn.java line 13 function java::Gn.main:\(\[Ljava/lang/String;\)V bytecode-index 2 block 2: SATISFIED -- diff --git a/src/java_bytecode/java_bytecode_convert_class.cpp b/src/java_bytecode/java_bytecode_convert_class.cpp index fa1dbc19636..8942853075a 100644 --- a/src/java_bytecode/java_bytecode_convert_class.cpp +++ b/src/java_bytecode/java_bytecode_convert_class.cpp @@ -377,6 +377,10 @@ void java_bytecode_convert_classt::convert( new_symbol.name=id2string(class_symbol.name)+"."+id2string(f.name); new_symbol.base_name=f.name; new_symbol.type=field_type; + // Annotating the type with ID_C_class to provide a static field -> class + // link matches the method used by java_bytecode_convert_method::convert + // for methods. + new_symbol.type.set(ID_C_class, class_symbol.name); new_symbol.pretty_name=id2string(class_symbol.pretty_name)+ "."+id2string(f.name); new_symbol.mode=ID_java; diff --git a/src/java_bytecode/java_bytecode_convert_method.cpp b/src/java_bytecode/java_bytecode_convert_method.cpp index 9cda1dd2c65..ec771081f2f 100644 --- a/src/java_bytecode/java_bytecode_convert_method.cpp +++ b/src/java_bytecode/java_bytecode_convert_method.cpp @@ -906,34 +906,6 @@ static void gather_symbol_live_ranges( } } -/// See above -/// \par parameters: `se`: Symbol expression referring to a static field -/// `basename`: The static field's basename -/// \return Creates a symbol table entry for the static field if one doesn't -/// exist already. -void java_bytecode_convert_methodt::check_static_field_stub( - const symbol_exprt &symbol_expr, - const irep_idt &basename) -{ - const auto &id=symbol_expr.get_identifier(); - if(symbol_table.symbols.find(id)==symbol_table.symbols.end()) - { - // Create a stub, to be overwritten if/when the real class is loaded. - symbolt new_symbol; - new_symbol.is_static_lifetime=true; - new_symbol.is_lvalue=true; - new_symbol.is_state_var=true; - new_symbol.name=id; - new_symbol.base_name=basename; - new_symbol.type=symbol_expr.type(); - new_symbol.pretty_name=new_symbol.name; - new_symbol.mode=ID_java; - new_symbol.is_type=false; - new_symbol.value.make_nil(); - symbol_table.add(new_symbol); - } -} - /// Each static access to classname should be prefixed with a check for /// necessary static init; this returns a call implementing that check. /// \param classname: Class name @@ -2026,8 +1998,9 @@ codet java_bytecode_convert_methodt::convert_instructions( field_name.find("$assertionsDisabled")!=std::string::npos; symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); - // If external, create a symbol table entry for this static field: - check_static_field_stub(symbol_expr, field_name); + INVARIANT( + symbol_table.has_symbol(symbol_expr.get_identifier()), + "getstatic symbol should have been created before method conversion"); if(needed_lazy_methods) { @@ -2081,8 +2054,9 @@ codet java_bytecode_convert_methodt::convert_instructions( const auto &field_name=arg0.get_string(ID_component_name); symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name); - // If external, create a symbol table entry for this static field: - check_static_field_stub(symbol_expr, field_name); + INVARIANT( + symbol_table.has_symbol(symbol_expr.get_identifier()), + "putstatic symbol should have been created before method conversion"); if(needed_lazy_methods && arg0.type().id() == ID_symbol) { diff --git a/src/java_bytecode/java_bytecode_convert_method_class.h b/src/java_bytecode/java_bytecode_convert_method_class.h index 0d1c34d82db..fbcf46cedb8 100644 --- a/src/java_bytecode/java_bytecode_convert_method_class.h +++ b/src/java_bytecode/java_bytecode_convert_method_class.h @@ -246,10 +246,6 @@ class java_bytecode_convert_methodt:public messaget const bytecode_infot &get_bytecode_info(const irep_idt &statement); - void check_static_field_stub( - const symbol_exprt &se, - const irep_idt &basename); - bool class_needs_clinit(const irep_idt &classname); exprt get_or_create_clinit_wrapper(const irep_idt &classname); codet get_clinit_call(const irep_idt &classname); diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index 0400d10ed08..c655eb7d02b 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -223,7 +223,8 @@ static void infer_opaque_type_fields( namespacet ns(symbol_table); for(const auto &method : parse_tree.parsed_class.methods) { - for(const auto &instruction : method.instructions) + for(const java_bytecode_parse_treet::instructiont &instruction : + method.instructions) { if(instruction.statement == "getfield" || instruction.statement == "putfield") @@ -359,7 +360,8 @@ static void generate_constant_global_variables( { for(auto &method : parse_tree.parsed_class.methods) { - for(auto &instruction : method.instructions) + for(java_bytecode_parse_treet::instructiont &instruction : + method.instructions) { // ldc* instructions are Java bytecode "load constant" ops, which can // retrieve a numeric constant, String literal, or Class literal. @@ -381,6 +383,87 @@ static void generate_constant_global_variables( } } +/// Add a stub global symbol to the symbol table, initialising pointer-typed +/// symbols with null and primitive-typed ones with an arbitrary (nondet) value. +/// \param symbol_table: table to add to +/// \param symbol_id: new symbol fully-qualified identifier +/// \param symbol_basename: new symbol basename +/// \param symbol_type: new symbol type +/// \param class_id: class id that directly encloses this static field +static void create_stub_global_symbol( + symbol_table_baset &symbol_table, + const irep_idt &symbol_id, + const irep_idt &symbol_basename, + const typet &symbol_type, + const irep_idt &class_id) +{ + symbolt new_symbol; + new_symbol.is_static_lifetime = true; + new_symbol.is_lvalue = true; + new_symbol.is_state_var = true; + new_symbol.name = symbol_id; + new_symbol.base_name = symbol_basename; + new_symbol.type = symbol_type; + new_symbol.type.set(ID_C_class, class_id); + new_symbol.pretty_name = new_symbol.name; + new_symbol.mode = ID_java; + new_symbol.is_type = false; + // If pointer-typed, initialise to null and a static initialiser will be + // created to initialise on first reference. If primitive-typed, specify + // nondeterministic initialisation by setting a nil value. + if(symbol_type.id() == ID_pointer) + new_symbol.value = null_pointer_exprt(to_pointer_type(symbol_type)); + else + new_symbol.value.make_nil(); + bool add_failed = symbol_table.add(new_symbol); + INVARIANT( + !add_failed, "caller should have checked symbol not already in table"); +} + +/// Search for getstatic and putstatic instructions in a class' bytecode and +/// create stub symbols for any static fields that aren't already in the symbol +/// table. The new symbols are null-initialised for reference-typed globals / +/// static fields, and nondet-initialised for primitives. +/// \param parse_tree: class bytecode +/// \param symbol_table: symbol table; may gain new symbols +static void create_stub_global_symbols( + const java_bytecode_parse_treet &parse_tree, + symbol_table_baset &symbol_table) +{ + namespacet ns(symbol_table); + for(const auto &method : parse_tree.parsed_class.methods) + { + for(const java_bytecode_parse_treet::instructiont &instruction : + method.instructions) + { + if(instruction.statement == "getstatic" || + instruction.statement == "putstatic") + { + INVARIANT( + instruction.args.size() > 0, + "get/putstatic should have at least one argument"); + irep_idt component = instruction.args[0].get_string(ID_component_name); + INVARIANT( + !component.empty(), "get/putstatic should specify a component"); + irep_idt class_id = instruction.args[0].get_string(ID_class); + INVARIANT( + !class_id.empty(), "get/putstatic should specify a class"); + irep_idt identifier = id2string(class_id) + "." + id2string(component); + + if(!symbol_table.has_symbol(identifier)) + { + create_stub_global_symbol( + symbol_table, + identifier, + component, + instruction.args[0].type(), + class_id); + } + } + } + } +} + bool java_bytecode_languaget::typecheck( symbol_tablet &symbol_table, const std::string &module) @@ -477,10 +560,31 @@ bool java_bytecode_languaget::typecheck( << " String or Class constant symbols" << messaget::eom; + // For each reference to a stub global (that is, a global variable declared on + // a class we don't have bytecode for, and therefore don't know the static + // initialiser for), create a synthetic static initialiser (clinit method) + // to nondet initialise it. + // Note this must be done before making static initialiser wrappers below, as + // this makes a Classname.clinit method, then the next pass makes a wrapper + // that ensures it is only run once, and that static initialisation happens + // in class-graph topological order. + + { + journalling_symbol_tablet symbol_table_journal = + journalling_symbol_tablet::wrap(symbol_table); + for(const auto &c : java_class_loader.class_map) + { + create_stub_global_symbols(c.second, symbol_table_journal); + } + + stub_global_initializer_factory.create_stub_global_initializer_symbols( + symbol_table, symbol_table_journal.get_inserted(), synthetic_methods); + } + // For each class that will require a static initializer wrapper, create a // function named package.classname::clinit_wrapper, and a corresponding // global tracking whether it has run or not: - create_static_initializer_wrappers(symbol_table); + create_static_initializer_wrappers(symbol_table, synthetic_methods); // Now incrementally elaborate methods // that are reachable from this entry point. @@ -495,24 +599,12 @@ bool java_bytecode_languaget::typecheck( journalling_symbol_tablet journalling_symbol_table = journalling_symbol_tablet::wrap(symbol_table); + // Convert all synthetic methods: + for(const auto &function_id_and_type : synthetic_methods) { - // Convert all static initialisers: - std::vector static_initialisers; - - // Careful not to add symbols while iterating over the symbol table! - for(const auto &symbol : symbol_table.symbols) - { - if(is_clinit_wrapper_function(symbol.second.name)) - static_initialisers.push_back(symbol.second.name); - } - - for(const auto &static_init_wrapper_name : static_initialisers) - { - convert_single_method( - static_init_wrapper_name, journalling_symbol_table); - } + convert_single_method( + function_id_and_type.first, journalling_symbol_table); } - // Convert all methods for which we have bytecode now for(const auto &method_sig : method_bytecode) { @@ -714,6 +806,8 @@ bool java_bytecode_languaget::convert_single_method( // Get bytecode for specified function if we have it method_bytecodet::opt_reft cmb = method_bytecode.get(function_id); + synthetic_methods_mapt::iterator synthetic_method_it; + // Check if have a string solver implementation if(string_preprocess.implements_function(function_id)) { @@ -735,11 +829,29 @@ bool java_bytecode_languaget::convert_single_method( symbol.value = generated_code; return false; } - else if(is_clinit_wrapper_function(function_id)) + else if( + (synthetic_method_it = synthetic_methods.find(function_id)) != + synthetic_methods.end()) { + // Synthetic method (i.e. one generated by the Java frontend and which + // doesn't occur in the source bytecode): symbolt &symbol = symbol_table.get_writeable_ref(function_id); - symbol.value = get_clinit_wrapper_body(function_id, symbol_table); - // Notify lazy methods of other static init functions called: + switch(synthetic_method_it->second) + { + case synthetic_method_typet::STATIC_INITIALIZER_WRAPPER: + symbol.value = get_clinit_wrapper_body(function_id, symbol_table); + break; + case synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER: + symbol.value = + stub_global_initializer_factory.get_stub_initializer_body( + function_id, + symbol_table, + object_factory_parameters, + get_pointer_type_selector()); + break; + } + // Notify lazy methods of static calls made from the newly generated + // function: notify_static_method_calls(symbol.value, needed_lazy_methods); return false; } diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index 63078525407..d267cb9b218 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -19,7 +19,10 @@ Author: Daniel Kroening, kroening@kroening.com #include "ci_lazy_methods.h" #include "ci_lazy_methods_needed.h" #include "java_class_loader.h" +#include "java_static_initializers.h" #include "java_string_library_preprocess.h" +#include "object_factory_parameters.h" +#include "synthetic_methods_map.h" #include @@ -53,10 +56,6 @@ Author: Daniel Kroening, kroening@kroening.com " the purpose of lazy method loading\n" /* NOLINT(*) */ \ " A '.*' wildcard is allowed to specify all class members\n" -#define MAX_NONDET_ARRAY_LENGTH_DEFAULT 5 -#define MAX_NONDET_STRING_LENGTH std::numeric_limits::max() -#define MAX_NONDET_TREE_DEPTH 5 - class symbolt; enum lazy_methods_modet @@ -66,26 +65,6 @@ enum lazy_methods_modet LAZY_METHODS_MODE_CONTEXT_SENSITIVE }; -struct object_factory_parameterst final -{ - /// Maximum value for the non-deterministically-chosen length of an array. - size_t max_nondet_array_length=MAX_NONDET_ARRAY_LENGTH_DEFAULT; - - /// Maximum value for the non-deterministically-chosen length of a string. - size_t max_nondet_string_length=MAX_NONDET_STRING_LENGTH; - - /// Maximum depth for object hierarchy on input. - /// Used to prevent object factory to loop infinitely during the - /// generation of code that allocates/initializes data structures of recursive - /// data types or unbounded depth. We bound the maximum number of times we - /// dereference a pointer using a 'depth counter'. We set a pointer to null if - /// such depth becomes >= than this maximum value. - size_t max_nondet_tree_depth=MAX_NONDET_TREE_DEPTH; - - /// Force string content to be ASCII printable characters when set to true. - bool string_printable = false; -}; - class java_bytecode_languaget:public languaget { public: @@ -192,6 +171,8 @@ class java_bytecode_languaget:public languaget private: const std::unique_ptr pointer_type_selector; + synthetic_methods_mapt synthetic_methods; + stub_global_initializer_factoryt stub_global_initializer_factory; }; std::unique_ptr new_java_bytecode_language(); diff --git a/src/java_bytecode/java_entry_point.cpp b/src/java_bytecode/java_entry_point.cpp index 9b204f5745e..e8e2343937d 100644 --- a/src/java_bytecode/java_entry_point.cpp +++ b/src/java_bytecode/java_entry_point.cpp @@ -82,17 +82,6 @@ static bool should_init_symbol(const symbolt &sym) return is_java_string_literal_id(sym.name); } -static bool is_non_null_library_global(const irep_idt &symbolid) -{ - static const std::unordered_set non_null_globals= - { - "java::java.lang.System.out", - "java::java.lang.System.err", - "java::java.lang.System.in" - }; - return non_null_globals.count(symbolid); -} - static void java_static_lifetime_init( symbol_table_baset &symbol_table, const source_locationt &source_location, @@ -129,8 +118,6 @@ static void java_static_lifetime_init( allow_null=false; if(allow_null && is_java_string_literal_id(nameid)) allow_null=false; - if(allow_null && is_non_null_library_global(nameid)) - allow_null=false; } gen_nondet_init( sym.symbol_expr(), diff --git a/src/java_bytecode/java_static_initializers.cpp b/src/java_bytecode/java_static_initializers.cpp index e5739c1de09..9246ef4cba5 100644 --- a/src/java_bytecode/java_static_initializers.cpp +++ b/src/java_bytecode/java_static_initializers.cpp @@ -7,6 +7,7 @@ Author: Chris Smowton, chris.smowton@diffblue.com \*******************************************************************/ #include "java_static_initializers.h" +#include "java_object_factory.h" #include #include #include @@ -16,6 +17,7 @@ Author: Chris Smowton, chris.smowton@diffblue.com // Disable linter here to allow a std::string constant, since that holds // a length, whereas a cstr would require strlen every time. const std::string clinit_wrapper_suffix = "::clinit_wrapper"; // NOLINT(*) +const std::string clinit_function_suffix = ".:()V"; // NOLINT(*) /// Get the Java static initializer wrapper name for a given class (the wrapper /// checks if static initialization has already been done before invoking the @@ -28,18 +30,6 @@ irep_idt clinit_wrapper_name(const irep_idt &class_name) return id2string(class_name) + clinit_wrapper_suffix; } -/// Get the class name from a clinit wrapper name. This is the opposite of -/// `clinit_wrapper_name`. -/// \param wrapper_name: clinit wrapper name -/// \return class name -static irep_idt clinit_wrapper_name_to_class_name(const irep_idt &wrapper_name) -{ - const std::string &wrapper_str = id2string(wrapper_name); - PRECONDITION(wrapper_str.size() > clinit_wrapper_suffix.size()); - return wrapper_str.substr( - 0, wrapper_str.size() - clinit_wrapper_suffix.size()); -} - /// Check if function_id is a clinit wrapper /// \param function_id: some function identifier /// \return true if the passed identifier is a clinit wrapper @@ -63,7 +53,7 @@ static irep_idt clinit_already_run_variable_name(const irep_idt &class_name) /// \return Static initializer symbol name static irep_idt clinit_function_name(const irep_idt &class_name) { - return id2string(class_name) + ".:()V"; + return id2string(class_name) + clinit_function_suffix; } /// Checks whether a static initializer wrapper is needed for a given class, @@ -97,8 +87,13 @@ static bool needs_clinit_wrapper( /// \param class_name: class symbol name /// \param symbol_table: global symbol table; will gain a clinit_wrapper symbol /// and a corresponding has-run-already global. +/// \param synthetic_methods: synthetic method type map. The new clinit wrapper +/// symbol will be recorded, such that we get a callback to produce its body +/// if and when required. static void create_clinit_wrapper_symbols( - const irep_idt &class_name, symbol_tablet &symbol_table) + const irep_idt &class_name, + symbol_tablet &symbol_table, + synthetic_methods_mapt &synthetic_methods) { const irep_idt &already_run_name = clinit_already_run_variable_name(class_name); @@ -122,9 +117,21 @@ static void create_clinit_wrapper_symbols( wrapper_method_symbol.pretty_name = wrapper_method_symbol.name; wrapper_method_symbol.base_name = "clinit_wrapper"; wrapper_method_symbol.type = wrapper_method_type; + // Note this use of a type-comment to provide a back-link from a method + // to its associated class is the same one used in + // java_bytecode_convert_methodt::convert + wrapper_method_symbol.type.set(ID_C_class, class_name); wrapper_method_symbol.mode = ID_java; failed = symbol_table.add(wrapper_method_symbol); INVARIANT(!failed, "clinit-wrapper symbol should be fresh"); + + auto insert_result = synthetic_methods.emplace( + wrapper_method_symbol.name, + synthetic_method_typet::STATIC_INITIALIZER_WRAPPER); + INVARIANT( + insert_result.second, + "synthetic methods map should not already contain entry for " + "clinit wrapper"); } /// Produces the static initialiser wrapper body for the given function. @@ -133,7 +140,7 @@ static void create_clinit_wrapper_symbols( /// \param symbol_table: global symbol table /// \return the body of the static initialiser wrapper codet get_clinit_wrapper_body( - const irep_idt &function_id, const symbol_tablet &symbol_table) + const irep_idt &function_id, const symbol_table_baset &symbol_table) { // Assume that class C extends class C' and implements interfaces // I1, ..., In. We now create the following function (possibly recursively @@ -154,7 +161,10 @@ codet get_clinit_wrapper_body( // java::C::(); // } // } - irep_idt class_name = clinit_wrapper_name_to_class_name(function_id); + const symbolt &wrapper_method_symbol = symbol_table.lookup_ref(function_id); + irep_idt class_name = wrapper_method_symbol.type.get(ID_C_class); + INVARIANT( + !class_name.empty(), "wrapper function should be annotated with its class"); const symbolt &already_run_symbol = symbol_table.lookup_ref(clinit_already_run_variable_name(class_name)); @@ -204,7 +214,9 @@ codet get_clinit_wrapper_body( /// Create static initializer wrappers for all classes that need them. /// \param symbol_table: global symbol table -void create_static_initializer_wrappers(symbol_tablet &symbol_table) +void create_static_initializer_wrappers( + symbol_tablet &symbol_table, + synthetic_methods_mapt &synthetic_methods) { // Top-sort the class hierarchy, such that we visit parents before children, // and can so identify parents that need static initialisation by whether we @@ -218,6 +230,156 @@ void create_static_initializer_wrappers(symbol_tablet &symbol_table) { const irep_idt &class_identifier = class_graph[node].class_identifier; if(needs_clinit_wrapper(class_identifier, symbol_table)) - create_clinit_wrapper_symbols(class_identifier, symbol_table); + { + create_clinit_wrapper_symbols( + class_identifier, symbol_table, synthetic_methods); + } + } +} + +/// Advance map iterator to next distinct key +/// \param in: iterator to advance +/// \param end: end of container iterator +template +static itertype advance_to_next_key(itertype in, itertype end) +{ + PRECONDITION(in != end); + auto initial_key = in->first; + while(in != end && in->first == initial_key) + ++in; + return in; +} + +/// Create static initializer symbols for each distinct class that has stub +/// globals. +/// \param symbol_table: global symbol table. Will gain static initializer +/// method symbols for each class that has a stub global in `stub_globals_set` +/// \param stub_globals_set: set of stub global symbols +/// \param synthetic_methods: map of synthetic method types. We record the new +/// static initialiser such that we get a callback to provide its body as and +/// when it is required. +void stub_global_initializer_factoryt::create_stub_global_initializer_symbols( + symbol_tablet &symbol_table, + const std::unordered_set &stub_globals_set, + synthetic_methods_mapt &synthetic_methods) +{ + // Populate map from class id -> stub globals: + for(const irep_idt &stub_global : stub_globals_set) + { + const irep_idt class_id = + symbol_table.lookup_ref(stub_global).type.get(ID_C_class); + INVARIANT( + !class_id.empty(), + "static field should be annotated with its defining class"); + stub_globals_by_class.insert({class_id, stub_global}); + } + + // For each distinct class that has stub globals, create a clinit symbol: + + for(auto it = stub_globals_by_class.begin(), + itend = stub_globals_by_class.end(); + it != itend; + it = advance_to_next_key(it, itend)) + { + const irep_idt static_init_name = clinit_function_name(it->first); + + INVARIANT( + !symbol_table.has_symbol(static_init_name), + "a class cannot be both incomplete, and so have stub static fields, and " + "also define a static initializer"); + + code_typet thunk_type; + thunk_type.return_type() = void_typet(); + + symbolt static_init_symbol; + static_init_symbol.name = static_init_name; + static_init_symbol.pretty_name = static_init_name; + static_init_symbol.base_name = "clinit():V"; + static_init_symbol.mode = ID_java; + static_init_symbol.type = thunk_type; + // Note this use of a type-comment to provide a back-link from a method + // to its associated class is the same one used in + // java_bytecode_convert_methodt::convert + static_init_symbol.type.set(ID_C_class, it->first); + + bool failed = symbol_table.add(static_init_symbol); + INVARIANT(!failed, "symbol should not already exist"); + + auto insert_result = synthetic_methods.emplace( + static_init_symbol.name, + synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER); + INVARIANT( + insert_result.second, + "synthetic methods map should not already contain entry for " + "stub static initializer"); + } +} + +/// Check if a symbol is a well-known non-null global +/// \param symbolid: symbol id to check +/// \return true if this static field is known never to be null +static bool is_non_null_library_global(const irep_idt &symbolid) +{ + static const std::unordered_set non_null_globals = + { + "java::java.lang.System.out", + "java::java.lang.System.err", + "java::java.lang.System.in" + }; + return non_null_globals.count(symbolid); +} + +/// Create the body of a synthetic static initializer (clinit method), +/// which initialise stub globals in the same manner as +/// java_static_lifetime_init, only delayed until first reference by virtue of +/// being given in a static initializer rather than directly in +/// __CPROVER_initialize. +/// \param function_id: synthetic static initializer id +/// \param symbol_table: global symbol table; may gain local variables created +/// for the new static initializer +/// \param object_factory_parameters: object factory parameters used to populate +/// the stub globals and objects reachable from them +/// \param pointer_type_selector: used to choose concrete types for abstract- +/// typed globals and fields. +/// \return synthetic static initializer body. +codet stub_global_initializer_factoryt::get_stub_initializer_body( + const irep_idt &function_id, + symbol_table_baset &symbol_table, + const object_factory_parameterst &object_factory_parameters, + const select_pointer_typet &pointer_type_selector) +{ + const symbolt &stub_initializer_symbol = symbol_table.lookup_ref(function_id); + irep_idt class_id = stub_initializer_symbol.type.get(ID_C_class); + INVARIANT( + !class_id.empty(), + "synthetic static initializer should be annotated with its class"); + code_blockt static_init_body; + + // Add a standard nondet initialisation for each global declared on this + // class. Note this is the same invocation used in + // java_static_lifetime_init. + + auto class_globals = stub_globals_by_class.equal_range(class_id); + INVARIANT( + class_globals.first != class_globals.second, + "class with synthetic clinit should have at least one global to init"); + + for(auto it = class_globals.first; it != class_globals.second; ++it) + { + const symbol_exprt new_global_symbol = + symbol_table.lookup_ref(it->second).symbol_expr(); + gen_nondet_init( + new_global_symbol, + static_init_body, + symbol_table, + source_locationt(), + false, + allocation_typet::DYNAMIC, + !is_non_null_library_global(it->second), + object_factory_parameters, + pointer_type_selector, + update_in_placet::NO_UPDATE_IN_PLACE); } + + return static_init_body; } diff --git a/src/java_bytecode/java_static_initializers.h b/src/java_bytecode/java_static_initializers.h index 12f537d0c90..3c8b1a6e898 100644 --- a/src/java_bytecode/java_static_initializers.h +++ b/src/java_bytecode/java_static_initializers.h @@ -9,16 +9,48 @@ Author: Chris Smowton, chris.smowton@diffblue.com #ifndef CPROVER_JAVA_BYTECODE_JAVA_STATIC_INITIALIZERS_H #define CPROVER_JAVA_BYTECODE_JAVA_STATIC_INITIALIZERS_H +#include + #include #include +#include +#include +#include irep_idt clinit_wrapper_name(const irep_idt &class_name); bool is_clinit_wrapper_function(const irep_idt &function_id); -void create_static_initializer_wrappers(symbol_tablet &symbol_table); +void create_static_initializer_wrappers( + symbol_tablet &symbol_table, + synthetic_methods_mapt &synthetic_methods); codet get_clinit_wrapper_body( - const irep_idt &function_id, const symbol_tablet &symbol_table); + const irep_idt &function_id, const symbol_table_baset &symbol_table); + +class stub_global_initializer_factoryt +{ + /// Maps class symbols onto the stub globals that belong to them + std::unordered_multimap + stub_globals_by_class; + +public: + void create_stub_global_initializer_symbols( + symbol_tablet &symbol_table, + const std::unordered_set &stub_globals_set, + synthetic_methods_mapt &synthetic_methods); + + codet get_stub_initializer_body( + const irep_idt &function_id, + symbol_table_baset &symbol_table, + const object_factory_parameterst &object_factory_parameters, + const select_pointer_typet &pointer_type_selector); +}; + +void create_stub_global_initializers( + symbol_tablet &symbol_table, + const std::unordered_set &stub_globals_set, + const object_factory_parameterst &object_factory_parameters, + const select_pointer_typet &pointer_type_selector); #endif diff --git a/src/java_bytecode/object_factory_parameters.h b/src/java_bytecode/object_factory_parameters.h new file mode 100644 index 00000000000..dfc498c9661 --- /dev/null +++ b/src/java_bytecode/object_factory_parameters.h @@ -0,0 +1,39 @@ +/*******************************************************************\ + +Module: + +Author: Daniel Kroening, kroening@kroening.com + +\*******************************************************************/ + +#ifndef CPROVER_JAVA_BYTECODE_OBJECT_FACTORY_PARAMETERS_H +#define CPROVER_JAVA_BYTECODE_OBJECT_FACTORY_PARAMETERS_H + +#include +#include + +#define MAX_NONDET_ARRAY_LENGTH_DEFAULT 5 +#define MAX_NONDET_STRING_LENGTH std::numeric_limits::max() +#define MAX_NONDET_TREE_DEPTH 5 + +struct object_factory_parameterst final +{ + /// Maximum value for the non-deterministically-chosen length of an array. + size_t max_nondet_array_length=MAX_NONDET_ARRAY_LENGTH_DEFAULT; + + /// Maximum value for the non-deterministically-chosen length of a string. + size_t max_nondet_string_length=MAX_NONDET_STRING_LENGTH; + + /// Maximum depth for object hierarchy on input. + /// Used to prevent object factory to loop infinitely during the + /// generation of code that allocates/initializes data structures of recursive + /// data types or unbounded depth. We bound the maximum number of times we + /// dereference a pointer using a 'depth counter'. We set a pointer to null if + /// such depth becomes >= than this maximum value. + size_t max_nondet_tree_depth=MAX_NONDET_TREE_DEPTH; + + /// Force string content to be ASCII printable characters when set to true. + bool string_printable = false; +}; + +#endif diff --git a/src/java_bytecode/synthetic_methods_map.h b/src/java_bytecode/synthetic_methods_map.h new file mode 100644 index 00000000000..86f12e2506b --- /dev/null +++ b/src/java_bytecode/synthetic_methods_map.h @@ -0,0 +1,21 @@ +/*******************************************************************\ + +Module: Java Static Initializers + +Author: Chris Smowton, chris.smowton@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_JAVA_BYTECODE_SYNTHETIC_METHODS_MAP_H +#define CPROVER_JAVA_BYTECODE_SYNTHETIC_METHODS_MAP_H + +enum class synthetic_method_typet +{ + STATIC_INITIALIZER_WRAPPER, + STUB_CLASS_STATIC_INITIALIZER +}; + +typedef std::unordered_map + synthetic_methods_mapt; + +#endif