diff --git a/jbmc/src/java_bytecode/README.md b/jbmc/src/java_bytecode/README.md index 675a57b691e..9bdbaa6e495 100644 --- a/jbmc/src/java_bytecode/README.md +++ b/jbmc/src/java_bytecode/README.md @@ -674,10 +674,11 @@ determine which loading strategy to use. If [lazy_methods_mode](\ref java_bytecode_languaget::lazy_methods_mode) is \ref java_bytecode_languaget::LAZY_METHODS_MODE_EAGER then eager loading is used. Under eager loading -\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &) +\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &, lazy_class_to_declared_symbols_mapt &) is called once for each method listed in method_bytecode (described above). This then calls -\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &, optionalt) +\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &, optionalt, lazy_class_to_declared_symbols_mapt &); + without a ci_lazy_methods_neededt object, which calls \ref java_bytecode_convert_method, passing in the method parse tree. This in turn uses \ref java_bytecode_convert_methodt to turn the bytecode into symbols diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index f63975719e5..c5f46b549e7 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -115,6 +115,23 @@ static prefix_filtert get_context(const optionst &options) return prefix_filtert(std::move(context_include), std::move(context_exclude)); } +std::unordered_multimap & +lazy_class_to_declared_symbols_mapt::get(const symbol_tablet &symbol_table) +{ + if(!initialized) + { + map = class_to_declared_symbols(symbol_table); + initialized = true; + } + return map; +} + +void lazy_class_to_declared_symbols_mapt::reinitialize() +{ + initialized = false; + map.clear(); +} + /// Consume options that are java bytecode specific. void java_bytecode_languaget::set_language_options(const optionst &options) { @@ -203,7 +220,26 @@ void java_bytecode_languaget::set_language_options(const optionst &options) java_cp_include_files=".*"; nondet_static = options.get_bool_option("nondet-static"); - static_values_file = options.get_option("static-values"); + if(options.is_set("static-values")) + { + const std::string filename = options.get_option("static-values"); + jsont tmp_json; + if(parse_json(filename, *message_handler, tmp_json)) + { + warning() << "Provided JSON file for static-values cannot be parsed; it" + << " will be ignored." << messaget::eom; + } + else + { + if(!tmp_json.is_object()) + { + warning() << "Provided JSON file for static-values is not a JSON " + << "object; it will be ignored." << messaget::eom; + } + else + static_values_json = std::move(to_json_object(tmp_json)); + } + } ignore_manifest_main_class = options.get_bool_option("ignore-manifest-main-class"); @@ -853,7 +889,9 @@ bool java_bytecode_languaget::typecheck( symbol_table, synthetic_methods, threading_support, - !static_values_file.empty()); + static_values_json.has_value()); + + lazy_class_to_declared_symbols_mapt class_to_declared_symbols; // Now incrementally elaborate methods // that are reachable from this entry point. @@ -876,18 +914,21 @@ bool java_bytecode_languaget::typecheck( for(const auto &function_id_and_type : synthetic_methods) { convert_single_method( - function_id_and_type.first, journalling_symbol_table); + function_id_and_type.first, + journalling_symbol_table, + class_to_declared_symbols); } // Convert all methods for which we have bytecode now for(const auto &method_sig : method_bytecode) { - convert_single_method(method_sig.first, journalling_symbol_table); + convert_single_method( + method_sig.first, journalling_symbol_table, class_to_declared_symbols); } // Now convert all newly added string methods for(const auto &fn_name : journalling_symbol_table.get_inserted()) { if(string_preprocess.implements_function(fn_name)) - convert_single_method(fn_name, symbol_table); + convert_single_method(fn_name, symbol_table, class_to_declared_symbols); } } break; @@ -982,12 +1023,17 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion( symbol_table_buildert symbol_table_builder = symbol_table_buildert::wrap(symbol_table); + lazy_class_to_declared_symbols_mapt class_to_declared_symbols; + const method_convertert method_converter = - [this, &symbol_table_builder]( + [this, &symbol_table_builder, &class_to_declared_symbols]( const irep_idt &function_id, ci_lazy_methods_neededt lazy_methods_needed) { return convert_single_method( - function_id, symbol_table_builder, std::move(lazy_methods_needed)); + function_id, + symbol_table_builder, + std::move(lazy_methods_needed), + class_to_declared_symbols); }; ci_lazy_methodst method_gather( @@ -1050,7 +1096,8 @@ void java_bytecode_languaget::convert_lazy_method( journalling_symbol_tablet symbol_table= journalling_symbol_tablet::wrap(symtab); - convert_single_method(function_id, symbol_table); + lazy_class_to_declared_symbols_mapt class_to_declared_symbols; + convert_single_method(function_id, symbol_table, class_to_declared_symbols); // Instrument runtime exceptions (unless symbol is a stub) if(symbol.value.is_not_nil()) @@ -1118,10 +1165,13 @@ static void notify_static_method_calls( /// \param symbol_table: global symbol table /// \param needed_lazy_methods: optionally a collection of needed methods to /// update with any methods touched during the conversion +/// \param class_to_declared_symbols: maps classes to the symbols that +/// they declare. bool java_bytecode_languaget::convert_single_method( const irep_idt &function_id, symbol_table_baset &symbol_table, - optionalt needed_lazy_methods) + optionalt needed_lazy_methods, + lazy_class_to_declared_symbols_mapt &class_to_declared_symbols) { // Do not convert if method is not in context if(method_in_context && !(*method_in_context)(id2string(function_id))) @@ -1177,7 +1227,7 @@ bool java_bytecode_languaget::convert_single_method( function_id, symbol_table, nondet_static, - !static_values_file.empty(), + static_values_json.has_value(), object_factory_parameters, get_pointer_type_selector(), get_message_handler()); @@ -1186,7 +1236,7 @@ bool java_bytecode_languaget::convert_single_method( function_id, symbol_table, nondet_static, - !static_values_file.empty(), + static_values_json.has_value(), object_factory_parameters, get_pointer_type_selector(), get_message_handler()); @@ -1197,14 +1247,16 @@ bool java_bytecode_languaget::convert_single_method( declaring_class(symbol_table.lookup_ref(function_id)); INVARIANT( class_name, "user_specified_clinit must be declared by a class."); + INVARIANT( + static_values_json.has_value(), "static-values JSON must be available"); writable_symbol.value = get_user_specified_clinit_body( *class_name, - static_values_file, + *static_values_json, symbol_table, - get_message_handler(), needed_lazy_methods, max_user_array_length, - references); + references, + class_to_declared_symbols.get(symbol_table)); break; } case synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER: diff --git a/jbmc/src/java_bytecode/java_bytecode_language.h b/jbmc/src/java_bytecode/java_bytecode_language.h index 5a344a9be18..dacfdd80089 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.h +++ b/jbmc/src/java_bytecode/java_bytecode_language.h @@ -121,6 +121,31 @@ enum lazy_methods_modet LAZY_METHODS_MODE_EXTERNAL_DRIVER }; +/// Map classes to the symbols they declare but is only computed once it is +/// needed and the map is then kept. +/// Note that it only includes function and field symbols (and not for example, +/// local variables), these are produced in the convert-class phase. +/// Calling `get` before the symbol table is properly filled with these symbols, +/// would make later calls return an outdated map. The +/// lazy_class_to_declared_symbols_mapt would then need to be reinitialized. +/// Similarly if some transformation creates or deletes function or field +/// symbols in the symbol table, then the map would get out of date and would +/// need to be reinitialized. +class lazy_class_to_declared_symbols_mapt +{ +public: + lazy_class_to_declared_symbols_mapt() = default; + + std::unordered_multimap & + get(const symbol_tablet &symbol_table); + + void reinitialize(); + +private: + bool initialized = false; + std::unordered_multimap map; +}; + #define JAVA_CLASS_MODEL_SUFFIX "@class_model" class java_bytecode_languaget:public languaget @@ -205,15 +230,20 @@ class java_bytecode_languaget:public languaget protected: void convert_single_method( const irep_idt &function_id, - symbol_table_baset &symbol_table) + symbol_table_baset &symbol_table, + lazy_class_to_declared_symbols_mapt &class_to_declared_symbols) { convert_single_method( - function_id, symbol_table, optionalt()); + function_id, + symbol_table, + optionalt(), + class_to_declared_symbols); } bool convert_single_method( const irep_idt &function_id, symbol_table_baset &symbol_table, - optionalt needed_lazy_methods); + optionalt needed_lazy_methods, + lazy_class_to_declared_symbols_mapt &class_to_declared_symbols); bool do_ci_lazy_method_conversion(symbol_tablet &); const select_pointer_typet &get_pointer_type_selector() const; @@ -236,10 +266,10 @@ class java_bytecode_languaget:public languaget java_string_library_preprocesst string_preprocess; std::string java_cp_include_files; bool nondet_static; - /// Path to a JSON file which contains initial values of static fields (right + /// JSON which contains initial values of static fields (right /// after the static initializer of the class was run). This is read from the - /// --static-values command-line option. - std::string static_values_file; + /// file specified by the --static-values command-line option. + optionalt static_values_json; // list of classes to force load even without reference from the entry point std::vector java_load_classes; diff --git a/jbmc/src/java_bytecode/java_static_initializers.cpp b/jbmc/src/java_bytecode/java_static_initializers.cpp index eae2a77a4d2..97484ebba14 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.cpp +++ b/jbmc/src/java_bytecode/java_static_initializers.cpp @@ -767,65 +767,72 @@ code_ifthenelset get_clinit_wrapper_body( return code_ifthenelset(std::move(check_already_run), std::move(init_body)); } +/// \return map associating classes to the symbols they declare +std::unordered_multimap +class_to_declared_symbols(const symbol_tablet &symbol_table) +{ + std::unordered_multimap result; + for(const auto &symbol_pair : symbol_table) + { + const symbolt &symbol = symbol_pair.second; + if(optionalt declaring = declaring_class(symbol)) + result.emplace(*declaring, symbol); + } + return result; +} + code_blockt get_user_specified_clinit_body( const irep_idt &class_id, - const std::string &static_values_file, + const json_objectt &static_values_json, symbol_table_baset &symbol_table, - message_handlert &message_handler, optionalt needed_lazy_methods, size_t max_user_array_length, - std::unordered_map &references) + std::unordered_map &references, + const std::unordered_multimap + &class_to_declared_symbols_map) { - jsont json; - if( - !static_values_file.empty() && - !parse_json(static_values_file, message_handler, json) && json.is_object()) + const irep_idt &real_clinit_name = clinit_function_name(class_id); + const auto class_entry = + static_values_json.find(id2string(strip_java_namespace_prefix(class_id))); + if(class_entry != static_values_json.end()) { - const auto &json_object = to_json_object(json); - const auto class_entry = - json_object.find(id2string(strip_java_namespace_prefix(class_id))); - if(class_entry != json_object.end()) + const auto &class_json_value = class_entry->second; + if(class_json_value.is_object()) { - const auto &class_json_value = class_entry->second; - if(class_json_value.is_object()) + const auto &class_json_object = to_json_object(class_json_value); + std::map static_field_values; + for(const auto &class_symbol_pair : + equal_range(class_to_declared_symbols_map, class_id)) { - const auto &class_json_object = to_json_object(class_json_value); - std::map static_field_values; - for(const auto &symbol_pair : symbol_table) + const symbolt &symbol = class_symbol_pair.second; + if(symbol.is_static_lifetime) { - const symbolt &symbol = symbol_pair.second; - if( - declaring_class(symbol) && *declaring_class(symbol) == class_id && - symbol.is_static_lifetime) + const symbol_exprt &static_field_expr = symbol.symbol_expr(); + const auto &static_field_entry = + class_json_object.find(id2string(symbol.base_name)); + if(static_field_entry != class_json_object.end()) { - const symbol_exprt &static_field_expr = symbol.symbol_expr(); - const auto &static_field_entry = - class_json_object.find(id2string(symbol.base_name)); - if(static_field_entry != class_json_object.end()) - { - static_field_values.insert( - {static_field_expr, static_field_entry->second}); - } + static_field_values.insert( + {static_field_expr, static_field_entry->second}); } } - code_blockt body; - for(const auto &value_pair : static_field_values) - { - assign_from_json( - value_pair.first, - value_pair.second, - clinit_function_name(class_id), - body, - symbol_table, - needed_lazy_methods, - max_user_array_length, - references); - } - return body; } + code_blockt body; + for(const auto &value_pair : static_field_values) + { + assign_from_json( + value_pair.first, + value_pair.second, + real_clinit_name, + body, + symbol_table, + needed_lazy_methods, + max_user_array_length, + references); + } + return body; } } - const irep_idt &real_clinit_name = clinit_function_name(class_id); if(const auto clinit_func = symbol_table.lookup(real_clinit_name)) return code_blockt{{code_function_callt{clinit_func->symbol_expr()}}}; return code_blockt{}; @@ -996,21 +1003,21 @@ code_blockt stub_global_initializer_factoryt::get_stub_initializer_body( // class. Note this is the same invocation used in // java_static_lifetime_init. - auto class_globals = stub_globals_by_class.equal_range(*class_id); + auto class_globals = equal_range(stub_globals_by_class, *class_id); INVARIANT( - class_globals.first != class_globals.second, + !class_globals.empty(), "class with synthetic clinit should have at least one global to init"); java_object_factory_parameterst parameters = object_factory_parameters; parameters.function_id = function_id; - for(auto it = class_globals.first; it != class_globals.second; ++it) + for(const auto &pair : class_globals) { const symbol_exprt new_global_symbol = - symbol_table.lookup_ref(it->second).symbol_expr(); + symbol_table.lookup_ref(pair.second).symbol_expr(); parameters.min_null_tree_depth = - is_non_null_library_global(it->second) + is_non_null_library_global(pair.second) ? object_factory_parameters.min_null_tree_depth + 1 : object_factory_parameters.min_null_tree_depth; diff --git a/jbmc/src/java_bytecode/java_static_initializers.h b/jbmc/src/java_bytecode/java_static_initializers.h index ef77a778381..5171c99c6d2 100644 --- a/jbmc/src/java_bytecode/java_static_initializers.h +++ b/jbmc/src/java_bytecode/java_static_initializers.h @@ -52,6 +52,10 @@ code_ifthenelset get_clinit_wrapper_body( const select_pointer_typet &pointer_type_selector, message_handlert &message_handler); +/// \return map associating classes to the symbols they declare +std::unordered_multimap +class_to_declared_symbols(const symbol_tablet &symbol_table); + /// Create the body of a user_specified_clinit function for a given class, which /// includes assignments for all static fields of the class to values read from /// an input file. If the file could not be parsed or an entry for this class @@ -62,27 +66,28 @@ code_ifthenelset get_clinit_wrapper_body( /// assigned default values (zero or null). /// \param class_id: the id of the class to create a user_specified_clinit /// function body for. -/// \param static_values_file: input file containing values of static fields. +/// \param static_values_json: JSON object containing values of static fields. /// The format is expected to be a map whose keys are class names, and whose -/// values are maps from static field names to values. Currently only JSON -/// is supported as a file format. +/// values are maps from static field names to values. /// \param symbol_table: used to look up and create new symbols -/// \param message_handler: used to log any errors with parsing the input file /// \param needed_lazy_methods: used to mark any runtime types given in the /// input file as needed /// \param max_user_array_length: maximum value for constant or variable length /// arrays. Any arrays that were specified to be of nondeterministic length in /// the input file will be limited by this value. /// \param references: map to keep track of reference-equal objets. +/// \param class_to_declared_symbols_map: map classes to the symbols that +/// they declare. /// \return the body of the user_specified_clinit function as a code block. code_blockt get_user_specified_clinit_body( const irep_idt &class_id, - const std::string &static_values_file, + const json_objectt &static_values_json, symbol_table_baset &symbol_table, - message_handlert &message_handler, optionalt needed_lazy_methods, size_t max_user_array_length, - std::unordered_map &references); + std::unordered_map &references, + const std::unordered_multimap + &class_to_declared_symbols_map); class stub_global_initializer_factoryt { diff --git a/src/util/range.h b/src/util/range.h index 54501a23bae..c714cdb0464 100644 --- a/src/util/range.h +++ b/src/util/range.h @@ -533,4 +533,15 @@ auto make_range(containert &container) -> ranget container.begin(), container.end()); } +/// Utility function to make equal_range method of multimap easier to use by +/// returning a ranget object. For instance, we can write: +/// `for(auto value : equal_range(map, key).filter(...).map(...)) {...}`. +template +ranget +equal_range(const multimapt &multimap, const typename multimapt::key_type &key) +{ + auto iterator_pair = multimap.equal_range(key); + return make_range(iterator_pair.first, iterator_pair.second); +} + #endif // CPROVER_UTIL_RANGE_H