Skip to content

Avoid redundant computations in get_user_specified_clinit #5078

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

5 changes: 3 additions & 2 deletions jbmc/src/java_bytecode/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -674,10 +674,11 @@ determine which loading strategy to use.
If [lazy_methods_mode](\ref java_bytecode_languaget::lazy_methods_mode) is
\ref java_bytecode_languaget::LAZY_METHODS_MODE_EAGER then eager loading is
used. Under eager loading
\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &)
\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &, lazy_class_to_declared_symbols_mapt &)
is called once for each method listed in method_bytecode (described above). This
then calls
\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &, optionalt<ci_lazy_methods_neededt>)
\ref java_bytecode_languaget::convert_single_method(const irep_idt &, symbol_table_baset &, optionalt<ci_lazy_methods_neededt>, lazy_class_to_declared_symbols_mapt &);

without a ci_lazy_methods_neededt object, which calls
\ref java_bytecode_convert_method, passing in the method parse tree. This in
turn uses \ref java_bytecode_convert_methodt to turn the bytecode into symbols
Expand Down
80 changes: 66 additions & 14 deletions jbmc/src/java_bytecode/java_bytecode_language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,23 @@ static prefix_filtert get_context(const optionst &options)
return prefix_filtert(std::move(context_include), std::move(context_exclude));
}

std::unordered_multimap<irep_idt, symbolt> &
lazy_class_to_declared_symbols_mapt::get(const symbol_tablet &symbol_table)
{
if(!initialized)
{
map = class_to_declared_symbols(symbol_table);
initialized = true;
}
return map;
}

void lazy_class_to_declared_symbols_mapt::reinitialize()
{
initialized = false;
map.clear();
}

/// Consume options that are java bytecode specific.
void java_bytecode_languaget::set_language_options(const optionst &options)
{
Expand Down Expand Up @@ -203,7 +220,26 @@ void java_bytecode_languaget::set_language_options(const optionst &options)
java_cp_include_files=".*";

nondet_static = options.get_bool_option("nondet-static");
static_values_file = options.get_option("static-values");
if(options.is_set("static-values"))
{
const std::string filename = options.get_option("static-values");
jsont tmp_json;
if(parse_json(filename, *message_handler, tmp_json))
{
warning() << "Provided JSON file for static-values cannot be parsed; it"
<< " will be ignored." << messaget::eom;
}
else
{
if(!tmp_json.is_object())
{
warning() << "Provided JSON file for static-values is not a JSON "
<< "object; it will be ignored." << messaget::eom;
}
else
static_values_json = std::move(to_json_object(tmp_json));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, this does get done. Suggest squash into earlier commit that changes the type.

}
}

ignore_manifest_main_class =
options.get_bool_option("ignore-manifest-main-class");
Expand Down Expand Up @@ -853,7 +889,9 @@ bool java_bytecode_languaget::typecheck(
symbol_table,
synthetic_methods,
threading_support,
!static_values_file.empty());
static_values_json.has_value());

lazy_class_to_declared_symbols_mapt class_to_declared_symbols;

// Now incrementally elaborate methods
// that are reachable from this entry point.
Expand All @@ -876,18 +914,21 @@ bool java_bytecode_languaget::typecheck(
for(const auto &function_id_and_type : synthetic_methods)
{
convert_single_method(
function_id_and_type.first, journalling_symbol_table);
function_id_and_type.first,
journalling_symbol_table,
class_to_declared_symbols);
}
// Convert all methods for which we have bytecode now
for(const auto &method_sig : method_bytecode)
{
convert_single_method(method_sig.first, journalling_symbol_table);
convert_single_method(
method_sig.first, journalling_symbol_table, class_to_declared_symbols);
}
// Now convert all newly added string methods
for(const auto &fn_name : journalling_symbol_table.get_inserted())
{
if(string_preprocess.implements_function(fn_name))
convert_single_method(fn_name, symbol_table);
convert_single_method(fn_name, symbol_table, class_to_declared_symbols);
}
}
break;
Expand Down Expand Up @@ -982,12 +1023,17 @@ bool java_bytecode_languaget::do_ci_lazy_method_conversion(
symbol_table_buildert symbol_table_builder =
symbol_table_buildert::wrap(symbol_table);

lazy_class_to_declared_symbols_mapt class_to_declared_symbols;

const method_convertert method_converter =
[this, &symbol_table_builder](
[this, &symbol_table_builder, &class_to_declared_symbols](
const irep_idt &function_id,
ci_lazy_methods_neededt lazy_methods_needed) {
return convert_single_method(
function_id, symbol_table_builder, std::move(lazy_methods_needed));
function_id,
symbol_table_builder,
std::move(lazy_methods_needed),
class_to_declared_symbols);
};

ci_lazy_methodst method_gather(
Expand Down Expand Up @@ -1050,7 +1096,8 @@ void java_bytecode_languaget::convert_lazy_method(
journalling_symbol_tablet symbol_table=
journalling_symbol_tablet::wrap(symtab);

convert_single_method(function_id, symbol_table);
lazy_class_to_declared_symbols_mapt class_to_declared_symbols;
convert_single_method(function_id, symbol_table, class_to_declared_symbols);

// Instrument runtime exceptions (unless symbol is a stub)
if(symbol.value.is_not_nil())
Expand Down Expand Up @@ -1118,10 +1165,13 @@ static void notify_static_method_calls(
/// \param symbol_table: global symbol table
/// \param needed_lazy_methods: optionally a collection of needed methods to
/// update with any methods touched during the conversion
/// \param class_to_declared_symbols: maps classes to the symbols that
/// they declare.
bool java_bytecode_languaget::convert_single_method(
const irep_idt &function_id,
symbol_table_baset &symbol_table,
optionalt<ci_lazy_methods_neededt> needed_lazy_methods)
optionalt<ci_lazy_methods_neededt> needed_lazy_methods,
lazy_class_to_declared_symbols_mapt &class_to_declared_symbols)
{
// Do not convert if method is not in context
if(method_in_context && !(*method_in_context)(id2string(function_id)))
Expand Down Expand Up @@ -1177,7 +1227,7 @@ bool java_bytecode_languaget::convert_single_method(
function_id,
symbol_table,
nondet_static,
!static_values_file.empty(),
static_values_json.has_value(),
object_factory_parameters,
get_pointer_type_selector(),
get_message_handler());
Expand All @@ -1186,7 +1236,7 @@ bool java_bytecode_languaget::convert_single_method(
function_id,
symbol_table,
nondet_static,
!static_values_file.empty(),
static_values_json.has_value(),
object_factory_parameters,
get_pointer_type_selector(),
get_message_handler());
Expand All @@ -1197,14 +1247,16 @@ bool java_bytecode_languaget::convert_single_method(
declaring_class(symbol_table.lookup_ref(function_id));
INVARIANT(
class_name, "user_specified_clinit must be declared by a class.");
INVARIANT(
static_values_json.has_value(), "static-values JSON must be available");
writable_symbol.value = get_user_specified_clinit_body(
*class_name,
static_values_file,
*static_values_json,
symbol_table,
get_message_handler(),
needed_lazy_methods,
max_user_array_length,
references);
references,
class_to_declared_symbols.get(symbol_table));
break;
}
case synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER:
Expand Down
42 changes: 36 additions & 6 deletions jbmc/src/java_bytecode/java_bytecode_language.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,31 @@ enum lazy_methods_modet
LAZY_METHODS_MODE_EXTERNAL_DRIVER
};

/// Map classes to the symbols they declare but is only computed once it is
/// needed and the map is then kept.
/// Note that it only includes function and field symbols (and not for example,
/// local variables), these are produced in the convert-class phase.
/// Calling `get` before the symbol table is properly filled with these symbols,
/// would make later calls return an outdated map. The
/// lazy_class_to_declared_symbols_mapt would then need to be reinitialized.
/// Similarly if some transformation creates or deletes function or field
/// symbols in the symbol table, then the map would get out of date and would
/// need to be reinitialized.
class lazy_class_to_declared_symbols_mapt
{
public:
lazy_class_to_declared_symbols_mapt() = default;

std::unordered_multimap<irep_idt, symbolt> &
get(const symbol_tablet &symbol_table);

void reinitialize();

private:
bool initialized = false;
std::unordered_multimap<irep_idt, symbolt> map;
};

#define JAVA_CLASS_MODEL_SUFFIX "@class_model"

class java_bytecode_languaget:public languaget
Expand Down Expand Up @@ -205,15 +230,20 @@ class java_bytecode_languaget:public languaget
protected:
void convert_single_method(
const irep_idt &function_id,
symbol_table_baset &symbol_table)
symbol_table_baset &symbol_table,
lazy_class_to_declared_symbols_mapt &class_to_declared_symbols)
{
convert_single_method(
function_id, symbol_table, optionalt<ci_lazy_methods_neededt>());
function_id,
symbol_table,
optionalt<ci_lazy_methods_neededt>(),
class_to_declared_symbols);
}
bool convert_single_method(
const irep_idt &function_id,
symbol_table_baset &symbol_table,
optionalt<ci_lazy_methods_neededt> needed_lazy_methods);
optionalt<ci_lazy_methods_neededt> needed_lazy_methods,
lazy_class_to_declared_symbols_mapt &class_to_declared_symbols);

bool do_ci_lazy_method_conversion(symbol_tablet &);
const select_pointer_typet &get_pointer_type_selector() const;
Expand All @@ -236,10 +266,10 @@ class java_bytecode_languaget:public languaget
java_string_library_preprocesst string_preprocess;
std::string java_cp_include_files;
bool nondet_static;
/// Path to a JSON file which contains initial values of static fields (right
/// JSON which contains initial values of static fields (right
/// after the static initializer of the class was run). This is read from the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 read from the ... option -> read from the file specified by the ... option?

/// --static-values command-line option.
std::string static_values_file;
/// file specified by the --static-values command-line option.
optionalt<json_objectt> static_values_json;

// list of classes to force load even without reference from the entry point
std::vector<irep_idt> java_load_classes;
Expand Down
103 changes: 55 additions & 48 deletions jbmc/src/java_bytecode/java_static_initializers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,65 +767,72 @@ code_ifthenelset get_clinit_wrapper_body(
return code_ifthenelset(std::move(check_already_run), std::move(init_body));
}

/// \return map associating classes to the symbols they declare
std::unordered_multimap<irep_idt, symbolt>
class_to_declared_symbols(const symbol_tablet &symbol_table)
{
std::unordered_multimap<irep_idt, symbolt> result;
for(const auto &symbol_pair : symbol_table)
{
const symbolt &symbol = symbol_pair.second;
if(optionalt<irep_idt> declaring = declaring_class(symbol))
result.emplace(*declaring, symbol);
}
return result;
}

code_blockt get_user_specified_clinit_body(
const irep_idt &class_id,
const std::string &static_values_file,
const json_objectt &static_values_json,
symbol_table_baset &symbol_table,
message_handlert &message_handler,
optionalt<ci_lazy_methods_neededt> needed_lazy_methods,
size_t max_user_array_length,
std::unordered_map<std::string, object_creation_referencet> &references)
std::unordered_map<std::string, object_creation_referencet> &references,
const std::unordered_multimap<irep_idt, symbolt>
&class_to_declared_symbols_map)
{
jsont json;
if(
!static_values_file.empty() &&
!parse_json(static_values_file, message_handler, json) && json.is_object())
const irep_idt &real_clinit_name = clinit_function_name(class_id);
const auto class_entry =
static_values_json.find(id2string(strip_java_namespace_prefix(class_id)));
if(class_entry != static_values_json.end())
{
const auto &json_object = to_json_object(json);
const auto class_entry =
json_object.find(id2string(strip_java_namespace_prefix(class_id)));
if(class_entry != json_object.end())
const auto &class_json_value = class_entry->second;
if(class_json_value.is_object())
{
const auto &class_json_value = class_entry->second;
if(class_json_value.is_object())
const auto &class_json_object = to_json_object(class_json_value);
std::map<symbol_exprt, jsont> static_field_values;
for(const auto &class_symbol_pair :
equal_range(class_to_declared_symbols_map, class_id))
{
const auto &class_json_object = to_json_object(class_json_value);
std::map<symbol_exprt, jsont> static_field_values;
for(const auto &symbol_pair : symbol_table)
const symbolt &symbol = class_symbol_pair.second;
if(symbol.is_static_lifetime)
{
const symbolt &symbol = symbol_pair.second;
if(
declaring_class(symbol) && *declaring_class(symbol) == class_id &&
symbol.is_static_lifetime)
const symbol_exprt &static_field_expr = symbol.symbol_expr();
const auto &static_field_entry =
class_json_object.find(id2string(symbol.base_name));
if(static_field_entry != class_json_object.end())
{
const symbol_exprt &static_field_expr = symbol.symbol_expr();
const auto &static_field_entry =
class_json_object.find(id2string(symbol.base_name));
if(static_field_entry != class_json_object.end())
{
static_field_values.insert(
{static_field_expr, static_field_entry->second});
}
static_field_values.insert(
{static_field_expr, static_field_entry->second});
}
}
code_blockt body;
for(const auto &value_pair : static_field_values)
{
assign_from_json(
value_pair.first,
value_pair.second,
clinit_function_name(class_id),
body,
symbol_table,
needed_lazy_methods,
max_user_array_length,
references);
}
return body;
}
code_blockt body;
for(const auto &value_pair : static_field_values)
{
assign_from_json(
value_pair.first,
value_pair.second,
real_clinit_name,
body,
symbol_table,
needed_lazy_methods,
max_user_array_length,
references);
}
return body;
}
}
const irep_idt &real_clinit_name = clinit_function_name(class_id);
if(const auto clinit_func = symbol_table.lookup(real_clinit_name))
return code_blockt{{code_function_callt{clinit_func->symbol_expr()}}};
return code_blockt{};
Expand Down Expand Up @@ -996,21 +1003,21 @@ code_blockt stub_global_initializer_factoryt::get_stub_initializer_body(
// class. Note this is the same invocation used in
// java_static_lifetime_init.

auto class_globals = stub_globals_by_class.equal_range(*class_id);
auto class_globals = equal_range(stub_globals_by_class, *class_id);
INVARIANT(
class_globals.first != class_globals.second,
!class_globals.empty(),
"class with synthetic clinit should have at least one global to init");

java_object_factory_parameterst parameters = object_factory_parameters;
parameters.function_id = function_id;

for(auto it = class_globals.first; it != class_globals.second; ++it)
for(const auto &pair : class_globals)
{
const symbol_exprt new_global_symbol =
symbol_table.lookup_ref(it->second).symbol_expr();
symbol_table.lookup_ref(pair.second).symbol_expr();

parameters.min_null_tree_depth =
is_non_null_library_global(it->second)
is_non_null_library_global(pair.second)
? object_factory_parameters.min_null_tree_depth + 1
: object_factory_parameters.min_null_tree_depth;

Expand Down
Loading