Skip to content

Java frontend: create synthetic static initialisers for stub globals #1781

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion regression/cbmc-java/generic_class_bound1/test.desc
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ Gn.class
.*file Gn.java line 9 function java::Gn.foo1:\(LGn;\)V bytecode-index 1 block 1: FAILED
.*file Gn.java line 10 function java::Gn.foo1:\(LGn;\)V bytecode-index 4 block 2: FAILED
.*file Gn.java line 11 function java::Gn.foo1:\(LGn;\)V bytecode-index 5 block 3: FAILED
.*file Gn.java line 13 function java::Gn.main:\(\[Ljava/lang/String;\)V bytecode-index 2 block 1: SATISFIED
.*file Gn.java line 13 function java::Gn.main:\(\[Ljava/lang/String;\)V bytecode-index 2 block 2: SATISFIED
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not especially important but do you know why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, for example on accessing the System static class for the first time (e.g. for System.out.println) it will call a synthetic System.clinit method, and the new function call constitutes a coverage goal. Previously the init of System.out et al would have happened during __CPROVER_initialize, i.e. at start-of-day, regardless of whether the objects were actually used or not.

--
4 changes: 4 additions & 0 deletions src/java_bytecode/java_bytecode_convert_class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,10 @@ void java_bytecode_convert_classt::convert(
new_symbol.name=id2string(class_symbol.name)+"."+id2string(f.name);
new_symbol.base_name=f.name;
new_symbol.type=field_type;
// Annotating the type with ID_C_class to provide a static field -> class
// link matches the method used by java_bytecode_convert_method::convert
// for methods.
new_symbol.type.set(ID_C_class, class_symbol.name);
new_symbol.pretty_name=id2string(class_symbol.pretty_name)+
"."+id2string(f.name);
new_symbol.mode=ID_java;
Expand Down
38 changes: 6 additions & 32 deletions src/java_bytecode/java_bytecode_convert_method.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -906,34 +906,6 @@ static void gather_symbol_live_ranges(
}
}

/// See above
/// \par parameters: `se`: Symbol expression referring to a static field
/// `basename`: The static field's basename
/// \return Creates a symbol table entry for the static field if one doesn't
/// exist already.
void java_bytecode_convert_methodt::check_static_field_stub(
const symbol_exprt &symbol_expr,
const irep_idt &basename)
{
const auto &id=symbol_expr.get_identifier();
if(symbol_table.symbols.find(id)==symbol_table.symbols.end())
{
// Create a stub, to be overwritten if/when the real class is loaded.
symbolt new_symbol;
new_symbol.is_static_lifetime=true;
new_symbol.is_lvalue=true;
new_symbol.is_state_var=true;
new_symbol.name=id;
new_symbol.base_name=basename;
new_symbol.type=symbol_expr.type();
new_symbol.pretty_name=new_symbol.name;
new_symbol.mode=ID_java;
new_symbol.is_type=false;
new_symbol.value.make_nil();
symbol_table.add(new_symbol);
}
}

/// Each static access to classname should be prefixed with a check for
/// necessary static init; this returns a call implementing that check.
/// \param classname: Class name
Expand Down Expand Up @@ -2026,8 +1998,9 @@ codet java_bytecode_convert_methodt::convert_instructions(
field_name.find("$assertionsDisabled")!=std::string::npos;
symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name);

// If external, create a symbol table entry for this static field:
check_static_field_stub(symbol_expr, field_name);
INVARIANT(
symbol_table.has_symbol(symbol_expr.get_identifier()),
"getstatic symbol should have been created before method conversion");

if(needed_lazy_methods)
{
Expand Down Expand Up @@ -2081,8 +2054,9 @@ codet java_bytecode_convert_methodt::convert_instructions(
const auto &field_name=arg0.get_string(ID_component_name);
symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name);

// If external, create a symbol table entry for this static field:
check_static_field_stub(symbol_expr, field_name);
INVARIANT(
symbol_table.has_symbol(symbol_expr.get_identifier()),
"putstatic symbol should have been created before method conversion");

if(needed_lazy_methods && arg0.type().id() == ID_symbol)
{
Expand Down
4 changes: 0 additions & 4 deletions src/java_bytecode/java_bytecode_convert_method_class.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,6 @@ class java_bytecode_convert_methodt:public messaget

const bytecode_infot &get_bytecode_info(const irep_idt &statement);

void check_static_field_stub(
const symbol_exprt &se,
const irep_idt &basename);

bool class_needs_clinit(const irep_idt &classname);
exprt get_or_create_clinit_wrapper(const irep_idt &classname);
codet get_clinit_call(const irep_idt &classname);
Expand Down
156 changes: 134 additions & 22 deletions src/java_bytecode/java_bytecode_language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,8 @@ static void infer_opaque_type_fields(
namespacet ns(symbol_table);
for(const auto &method : parse_tree.parsed_class.methods)
{
for(const auto &instruction : method.instructions)
for(const java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
if(instruction.statement == "getfield" ||
instruction.statement == "putfield")
Expand Down Expand Up @@ -359,7 +360,8 @@ static void generate_constant_global_variables(
{
for(auto &method : parse_tree.parsed_class.methods)
{
for(auto &instruction : method.instructions)
for(java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
// ldc* instructions are Java bytecode "load constant" ops, which can
// retrieve a numeric constant, String literal, or Class literal.
Expand All @@ -381,6 +383,87 @@ static void generate_constant_global_variables(
}
}

/// Add a stub global symbol to the symbol table, initialising pointer-typed
/// symbols with null and primitive-typed ones with an arbitrary (nondet) value.
/// \param symbol_table: table to add to
/// \param symbol_id: new symbol fully-qualified identifier
/// \param symbol_basename: new symbol basename
/// \param symbol_type: new symbol type
/// \param class_id: class id that directly encloses this static field
static void create_stub_global_symbol(
symbol_table_baset &symbol_table,
const irep_idt &symbol_id,
const irep_idt &symbol_basename,
const typet &symbol_type,
const irep_idt &class_id)
{
symbolt new_symbol;
new_symbol.is_static_lifetime = true;
new_symbol.is_lvalue = true;
new_symbol.is_state_var = true;
new_symbol.name = symbol_id;
new_symbol.base_name = symbol_basename;
new_symbol.type = symbol_type;
new_symbol.type.set(ID_C_class, class_id);
new_symbol.pretty_name = new_symbol.name;
new_symbol.mode = ID_java;
new_symbol.is_type = false;
// If pointer-typed, initialise to null and a static initialiser will be
// created to initialise on first reference. If primitive-typed, specify
// nondeterministic initialisation by setting a nil value.
if(symbol_type.id() == ID_pointer)
new_symbol.value = null_pointer_exprt(to_pointer_type(symbol_type));
else
new_symbol.value.make_nil();
bool add_failed = symbol_table.add(new_symbol);
INVARIANT(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might just be a personal preference, but this feels more like a PRECONDITION(symbol_table.has_symbol(symbol_id)); of the method. This of course would involve two calls to the symbol table

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm going to stick with the single-lookup version here

!add_failed, "caller should have checked symbol not already in table");
}

/// Search for getstatic and putstatic instructions in a class' bytecode and
/// create stub symbols for any static fields that aren't already in the symbol
/// table. The new symbols are null-initialised for reference-typed globals /
/// static fields, and nondet-initialised for primitives.
/// \param parse_tree: class bytecode
/// \param symbol_table: symbol table; may gain new symbols
static void create_stub_global_symbols(
const java_bytecode_parse_treet &parse_tree,
symbol_table_baset &symbol_table)
{
namespacet ns(symbol_table);
for(const auto &method : parse_tree.parsed_class.methods)
{
for(const java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
if(instruction.statement == "getstatic" ||
instruction.statement == "putstatic")
{
INVARIANT(
instruction.args.size() > 0,
"get/putstatic should have at least one argument");
irep_idt component = instruction.args[0].get_string(ID_component_name);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a way to avoid direct irep operations here? Though I see this is a common variant, so perhaps out of scope of this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, this should be a separate task of adding a std_expr like file that describes Java bytecode parse tree ireps

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created TG-2412 in the vague hope we'll get round to this...

INVARIANT(
!component.empty(), "get/putstatic should specify a component");
irep_idt class_id = instruction.args[0].get_string(ID_class);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto

INVARIANT(
!class_id.empty(), "get/putstatic should specify a class");
irep_idt identifier = id2string(class_id) + "." + id2string(component);

if(!symbol_table.has_symbol(identifier))
{
create_stub_global_symbol(
symbol_table,
identifier,
component,
instruction.args[0].type(),
class_id);
}
}
}
}
}

bool java_bytecode_languaget::typecheck(
symbol_tablet &symbol_table,
const std::string &module)
Expand Down Expand Up @@ -477,10 +560,31 @@ bool java_bytecode_languaget::typecheck(
<< " String or Class constant symbols"
<< messaget::eom;

// For each reference to a stub global (that is, a global variable declared on
// a class we don't have bytecode for, and therefore don't know the static
// initialiser for), create a synthetic static initialiser (clinit method)
// to nondet initialise it.
// Note this must be done before making static initialiser wrappers below, as
// this makes a Classname.clinit method, then the next pass makes a wrapper
// that ensures it is only run once, and that static initialisation happens
// in class-graph topological order.

{
journalling_symbol_tablet symbol_table_journal =
journalling_symbol_tablet::wrap(symbol_table);
for(const auto &c : java_class_loader.class_map)
{
create_stub_global_symbols(c.second, symbol_table_journal);
}

stub_global_initializer_factory.create_stub_global_initializer_symbols(
symbol_table, symbol_table_journal.get_inserted(), synthetic_methods);
}

// For each class that will require a static initializer wrapper, create a
// function named package.classname::clinit_wrapper, and a corresponding
// global tracking whether it has run or not:
create_static_initializer_wrappers(symbol_table);
create_static_initializer_wrappers(symbol_table, synthetic_methods);

// Now incrementally elaborate methods
// that are reachable from this entry point.
Expand All @@ -495,24 +599,12 @@ bool java_bytecode_languaget::typecheck(
journalling_symbol_tablet journalling_symbol_table =
journalling_symbol_tablet::wrap(symbol_table);

// Convert all synthetic methods:
for(const auto &function_id_and_type : synthetic_methods)
{
// Convert all static initialisers:
std::vector<irep_idt> static_initialisers;

// Careful not to add symbols while iterating over the symbol table!
for(const auto &symbol : symbol_table.symbols)
{
if(is_clinit_wrapper_function(symbol.second.name))
static_initialisers.push_back(symbol.second.name);
}

for(const auto &static_init_wrapper_name : static_initialisers)
{
convert_single_method(
static_init_wrapper_name, journalling_symbol_table);
}
convert_single_method(
function_id_and_type.first, journalling_symbol_table);
}

// Convert all methods for which we have bytecode now
for(const auto &method_sig : method_bytecode)
{
Expand Down Expand Up @@ -714,6 +806,8 @@ bool java_bytecode_languaget::convert_single_method(
// Get bytecode for specified function if we have it
method_bytecodet::opt_reft cmb = method_bytecode.get(function_id);

synthetic_methods_mapt::iterator synthetic_method_it;

// Check if have a string solver implementation
if(string_preprocess.implements_function(function_id))
{
Expand All @@ -735,11 +829,29 @@ bool java_bytecode_languaget::convert_single_method(
symbol.value = generated_code;
return false;
}
else if(is_clinit_wrapper_function(function_id))
else if(
(synthetic_method_it = synthetic_methods.find(function_id)) !=
synthetic_methods.end())
{
// Synthetic method (i.e. one generated by the Java frontend and which
// doesn't occur in the source bytecode):
symbolt &symbol = symbol_table.get_writeable_ref(function_id);
symbol.value = get_clinit_wrapper_body(function_id, symbol_table);
// Notify lazy methods of other static init functions called:
switch(synthetic_method_it->second)
{
case synthetic_method_typet::STATIC_INITIALIZER_WRAPPER:
symbol.value = get_clinit_wrapper_body(function_id, symbol_table);
break;
case synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER:
symbol.value =
stub_global_initializer_factory.get_stub_initializer_body(
function_id,
symbol_table,
object_factory_parameters,
get_pointer_type_selector());
break;
}
// Notify lazy methods of static calls made from the newly generated
// function:
notify_static_method_calls(symbol.value, needed_lazy_methods);
return false;
}
Expand Down
29 changes: 5 additions & 24 deletions src/java_bytecode/java_bytecode_language.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ Author: Daniel Kroening, [email protected]
#include "ci_lazy_methods.h"
#include "ci_lazy_methods_needed.h"
#include "java_class_loader.h"
#include "java_static_initializers.h"
#include "java_string_library_preprocess.h"
#include "object_factory_parameters.h"
#include "synthetic_methods_map.h"

#include <java_bytecode/select_pointer_type.h>

Expand Down Expand Up @@ -53,10 +56,6 @@ Author: Daniel Kroening, [email protected]
" the purpose of lazy method loading\n" /* NOLINT(*) */ \
" A '.*' wildcard is allowed to specify all class members\n"

#define MAX_NONDET_ARRAY_LENGTH_DEFAULT 5
#define MAX_NONDET_STRING_LENGTH std::numeric_limits<std::int32_t>::max()
#define MAX_NONDET_TREE_DEPTH 5

class symbolt;

enum lazy_methods_modet
Expand All @@ -66,26 +65,6 @@ enum lazy_methods_modet
LAZY_METHODS_MODE_CONTEXT_SENSITIVE
};

struct object_factory_parameterst final
{
/// Maximum value for the non-deterministically-chosen length of an array.
size_t max_nondet_array_length=MAX_NONDET_ARRAY_LENGTH_DEFAULT;

/// Maximum value for the non-deterministically-chosen length of a string.
size_t max_nondet_string_length=MAX_NONDET_STRING_LENGTH;

/// Maximum depth for object hierarchy on input.
/// Used to prevent object factory to loop infinitely during the
/// generation of code that allocates/initializes data structures of recursive
/// data types or unbounded depth. We bound the maximum number of times we
/// dereference a pointer using a 'depth counter'. We set a pointer to null if
/// such depth becomes >= than this maximum value.
size_t max_nondet_tree_depth=MAX_NONDET_TREE_DEPTH;

/// Force string content to be ASCII printable characters when set to true.
bool string_printable = false;
};

class java_bytecode_languaget:public languaget
{
public:
Expand Down Expand Up @@ -192,6 +171,8 @@ class java_bytecode_languaget:public languaget

private:
const std::unique_ptr<const select_pointer_typet> pointer_type_selector;
synthetic_methods_mapt synthetic_methods;
stub_global_initializer_factoryt stub_global_initializer_factory;
};

std::unique_ptr<languaget> new_java_bytecode_language();
Expand Down
13 changes: 0 additions & 13 deletions src/java_bytecode/java_entry_point.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,6 @@ static bool should_init_symbol(const symbolt &sym)
return is_java_string_literal_id(sym.name);
}

static bool is_non_null_library_global(const irep_idt &symbolid)
{
static const std::unordered_set<irep_idt, irep_id_hash> non_null_globals=
{
"java::java.lang.System.out",
"java::java.lang.System.err",
"java::java.lang.System.in"
};
return non_null_globals.count(symbolid);
}

static void java_static_lifetime_init(
symbol_table_baset &symbol_table,
const source_locationt &source_location,
Expand Down Expand Up @@ -129,8 +118,6 @@ static void java_static_lifetime_init(
allow_null=false;
if(allow_null && is_java_string_literal_id(nameid))
allow_null=false;
if(allow_null && is_non_null_library_global(nameid))
allow_null=false;
}
gen_nondet_init(
sym.symbol_expr(),
Expand Down
Loading