Skip to content

Commit e163ab6

Browse files
committed
Java frontend: create synthetic static initialisers for stub globals
Previously the Java frontend discovered static field references during method conversion, and when it found them it created global symbols with nondet values, causing an object tree to be created in __CPROVER_initialize. This caused two problems: (1) they were created late, meaning when incrementally loading functions, __CPROVER_initialize may already have been created when a new stub static field is discovered, and (2) by creating potentially large trees of potential objects in __CPROVER_initialize, symex would be compelled to accrue a lot of possibly-unused state. This change moves the object tree creation into a synthetic static initialiser, which both defers executing the initialisation until their class is actually used, and also creates the static initialisers before method conversion, such that its already_run variable is mentioned in __CPROVER_initialize when it is created (the initialize function is now both smaller and "right first time").
1 parent 150f826 commit e163ab6

11 files changed

+424
-116
lines changed

regression/cbmc-java/generic_class_bound1/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ Gn.class
77
.*file Gn.java line 9 function java::Gn.foo1:\(LGn;\)V bytecode-index 1 block 1: FAILED
88
.*file Gn.java line 10 function java::Gn.foo1:\(LGn;\)V bytecode-index 4 block 2: FAILED
99
.*file Gn.java line 11 function java::Gn.foo1:\(LGn;\)V bytecode-index 5 block 3: FAILED
10-
.*file Gn.java line 13 function java::Gn.main:\(\[Ljava/lang/String;\)V bytecode-index 2 block 1: SATISFIED
10+
.*file Gn.java line 13 function java::Gn.main:\(\[Ljava/lang/String;\)V bytecode-index 2 block 2: SATISFIED
1111
--

src/java_bytecode/java_bytecode_convert_class.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,10 @@ void java_bytecode_convert_classt::convert(
377377
new_symbol.name=id2string(class_symbol.name)+"."+id2string(f.name);
378378
new_symbol.base_name=f.name;
379379
new_symbol.type=field_type;
380+
// Annotating the type with ID_C_class to provide a static field -> class
381+
// link matches the method used by java_bytecode_convert_method::convert
382+
// for methods.
383+
new_symbol.type.set(ID_C_class, class_symbol.name);
380384
new_symbol.pretty_name=id2string(class_symbol.pretty_name)+
381385
"."+id2string(f.name);
382386
new_symbol.mode=ID_java;

src/java_bytecode/java_bytecode_convert_method.cpp

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -906,34 +906,6 @@ static void gather_symbol_live_ranges(
906906
}
907907
}
908908

909-
/// See above
910-
/// \par parameters: `se`: Symbol expression referring to a static field
911-
/// `basename`: The static field's basename
912-
/// \return Creates a symbol table entry for the static field if one doesn't
913-
/// exist already.
914-
void java_bytecode_convert_methodt::check_static_field_stub(
915-
const symbol_exprt &symbol_expr,
916-
const irep_idt &basename)
917-
{
918-
const auto &id=symbol_expr.get_identifier();
919-
if(symbol_table.symbols.find(id)==symbol_table.symbols.end())
920-
{
921-
// Create a stub, to be overwritten if/when the real class is loaded.
922-
symbolt new_symbol;
923-
new_symbol.is_static_lifetime=true;
924-
new_symbol.is_lvalue=true;
925-
new_symbol.is_state_var=true;
926-
new_symbol.name=id;
927-
new_symbol.base_name=basename;
928-
new_symbol.type=symbol_expr.type();
929-
new_symbol.pretty_name=new_symbol.name;
930-
new_symbol.mode=ID_java;
931-
new_symbol.is_type=false;
932-
new_symbol.value.make_nil();
933-
symbol_table.add(new_symbol);
934-
}
935-
}
936-
937909
/// Each static access to classname should be prefixed with a check for
938910
/// necessary static init; this returns a call implementing that check.
939911
/// \param classname: Class name
@@ -2026,8 +1998,9 @@ codet java_bytecode_convert_methodt::convert_instructions(
20261998
field_name.find("$assertionsDisabled")!=std::string::npos;
20271999
symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name);
20282000

2029-
// If external, create a symbol table entry for this static field:
2030-
check_static_field_stub(symbol_expr, field_name);
2001+
INVARIANT(
2002+
symbol_table.has_symbol(symbol_expr.get_identifier()),
2003+
"getstatic symbol should have been created before method conversion");
20312004

20322005
if(needed_lazy_methods)
20332006
{
@@ -2081,8 +2054,9 @@ codet java_bytecode_convert_methodt::convert_instructions(
20812054
const auto &field_name=arg0.get_string(ID_component_name);
20822055
symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name);
20832056

2084-
// If external, create a symbol table entry for this static field:
2085-
check_static_field_stub(symbol_expr, field_name);
2057+
INVARIANT(
2058+
symbol_table.has_symbol(symbol_expr.get_identifier()),
2059+
"putstatic symbol should have been created before method conversion");
20862060

20872061
if(needed_lazy_methods && arg0.type().id() == ID_symbol)
20882062
{

src/java_bytecode/java_bytecode_convert_method_class.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,10 +246,6 @@ class java_bytecode_convert_methodt:public messaget
246246

247247
const bytecode_infot &get_bytecode_info(const irep_idt &statement);
248248

249-
void check_static_field_stub(
250-
const symbol_exprt &se,
251-
const irep_idt &basename);
252-
253249
bool class_needs_clinit(const irep_idt &classname);
254250
exprt get_or_create_clinit_wrapper(const irep_idt &classname);
255251
codet get_clinit_call(const irep_idt &classname);

src/java_bytecode/java_bytecode_language.cpp

Lines changed: 134 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ static void infer_opaque_type_fields(
223223
namespacet ns(symbol_table);
224224
for(const auto &method : parse_tree.parsed_class.methods)
225225
{
226-
for(const auto &instruction : method.instructions)
226+
for(const java_bytecode_parse_treet::instructiont &instruction :
227+
method.instructions)
227228
{
228229
if(instruction.statement == "getfield" ||
229230
instruction.statement == "putfield")
@@ -359,7 +360,8 @@ static void generate_constant_global_variables(
359360
{
360361
for(auto &method : parse_tree.parsed_class.methods)
361362
{
362-
for(auto &instruction : method.instructions)
363+
for(java_bytecode_parse_treet::instructiont &instruction :
364+
method.instructions)
363365
{
364366
// ldc* instructions are Java bytecode "load constant" ops, which can
365367
// retrieve a numeric constant, String literal, or Class literal.
@@ -381,6 +383,87 @@ static void generate_constant_global_variables(
381383
}
382384
}
383385

386+
/// Add a stub global symbol to the symbol table, initialising pointer-typed
387+
/// symbols with null and primitive-typed ones with an arbitrary (nondet) value.
388+
/// \param symbol_table: table to add to
389+
/// \param symbol_id: new symbol fully-qualified identifier
390+
/// \param symbol_basename: new symbol basename
391+
/// \param symbol_type: new symbol type
392+
/// \param class_id: class id that directly encloses this static field
393+
static void create_stub_global_symbol(
394+
symbol_table_baset &symbol_table,
395+
const irep_idt &symbol_id,
396+
const irep_idt &symbol_basename,
397+
const typet &symbol_type,
398+
const irep_idt &class_id)
399+
{
400+
symbolt new_symbol;
401+
new_symbol.is_static_lifetime = true;
402+
new_symbol.is_lvalue = true;
403+
new_symbol.is_state_var = true;
404+
new_symbol.name = symbol_id;
405+
new_symbol.base_name = symbol_basename;
406+
new_symbol.type = symbol_type;
407+
new_symbol.type.set(ID_C_class, class_id);
408+
new_symbol.pretty_name = new_symbol.name;
409+
new_symbol.mode = ID_java;
410+
new_symbol.is_type = false;
411+
// If pointer-typed, initialise to null and a static initialiser will be
412+
// created to initialise on first reference. If primitive-typed, specify
413+
// nondeterministic initialisation by setting a nil value.
414+
if(symbol_type.id() == ID_pointer)
415+
new_symbol.value = null_pointer_exprt(to_pointer_type(symbol_type));
416+
else
417+
new_symbol.value.make_nil();
418+
bool add_failed = symbol_table.add(new_symbol);
419+
INVARIANT(
420+
!add_failed, "caller should have checked symbol not already in table");
421+
}
422+
423+
/// Search for getstatic and putstatic instructions in a class' bytecode and
424+
/// create stub symbols for any static fields that aren't already in the symbol
425+
/// table. The new symbols are null-initialised for reference-typed globals /
426+
/// static fields, and nondet-initialised for primitives.
427+
/// \param parse_tree: class bytecode
428+
/// \param symbol_table: symbol table; may gain new symbols
429+
static void create_stub_global_symbols(
430+
const java_bytecode_parse_treet &parse_tree,
431+
symbol_table_baset &symbol_table)
432+
{
433+
namespacet ns(symbol_table);
434+
for(const auto &method : parse_tree.parsed_class.methods)
435+
{
436+
for(const java_bytecode_parse_treet::instructiont &instruction :
437+
method.instructions)
438+
{
439+
if(instruction.statement == "getstatic" ||
440+
instruction.statement == "putstatic")
441+
{
442+
INVARIANT(
443+
instruction.args.size() > 0,
444+
"get/putstatic should have at least one argument");
445+
irep_idt component = instruction.args[0].get_string(ID_component_name);
446+
INVARIANT(
447+
!component.empty(), "get/putstatic should specify a component");
448+
irep_idt class_id = instruction.args[0].get_string(ID_class);
449+
INVARIANT(
450+
!class_id.empty(), "get/putstatic should specify a class");
451+
irep_idt identifier = id2string(class_id) + "." + id2string(component);
452+
453+
if(!symbol_table.has_symbol(identifier))
454+
{
455+
create_stub_global_symbol(
456+
symbol_table,
457+
identifier,
458+
component,
459+
instruction.args[0].type(),
460+
class_id);
461+
}
462+
}
463+
}
464+
}
465+
}
466+
384467
bool java_bytecode_languaget::typecheck(
385468
symbol_tablet &symbol_table,
386469
const std::string &module)
@@ -477,10 +560,31 @@ bool java_bytecode_languaget::typecheck(
477560
<< " String or Class constant symbols"
478561
<< messaget::eom;
479562

563+
// For each reference to a stub global (that is, a global variable declared on
564+
// a class we don't have bytecode for, and therefore don't know the static
565+
// initialiser for), create a synthetic static initialiser (clinit method)
566+
// to nondet initialise it.
567+
// Note this must be done before making static initialiser wrappers below, as
568+
// this makes a Classname.clinit method, then the next pass makes a wrapper
569+
// that ensures it is only run once, and that static initialisation happens
570+
// in class-graph topological order.
571+
572+
{
573+
journalling_symbol_tablet symbol_table_journal =
574+
journalling_symbol_tablet::wrap(symbol_table);
575+
for(const auto &c : java_class_loader.class_map)
576+
{
577+
create_stub_global_symbols(c.second, symbol_table_journal);
578+
}
579+
580+
stub_global_initializer_factory.create_stub_global_initializer_symbols(
581+
symbol_table, symbol_table_journal.get_inserted(), synthetic_methods);
582+
}
583+
480584
// For each class that will require a static initializer wrapper, create a
481585
// function named package.classname::clinit_wrapper, and a corresponding
482586
// global tracking whether it has run or not:
483-
create_static_initializer_wrappers(symbol_table);
587+
create_static_initializer_wrappers(symbol_table, synthetic_methods);
484588

485589
// Now incrementally elaborate methods
486590
// that are reachable from this entry point.
@@ -495,24 +599,12 @@ bool java_bytecode_languaget::typecheck(
495599
journalling_symbol_tablet journalling_symbol_table =
496600
journalling_symbol_tablet::wrap(symbol_table);
497601

602+
// Convert all synthetic methods:
603+
for(const auto &function_id_and_type : synthetic_methods)
498604
{
499-
// Convert all static initialisers:
500-
std::vector<irep_idt> static_initialisers;
501-
502-
// Careful not to add symbols while iterating over the symbol table!
503-
for(const auto &symbol : symbol_table.symbols)
504-
{
505-
if(is_clinit_wrapper_function(symbol.second.name))
506-
static_initialisers.push_back(symbol.second.name);
507-
}
508-
509-
for(const auto &static_init_wrapper_name : static_initialisers)
510-
{
511-
convert_single_method(
512-
static_init_wrapper_name, journalling_symbol_table);
513-
}
605+
convert_single_method(
606+
function_id_and_type.first, journalling_symbol_table);
514607
}
515-
516608
// Convert all methods for which we have bytecode now
517609
for(const auto &method_sig : method_bytecode)
518610
{
@@ -714,6 +806,8 @@ bool java_bytecode_languaget::convert_single_method(
714806
// Get bytecode for specified function if we have it
715807
method_bytecodet::opt_reft cmb = method_bytecode.get(function_id);
716808

809+
synthetic_methods_mapt::iterator synthetic_method_it;
810+
717811
// Check if have a string solver implementation
718812
if(string_preprocess.implements_function(function_id))
719813
{
@@ -735,11 +829,29 @@ bool java_bytecode_languaget::convert_single_method(
735829
symbol.value = generated_code;
736830
return false;
737831
}
738-
else if(is_clinit_wrapper_function(function_id))
832+
else if(
833+
(synthetic_method_it = synthetic_methods.find(function_id)) !=
834+
synthetic_methods.end())
739835
{
836+
// Synthetic method (i.e. one generated by the Java frontend and which
837+
// doesn't occur in the source bytecode):
740838
symbolt &symbol = symbol_table.get_writeable_ref(function_id);
741-
symbol.value = get_clinit_wrapper_body(function_id, symbol_table);
742-
// Notify lazy methods of other static init functions called:
839+
switch(synthetic_method_it->second)
840+
{
841+
case synthetic_method_typet::STATIC_INITIALIZER_WRAPPER:
842+
symbol.value = get_clinit_wrapper_body(function_id, symbol_table);
843+
break;
844+
case synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER:
845+
symbol.value =
846+
stub_global_initializer_factory.get_stub_initializer_body(
847+
function_id,
848+
symbol_table,
849+
object_factory_parameters,
850+
get_pointer_type_selector());
851+
break;
852+
}
853+
// Notify lazy methods of static calls made from the newly generated
854+
// function:
743855
notify_static_method_calls(symbol.value, needed_lazy_methods);
744856
return false;
745857
}

src/java_bytecode/java_bytecode_language.h

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ Author: Daniel Kroening, [email protected]
1919
#include "ci_lazy_methods.h"
2020
#include "ci_lazy_methods_needed.h"
2121
#include "java_class_loader.h"
22+
#include "java_static_initializers.h"
2223
#include "java_string_library_preprocess.h"
24+
#include "object_factory_parameters.h"
25+
#include "synthetic_methods_map.h"
2326

2427
#include <java_bytecode/select_pointer_type.h>
2528

@@ -53,10 +56,6 @@ Author: Daniel Kroening, [email protected]
5356
" the purpose of lazy method loading\n" /* NOLINT(*) */ \
5457
" A '.*' wildcard is allowed to specify all class members\n"
5558

56-
#define MAX_NONDET_ARRAY_LENGTH_DEFAULT 5
57-
#define MAX_NONDET_STRING_LENGTH std::numeric_limits<std::int32_t>::max()
58-
#define MAX_NONDET_TREE_DEPTH 5
59-
6059
class symbolt;
6160

6261
enum lazy_methods_modet
@@ -66,26 +65,6 @@ enum lazy_methods_modet
6665
LAZY_METHODS_MODE_CONTEXT_SENSITIVE
6766
};
6867

69-
struct object_factory_parameterst final
70-
{
71-
/// Maximum value for the non-deterministically-chosen length of an array.
72-
size_t max_nondet_array_length=MAX_NONDET_ARRAY_LENGTH_DEFAULT;
73-
74-
/// Maximum value for the non-deterministically-chosen length of a string.
75-
size_t max_nondet_string_length=MAX_NONDET_STRING_LENGTH;
76-
77-
/// Maximum depth for object hierarchy on input.
78-
/// Used to prevent object factory to loop infinitely during the
79-
/// generation of code that allocates/initializes data structures of recursive
80-
/// data types or unbounded depth. We bound the maximum number of times we
81-
/// dereference a pointer using a 'depth counter'. We set a pointer to null if
82-
/// such depth becomes >= than this maximum value.
83-
size_t max_nondet_tree_depth=MAX_NONDET_TREE_DEPTH;
84-
85-
/// Force string content to be ASCII printable characters when set to true.
86-
bool string_printable = false;
87-
};
88-
8968
class java_bytecode_languaget:public languaget
9069
{
9170
public:
@@ -192,6 +171,8 @@ class java_bytecode_languaget:public languaget
192171

193172
private:
194173
const std::unique_ptr<const select_pointer_typet> pointer_type_selector;
174+
synthetic_methods_mapt synthetic_methods;
175+
stub_global_initializer_factoryt stub_global_initializer_factory;
195176
};
196177

197178
std::unique_ptr<languaget> new_java_bytecode_language();

src/java_bytecode/java_entry_point.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,6 @@ static bool should_init_symbol(const symbolt &sym)
8282
return is_java_string_literal_id(sym.name);
8383
}
8484

85-
static bool is_non_null_library_global(const irep_idt &symbolid)
86-
{
87-
static const std::unordered_set<irep_idt, irep_id_hash> non_null_globals=
88-
{
89-
"java::java.lang.System.out",
90-
"java::java.lang.System.err",
91-
"java::java.lang.System.in"
92-
};
93-
return non_null_globals.count(symbolid);
94-
}
95-
9685
static void java_static_lifetime_init(
9786
symbol_table_baset &symbol_table,
9887
const source_locationt &source_location,
@@ -129,8 +118,6 @@ static void java_static_lifetime_init(
129118
allow_null=false;
130119
if(allow_null && is_java_string_literal_id(nameid))
131120
allow_null=false;
132-
if(allow_null && is_non_null_library_global(nameid))
133-
allow_null=false;
134121
}
135122
gen_nondet_init(
136123
sym.symbol_expr(),

0 commit comments

Comments
 (0)