Skip to content

Commit 0df054c

Browse files
authored
Merge pull request diffblue#1781 from smowton/smowton/feature/java-create-stub-globals-earlier
Java frontend: create synthetic static initialisers for stub globals
2 parents fbcb423 + e163ab6 commit 0df054c

11 files changed

+424
-116
lines changed

regression/cbmc-java/generic_class_bound1/test.desc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,5 @@ Gn.class
77
.*file Gn.java line 9 function java::Gn.foo1:\(LGn;\)V bytecode-index 1 block 1: FAILED
88
.*file Gn.java line 10 function java::Gn.foo1:\(LGn;\)V bytecode-index 4 block 2: FAILED
99
.*file Gn.java line 11 function java::Gn.foo1:\(LGn;\)V bytecode-index 5 block 3: FAILED
10-
.*file Gn.java line 13 function java::Gn.main:\(\[Ljava/lang/String;\)V bytecode-index 2 block 1: SATISFIED
10+
.*file Gn.java line 13 function java::Gn.main:\(\[Ljava/lang/String;\)V bytecode-index 2 block 2: SATISFIED
1111
--

src/java_bytecode/java_bytecode_convert_class.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,10 @@ void java_bytecode_convert_classt::convert(
377377
new_symbol.name=id2string(class_symbol.name)+"."+id2string(f.name);
378378
new_symbol.base_name=f.name;
379379
new_symbol.type=field_type;
380+
// Annotating the type with ID_C_class to provide a static field -> class
381+
// link matches the method used by java_bytecode_convert_method::convert
382+
// for methods.
383+
new_symbol.type.set(ID_C_class, class_symbol.name);
380384
new_symbol.pretty_name=id2string(class_symbol.pretty_name)+
381385
"."+id2string(f.name);
382386
new_symbol.mode=ID_java;

src/java_bytecode/java_bytecode_convert_method.cpp

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -906,34 +906,6 @@ static void gather_symbol_live_ranges(
906906
}
907907
}
908908

909-
/// See above
910-
/// \par parameters: `se`: Symbol expression referring to a static field
911-
/// `basename`: The static field's basename
912-
/// \return Creates a symbol table entry for the static field if one doesn't
913-
/// exist already.
914-
void java_bytecode_convert_methodt::check_static_field_stub(
915-
const symbol_exprt &symbol_expr,
916-
const irep_idt &basename)
917-
{
918-
const auto &id=symbol_expr.get_identifier();
919-
if(symbol_table.symbols.find(id)==symbol_table.symbols.end())
920-
{
921-
// Create a stub, to be overwritten if/when the real class is loaded.
922-
symbolt new_symbol;
923-
new_symbol.is_static_lifetime=true;
924-
new_symbol.is_lvalue=true;
925-
new_symbol.is_state_var=true;
926-
new_symbol.name=id;
927-
new_symbol.base_name=basename;
928-
new_symbol.type=symbol_expr.type();
929-
new_symbol.pretty_name=new_symbol.name;
930-
new_symbol.mode=ID_java;
931-
new_symbol.is_type=false;
932-
new_symbol.value.make_nil();
933-
symbol_table.add(new_symbol);
934-
}
935-
}
936-
937909
/// Each static access to classname should be prefixed with a check for
938910
/// necessary static init; this returns a call implementing that check.
939911
/// \param classname: Class name
@@ -2026,8 +1998,9 @@ codet java_bytecode_convert_methodt::convert_instructions(
20261998
field_name.find("$assertionsDisabled")!=std::string::npos;
20271999
symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name);
20282000

2029-
// If external, create a symbol table entry for this static field:
2030-
check_static_field_stub(symbol_expr, field_name);
2001+
INVARIANT(
2002+
symbol_table.has_symbol(symbol_expr.get_identifier()),
2003+
"getstatic symbol should have been created before method conversion");
20312004

20322005
if(needed_lazy_methods)
20332006
{
@@ -2081,8 +2054,9 @@ codet java_bytecode_convert_methodt::convert_instructions(
20812054
const auto &field_name=arg0.get_string(ID_component_name);
20822055
symbol_expr.set_identifier(arg0.get_string(ID_class)+"."+field_name);
20832056

2084-
// If external, create a symbol table entry for this static field:
2085-
check_static_field_stub(symbol_expr, field_name);
2057+
INVARIANT(
2058+
symbol_table.has_symbol(symbol_expr.get_identifier()),
2059+
"putstatic symbol should have been created before method conversion");
20862060

20872061
if(needed_lazy_methods && arg0.type().id() == ID_symbol)
20882062
{

src/java_bytecode/java_bytecode_convert_method_class.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,10 +246,6 @@ class java_bytecode_convert_methodt:public messaget
246246

247247
const bytecode_infot &get_bytecode_info(const irep_idt &statement);
248248

249-
void check_static_field_stub(
250-
const symbol_exprt &se,
251-
const irep_idt &basename);
252-
253249
bool class_needs_clinit(const irep_idt &classname);
254250
exprt get_or_create_clinit_wrapper(const irep_idt &classname);
255251
codet get_clinit_call(const irep_idt &classname);

src/java_bytecode/java_bytecode_language.cpp

Lines changed: 134 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ static void infer_opaque_type_fields(
223223
namespacet ns(symbol_table);
224224
for(const auto &method : parse_tree.parsed_class.methods)
225225
{
226-
for(const auto &instruction : method.instructions)
226+
for(const java_bytecode_parse_treet::instructiont &instruction :
227+
method.instructions)
227228
{
228229
if(instruction.statement == "getfield" ||
229230
instruction.statement == "putfield")
@@ -359,7 +360,8 @@ static void generate_constant_global_variables(
359360
{
360361
for(auto &method : parse_tree.parsed_class.methods)
361362
{
362-
for(auto &instruction : method.instructions)
363+
for(java_bytecode_parse_treet::instructiont &instruction :
364+
method.instructions)
363365
{
364366
// ldc* instructions are Java bytecode "load constant" ops, which can
365367
// retrieve a numeric constant, String literal, or Class literal.
@@ -381,6 +383,87 @@ static void generate_constant_global_variables(
381383
}
382384
}
383385

386+
/// Add a stub global symbol to the symbol table, initialising pointer-typed
387+
/// symbols with null and primitive-typed ones with an arbitrary (nondet) value.
388+
/// \param symbol_table: table to add to
389+
/// \param symbol_id: new symbol fully-qualified identifier
390+
/// \param symbol_basename: new symbol basename
391+
/// \param symbol_type: new symbol type
392+
/// \param class_id: class id that directly encloses this static field
393+
static void create_stub_global_symbol(
394+
symbol_table_baset &symbol_table,
395+
const irep_idt &symbol_id,
396+
const irep_idt &symbol_basename,
397+
const typet &symbol_type,
398+
const irep_idt &class_id)
399+
{
400+
symbolt new_symbol;
401+
new_symbol.is_static_lifetime = true;
402+
new_symbol.is_lvalue = true;
403+
new_symbol.is_state_var = true;
404+
new_symbol.name = symbol_id;
405+
new_symbol.base_name = symbol_basename;
406+
new_symbol.type = symbol_type;
407+
new_symbol.type.set(ID_C_class, class_id);
408+
new_symbol.pretty_name = new_symbol.name;
409+
new_symbol.mode = ID_java;
410+
new_symbol.is_type = false;
411+
// If pointer-typed, initialise to null and a static initialiser will be
412+
// created to initialise on first reference. If primitive-typed, specify
413+
// nondeterministic initialisation by setting a nil value.
414+
if(symbol_type.id() == ID_pointer)
415+
new_symbol.value = null_pointer_exprt(to_pointer_type(symbol_type));
416+
else
417+
new_symbol.value.make_nil();
418+
bool add_failed = symbol_table.add(new_symbol);
419+
INVARIANT(
420+
!add_failed, "caller should have checked symbol not already in table");
421+
}
422+
423+
/// Search for getstatic and putstatic instructions in a class' bytecode and
424+
/// create stub symbols for any static fields that aren't already in the symbol
425+
/// table. The new symbols are null-initialised for reference-typed globals /
426+
/// static fields, and nondet-initialised for primitives.
427+
/// \param parse_tree: class bytecode
428+
/// \param symbol_table: symbol table; may gain new symbols
429+
static void create_stub_global_symbols(
430+
const java_bytecode_parse_treet &parse_tree,
431+
symbol_table_baset &symbol_table)
432+
{
433+
namespacet ns(symbol_table);
434+
for(const auto &method : parse_tree.parsed_class.methods)
435+
{
436+
for(const java_bytecode_parse_treet::instructiont &instruction :
437+
method.instructions)
438+
{
439+
if(instruction.statement == "getstatic" ||
440+
instruction.statement == "putstatic")
441+
{
442+
INVARIANT(
443+
instruction.args.size() > 0,
444+
"get/putstatic should have at least one argument");
445+
irep_idt component = instruction.args[0].get_string(ID_component_name);
446+
INVARIANT(
447+
!component.empty(), "get/putstatic should specify a component");
448+
irep_idt class_id = instruction.args[0].get_string(ID_class);
449+
INVARIANT(
450+
!class_id.empty(), "get/putstatic should specify a class");
451+
irep_idt identifier = id2string(class_id) + "." + id2string(component);
452+
453+
if(!symbol_table.has_symbol(identifier))
454+
{
455+
create_stub_global_symbol(
456+
symbol_table,
457+
identifier,
458+
component,
459+
instruction.args[0].type(),
460+
class_id);
461+
}
462+
}
463+
}
464+
}
465+
}
466+
384467
bool java_bytecode_languaget::typecheck(
385468
symbol_tablet &symbol_table,
386469
const std::string &module)
@@ -477,10 +560,31 @@ bool java_bytecode_languaget::typecheck(
477560
<< " String or Class constant symbols"
478561
<< messaget::eom;
479562

563+
// For each reference to a stub global (that is, a global variable declared on
564+
// a class we don't have bytecode for, and therefore don't know the static
565+
// initialiser for), create a synthetic static initialiser (clinit method)
566+
// to nondet initialise it.
567+
// Note this must be done before making static initialiser wrappers below, as
568+
// this makes a Classname.clinit method, then the next pass makes a wrapper
569+
// that ensures it is only run once, and that static initialisation happens
570+
// in class-graph topological order.
571+
572+
{
573+
journalling_symbol_tablet symbol_table_journal =
574+
journalling_symbol_tablet::wrap(symbol_table);
575+
for(const auto &c : java_class_loader.class_map)
576+
{
577+
create_stub_global_symbols(c.second, symbol_table_journal);
578+
}
579+
580+
stub_global_initializer_factory.create_stub_global_initializer_symbols(
581+
symbol_table, symbol_table_journal.get_inserted(), synthetic_methods);
582+
}
583+
480584
// For each class that will require a static initializer wrapper, create a
481585
// function named package.classname::clinit_wrapper, and a corresponding
482586
// global tracking whether it has run or not:
483-
create_static_initializer_wrappers(symbol_table);
587+
create_static_initializer_wrappers(symbol_table, synthetic_methods);
484588

485589
// Now incrementally elaborate methods
486590
// that are reachable from this entry point.
@@ -495,24 +599,12 @@ bool java_bytecode_languaget::typecheck(
495599
journalling_symbol_tablet journalling_symbol_table =
496600
journalling_symbol_tablet::wrap(symbol_table);
497601

602+
// Convert all synthetic methods:
603+
for(const auto &function_id_and_type : synthetic_methods)
498604
{
499-
// Convert all static initialisers:
500-
std::vector<irep_idt> static_initialisers;
501-
502-
// Careful not to add symbols while iterating over the symbol table!
503-
for(const auto &symbol : symbol_table.symbols)
504-
{
505-
if(is_clinit_wrapper_function(symbol.second.name))
506-
static_initialisers.push_back(symbol.second.name);
507-
}
508-
509-
for(const auto &static_init_wrapper_name : static_initialisers)
510-
{
511-
convert_single_method(
512-
static_init_wrapper_name, journalling_symbol_table);
513-
}
605+
convert_single_method(
606+
function_id_and_type.first, journalling_symbol_table);
514607
}
515-
516608
// Convert all methods for which we have bytecode now
517609
for(const auto &method_sig : method_bytecode)
518610
{
@@ -714,6 +806,8 @@ bool java_bytecode_languaget::convert_single_method(
714806
// Get bytecode for specified function if we have it
715807
method_bytecodet::opt_reft cmb = method_bytecode.get(function_id);
716808

809+
synthetic_methods_mapt::iterator synthetic_method_it;
810+
717811
// Check if have a string solver implementation
718812
if(string_preprocess.implements_function(function_id))
719813
{
@@ -735,11 +829,29 @@ bool java_bytecode_languaget::convert_single_method(
735829
symbol.value = generated_code;
736830
return false;
737831
}
738-
else if(is_clinit_wrapper_function(function_id))
832+
else if(
833+
(synthetic_method_it = synthetic_methods.find(function_id)) !=
834+
synthetic_methods.end())
739835
{
836+
// Synthetic method (i.e. one generated by the Java frontend and which
837+
// doesn't occur in the source bytecode):
740838
symbolt &symbol = symbol_table.get_writeable_ref(function_id);
741-
symbol.value = get_clinit_wrapper_body(function_id, symbol_table);
742-
// Notify lazy methods of other static init functions called:
839+
switch(synthetic_method_it->second)
840+
{
841+
case synthetic_method_typet::STATIC_INITIALIZER_WRAPPER:
842+
symbol.value = get_clinit_wrapper_body(function_id, symbol_table);
843+
break;
844+
case synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER:
845+
symbol.value =
846+
stub_global_initializer_factory.get_stub_initializer_body(
847+
function_id,
848+
symbol_table,
849+
object_factory_parameters,
850+
get_pointer_type_selector());
851+
break;
852+
}
853+
// Notify lazy methods of static calls made from the newly generated
854+
// function:
743855
notify_static_method_calls(symbol.value, needed_lazy_methods);
744856
return false;
745857
}

src/java_bytecode/java_bytecode_language.h

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ Author: Daniel Kroening, [email protected]
1919
#include "ci_lazy_methods.h"
2020
#include "ci_lazy_methods_needed.h"
2121
#include "java_class_loader.h"
22+
#include "java_static_initializers.h"
2223
#include "java_string_library_preprocess.h"
24+
#include "object_factory_parameters.h"
25+
#include "synthetic_methods_map.h"
2326

2427
#include <java_bytecode/select_pointer_type.h>
2528

@@ -53,10 +56,6 @@ Author: Daniel Kroening, [email protected]
5356
" the purpose of lazy method loading\n" /* NOLINT(*) */ \
5457
" A '.*' wildcard is allowed to specify all class members\n"
5558

56-
#define MAX_NONDET_ARRAY_LENGTH_DEFAULT 5
57-
#define MAX_NONDET_STRING_LENGTH std::numeric_limits<std::int32_t>::max()
58-
#define MAX_NONDET_TREE_DEPTH 5
59-
6059
class symbolt;
6160

6261
enum lazy_methods_modet
@@ -66,26 +65,6 @@ enum lazy_methods_modet
6665
LAZY_METHODS_MODE_CONTEXT_SENSITIVE
6766
};
6867

69-
struct object_factory_parameterst final
70-
{
71-
/// Maximum value for the non-deterministically-chosen length of an array.
72-
size_t max_nondet_array_length=MAX_NONDET_ARRAY_LENGTH_DEFAULT;
73-
74-
/// Maximum value for the non-deterministically-chosen length of a string.
75-
size_t max_nondet_string_length=MAX_NONDET_STRING_LENGTH;
76-
77-
/// Maximum depth for object hierarchy on input.
78-
/// Used to prevent object factory to loop infinitely during the
79-
/// generation of code that allocates/initializes data structures of recursive
80-
/// data types or unbounded depth. We bound the maximum number of times we
81-
/// dereference a pointer using a 'depth counter'. We set a pointer to null if
82-
/// such depth becomes >= than this maximum value.
83-
size_t max_nondet_tree_depth=MAX_NONDET_TREE_DEPTH;
84-
85-
/// Force string content to be ASCII printable characters when set to true.
86-
bool string_printable = false;
87-
};
88-
8968
class java_bytecode_languaget:public languaget
9069
{
9170
public:
@@ -192,6 +171,8 @@ class java_bytecode_languaget:public languaget
192171

193172
private:
194173
const std::unique_ptr<const select_pointer_typet> pointer_type_selector;
174+
synthetic_methods_mapt synthetic_methods;
175+
stub_global_initializer_factoryt stub_global_initializer_factory;
195176
};
196177

197178
std::unique_ptr<languaget> new_java_bytecode_language();

src/java_bytecode/java_entry_point.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -82,17 +82,6 @@ static bool should_init_symbol(const symbolt &sym)
8282
return is_java_string_literal_id(sym.name);
8383
}
8484

85-
static bool is_non_null_library_global(const irep_idt &symbolid)
86-
{
87-
static const std::unordered_set<irep_idt, irep_id_hash> non_null_globals=
88-
{
89-
"java::java.lang.System.out",
90-
"java::java.lang.System.err",
91-
"java::java.lang.System.in"
92-
};
93-
return non_null_globals.count(symbolid);
94-
}
95-
9685
static void java_static_lifetime_init(
9786
symbol_table_baset &symbol_table,
9887
const source_locationt &source_location,
@@ -129,8 +118,6 @@ static void java_static_lifetime_init(
129118
allow_null=false;
130119
if(allow_null && is_java_string_literal_id(nameid))
131120
allow_null=false;
132-
if(allow_null && is_non_null_library_global(nameid))
133-
allow_null=false;
134121
}
135122
gen_nondet_init(
136123
sym.symbol_expr(),

0 commit comments

Comments
 (0)