From 48914be33a97ee8d564c7e2acf1385bdc79f0a78 Mon Sep 17 00:00:00 2001 From: Kurt Degiorgio Date: Mon, 12 Feb 2018 09:44:45 +0000 Subject: [PATCH 1/2] Modifies the instrumented clinit_wrapper function The previous version of clinit_wrapper did not conform to the JVM specification (section 5.5) as it did not support concurrency. More specifically, the clinit_wrapper has to be carefully synchronised as other threads may try to initialize a given class or interface at the same-time. This commit makes the clinit_wrapper thread-safe by introducing a critical section and two state variables. The commit implements a simplification of the algorithm defined in section 5.5 of the JVM specification. For instance, exceptions thrown during the execution of static initializers are not properly handled. --- .../java_static_initializers.cpp | 454 +++++++++++++++--- src/java_bytecode/java_static_initializers.h | 3 +- 2 files changed, 379 insertions(+), 78 deletions(-) diff --git a/src/java_bytecode/java_static_initializers.cpp b/src/java_bytecode/java_static_initializers.cpp index eefa2a42eb7..c3721978606 100644 --- a/src/java_bytecode/java_static_initializers.cpp +++ b/src/java_bytecode/java_static_initializers.cpp @@ -14,6 +14,34 @@ Author: Chris Smowton, chris.smowton@diffblue.com #include #include #include +#include + +/// The three states in which a `` method for a class can be before, +/// after, and during static class initialization. +/// +/// According to the JVM Spec document (section 5.5), the JVM needs to +/// maintain, for every class initializer, a state indicating whether the +/// initializer has been executed, is being executed, or has raised errors. +/// The spec mandates that the JVM consider 4 different states (not +/// initialized, being initialized, ready for use, and initialization error). +/// The `clinit_statet` is a simplification of those 4 states where: +/// +/// `NOT_INIT` corresponds to "not initialized" +/// `IN_PROGRESS` corresponds to "some thread is currently running the +/// `` and no other thread should run it" +/// `INIT_COMPLETE` corresponds to "the `` has been executed and the +/// class is ready to be used, or it has errored" +/// +/// The last state corresponds to a fusion of the two original states "ready +/// for use" and "initialization error". +enum class clinit_statest +{ + NOT_INIT, + IN_PROGRESS, + INIT_COMPLETE +}; + +const typet clinit_states_type = java_byte_type(); // Disable linter here to allow a std::string constant, since that holds // a length, whereas a cstr would require strlen every time. @@ -39,13 +67,35 @@ bool is_clinit_wrapper_function(const irep_idt &function_id) return has_suffix(id2string(function_id), clinit_wrapper_suffix); } -/// Get name of the static-initialization-already-done global variable for a -/// given class. -/// \param class_name: class symbol name -/// \return static initializer wrapper-already run global name -static irep_idt clinit_already_run_variable_name(const irep_idt &class_name) +/// Add a new symbol to the symbol table. +/// Note: assumes that a symbol with this name does not exist. +/// /param name: name of the symbol to be generated +/// /param type: type of the symbol to be generated +/// /param value: initial value of the symbol to be generated +/// /param is_thread_local: if true this symbol will be set as thread local +/// /param is_static_lifetime: if true this symbol will be set as static +/// /return returns new symbol. +static symbolt add_new_symbol( + symbol_table_baset &symbol_table, + const irep_idt &name, + const typet &type, + const exprt &value, + const bool is_thread_local, + const bool is_static_lifetime) { - return id2string(class_name) + "::clinit_already_run"; + symbolt new_symbol; + new_symbol.name = name; + new_symbol.pretty_name = name; + new_symbol.base_name = name; + new_symbol.type = type; + new_symbol.value = value; + new_symbol.is_lvalue = true; + new_symbol.is_state_var = true; + new_symbol.is_static_lifetime = is_static_lifetime; + new_symbol.is_thread_local = is_thread_local; + new_symbol.mode = ID_java; + symbol_table.add(new_symbol); + return new_symbol; } /// Get name of the real static initializer for a given class. Doesn't check @@ -57,6 +107,64 @@ static irep_idt clinit_function_name(const irep_idt &class_name) return id2string(class_name) + clinit_function_suffix; } +/// Get name of the static-initialization-state global variable for a +/// given class. +/// \param class_name: class symbol name +/// \return static initializer wrapper-state variable global name +static irep_idt clinit_state_var_name(const irep_idt &class_name) +{ + return id2string(class_name) + CPROVER_PREFIX "clinit_state"; +} + +/// Get name of the static-initialization-state local state variable for a +/// given class. +/// \param class_name: class symbol name +/// \return static initializer wrapper-state local state variable name +static irep_idt clinit_thread_local_state_var_name(const irep_idt &class_name) +{ + return id2string(class_name) + CPROVER_PREFIX "clinit_threadlocal_state"; +} + +/// Get name of the static-initialization local variable for a given class. +/// \param class_name: class symbol name +/// \return static initializer wrapper-state local variable +static irep_idt clinit_local_init_complete_var_name(const irep_idt &class_name) +{ + return id2string(class_name) + CPROVER_PREFIX "clinit_wrapper::init_complete"; +} + +/// Generates a code_assignt for clinit_statest +/// /param expr: +/// expression to be used as the LHS of generated assignment. +/// /param state: +/// execution state of the clint_wrapper, used as the RHS of the generated +/// assignment. +/// /return returns a code_assignt, assigning \p expr to the integer +/// representation of \p state +static code_assignt +gen_clinit_assignexpr(const exprt &expr, const clinit_statest state) +{ + mp_integer initv(static_cast(state)); + constant_exprt init_s = from_integer(initv, clinit_states_type); + return code_assignt(expr, init_s); +} + +/// Generates an equal_exprt for clinit_statest +/// /param expr: +/// expression to be used as the LHS of generated eqaul exprt. +/// /param state: +/// execution state of the clint_wrapper, used as the RHS of the generated +/// equal exprt. +/// /return returns a equal_exprt, equating \p expr to the integer +/// representation of \p state +static equal_exprt +gen_clinit_eqexpr(const exprt &expr, const clinit_statest state) +{ + mp_integer initv(static_cast(state)); + constant_exprt init_s = from_integer(initv, clinit_states_type); + return equal_exprt(expr, init_s); +} + /// Checks whether a static initializer wrapper is needed for a given class, /// i.e. if the given class or any superclass has a static initializer. /// \param class_name: class symbol name @@ -96,21 +204,29 @@ static void create_clinit_wrapper_symbols( symbol_tablet &symbol_table, synthetic_methods_mapt &synthetic_methods) { - const irep_idt &already_run_name = - clinit_already_run_variable_name(class_name); - symbolt already_run_symbol; - already_run_symbol.name = already_run_name; - already_run_symbol.pretty_name = already_run_name; - already_run_symbol.base_name = "clinit_already_run"; - already_run_symbol.type = bool_typet(); - already_run_symbol.value = false_exprt(); - already_run_symbol.is_lvalue = true; - already_run_symbol.is_state_var = true; - already_run_symbol.is_static_lifetime = true; - already_run_symbol.mode = ID_java; - bool failed = symbol_table.add(already_run_symbol); - INVARIANT(!failed, "clinit-already-run symbol should be fresh"); - + exprt not_init_value = from_integer( + static_cast(clinit_statest::NOT_INIT), clinit_states_type); + + // Create two global static synthetic "fields" for the class "id" + // these two variables hold the state of the class initialization algorithm + // across calls to the clinit_wrapper + add_new_symbol( + symbol_table, + clinit_state_var_name(class_name), + clinit_states_type, + not_init_value, + false, + true); + + add_new_symbol( + symbol_table, + clinit_thread_local_state_var_name(class_name), + clinit_states_type, + not_init_value, + true, + true); + + // Create symbol for the "clinit_wrapper" symbolt wrapper_method_symbol; code_typet wrapper_method_type; wrapper_method_type.return_type() = void_typet(); @@ -126,7 +242,7 @@ static void create_clinit_wrapper_symbols( // java_bytecode_convert_methodt::convert wrapper_method_symbol.type.set(ID_C_class, class_name); wrapper_method_symbol.mode = ID_java; - failed = symbol_table.add(wrapper_method_symbol); + bool failed = symbol_table.add(wrapper_method_symbol); INVARIANT(!failed, "clinit-wrapper symbol should be fresh"); auto insert_result = synthetic_methods.emplace( @@ -138,82 +254,266 @@ static void create_clinit_wrapper_symbols( "clinit wrapper"); } -/// Produces the static initialiser wrapper body for the given function. +/// Produces the static initialiser wrapper body for the given function. This +/// static initializer implements (a simplification of) the algorithm defined +/// in Section 5.5 of the JVM Specs. This function, or wrapper, checks whether +/// static init has already taken place, calls the actual `` method if +/// not, and possibly recursively initializes super-classes and interfaces. +/// Assume that C is the class to be initialized and that C extends C' and +/// implements interfaces I1 ... In, then the algorithm is as follows: +/// +/// \code +/// +/// bool init_complete; +/// if(java::C::__CPROVER_PREFIX_clinit_thread_local_state == INIT_COMPLETE) +/// { +/// return; +/// } +/// java::C::__CPROVER_PREFIX_clinit_thread_local_state = INIT_COMPLETE; +/// +/// // This thread atomically checks and sets the global variable +/// // 'clinit_state' in order to ensure that only this thread runs the +/// // static initializer. The assume() statement below will prevent the SAT +/// // solver from producing a thread schedule where more than 1 thread is +/// // running the initializer. At the end of this function the only +/// // thread that runs the static initializer will update the variable. +/// // Alternatively we could have done a busy wait / spin-lock, but that +/// // would achieve the same effect and blow up the size of the SAT formula. +/// ATOMIC_BEGIN +/// assume(java::C::__CPROVER_PREFIX_clinit_state != IN_PROGRESS) +/// if(java::C::__CPROVER_PREFIX_clinit_state == NOT_INIT) +/// { +/// java::C::__CPROVER_PREFIX_clinit_state = IN_PROGRESS +/// init_complete = false; +/// } +/// else if(java::C::__CPROVER_PREFIX_clinit_state == INIT_COMPLETE) +/// { +/// init_complete = true; +/// } +/// ATOMIC_END +/// +/// if(init_complete) +/// return; +/// +/// java::C'::clinit_wrapper(); +/// java::I1::clinit_wrapper(); +/// java::I2::clinit_wrapper(); +/// // ... +/// java::In::clinit_wrapper(); +/// +/// java::C::(); +/// +/// // Setting this variable to INIT_COMPLETE will let other threads "cross" +/// // beyond the assume() statement above in this function. +/// ATOMIC_START +/// C::__CPROVER_PREFIX_clinit_state = INIT_COMPLETE; +/// ATOMIC_END +/// +/// return; +/// +/// \endcode +/// +/// Note: The current implementation does not deal with exceptions. +/// /// \param function_id: clinit wrapper function id (the wrapper_method_symbol /// name created by `create_clinit_wrapper_symbols`) /// \param symbol_table: global symbol table /// \return the body of the static initialiser wrapper codet get_clinit_wrapper_body( - const irep_idt &function_id, const symbol_table_baset &symbol_table) + const irep_idt &function_id, + symbol_table_baset &symbol_table) { - // Assume that class C extends class C' and implements interfaces - // I1, ..., In. We now create the following function (possibly recursively - // creating the clinit_wrapper functions for C' and I1, ..., In): - // - // java::C::clinit_wrapper() - // { - // if (java::C::clinit_already_run == false) - // { - // java::C::clinit_already_run = true; // before recursive calls! - // - // java::C'::clinit_wrapper(); - // java::I1::clinit_wrapper(); - // java::I2::clinit_wrapper(); - // // ... - // java::In::clinit_wrapper(); - // - // java::C::(); - // } - // } const symbolt &wrapper_method_symbol = symbol_table.lookup_ref(function_id); irep_idt class_name = wrapper_method_symbol.type.get(ID_C_class); INVARIANT( !class_name.empty(), "wrapper function should be annotated with its class"); - const symbolt &already_run_symbol = - symbol_table.lookup_ref(clinit_already_run_variable_name(class_name)); - equal_exprt check_already_run( - already_run_symbol.symbol_expr(), - false_exprt()); + const symbolt &clinit_state_sym = + symbol_table.lookup_ref(clinit_state_var_name(class_name)); + const symbolt &clinit_thread_local_state_sym = + symbol_table.lookup_ref(clinit_thread_local_state_var_name(class_name)); + + // Create a function-local variable "init_complete". This variable is used to + // avoid inspecting the global state (clinit_state_sym) outside of + // the critical-section. + const symbolt &init_complete = add_new_symbol( + symbol_table, + clinit_local_init_complete_var_name(class_name), + bool_typet(), + nil_exprt(), + true, + false); + + code_blockt function_body; + codet atomic_begin(ID_atomic_begin); + codet atomic_end(ID_atomic_end); + +#if 0 + // This code defines source locations for every codet generated below for + // the static initializer wrapper. Enable this for debugging the symex going + // through the clinit_wrapper. + // + // java::C::clinit_wrapper() + // You will additionally need to associate the `location` with the + // `function_body` and then manually set lines of code for each of the + // statements of the function, using something along the lines of: + // `mycodet.then_case().add_source_location().set_line(17);`/ + + source_locationt &location = function_body.add_source_location(); + location.set_file (""); + location.set_line (""); + location.set_function (clinit_wrapper_name); + std::string comment = + "Automatically generated function. States are:\n" + " 0 = class not initialized, init val of clinit_state/clinit_local_state\n" + " 1 = class initialization in progress, by this or another thread\n" + " 2 = initialization finished with success, by this or another thread\n"; + static_assert((int) clinit_statest::NOT_INIT==0, "Check commment above"); + static_assert((int) clinit_statest::IN_PROGRESS==1, "Check comment above"); + static_assert((int) clinit_statest::INIT_COMPLETE==2, "Check comment above"); +#endif + + // bool init_complete; + { + code_declt decl(init_complete.symbol_expr()); + function_body.add(decl); + } + + // if(C::__CPROVER_PREFIX_clinit_thread_local_state == INIT_COMPLETE) return; + { + code_ifthenelset conditional; + conditional.cond() = gen_clinit_eqexpr( + clinit_thread_local_state_sym.symbol_expr(), + clinit_statest::INIT_COMPLETE); + conditional.then_case() = code_returnt(); + function_body.add(conditional); + } - // the entire body of the function is an if-then-else - code_ifthenelset wrapper_body; + // C::__CPROVER_PREFIX_clinit_thread_local_state = INIT_COMPLETE; + { + code_assignt assign = gen_clinit_assignexpr( + clinit_thread_local_state_sym.symbol_expr(), + clinit_statest::INIT_COMPLETE); + function_body.add(assign); + } - // add the condition to the if - wrapper_body.cond()=check_already_run; + // ATOMIC_BEGIN + { + function_body.add(atomic_begin); + } - // add the "already-run = false" statement - code_blockt init_body; - code_assignt set_already_run(already_run_symbol.symbol_expr(), true_exprt()); - init_body.move_to_operands(set_already_run); + // Assume: clinit_state_sym != IN_PROGRESS + { + exprt assumption = gen_clinit_eqexpr( + clinit_state_sym.symbol_expr(), clinit_statest::IN_PROGRESS); + assumption = not_exprt(assumption); + code_assumet assume(assumption); + function_body.add(assume); + } - // iterate through the base types and add recursive calls to the - // clinit_wrapper() - const symbolt &class_symbol = symbol_table.lookup_ref(class_name); - for(const auto &base : to_class_type(class_symbol.type).bases()) + // If(C::__CPROVER_PREFIX_clinit_state == NOT_INIT) + // { + // C::__CPROVER_PREFIX_clinit_state = IN_PROGRESS; + // init_complete = false; + // } + // else If(C::__CPROVER_PREFIX_clinit_state == INIT_COMPLETE) + // { + // init_complete = true; + // } { - const auto base_name = to_symbol_type(base.type()).get_identifier(); - irep_idt base_init_routine = clinit_wrapper_name(base_name); - auto findit = symbol_table.symbols.find(base_init_routine); - if(findit == symbol_table.symbols.end()) - continue; - code_function_callt call_base; - call_base.function() = findit->second.symbol_expr(); - init_body.move_to_operands(call_base); + code_ifthenelset not_init_conditional; + code_blockt then_block; + not_init_conditional.cond() = gen_clinit_eqexpr( + clinit_state_sym.symbol_expr(), clinit_statest::NOT_INIT); + then_block.add( + gen_clinit_assignexpr( + clinit_state_sym.symbol_expr(), clinit_statest::IN_PROGRESS)); + then_block.add(code_assignt(init_complete.symbol_expr(), false_exprt())); + not_init_conditional.then_case() = then_block; + + code_ifthenelset init_conditional; + code_blockt init_conditional_body; + init_conditional.cond() = gen_clinit_eqexpr( + clinit_state_sym.symbol_expr(), clinit_statest::INIT_COMPLETE); + init_conditional_body.add( + code_assignt(init_complete.symbol_expr(), true_exprt())); + init_conditional.then_case() = init_conditional_body; + not_init_conditional.else_case() = init_conditional; + function_body.add(not_init_conditional); + } + + // ATOMIC_END + { + function_body.add(atomic_end); + } + + // if(init_complete) return; + { + code_ifthenelset conditional; + conditional.cond() = init_complete.symbol_expr(); + conditional.then_case() = code_returnt(); + function_body.add(conditional); + } + + // Initialize the super-class C' and + // the implemented interfaces l_1 ... l_n. + // see JVMS p.359 step 7, for the exact definition of + // the sequence l_1 to l_n. + // This is achieved by iterating through the base types and + // adding recursive calls to the clinit_wrapper() + // + // java::C'::clinit_wrapper(); + // java::I1::clinit_wrapper(); + // java::I2::clinit_wrapper(); + // // ... + // java::In::clinit_wrapper(); + // + // java::C::(); + // + code_blockt init_body; + { + // iterate through the base types and add recursive calls to the + // clinit_wrapper() + const symbolt &class_symbol = symbol_table.lookup_ref(class_name); + for(const auto &base : to_class_type(class_symbol.type).bases()) + { + const auto base_name = to_symbol_type(base.type()).get_identifier(); + irep_idt base_init_routine = clinit_wrapper_name(base_name); + auto findit = symbol_table.symbols.find(base_init_routine); + if(findit == symbol_table.symbols.end()) + continue; + code_function_callt call_base; + call_base.function() = findit->second.symbol_expr(); + init_body.move_to_operands(call_base); + } + + // call java::C::(), if the class has one static initializer + const irep_idt &real_clinit_name = clinit_function_name(class_name); + auto find_sym_it = symbol_table.symbols.find(real_clinit_name); + if(find_sym_it != symbol_table.symbols.end()) + { + code_function_callt call_real_init; + call_real_init.function() = find_sym_it->second.symbol_expr(); + init_body.move_to_operands(call_real_init); + } } + function_body.append(init_body); - // call java::C::(), if the class has one static initializer - const irep_idt &real_clinit_name = clinit_function_name(class_name); - auto find_sym_it = symbol_table.symbols.find(real_clinit_name); - if(find_sym_it!=symbol_table.symbols.end()) + // ATOMIC_START + // C::__CPROVER_PREFIX_clinit_state = INIT_COMPLETE; + // ATOMIC_END + // return; { - code_function_callt call_real_init; - call_real_init.function()=find_sym_it->second.symbol_expr(); - init_body.move_to_operands(call_real_init); + // synchronization prologue + function_body.add(atomic_begin); + function_body.add( + gen_clinit_assignexpr( + clinit_state_sym.symbol_expr(), clinit_statest::INIT_COMPLETE)); + function_body.add(atomic_end); + function_body.add(code_returnt()); } - wrapper_body.then_case()=init_body; - return wrapper_body; + return function_body; } /// Create static initializer wrappers for all classes that need them. diff --git a/src/java_bytecode/java_static_initializers.h b/src/java_bytecode/java_static_initializers.h index 992e9b6425b..1827acf25ce 100644 --- a/src/java_bytecode/java_static_initializers.h +++ b/src/java_bytecode/java_static_initializers.h @@ -27,7 +27,8 @@ void create_static_initializer_wrappers( synthetic_methods_mapt &synthetic_methods); codet get_clinit_wrapper_body( - const irep_idt &function_id, const symbol_table_baset &symbol_table); + const irep_idt &function_id, + symbol_table_baset &symbol_table); class stub_global_initializer_factoryt { From 94bbbba0e7587be0f6fe25b36022cc7d5469d2a1 Mon Sep 17 00:00:00 2001 From: Kurt Degiorgio Date: Wed, 2 May 2018 14:34:51 +0100 Subject: [PATCH 2/2] Added new cmd option to jbmc, 'java-threading' Setting this option will enable support for analysing multi-threaded java programs. For now, this means that a thread-safe version of clinit_wrapper is generated. This was made opt-in instead of opt-out as the inclusion of thread-safe clinit wrapper has negative implications for performance. Commit also adds appropriate regression tests that use this flag to test the resulting modifications to the clinit_wrapper --- regression/cbmc-java/static_init1/test1.desc | 8 + regression/cbmc-java/static_init2/test1.desc | 8 + .../cbmc-java/static_init_order/test3.desc | 7 + .../cbmc-java/static_init_order/test4.desc | 7 + src/java_bytecode/java_bytecode_language.cpp | 14 +- src/java_bytecode/java_bytecode_language.h | 1 + .../java_static_initializers.cpp | 221 +++++++++++++----- src/java_bytecode/java_static_initializers.h | 7 +- src/jbmc/jbmc_parse_options.cpp | 1 + src/jbmc/jbmc_parse_options.h | 1 + 10 files changed, 215 insertions(+), 60 deletions(-) create mode 100644 regression/cbmc-java/static_init1/test1.desc create mode 100644 regression/cbmc-java/static_init2/test1.desc create mode 100644 regression/cbmc-java/static_init_order/test3.desc create mode 100644 regression/cbmc-java/static_init_order/test4.desc diff --git a/regression/cbmc-java/static_init1/test1.desc b/regression/cbmc-java/static_init1/test1.desc new file mode 100644 index 00000000000..8bc396cff4d --- /dev/null +++ b/regression/cbmc-java/static_init1/test1.desc @@ -0,0 +1,8 @@ +CORE +static_init.class +--function static_init.main --java-threading +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc-java/static_init2/test1.desc b/regression/cbmc-java/static_init2/test1.desc new file mode 100644 index 00000000000..8bc396cff4d --- /dev/null +++ b/regression/cbmc-java/static_init2/test1.desc @@ -0,0 +1,8 @@ +CORE +static_init.class +--function static_init.main --java-threading +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +^warning: ignoring diff --git a/regression/cbmc-java/static_init_order/test3.desc b/regression/cbmc-java/static_init_order/test3.desc new file mode 100644 index 00000000000..6ec4686fa57 --- /dev/null +++ b/regression/cbmc-java/static_init_order/test3.desc @@ -0,0 +1,7 @@ +CORE +static_init_order.class +--function static_init_order.test1 --trace --java-threading +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- diff --git a/regression/cbmc-java/static_init_order/test4.desc b/regression/cbmc-java/static_init_order/test4.desc new file mode 100644 index 00000000000..d008222606e --- /dev/null +++ b/regression/cbmc-java/static_init_order/test4.desc @@ -0,0 +1,7 @@ +CORE +static_init_order.class +--function static_init_order.test2 --java-threading +^EXIT=10$ +^SIGNAL=0$ +^VERIFICATION FAILED$ +-- diff --git a/src/java_bytecode/java_bytecode_language.cpp b/src/java_bytecode/java_bytecode_language.cpp index cfcb797cc40..72deab72194 100644 --- a/src/java_bytecode/java_bytecode_language.cpp +++ b/src/java_bytecode/java_bytecode_language.cpp @@ -69,6 +69,11 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd) else lazy_methods_mode=LAZY_METHODS_MODE_EAGER; + if(cmd.isset("java-threading")) + threading_support = true; + else + threading_support = false; + if(cmd.isset("java-throw-runtime-exceptions")) { throw_runtime_exceptions = true; @@ -701,7 +706,8 @@ bool java_bytecode_languaget::typecheck( // For each class that will require a static initializer wrapper, create a // function named package.classname::clinit_wrapper, and a corresponding // global tracking whether it has run or not: - create_static_initializer_wrappers(symbol_table, synthetic_methods); + create_static_initializer_wrappers( + symbol_table, synthetic_methods, threading_support); // Now incrementally elaborate methods // that are reachable from this entry point. @@ -961,7 +967,11 @@ bool java_bytecode_languaget::convert_single_method( switch(synthetic_method_it->second) { case synthetic_method_typet::STATIC_INITIALIZER_WRAPPER: - symbol.value = get_clinit_wrapper_body(function_id, symbol_table); + if(threading_support) + symbol.value = get_thread_safe_clinit_wrapper_body( + function_id, symbol_table); + else + symbol.value = get_clinit_wrapper_body(function_id, symbol_table); break; case synthetic_method_typet::STUB_CLASS_STATIC_INITIALIZER: symbol.value = diff --git a/src/java_bytecode/java_bytecode_language.h b/src/java_bytecode/java_bytecode_language.h index bb3912fa150..29c347fe227 100644 --- a/src/java_bytecode/java_bytecode_language.h +++ b/src/java_bytecode/java_bytecode_language.h @@ -159,6 +159,7 @@ class java_bytecode_languaget:public languaget irep_idt main_class; std::vector main_jar_classes; java_class_loadert java_class_loader; + bool threading_support; bool assume_inputs_non_null; // assume inputs variables to be non-null object_factory_parameterst object_factory_parameters; size_t max_user_array_length; // max size for user code created arrays diff --git a/src/java_bytecode/java_static_initializers.cpp b/src/java_bytecode/java_static_initializers.cpp index c3721978606..c2410c8d2ff 100644 --- a/src/java_bytecode/java_static_initializers.cpp +++ b/src/java_bytecode/java_static_initializers.cpp @@ -17,7 +17,8 @@ Author: Chris Smowton, chris.smowton@diffblue.com #include /// The three states in which a `` method for a class can be before, -/// after, and during static class initialization. +/// after, and during static class initialization. These states are only used +/// when the thread safe version of the clinit wrapper is generated. /// /// According to the JVM Spec document (section 5.5), the JVM needs to /// maintain, for every class initializer, a state indicating whether the @@ -33,7 +34,9 @@ Author: Chris Smowton, chris.smowton@diffblue.com /// class is ready to be used, or it has errored" /// /// The last state corresponds to a fusion of the two original states "ready -/// for use" and "initialization error". +/// for use" and "initialization error". The basis for fusing these states is +/// that for simplification reasons both implementations of the clinit wrapper +/// do not handle exceptions, hence the error state is not possible. enum class clinit_statest { NOT_INIT, @@ -75,7 +78,7 @@ bool is_clinit_wrapper_function(const irep_idt &function_id) /// /param is_thread_local: if true this symbol will be set as thread local /// /param is_static_lifetime: if true this symbol will be set as static /// /return returns new symbol. -static symbolt add_new_symbol( +static symbolt add_new_variable_symbol( symbol_table_baset &symbol_table, const irep_idt &name, const typet &type, @@ -98,6 +101,15 @@ static symbolt add_new_symbol( return new_symbol; } +/// Get name of the static-initialization-already-done global variable for a +/// given class. +/// \param class_name: class symbol name +/// \return static initializer wrapper-already run global name +static irep_idt clinit_already_run_variable_name(const irep_idt &class_name) +{ + return id2string(class_name) + "::clinit_already_run"; +} + /// Get name of the real static initializer for a given class. Doesn't check /// if a static initializer actually exists. /// \param class_name: class symbol name @@ -142,7 +154,7 @@ static irep_idt clinit_local_init_complete_var_name(const irep_idt &class_name) /// /return returns a code_assignt, assigning \p expr to the integer /// representation of \p state static code_assignt -gen_clinit_assignexpr(const exprt &expr, const clinit_statest state) +gen_clinit_assign(const exprt &expr, const clinit_statest state) { mp_integer initv(static_cast(state)); constant_exprt init_s = from_integer(initv, clinit_states_type); @@ -165,6 +177,40 @@ gen_clinit_eqexpr(const exprt &expr, const clinit_statest state) return equal_exprt(expr, init_s); } +/// Generates codet that iterates through the base types of the class specified +/// by class_name, C, and recursively adds calls to their clinit wrapper. +/// Finally a call to the clinint wrapper of C is made. +/// \param symbol_table: symbol table +/// \param class_name: name of the class to generate clinit wrapper calls for +/// \param [out] init_body: appended with calls to clinit wrapper +static void clinit_wrapper_do_recursive_calls( + const symbol_tablet &symbol_table, + const irep_idt &class_name, + code_blockt &init_body) +{ + const symbolt &class_symbol = symbol_table.lookup_ref(class_name); + for(const auto &base : to_class_type(class_symbol.type).bases()) + { + const auto base_name = to_symbol_type(base.type()).get_identifier(); + irep_idt base_init_routine = clinit_wrapper_name(base_name); + auto findit = symbol_table.symbols.find(base_init_routine); + if(findit == symbol_table.symbols.end()) + continue; + code_function_callt call_base; + call_base.function() = findit->second.symbol_expr(); + init_body.move_to_operands(call_base); + } + + const irep_idt &real_clinit_name = clinit_function_name(class_name); + auto find_sym_it = symbol_table.symbols.find(real_clinit_name); + if(find_sym_it != symbol_table.symbols.end()) + { + code_function_callt call_real_init; + call_real_init.function() = find_sym_it->second.symbol_expr(); + init_body.move_to_operands(call_real_init); + } +} + /// Checks whether a static initializer wrapper is needed for a given class, /// i.e. if the given class or any superclass has a static initializer. /// \param class_name: class symbol name @@ -199,32 +245,51 @@ static bool needs_clinit_wrapper( /// \param synthetic_methods: synthetic method type map. The new clinit wrapper /// symbol will be recorded, such that we get a callback to produce its body /// if and when required. +/// \param thread_safe: if true state variables required to make the +/// clinit_wrapper thread safe will be created. static void create_clinit_wrapper_symbols( const irep_idt &class_name, symbol_tablet &symbol_table, - synthetic_methods_mapt &synthetic_methods) + synthetic_methods_mapt &synthetic_methods, + const bool thread_safe) { - exprt not_init_value = from_integer( - static_cast(clinit_statest::NOT_INIT), clinit_states_type); + if(thread_safe) + { + exprt not_init_value = from_integer( + static_cast(clinit_statest::NOT_INIT), clinit_states_type); - // Create two global static synthetic "fields" for the class "id" - // these two variables hold the state of the class initialization algorithm - // across calls to the clinit_wrapper - add_new_symbol( - symbol_table, - clinit_state_var_name(class_name), - clinit_states_type, - not_init_value, - false, - true); + // Create two global static synthetic "fields" for the class "id" + // these two variables hold the state of the class initialization algorithm + // across calls to the clinit_wrapper + add_new_variable_symbol( + symbol_table, + clinit_state_var_name(class_name), + clinit_states_type, + not_init_value, + false, + true); - add_new_symbol( - symbol_table, - clinit_thread_local_state_var_name(class_name), - clinit_states_type, - not_init_value, - true, - true); + add_new_variable_symbol( + symbol_table, + clinit_thread_local_state_var_name(class_name), + clinit_states_type, + not_init_value, + true, + true); + } + else + { + const irep_idt &already_run_name = + clinit_already_run_variable_name(class_name); + + add_new_variable_symbol( + symbol_table, + already_run_name, + bool_typet(), + false_exprt(), + false, + true); + } // Create symbol for the "clinit_wrapper" symbolt wrapper_method_symbol; @@ -254,6 +319,8 @@ static void create_clinit_wrapper_symbols( "clinit wrapper"); } +/// Thread safe version of the static initialiser. +/// /// Produces the static initialiser wrapper body for the given function. This /// static initializer implements (a simplification of) the algorithm defined /// in Section 5.5 of the JVM Specs. This function, or wrapper, checks whether @@ -319,7 +386,7 @@ static void create_clinit_wrapper_symbols( /// name created by `create_clinit_wrapper_symbols`) /// \param symbol_table: global symbol table /// \return the body of the static initialiser wrapper -codet get_clinit_wrapper_body( +codet get_thread_safe_clinit_wrapper_body( const irep_idt &function_id, symbol_table_baset &symbol_table) { @@ -336,7 +403,7 @@ codet get_clinit_wrapper_body( // Create a function-local variable "init_complete". This variable is used to // avoid inspecting the global state (clinit_state_sym) outside of // the critical-section. - const symbolt &init_complete = add_new_symbol( + const symbolt &init_complete = add_new_variable_symbol( symbol_table, clinit_local_init_complete_var_name(class_name), bool_typet(), @@ -391,7 +458,7 @@ codet get_clinit_wrapper_body( // C::__CPROVER_PREFIX_clinit_thread_local_state = INIT_COMPLETE; { - code_assignt assign = gen_clinit_assignexpr( + code_assignt assign = gen_clinit_assign( clinit_thread_local_state_sym.symbol_expr(), clinit_statest::INIT_COMPLETE); function_body.add(assign); @@ -426,7 +493,7 @@ codet get_clinit_wrapper_body( not_init_conditional.cond() = gen_clinit_eqexpr( clinit_state_sym.symbol_expr(), clinit_statest::NOT_INIT); then_block.add( - gen_clinit_assignexpr( + gen_clinit_assign( clinit_state_sym.symbol_expr(), clinit_statest::IN_PROGRESS)); then_block.add(code_assignt(init_complete.symbol_expr(), false_exprt())); not_init_conditional.then_case() = then_block; @@ -470,34 +537,11 @@ codet get_clinit_wrapper_body( // // java::C::(); // - code_blockt init_body; { - // iterate through the base types and add recursive calls to the - // clinit_wrapper() - const symbolt &class_symbol = symbol_table.lookup_ref(class_name); - for(const auto &base : to_class_type(class_symbol.type).bases()) - { - const auto base_name = to_symbol_type(base.type()).get_identifier(); - irep_idt base_init_routine = clinit_wrapper_name(base_name); - auto findit = symbol_table.symbols.find(base_init_routine); - if(findit == symbol_table.symbols.end()) - continue; - code_function_callt call_base; - call_base.function() = findit->second.symbol_expr(); - init_body.move_to_operands(call_base); - } - - // call java::C::(), if the class has one static initializer - const irep_idt &real_clinit_name = clinit_function_name(class_name); - auto find_sym_it = symbol_table.symbols.find(real_clinit_name); - if(find_sym_it != symbol_table.symbols.end()) - { - code_function_callt call_real_init; - call_real_init.function() = find_sym_it->second.symbol_expr(); - init_body.move_to_operands(call_real_init); - } + code_blockt init_body; + clinit_wrapper_do_recursive_calls(symbol_table, class_name, init_body); + function_body.append(init_body); } - function_body.append(init_body); // ATOMIC_START // C::__CPROVER_PREFIX_clinit_state = INIT_COMPLETE; @@ -507,7 +551,7 @@ codet get_clinit_wrapper_body( // synchronization prologue function_body.add(atomic_begin); function_body.add( - gen_clinit_assignexpr( + gen_clinit_assign( clinit_state_sym.symbol_expr(), clinit_statest::INIT_COMPLETE)); function_body.add(atomic_end); function_body.add(code_returnt()); @@ -516,14 +560,77 @@ codet get_clinit_wrapper_body( return function_body; } +/// Produces the static initialiser wrapper body for the given function. +/// Note: this version of the clinit wrapper is not thread safe. +/// \param function_id: clinit wrapper function id (the wrapper_method_symbol +/// name created by `create_clinit_wrapper_symbols`) +/// \param symbol_table: global symbol table +/// \return the body of the static initialiser wrapper/ +codet get_clinit_wrapper_body( + const irep_idt &function_id, + symbol_table_baset &symbol_table) +{ + // Assume that class C extends class C' and implements interfaces + // I1, ..., In. We now create the following function (possibly recursively + // creating the clinit_wrapper functions for C' and I1, ..., In): + // + // java::C::clinit_wrapper() + // { + // if (java::C::clinit_already_run == false) + // { + // java::C::clinit_already_run = true; // before recursive calls! + // + // java::C'::clinit_wrapper(); + // java::I1::clinit_wrapper(); + // java::I2::clinit_wrapper(); + // // ... + // java::In::clinit_wrapper(); + // + // java::C::(); + // } + // } + const symbolt &wrapper_method_symbol = symbol_table.lookup_ref(function_id); + irep_idt class_name = wrapper_method_symbol.type.get(ID_C_class); + INVARIANT( + !class_name.empty(), "wrapper function should be annotated with its class"); + + const symbolt &already_run_symbol = + symbol_table.lookup_ref(clinit_already_run_variable_name(class_name)); + + equal_exprt check_already_run( + already_run_symbol.symbol_expr(), + false_exprt()); + + // the entire body of the function is an if-then-else + code_ifthenelset wrapper_body; + + // add the condition to the if + wrapper_body.cond() = check_already_run; + + // add the "already-run = false" statement + code_blockt init_body; + code_assignt set_already_run(already_run_symbol.symbol_expr(), true_exprt()); + init_body.move_to_operands(set_already_run); + + clinit_wrapper_do_recursive_calls(symbol_table, class_name, init_body); + + wrapper_body.then_case() = init_body; + + return wrapper_body; +} + + /// Create static initializer wrappers for all classes that need them. /// \param symbol_table: global symbol table /// \param synthetic_methods: synthetic methods map. Will be extended noting /// that any wrapper belongs to this code, and so `get_clinit_wrapper_body` /// should be used to produce the method body when required. +/// \param thread_safe: if true state variables required to make the +/// clinit_wrapper thread safe will be created. void create_static_initializer_wrappers( symbol_tablet &symbol_table, - synthetic_methods_mapt &synthetic_methods) + synthetic_methods_mapt &synthetic_methods, + const bool thread_safe) { // Top-sort the class hierarchy, such that we visit parents before children, // and can so identify parents that need static initialisation by whether we @@ -539,7 +646,7 @@ void create_static_initializer_wrappers( if(needs_clinit_wrapper(class_identifier, symbol_table)) { create_clinit_wrapper_symbols( - class_identifier, symbol_table, synthetic_methods); + class_identifier, symbol_table, synthetic_methods, thread_safe); } } } diff --git a/src/java_bytecode/java_static_initializers.h b/src/java_bytecode/java_static_initializers.h index 1827acf25ce..f8daab5b0cd 100644 --- a/src/java_bytecode/java_static_initializers.h +++ b/src/java_bytecode/java_static_initializers.h @@ -24,7 +24,12 @@ bool is_clinit_wrapper_function(const irep_idt &function_id); void create_static_initializer_wrappers( symbol_tablet &symbol_table, - synthetic_methods_mapt &synthetic_methods); + synthetic_methods_mapt &synthetic_methods, + const bool thread_safe); + +codet get_thread_safe_clinit_wrapper_body( + const irep_idt &function_id, + symbol_table_baset &symbol_table); codet get_clinit_wrapper_body( const irep_idt &function_id, diff --git a/src/jbmc/jbmc_parse_options.cpp b/src/jbmc/jbmc_parse_options.cpp index e02564809b7..26427ffe703 100644 --- a/src/jbmc/jbmc_parse_options.cpp +++ b/src/jbmc/jbmc_parse_options.cpp @@ -1047,6 +1047,7 @@ void jbmc_parse_optionst::help() JAVA_BYTECODE_LANGUAGE_OPTIONS_HELP // This one is handled by jbmc_parse_options not by the Java frontend, // hence its presence here: + " --java-threading enable experimental support for java multi-threading\n"// NOLINT(*) " --java-unwind-enum-static try to unwind loops in static initialization of enums\n" // NOLINT(*) // Currently only supported in the JBMC frontend: " --symex-driven-lazy-loading only load functions when first entered by symbolic execution\n" // NOLINT(*) diff --git a/src/jbmc/jbmc_parse_options.h b/src/jbmc/jbmc_parse_options.h index dbb9cf7253e..22871752ab4 100644 --- a/src/jbmc/jbmc_parse_options.h +++ b/src/jbmc/jbmc_parse_options.h @@ -75,6 +75,7 @@ class optionst; JAVA_BYTECODE_LANGUAGE_OPTIONS \ "(java-unwind-enum-static)" \ "(localize-faults)(localize-faults-method):" \ + "(java-threading)" \ OPT_GOTO_TRACE \ "(symex-driven-lazy-loading)" // clang-format on