Skip to content

Commit dc7c5d2

Browse files
committed
Implement lambda code synthesis
This scans invokedynamic instructions during class-loading, creating a synthetic type for each one that we can understand, and registers its constructor (which captures) and method (which passes captured and actual parameters to the real lambda function) as synthetic methods. These are then created on demand using the same mechanism as e.g. synthetic static initializer methods.
1 parent 515c3ab commit dc7c5d2

8 files changed

+586
-60
lines changed

jbmc/src/java_bytecode/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ SRC = assignments_from_json.cpp \
3737
java_string_literals.cpp \
3838
java_types.cpp \
3939
java_utils.cpp \
40+
lambda_synthesis.cpp \
4041
load_method_by_regex.cpp \
4142
mz_zip_archive.cpp \
4243
remove_exceptions.cpp \

jbmc/src/java_bytecode/java_bytecode_convert_method.cpp

Lines changed: 60 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Author: Daniel Kroening, [email protected]
2121
#include "java_string_literal_expr.h"
2222
#include "java_types.h"
2323
#include "java_utils.h"
24+
#include "lambda_synthesis.h"
2425
#include "pattern.h"
2526
#include "remove_exceptions.h"
2627

@@ -303,24 +304,6 @@ java_method_typet member_type_lazy(
303304
return to_java_method_type(*member_type_from_descriptor);
304305
}
305306

306-
/// Retrieves the symbol of the lambda method associated with the given
307-
/// lambda method handle (bootstrap method).
308-
/// \param lambda_method_handles: Vector of lambda method handles (bootstrap
309-
/// methods) of the class where the lambda is called
310-
/// \param index: Index of the lambda method handle in the vector
311-
/// \return Symbol of the lambda method if the method handle has a known type
312-
optionalt<symbolt> java_bytecode_convert_methodt::get_lambda_method_symbol(
313-
const java_class_typet::java_lambda_method_handlest &lambda_method_handles,
314-
const size_t index)
315-
{
316-
const irept &lambda_method_handle = lambda_method_handles.at(index);
317-
// If the lambda method handle has an unknown type, it does not refer to
318-
// any symbol (it has an empty identifier)
319-
if(!lambda_method_handle.id().empty())
320-
return symbol_table.lookup_ref(lambda_method_handle.id());
321-
return {};
322-
}
323-
324307
/// This creates a method symbol in the symtab, but doesn't actually perform
325308
/// method conversion just yet. The caller should call
326309
/// java_bytecode_convert_method later to give the symbol/method a body.
@@ -629,9 +612,7 @@ void java_bytecode_convert_methodt::convert(
629612
current_method = method_symbol.name;
630613
method_has_this = method_type.has_this();
631614
if((!m.is_abstract) && (!m.is_native))
632-
method_symbol.value = convert_instructions(
633-
m,
634-
to_java_class_type(class_symbol.type).lambda_method_handles());
615+
method_symbol.value = convert_instructions(m);
635616
}
636617

637618
static irep_idt get_if_cmp_operator(const irep_idt &stmt)
@@ -1010,9 +991,8 @@ static std::size_t get_bytecode_type_width(const typet &ty)
1010991
return to_bitvector_type(ty).get_width();
1011992
}
1012993

1013-
code_blockt java_bytecode_convert_methodt::convert_instructions(
1014-
const methodt &method,
1015-
const java_class_typet::java_lambda_method_handlest &lambda_method_handles)
994+
code_blockt
995+
java_bytecode_convert_methodt::convert_instructions(const methodt &method)
1016996
{
1017997
const instructionst &instructions=method.instructions;
1018998

@@ -1264,10 +1244,9 @@ code_blockt java_bytecode_convert_methodt::convert_instructions(
12641244
}
12651245
else if(statement=="invokedynamic")
12661246
{
1267-
// not used in Java
12681247
if(
1269-
const auto res = convert_invoke_dynamic(
1270-
lambda_method_handles, i_it->source_location, arg0))
1248+
const auto res =
1249+
convert_invoke_dynamic(i_it->source_location, i_it->address, arg0, c))
12711250
{
12721251
results.resize(1);
12731252
results[0] = *res;
@@ -2907,40 +2886,71 @@ code_blockt java_bytecode_convert_methodt::convert_astore(
29072886
}
29082887

29092888
optionalt<exprt> java_bytecode_convert_methodt::convert_invoke_dynamic(
2910-
const java_class_typet::java_lambda_method_handlest &lambda_method_handles,
29112889
const source_locationt &location,
2912-
const exprt &arg0)
2890+
std::size_t instruction_address,
2891+
const exprt &arg0,
2892+
codet &result_code)
29132893
{
29142894
const java_method_typet &method_type = to_java_method_type(arg0.type());
2895+
const java_method_typet::parameterst &parameters(method_type.parameters());
2896+
const typet &return_type = method_type.return_type();
29152897

2916-
const optionalt<symbolt> &lambda_method_symbol = get_lambda_method_symbol(
2917-
lambda_method_handles,
2918-
method_type.get_int(ID_java_lambda_method_handle_index));
2919-
if(lambda_method_symbol.has_value())
2920-
debug() << "Converting invokedynamic for lambda: "
2921-
<< lambda_method_symbol.value().name << eom;
2922-
else
2923-
debug() << "Converting invokedynamic for lambda with unknown handle "
2924-
"type"
2925-
<< eom;
2898+
// Note these must be popped regardless of whether we understand the lambda
2899+
// method or not
2900+
code_function_callt::argumentst arguments = pop(parameters.size());
29262901

2927-
const java_method_typet::parameterst &parameters(method_type.parameters());
2902+
irep_idt synthetic_class_name =
2903+
lambda_synthetic_class_name(method_id, instruction_address);
2904+
2905+
if(!symbol_table.has_symbol(synthetic_class_name))
2906+
{
2907+
// We failed to parse the invokedynamic handle as a Java 8+ lambda;
2908+
// give up and return null.
2909+
const auto value = zero_initializer(return_type, location, ns);
2910+
if(!value.has_value())
2911+
{
2912+
error().source_location = location;
2913+
error() << "failed to zero-initialize return type" << eom;
2914+
throw 0;
2915+
}
2916+
return value;
2917+
}
29282918

2929-
pop(parameters.size());
2919+
// Construct an instance of the synthetic class created for this invokedynamic
2920+
// site:
29302921

2931-
const typet &return_type = method_type.return_type();
2922+
irep_idt constructor_name = id2string(synthetic_class_name) + ".<init>";
29322923

2933-
if(return_type.id() == ID_empty)
2934-
return {};
2924+
const symbolt &constructor_symbol = ns.lookup(constructor_name);
29352925

2936-
const auto value = zero_initializer(return_type, location, ns);
2937-
if(!value.has_value())
2926+
code_blockt result;
2927+
2928+
// SyntheticType lambda_new = new SyntheticType;
2929+
const reference_typet ref_type =
2930+
java_reference_type(struct_tag_typet(synthetic_class_name));
2931+
side_effect_exprt java_new_expr(ID_java_new, ref_type, location);
2932+
const exprt new_instance = tmp_variable("lambda_new", ref_type);
2933+
result.add(code_assignt(new_instance, java_new_expr, location));
2934+
2935+
// lambda_new.<init>(capture_1, capture_2, ...);
2936+
// Add the implicit 'this' parameter:
2937+
arguments.insert(arguments.begin(), new_instance);
2938+
code_function_callt constructor_call(
2939+
constructor_symbol.symbol_expr(), arguments);
2940+
constructor_call.add_source_location() = location;
2941+
result.add(constructor_call);
2942+
if(needed_lazy_methods)
29382943
{
2939-
error().source_location = location;
2940-
error() << "failed to zero-initialize return type" << eom;
2941-
throw 0;
2944+
needed_lazy_methods->add_needed_method(constructor_symbol.name);
2945+
needed_lazy_methods->add_needed_class(synthetic_class_name);
29422946
}
2943-
return value;
2947+
2948+
result_code = std::move(result);
2949+
2950+
if(return_type.id() == ID_empty)
2951+
return {};
2952+
else
2953+
return new_instance;
29442954
}
29452955

29462956
void java_bytecode_convert_methodt::draw_edges_from_ret_to_jsr(

jbmc/src/java_bytecode/java_bytecode_convert_method_class.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -293,16 +293,10 @@ class java_bytecode_convert_methodt:public messaget
293293
const address_mapt &amap,
294294
bool allow_merge = true);
295295

296-
optionalt<symbolt> get_lambda_method_symbol(
297-
const java_class_typet::java_lambda_method_handlest &lambda_method_handles,
298-
const size_t index);
299-
300296
// conversion
301297
void convert(const symbolt &class_symbol, const methodt &);
302298

303-
code_blockt convert_instructions(
304-
const methodt &,
305-
const java_class_typet::java_lambda_method_handlest &);
299+
code_blockt convert_instructions(const methodt &);
306300

307301
codet get_clinit_call(const irep_idt &classname);
308302

@@ -346,9 +340,10 @@ class java_bytecode_convert_methodt:public messaget
346340
&ret_instructions) const;
347341

348342
optionalt<exprt> convert_invoke_dynamic(
349-
const java_class_typet::java_lambda_method_handlest &lambda_method_handles,
350343
const source_locationt &location,
351-
const exprt &arg0);
344+
std::size_t instruction_address,
345+
const exprt &arg0,
346+
codet &result_code);
352347

353348
code_blockt convert_astore(
354349
const irep_idt &statement,

jbmc/src/java_bytecode/java_bytecode_language.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ Author: Daniel Kroening, [email protected]
4040
#include "java_string_literal_expr.h"
4141
#include "java_string_literals.h"
4242
#include "java_utils.h"
43+
#include "lambda_synthesis.h"
4344

4445
#include "expr2java.h"
4546
#include "load_method_by_regex.h"
@@ -719,6 +720,25 @@ bool java_bytecode_languaget::typecheck(
719720
}
720721
}
721722

723+
// Now add synthetic classes for every invokedynamic instruction found (it
724+
// makes this easier that all interface types and their methods have been
725+
// created above):
726+
for(const auto &id_and_symbol : symbol_table)
727+
{
728+
if(id_and_symbol.second.type.id() != ID_code)
729+
continue;
730+
auto cmb = method_bytecode.get(id_and_symbol.first);
731+
if(!cmb)
732+
continue;
733+
734+
create_invokedynamic_synthetic_classes(
735+
id_and_symbol.first,
736+
cmb->get().method.instructions,
737+
symbol_table,
738+
synthetic_methods,
739+
get_message_handler());
740+
}
741+
722742
// Now that all classes have been created in the symbol table we can populate
723743
// the class hierarchy:
724744
class_hierarchy(symbol_table);
@@ -1031,6 +1051,15 @@ static void notify_static_method_calls(
10311051
expr_dynamic_cast<symbol_exprt>(fn_call->function());
10321052
needed_lazy_methods->add_needed_method(fn_sym.get_identifier());
10331053
}
1054+
else if(
1055+
it->id() == ID_side_effect &&
1056+
to_side_effect_expr(*it).get_statement() == ID_function_call)
1057+
{
1058+
const auto &call_expr = to_side_effect_expr_function_call(*it);
1059+
const symbol_exprt &fn_sym =
1060+
expr_dynamic_cast<symbol_exprt>(call_expr.function());
1061+
needed_lazy_methods->add_needed_method(fn_sym.get_identifier());
1062+
}
10341063
}
10351064
}
10361065
}
@@ -1136,6 +1165,14 @@ bool java_bytecode_languaget::convert_single_method(
11361165
get_pointer_type_selector(),
11371166
get_message_handler());
11381167
break;
1168+
case synthetic_method_typet::INVOKEDYNAMIC_CAPTURE_CONSTRUCTOR:
1169+
writable_symbol.value = get_invokedynamic_synthetic_constructor(
1170+
function_id, symbol_table, get_message_handler());
1171+
break;
1172+
case synthetic_method_typet::INVOKEDYNAMIC_METHOD:
1173+
writable_symbol.value = get_invokedynamic_synthetic_method(
1174+
function_id, symbol_table, get_message_handler());
1175+
break;
11391176
}
11401177
// Notify lazy methods of static calls made from the newly generated
11411178
// function:

0 commit comments

Comments
 (0)