Skip to content

Commit a94e7a9

Browse files
committed
Implement lambda code synthesis
This scans invokedynamic instructions during class-loading, creating a synthetic type for each one that we can understand, and registers its constructor (which captures) and method (which passes captured and actual parameters to the real lambda function) as synthetic methods. These are then created on demand using the same mechanism as e.g. synthetic static initializer methods. Prior to this change invokedynamic instructions were always stubbed to return null. With this change they actually work, so you can verify programs that use lambdas, such as assert ((MyInterface)(x -> x + 1))(1) == 2
1 parent 6efd03d commit a94e7a9

8 files changed

+593
-59
lines changed

jbmc/src/java_bytecode/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ SRC = assignments_from_json.cpp \
3737
java_string_literals.cpp \
3838
java_types.cpp \
3939
java_utils.cpp \
40+
lambda_synthesis.cpp \
4041
load_method_by_regex.cpp \
4142
mz_zip_archive.cpp \
4243
remove_exceptions.cpp \

jbmc/src/java_bytecode/java_bytecode_convert_method.cpp

Lines changed: 60 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Author: Daniel Kroening, [email protected]
2121
#include "java_string_literal_expr.h"
2222
#include "java_types.h"
2323
#include "java_utils.h"
24+
#include "lambda_synthesis.h"
2425
#include "pattern.h"
2526
#include "remove_exceptions.h"
2627

@@ -303,24 +304,6 @@ java_method_typet member_type_lazy(
303304
return to_java_method_type(*member_type_from_descriptor);
304305
}
305306

306-
/// Retrieves the symbol of the lambda method associated with the given
307-
/// lambda method handle (bootstrap method).
308-
/// \param lambda_method_handles: Vector of lambda method handles (bootstrap
309-
/// methods) of the class where the lambda is called
310-
/// \param index: Index of the lambda method handle in the vector
311-
/// \return Symbol of the lambda method if the method handle has a known type
312-
optionalt<symbolt> java_bytecode_convert_methodt::get_lambda_method_symbol(
313-
const java_class_typet::java_lambda_method_handlest &lambda_method_handles,
314-
const size_t index)
315-
{
316-
const irept &lambda_method_handle = lambda_method_handles.at(index);
317-
// If the lambda method handle has an unknown type, it does not refer to
318-
// any symbol (it has an empty identifier)
319-
if(!lambda_method_handle.id().empty())
320-
return symbol_table.lookup_ref(lambda_method_handle.id());
321-
return {};
322-
}
323-
324307
/// This creates a method symbol in the symtab, but doesn't actually perform
325308
/// method conversion just yet. The caller should call
326309
/// java_bytecode_convert_method later to give the symbol/method a body.
@@ -631,8 +614,7 @@ void java_bytecode_convert_methodt::convert(
631614
if((!m.is_abstract) && (!m.is_native))
632615
{
633616
code_blockt code(convert_parameter_annotations(m, method_type));
634-
code.append(convert_instructions(
635-
m, to_java_class_type(class_symbol.type).lambda_method_handles()));
617+
code.append(convert_instructions(m));
636618
method_symbol.value = std::move(code);
637619
}
638620
}
@@ -1061,9 +1043,8 @@ code_blockt java_bytecode_convert_methodt::convert_parameter_annotations(
10611043
return code;
10621044
}
10631045

1064-
code_blockt java_bytecode_convert_methodt::convert_instructions(
1065-
const methodt &method,
1066-
const java_class_typet::java_lambda_method_handlest &lambda_method_handles)
1046+
code_blockt
1047+
java_bytecode_convert_methodt::convert_instructions(const methodt &method)
10671048
{
10681049
const instructionst &instructions=method.instructions;
10691050

@@ -1317,10 +1298,9 @@ code_blockt java_bytecode_convert_methodt::convert_instructions(
13171298
}
13181299
else if(bytecode == BC_invokedynamic)
13191300
{
1320-
// not used in Java
13211301
if(
1322-
const auto res = convert_invoke_dynamic(
1323-
lambda_method_handles, i_it->source_location, arg0))
1302+
const auto res =
1303+
convert_invoke_dynamic(i_it->source_location, i_it->address, arg0, c))
13241304
{
13251305
results.resize(1);
13261306
results[0] = *res;
@@ -2957,40 +2937,71 @@ code_blockt java_bytecode_convert_methodt::convert_astore(
29572937
}
29582938

29592939
optionalt<exprt> java_bytecode_convert_methodt::convert_invoke_dynamic(
2960-
const java_class_typet::java_lambda_method_handlest &lambda_method_handles,
29612940
const source_locationt &location,
2962-
const exprt &arg0)
2941+
std::size_t instruction_address,
2942+
const exprt &arg0,
2943+
codet &result_code)
29632944
{
29642945
const java_method_typet &method_type = to_java_method_type(arg0.type());
2946+
const java_method_typet::parameterst &parameters(method_type.parameters());
2947+
const typet &return_type = method_type.return_type();
29652948

2966-
const optionalt<symbolt> &lambda_method_symbol = get_lambda_method_symbol(
2967-
lambda_method_handles,
2968-
method_type.get_int(ID_java_lambda_method_handle_index));
2969-
if(lambda_method_symbol.has_value())
2970-
debug() << "Converting invokedynamic for lambda: "
2971-
<< lambda_method_symbol.value().name << eom;
2972-
else
2973-
debug() << "Converting invokedynamic for lambda with unknown handle "
2974-
"type"
2975-
<< eom;
2949+
// Note these must be popped regardless of whether we understand the lambda
2950+
// method or not
2951+
code_function_callt::argumentst arguments = pop(parameters.size());
29762952

2977-
const java_method_typet::parameterst &parameters(method_type.parameters());
2953+
irep_idt synthetic_class_name =
2954+
lambda_synthetic_class_name(method_id, instruction_address);
29782955

2979-
pop(parameters.size());
2956+
if(!symbol_table.has_symbol(synthetic_class_name))
2957+
{
2958+
// We failed to parse the invokedynamic handle as a Java 8+ lambda;
2959+
// give up and return null.
2960+
const auto value = zero_initializer(return_type, location, ns);
2961+
if(!value.has_value())
2962+
{
2963+
error().source_location = location;
2964+
error() << "failed to zero-initialize return type" << eom;
2965+
throw 0;
2966+
}
2967+
return value;
2968+
}
29802969

2981-
const typet &return_type = method_type.return_type();
2970+
// Construct an instance of the synthetic class created for this invokedynamic
2971+
// site:
29822972

2983-
if(return_type.id() == ID_empty)
2984-
return {};
2973+
irep_idt constructor_name = id2string(synthetic_class_name) + ".<init>";
2974+
2975+
const symbolt &constructor_symbol = ns.lookup(constructor_name);
29852976

2986-
const auto value = zero_initializer(return_type, location, ns);
2987-
if(!value.has_value())
2977+
code_blockt result;
2978+
2979+
// SyntheticType lambda_new = new SyntheticType;
2980+
const reference_typet ref_type =
2981+
java_reference_type(struct_tag_typet(synthetic_class_name));
2982+
side_effect_exprt java_new_expr(ID_java_new, ref_type, location);
2983+
const exprt new_instance = tmp_variable("lambda_new", ref_type);
2984+
result.add(code_assignt(new_instance, java_new_expr, location));
2985+
2986+
// lambda_new.<init>(capture_1, capture_2, ...);
2987+
// Add the implicit 'this' parameter:
2988+
arguments.insert(arguments.begin(), new_instance);
2989+
code_function_callt constructor_call(
2990+
constructor_symbol.symbol_expr(), arguments);
2991+
constructor_call.add_source_location() = location;
2992+
result.add(constructor_call);
2993+
if(needed_lazy_methods)
29882994
{
2989-
error().source_location = location;
2990-
error() << "failed to zero-initialize return type" << eom;
2991-
throw 0;
2995+
needed_lazy_methods->add_needed_method(constructor_symbol.name);
2996+
needed_lazy_methods->add_needed_class(synthetic_class_name);
29922997
}
2993-
return value;
2998+
2999+
result_code = std::move(result);
3000+
3001+
if(return_type.id() == ID_empty)
3002+
return {};
3003+
else
3004+
return new_instance;
29943005
}
29953006

29963007
void java_bytecode_convert_methodt::draw_edges_from_ret_to_jsr(

jbmc/src/java_bytecode/java_bytecode_convert_method_class.h

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -293,20 +293,14 @@ class java_bytecode_convert_methodt:public messaget
293293
const address_mapt &amap,
294294
bool allow_merge = true);
295295

296-
optionalt<symbolt> get_lambda_method_symbol(
297-
const java_class_typet::java_lambda_method_handlest &lambda_method_handles,
298-
const size_t index);
299-
300296
// conversion
301297
void convert(const symbolt &class_symbol, const methodt &);
302298

303299
code_blockt convert_parameter_annotations(
304300
const methodt &method,
305301
const java_method_typet &method_type);
306302

307-
code_blockt convert_instructions(
308-
const methodt &,
309-
const java_class_typet::java_lambda_method_handlest &);
303+
code_blockt convert_instructions(const methodt &);
310304

311305
codet get_clinit_call(const irep_idt &classname);
312306

@@ -350,9 +344,10 @@ class java_bytecode_convert_methodt:public messaget
350344
&ret_instructions) const;
351345

352346
optionalt<exprt> convert_invoke_dynamic(
353-
const java_class_typet::java_lambda_method_handlest &lambda_method_handles,
354347
const source_locationt &location,
355-
const exprt &arg0);
348+
std::size_t instruction_address,
349+
const exprt &arg0,
350+
codet &result_code);
356351

357352
code_blockt convert_astore(
358353
const irep_idt &statement,

jbmc/src/java_bytecode/java_bytecode_language.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ Author: Daniel Kroening, [email protected]
4040
#include "java_string_literal_expr.h"
4141
#include "java_string_literals.h"
4242
#include "java_utils.h"
43+
#include "lambda_synthesis.h"
4344

4445
#include "expr2java.h"
4546
#include "load_method_by_regex.h"
@@ -719,6 +720,25 @@ bool java_bytecode_languaget::typecheck(
719720
}
720721
}
721722

723+
// Now add synthetic classes for every invokedynamic instruction found (it
724+
// makes this easier that all interface types and their methods have been
725+
// created above):
726+
for(const auto &id_and_symbol : symbol_table)
727+
{
728+
if(id_and_symbol.second.type.id() != ID_code)
729+
continue;
730+
auto cmb = method_bytecode.get(id_and_symbol.first);
731+
if(!cmb)
732+
continue;
733+
734+
create_invokedynamic_synthetic_classes(
735+
id_and_symbol.first,
736+
cmb->get().method.instructions,
737+
symbol_table,
738+
synthetic_methods,
739+
get_message_handler());
740+
}
741+
722742
// Now that all classes have been created in the symbol table we can populate
723743
// the class hierarchy:
724744
class_hierarchy(symbol_table);
@@ -1031,6 +1051,15 @@ static void notify_static_method_calls(
10311051
expr_dynamic_cast<symbol_exprt>(fn_call->function());
10321052
needed_lazy_methods->add_needed_method(fn_sym.get_identifier());
10331053
}
1054+
else if(
1055+
it->id() == ID_side_effect &&
1056+
to_side_effect_expr(*it).get_statement() == ID_function_call)
1057+
{
1058+
const auto &call_expr = to_side_effect_expr_function_call(*it);
1059+
const symbol_exprt &fn_sym =
1060+
expr_dynamic_cast<symbol_exprt>(call_expr.function());
1061+
needed_lazy_methods->add_needed_method(fn_sym.get_identifier());
1062+
}
10341063
}
10351064
}
10361065
}
@@ -1136,6 +1165,14 @@ bool java_bytecode_languaget::convert_single_method(
11361165
get_pointer_type_selector(),
11371166
get_message_handler());
11381167
break;
1168+
case synthetic_method_typet::INVOKEDYNAMIC_CAPTURE_CONSTRUCTOR:
1169+
writable_symbol.value = get_invokedynamic_synthetic_constructor(
1170+
function_id, symbol_table, get_message_handler());
1171+
break;
1172+
case synthetic_method_typet::INVOKEDYNAMIC_METHOD:
1173+
writable_symbol.value = get_invokedynamic_synthetic_method(
1174+
function_id, symbol_table, get_message_handler());
1175+
break;
11391176
}
11401177
// Notify lazy methods of static calls made from the newly generated
11411178
// function:

0 commit comments

Comments
 (0)