Skip to content

Commit 9bb474d

Browse files
committed
Fixes parameter detection on bytecode->goto translation
The JVM spec document (Sec. 3.6) mandates that Java method arguments occupy the lowest local variables. The only sound way to detect which variable slots are function parameters is by counting the number and type of arguments in the method descriptor. This commit adds a new function `java_method_parameter_slots` which can determine the number of variable slots used by a method's parameters. It also fixes `setup_local_variables` (which were previously checking whether `v.start_pc=0` to determine if an LVT entry is a parameter).
1 parent 77bda4e commit 9bb474d

5 files changed

+125
-63
lines changed

src/java_bytecode/java_bytecode_convert_method.cpp

+38-46
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Author: Daniel Kroening, [email protected]
3030
#include "java_bytecode_convert_method_class.h"
3131
#include "bytecode_info.h"
3232
#include "java_types.h"
33+
#include "java_utils.h"
3334
#include "java_string_library_preprocess.h"
3435

3536
#include <limits>
@@ -106,25 +107,10 @@ static bool operator==(const irep_idt &what, const patternt &pattern)
106107
return pattern==what;
107108
}
108109

109-
const size_t SLOTS_PER_INTEGER(1u);
110-
const size_t INTEGER_WIDTH(64u);
111-
static size_t count_slots(
112-
const size_t value,
113-
const code_typet::parametert &param)
114-
{
115-
const std::size_t width(param.type().get_unsigned_int(ID_width));
116-
return value+SLOTS_PER_INTEGER+width/INTEGER_WIDTH;
117-
}
118-
119-
static size_t get_variable_slots(const code_typet::parametert &param)
120-
{
121-
return count_slots(0, param);
122-
}
123-
124110
static irep_idt strip_java_namespace_prefix(const irep_idt to_strip)
125111
{
126112
const auto to_strip_str=id2string(to_strip);
127-
assert(has_prefix(to_strip_str, "java::"));
113+
PRECONDITION(has_prefix(to_strip_str, "java::"));
128114
return to_strip_str.substr(6, std::string::npos);
129115
}
130116

@@ -308,12 +294,22 @@ void java_bytecode_convert_methodt::convert(
308294
method_return_type=code_type.return_type();
309295
code_typet::parameterst &parameters=code_type.parameters();
310296

297+
// Determine the number of local variable slots used by the JVM to maintan the
298+
// formal parameters
299+
slots_for_parameters = java_method_parameter_slots(code_type);
300+
301+
debug() << "Generating codet: class "
302+
<< class_symbol.name << ", method "
303+
<< m.name << eom;
304+
305+
// We now set up the local variables for the method parameters
311306
variables.clear();
312307

313-
// find parameter names in the local variable table:
308+
// Find parameter names in the local variable table:
314309
for(const auto &v : m.local_variable_table)
315310
{
316-
if(v.start_pc!=0) // Local?
311+
// Skip this variable if it is not a method parameter
312+
if(!is_parameter(v))
317313
continue;
318314

319315
typet t=java_type_from_string(v.signature);
@@ -335,8 +331,10 @@ void java_bytecode_convert_methodt::convert(
335331
for(const auto &param : parameters)
336332
{
337333
variables[param_index].resize(1);
338-
param_index+=get_variable_slots(param);
334+
param_index+=java_local_variable_slots(param.type());
339335
}
336+
INVARIANT(param_index==slots_for_parameters,
337+
"java_parameter_count and local computation must agree");
340338

341339
// assign names to parameters
342340
param_index=0;
@@ -348,8 +346,6 @@ void java_bytecode_convert_methodt::convert(
348346
{
349347
base_name="this";
350348
identifier=id2string(method_identifier)+"::"+id2string(base_name);
351-
param.set_base_name(base_name);
352-
param.set_identifier(identifier);
353349
}
354350
else
355351
{
@@ -364,10 +360,9 @@ void java_bytecode_convert_methodt::convert(
364360
base_name="arg"+std::to_string(param_index)+suffix;
365361
identifier=id2string(method_identifier)+"::"+id2string(base_name);
366362
}
367-
368-
param.set_base_name(base_name);
369-
param.set_identifier(identifier);
370363
}
364+
param.set_base_name(base_name);
365+
param.set_identifier(identifier);
371366

372367
// add to symbol table
373368
parameter_symbolt parameter_symbol;
@@ -377,41 +372,34 @@ void java_bytecode_convert_methodt::convert(
377372
parameter_symbol.type=param.type();
378373
symbol_table.add(parameter_symbol);
379374

380-
// add as a JVM variable
381-
std::size_t slots=get_variable_slots(param);
375+
// Add as a JVM local variable
382376
variables[param_index][0].symbol_expr=parameter_symbol.symbol_expr();
383377
variables[param_index][0].is_parameter=true;
384378
variables[param_index][0].start_pc=0;
385379
variables[param_index][0].length=std::numeric_limits<size_t>::max();
386-
variables[param_index][0].is_parameter=true;
387-
param_index+=slots;
388-
assert(param_index>0);
380+
param_index+=java_local_variable_slots(param.type());
389381
}
390382

383+
// The parameter slots detected in this function should agree with what
384+
// java_parameter_count() thinks about this method
385+
INVARIANT(param_index==slots_for_parameters,
386+
"java_parameter_count and local computation must agree");
387+
391388
const bool is_virtual=!m.is_static && !m.is_final;
392389

393-
#if 0
394-
class_type.methods().push_back(class_typet::methodt());
395-
class_typet::methodt &method=class_type.methods().back();
396-
#else
390+
// Construct a methodt, which lives within the class type; this object is
391+
// never used for anything useful and could be removed
397392
class_typet::methodt method;
398-
#endif
399-
400393
method.set_base_name(m.base_name);
401394
method.set_name(method_identifier);
402-
403395
method.set(ID_abstract, m.is_abstract);
404396
method.set(ID_is_virtual, is_virtual);
405-
397+
method.type()=member_type;
406398
if(is_constructor(method))
407399
method.set(ID_constructor, true);
408400

409-
method.type()=member_type;
410-
411401
// we add the symbol for the method
412-
413402
symbolt method_symbol;
414-
415403
method_symbol.name=method.get_name();
416404
method_symbol.base_name=method.get_base_name();
417405
method_symbol.mode=ID_java;
@@ -430,15 +418,13 @@ void java_bytecode_convert_methodt::convert(
430418
method_symbol.type.set(ID_constructor, true);
431419
current_method=method_symbol.name;
432420
method_has_this=code_type.has_this();
433-
434-
tmp_vars.clear();
435421
if((!m.is_abstract) && (!m.is_native))
436-
method_symbol.value=convert_instructions(
437-
m, code_type, method_symbol.name);
422+
method_symbol.value=convert_instructions(m, code_type, method_symbol.name);
438423

439424
// Replace the existing stub symbol with the real deal:
440425
const auto s_it=symbol_table.symbols.find(method.get_name());
441-
assert(s_it!=symbol_table.symbols.end());
426+
INVARIANT(s_it!=symbol_table.symbols.end(),
427+
"the symbol was there earlier on this function; it must be there now");
442428
symbol_table.symbols.erase(s_it);
443429

444430
symbol_table.add(method_symbol);
@@ -1138,6 +1124,12 @@ codet java_bytecode_convert_methodt::convert_instructions(
11381124
}
11391125
}
11401126

1127+
// Clean the list of temporary variables created by a call to `tmp_variable`.
1128+
// These are local variables in the goto function used to represent temporary
1129+
// values of the JVM operand stack, newly allocated objects before the
1130+
// constructor is called, ...
1131+
tmp_vars.clear();
1132+
11411133
// Now that the control flow graph is built, set up our local variables
11421134
// (these require the graph to determine live ranges)
11431135
setup_local_variables(method, address_map);

src/java_bytecode/java_bytecode_convert_method_class.h

+16-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ class java_bytecode_convert_methodt:public messaget
4141
symbol_table(_symbol_table),
4242
max_array_length(_max_array_length),
4343
lazy_methods(_lazy_methods),
44-
string_preprocess(_string_preprocess)
44+
string_preprocess(_string_preprocess),
45+
slots_for_parameters(0)
4546
{
4647
}
4748

@@ -66,6 +67,11 @@ class java_bytecode_convert_methodt:public messaget
6667
typet method_return_type;
6768
java_string_library_preprocesst &string_preprocess;
6869

70+
/// Number of local variable slots used by the JVM to pass parameters upon
71+
/// invocation of the method under translation.
72+
/// Initialized in `convert`.
73+
unsigned slots_for_parameters;
74+
6975
public:
7076
struct holet
7177
{
@@ -76,6 +82,7 @@ class java_bytecode_convert_methodt:public messaget
7682
struct local_variable_with_holest
7783
{
7884
local_variablet var;
85+
bool is_parameter;
7986
std::vector<holet> holes;
8087
};
8188

@@ -149,6 +156,14 @@ class java_bytecode_convert_methodt:public messaget
149156

150157
bool is_constructor(const class_typet::methodt &method);
151158

159+
/// Returns true iff the slot index of the local variable of a method (coming
160+
/// from the LVT) is a parameter of that method. Assumes that
161+
/// `slots_for_parameters` is initialized upon call.
162+
bool is_parameter(const local_variablet &v)
163+
{
164+
return v.index < slots_for_parameters;
165+
}
166+
152167
struct converted_instructiont
153168
{
154169
converted_instructiont(

src/java_bytecode/java_local_variable_table.cpp

+30-16
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Author: Chris Smowton, [email protected]
1111

1212
#include "java_bytecode_convert_method_class.h"
1313
#include "java_types.h"
14+
#include "java_utils.h"
1415

1516
#include <util/string2int.h>
1617

@@ -55,7 +56,7 @@ struct procedure_local_cfg_baset<
5556
for(const auto &table_entry : method.exception_table)
5657
{
5758
auto findit=amap.find(table_entry.start_pc);
58-
assert(findit!=amap.end() &&
59+
INVARIANT(findit!=amap.end(),
5960
"Exception table entry doesn't point to an instruction?");
6061
for(; findit->first<table_entry.end_pc; ++findit)
6162
{
@@ -186,9 +187,11 @@ static bool is_store_to_slot(
186187
else
187188
{
188189
// Store shorthands, like "store_0", "store_1"
189-
assert(prevstatement[6]=='_' && prevstatement.size()==8);
190+
INVARIANT(prevstatement[6]=='_' && prevstatement.size()==8,
191+
"expected store instruction looks like store_0, store_1...");
190192
storeslot=prevstatement[7];
191-
assert(isdigit(storeslot[0]));
193+
INVARIANT(isdigit(storeslot[0]),
194+
"store_? instructions should end in a digit");
192195
}
193196
auto storeslotidx=safe_string2unsigned(storeslot);
194197
return storeslotidx==slotidx;
@@ -203,7 +206,7 @@ static void maybe_add_hole(
203206
unsigned from,
204207
unsigned to)
205208
{
206-
assert(to>=from);
209+
PRECONDITION(to>=from);
207210
if(to!=from)
208211
var.holes.push_back({from, to-from});
209212
}
@@ -230,7 +233,7 @@ static void populate_variable_address_map(
230233
idx!=idxlim;
231234
++idx)
232235
{
233-
assert((!live_variable_at_address[idx]) && "Local variable table clash?");
236+
INVARIANT(!live_variable_at_address[idx], "Local variable table clash?");
234237
live_variable_at_address[idx]=&*it;
235238
}
236239
}
@@ -261,20 +264,28 @@ static void populate_predecessor_map(
261264
messaget msg(msg_handler);
262265
for(auto it=firstvar, itend=varlimit; it!=itend; ++it)
263266
{
264-
// Parameters are irrelevant to us and shouldn't be changed:
265-
if(it->var.start_pc==0)
267+
// All entries of the "local_variable_table_with_holest" processed in this
268+
// function concern the same Java Local Variable Table slot/register. This
269+
// is because "find_initialisers()" has already sorted them.
270+
INVARIANT(it->var.index==firstvar->var.index,
271+
"all entries are for the same local variable slot");
272+
273+
// Parameters are irrelevant to us and shouldn't be changed. This is because
274+
// they cannot have live predecessor ranges: they are initialized by the JVM
275+
// and no other live variable range can flow into them.
276+
if(it->is_parameter)
266277
continue;
267278

268279
// Find the last instruction within the live range:
269280
unsigned end_pc=it->var.start_pc+it->var.length;
270281
auto amapit=amap.find(end_pc);
271-
assert(amapit!=amap.begin());
282+
INVARIANT(amapit!=amap.begin(),
283+
"current bytecode shall not be the first");
272284
auto old_amapit=amapit;
273285
--amapit;
274286
if(old_amapit==amap.end())
275287
{
276-
assert(
277-
end_pc>amapit->first &&
288+
INVARIANT(end_pc>amapit->first,
278289
"Instruction live range doesn't align to instruction boundary?");
279290
}
280291

@@ -287,7 +298,8 @@ static void populate_predecessor_map(
287298
auto pred_var=
288299
(pred<live_variable_at_address.size() ?
289300
live_variable_at_address[pred] :
290-
0);
301+
nullptr);
302+
291303
if(pred_var==&*it)
292304
{
293305
// Flow from within same live range?
@@ -299,7 +311,8 @@ static void populate_predecessor_map(
299311
// Check if this is an initialiser, and if so expand the live range
300312
// to include it, but don't check its predecessors:
301313
auto inst_before_this=amapit;
302-
assert(inst_before_this!=amap.begin());
314+
INVARIANT(inst_before_this!=amap.begin(),
315+
"we shall not be on the first bytecode of the method");
303316
--inst_before_this;
304317
if(amapit->first!=it->var.start_pc || inst_before_this->first!=pred)
305318
{
@@ -360,7 +373,7 @@ static unsigned get_common_dominator(
360373
const std::set<local_variable_with_holest*> &merge_vars,
361374
const java_cfg_dominatorst &dominator_analysis)
362375
{
363-
assert(!merge_vars.empty());
376+
PRECONDITION(!merge_vars.empty());
364377

365378
unsigned first_pc=UINT_MAX;
366379
for(auto v : merge_vars)
@@ -652,10 +665,11 @@ void java_bytecode_convert_methodt::setup_local_variables(
652665
dominator_analysis(dominator_args);
653666

654667
// Find out which local variable table entries should be merged:
655-
// Wrap each entry so we have somewhere to record live ranges with holes:
668+
// Wrap each entry so we have a data structure to work during function calls,
669+
// where we record live ranges with holes:
656670
std::vector<local_variable_with_holest> vars_with_holes;
657671
for(const auto &v : m.local_variable_table)
658-
vars_with_holes.push_back({v, {}});
672+
vars_with_holes.push_back({v, is_parameter(v), {}});
659673

660674
// Merge variable records:
661675
find_initialisers(vars_with_holes, amap, dominator_analysis);
@@ -673,7 +687,7 @@ void java_bytecode_convert_methodt::setup_local_variables(
673687
// length values in the local variable table
674688
for(const auto &v : vars_with_holes)
675689
{
676-
if(v.var.start_pc==0) // Parameter?
690+
if(v.is_parameter)
677691
continue;
678692

679693
typet t=java_type_from_string(v.var.signature);

0 commit comments

Comments
 (0)