Skip to content

Commit a3e19f7

Browse files
authored
Merge pull request diffblue#1644 from NathanJPhillips/feature/string-functions-on-demand
Load string functions on demand
2 parents 9b1ef1a + ea7646b commit a3e19f7

14 files changed

+464
-310
lines changed

src/java_bytecode/ci_lazy_methods.cpp

Lines changed: 51 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#include <java_bytecode/java_entry_point.h>
1212
#include <java_bytecode/java_class_loader.h>
1313
#include <java_bytecode/java_utils.h>
14-
#include <util/safe_pointer.h>
1514
#include <util/suffix.h>
1615
#include <java_bytecode/java_string_library_preprocess.h>
1716

@@ -61,14 +60,14 @@ ci_lazy_methodst::ci_lazy_methodst(
6160
/// from the main entry point (usually provided with the --function command-
6261
/// line option
6362
/// \param symbol_table: global symbol table
64-
/// \param [out] lazy_methods: map from method names to relevant symbol and
63+
/// \param [out] method_bytecode: map from method names to relevant symbol and
6564
/// parsed-method objects.
6665
/// \param method_converter: Function for converting methods on demand.
6766
/// \return Returns false on success
6867
bool ci_lazy_methodst::operator()(
6968
symbol_tablet &symbol_table,
70-
lazy_methodst &lazy_methods,
71-
method_convertert method_converter)
69+
method_bytecodet &method_bytecode,
70+
const method_convertert &method_converter)
7271
{
7372
std::vector<irep_idt> method_worklist1;
7473
std::vector<irep_idt> method_worklist2;
@@ -141,21 +140,17 @@ bool ci_lazy_methodst::operator()(
141140
{
142141
if(!methods_already_populated.insert(mname).second)
143142
continue;
144-
auto findit=lazy_methods.find(mname);
145-
if(findit==lazy_methods.end())
143+
debug() << "CI lazy methods: elaborate " << mname << eom;
144+
if(
145+
method_converter(
146+
mname,
147+
// Note this wraps *references* to method_worklist2 & needed_classes
148+
ci_lazy_methods_neededt(
149+
method_worklist2, needed_classes, symbol_table)))
146150
{
147-
debug() << "Skip " << mname << eom;
151+
// Couldn't convert this function
148152
continue;
149153
}
150-
debug() << "CI lazy methods: elaborate " << mname << eom;
151-
const auto &parsed_method=findit->second;
152-
// Note this wraps *references* to method_worklist2, needed_classes:
153-
ci_lazy_methods_neededt new_lazy_methods(
154-
method_worklist2,
155-
needed_classes,
156-
symbol_table);
157-
method_converter(
158-
*parsed_method.first, *parsed_method.second, new_lazy_methods);
159154
gather_virtual_callsites(
160155
symbol_table.lookup_ref(mname).value,
161156
virtual_callsites);
@@ -189,15 +184,23 @@ bool ci_lazy_methodst::operator()(
189184

190185
for(const auto &sym : symbol_table.symbols)
191186
{
187+
// Don't keep global variables (unless they're gathered below from a
188+
// function that references them)
192189
if(sym.second.is_static_lifetime)
193190
continue;
194-
if(lazy_methods.count(sym.first) &&
195-
!methods_already_populated.count(sym.first))
196-
{
197-
continue;
198-
}
199191
if(sym.second.type.id()==ID_code)
192+
{
193+
// Don't keep functions that belong to this language that we haven't
194+
// converted above
195+
if(
196+
method_bytecode.contains_method(sym.first) &&
197+
!methods_already_populated.count(sym.first))
198+
{
199+
continue;
200+
}
201+
// If this is a function then add all the things used in it
200202
gather_needed_globals(sym.second.value, symbol_table, keep_symbols);
203+
}
201204
keep_symbols.add(sym.second);
202205
}
203206

@@ -263,13 +266,13 @@ void ci_lazy_methodst::resolve_method_names(
263266
/// \param entry_points: list of fully-qualified function names that
264267
/// we should assume are reachable
265268
/// \param ns: global namespace
266-
/// \param [out] lazy_methods: Populated with all Java reference types whose
267-
/// references may be passed, directly or indirectly, to any of the functions
268-
/// in `entry_points`.
269+
/// \param [out] needed_lazy_methods: Populated with all Java reference types
270+
/// whose references may be passed, directly or indirectly, to any of the
271+
/// functions in `entry_points`.
269272
void ci_lazy_methodst::initialize_needed_classes(
270273
const std::vector<irep_idt> &entry_points,
271274
const namespacet &ns,
272-
ci_lazy_methods_neededt &lazy_methods)
275+
ci_lazy_methods_neededt &needed_lazy_methods)
273276
{
274277
for(const auto &mname : entry_points)
275278
{
@@ -281,67 +284,66 @@ void ci_lazy_methodst::initialize_needed_classes(
281284
{
282285
const pointer_typet &original_pointer=to_pointer_type(param.type());
283286
initialize_all_needed_classes_from_pointer(
284-
original_pointer, ns, lazy_methods);
287+
original_pointer, ns, needed_lazy_methods);
285288
}
286289
}
287290
}
288291

289292
// Also add classes whose instances are magically
290293
// created by the JVM and so won't be spotted by
291294
// looking for constructors and calls as usual:
292-
lazy_methods.add_needed_class("java::java.lang.String");
293-
lazy_methods.add_needed_class("java::java.lang.Class");
294-
lazy_methods.add_needed_class("java::java.lang.Object");
295+
needed_lazy_methods.add_needed_class("java::java.lang.String");
296+
needed_lazy_methods.add_needed_class("java::java.lang.Class");
297+
needed_lazy_methods.add_needed_class("java::java.lang.Object");
295298

296299
// As in class_loader, ensure these classes stay available
297300
for(const auto &id : extra_needed_classes)
298-
lazy_methods.add_needed_class("java::" + id2string(id));
301+
needed_lazy_methods.add_needed_class("java::" + id2string(id));
299302
}
300303

301304
/// Build up list of methods for types for a pointer and any types it
302305
/// might be subsituted for. See
303306
/// `initialize_needed_classes` for more details.
304307
/// \param pointer_type: The type to gather methods for.
305308
/// \param ns: global namespace
306-
/// \param [out] lazy_methods: Populated with all Java reference types whose
307-
/// references may be passed, directly or indirectly, to any of the functions
308-
/// in `entry_points
309+
/// \param [out] needed_lazy_methods: Populated with all Java reference types
310+
/// whose references may be passed, directly or indirectly, to any of the
311+
/// functions in `entry_points`
309312
void ci_lazy_methodst::initialize_all_needed_classes_from_pointer(
310313
const pointer_typet &pointer_type,
311314
const namespacet &ns,
312-
ci_lazy_methods_neededt &lazy_methods)
315+
ci_lazy_methods_neededt &needed_lazy_methods)
313316
{
314-
initialize_needed_classes_from_pointer(
315-
pointer_type, ns, lazy_methods);
317+
initialize_needed_classes_from_pointer(pointer_type, ns, needed_lazy_methods);
316318

317319
const pointer_typet &subbed_pointer_type=
318320
pointer_type_selector.convert_pointer_type(pointer_type, ns);
319321

320322
if(subbed_pointer_type!=pointer_type)
321323
{
322324
initialize_needed_classes_from_pointer(
323-
subbed_pointer_type, ns, lazy_methods);
325+
subbed_pointer_type, ns, needed_lazy_methods);
324326
}
325327
}
326328

327329
/// Build up list of methods for types for a specific pointer type. See
328330
/// `initialize_needed_classes` for more details.
329331
/// \param pointer_type: The type to gather methods for.
330332
/// \param ns: global namespace
331-
/// \param [out] lazy_methods: Populated with all Java reference types whose
332-
/// references may be passed, directly or indirectly, to any of the functions
333-
/// in `entry_points
333+
/// \param [out] needed_lazy_methods: Populated with all Java reference types
334+
/// whose references may be passed, directly or indirectly, to any of the
335+
/// functions in `entry_points`
334336
void ci_lazy_methodst::initialize_needed_classes_from_pointer(
335337
const pointer_typet &pointer_type,
336338
const namespacet &ns,
337-
ci_lazy_methods_neededt &lazy_methods)
339+
ci_lazy_methods_neededt &needed_lazy_methods)
338340
{
339341
const symbol_typet &class_type=to_symbol_type(pointer_type.subtype());
340342
const auto &param_classid=class_type.get_identifier();
341343

342-
if(lazy_methods.add_needed_class(param_classid))
344+
if(needed_lazy_methods.add_needed_class(param_classid))
343345
{
344-
gather_field_types(pointer_type.subtype(), ns, lazy_methods);
346+
gather_field_types(pointer_type.subtype(), ns, needed_lazy_methods);
345347
}
346348
}
347349

@@ -462,30 +464,30 @@ void ci_lazy_methodst::gather_needed_globals(
462464
gather_needed_globals(*opit, symbol_table, needed);
463465
}
464466

465-
/// See param lazy_methods
467+
/// See param needed_lazy_methods
466468
/// \param class_type: root of class tree to search
467469
/// \param ns: global namespace
468-
/// \param [out] lazy_methods: Popualted with all Java reference types reachable
469-
/// starting at `class_type`. For example if `class_type` is
470+
/// \param [out] needed_lazy_methods: Popualted with all Java reference types
471+
/// reachable starting at `class_type`. For example if `class_type` is
470472
/// `symbol_typet("java::A")` and A has a B field, then `B` (but not `A`) will
471473
/// noted as a needed class.
472474
void ci_lazy_methodst::gather_field_types(
473475
const typet &class_type,
474476
const namespacet &ns,
475-
ci_lazy_methods_neededt &lazy_methods)
477+
ci_lazy_methods_neededt &needed_lazy_methods)
476478
{
477479
const auto &underlying_type=to_struct_type(ns.follow(class_type));
478480
for(const auto &field : underlying_type.components())
479481
{
480482
if(field.type().id()==ID_struct || field.type().id()==ID_symbol)
481-
gather_field_types(field.type(), ns, lazy_methods);
483+
gather_field_types(field.type(), ns, needed_lazy_methods);
482484
else if(field.type().id()==ID_pointer)
483485
{
484486
// Skip array primitive pointers, for example:
485487
if(field.type().subtype().id()!=ID_symbol)
486488
continue;
487489
initialize_all_needed_classes_from_pointer(
488-
to_pointer_type(field.type()), ns, lazy_methods);
490+
to_pointer_type(field.type()), ns, needed_lazy_methods);
489491
}
490492
}
491493
}

src/java_bytecode/ci_lazy_methods.h

Lines changed: 67 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,18 +26,67 @@
2626

2727
class java_string_library_preprocesst;
2828

29-
typedef std::pair<
30-
const symbolt *,
31-
const java_bytecode_parse_treet::methodt *>
32-
lazy_method_valuet;
29+
// Map from method id to class_method_and_bytecodet
30+
class method_bytecodet
31+
{
32+
public:
33+
/// Pair of class id and methodt
34+
struct class_method_and_bytecodet
35+
{
36+
irep_idt class_id;
37+
irep_idt method_id;
38+
const java_bytecode_parse_treet::methodt &method;
39+
};
3340

34-
typedef std::map<irep_idt, lazy_method_valuet>
35-
lazy_methodst;
41+
typedef optionalt<std::reference_wrapper<const class_method_and_bytecodet>>
42+
opt_reft;
43+
44+
private:
45+
typedef std::map<irep_idt, class_method_and_bytecodet> mapt;
46+
mapt map;
47+
48+
public:
49+
bool contains_method(const irep_idt &method_id) const
50+
{
51+
return map.count(method_id) != 0;
52+
}
53+
54+
void add(const class_method_and_bytecodet &method_class_and_bytecode)
55+
{
56+
map.emplace(
57+
std::make_pair(
58+
method_class_and_bytecode.method_id, method_class_and_bytecode));
59+
}
60+
61+
void add(
62+
const irep_idt &class_id,
63+
const irep_idt &method_id,
64+
const java_bytecode_parse_treet::methodt &method)
65+
{
66+
add(class_method_and_bytecodet{class_id, method_id, method});
67+
}
68+
69+
mapt::const_iterator begin() const
70+
{
71+
return map.begin();
72+
}
73+
mapt::const_iterator end() const
74+
{
75+
return map.end();
76+
}
77+
78+
opt_reft get(const irep_idt &method_id)
79+
{
80+
const auto it = map.find(method_id);
81+
if(it == map.end())
82+
return opt_reft();
83+
return std::cref(it->second);
84+
}
85+
};
3686

37-
typedef std::function<void(
38-
const symbolt &,
39-
const java_bytecode_parse_treet::methodt &,
40-
ci_lazy_methods_neededt)> method_convertert;
87+
typedef std::function<
88+
bool(const irep_idt &function_id, ci_lazy_methods_neededt)>
89+
method_convertert;
4190

4291
class ci_lazy_methodst:public messaget
4392
{
@@ -55,8 +104,8 @@ class ci_lazy_methodst:public messaget
55104
// not const since messaget
56105
bool operator()(
57106
symbol_tablet &symbol_table,
58-
lazy_methodst &lazy_methods,
59-
method_convertert method_converter);
107+
method_bytecodet &method_bytecode,
108+
const method_convertert &method_converter);
60109

61110
private:
62111
void resolve_method_names(
@@ -66,17 +115,17 @@ class ci_lazy_methodst:public messaget
66115
void initialize_needed_classes(
67116
const std::vector<irep_idt> &entry_points,
68117
const namespacet &ns,
69-
ci_lazy_methods_neededt &lazy_methods);
118+
ci_lazy_methods_neededt &needed_lazy_methods);
70119

71120
void initialize_all_needed_classes_from_pointer(
72121
const pointer_typet &pointer_type,
73122
const namespacet &ns,
74-
ci_lazy_methods_neededt &lazy_methods);
123+
ci_lazy_methods_neededt &needed_lazy_methods);
75124

76125
void initialize_needed_classes_from_pointer(
77126
const pointer_typet &pointer_type,
78127
const namespacet &ns,
79-
ci_lazy_methods_neededt &lazy_methods);
128+
ci_lazy_methods_neededt &needed_lazy_methods);
80129

81130
void gather_virtual_callsites(
82131
const exprt &e,
@@ -93,9 +142,10 @@ class ci_lazy_methodst:public messaget
93142
const symbol_tablet &symbol_table,
94143
symbol_tablet &needed);
95144

96-
void gather_field_types(const typet &class_type,
145+
void gather_field_types(
146+
const typet &class_type,
97147
const namespacet &ns,
98-
ci_lazy_methods_neededt &lazy_methods);
148+
ci_lazy_methods_neededt &needed_lazy_methods);
99149

100150
irep_idt get_virtual_method_target(
101151
const std::set<irep_idt> &needed_classes,

0 commit comments

Comments
 (0)