Skip to content

Commit b0059c9

Browse files
committed
Java class literals: init using a library hook when possible
The Java library definition of java.lang.Class, when nondet initialized, can be quite time consuming due to the enumeration constant dictionary's internal array. Instead, let's give the library a chance to set Class constants (literals like MyClass.class) itself. While we're at it we might as well gain the ability to prove some properties of the literals. We currently supply those class attributes that are easy to come by in the parse tree; with a little more effort in the parser IsLocalClass and IsMemberClass could be determined.
1 parent 34b0ac6 commit b0059c9

19 files changed

+221
-8
lines changed
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
2+
public @interface ExampleAnnotation {
3+
4+
}
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
public enum ExampleEnum {
2+
3+
}
4+
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
2+
interface ExampleInterface {
3+
4+
}
875 Bytes
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
public class Test {
3+
4+
public static void main() {
5+
6+
assert ExampleAnnotation.class.isAnnotation();
7+
assert int[].class.isArray();
8+
assert ExampleInterface.class.isInterface();
9+
assert ExampleEnum.class.isEnum();
10+
11+
// Note use of '==' not '.equals', as we expect the same exact literal,
12+
// which in jbmc always have the same address.
13+
// Note int[].class.getName() is not tested yet, as arrays' types are
14+
// printed as their raw signature, to be addressed separately.
15+
// Note also primitive types (e.g. int.class) are not addresses here, as
16+
// they are created through box types' static initializers (e.g. Integer
17+
// has a static member TYPE that holds the Class for `int.class`)
18+
19+
assert ExampleAnnotation.class.getName() == "ExampleAnnotation";
20+
assert ExampleInterface.class.getName() == "ExampleInterface";
21+
assert ExampleEnum.class.getName() == "ExampleEnum";
22+
23+
}
24+
25+
}
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package java.lang;
2+
3+
public class Class {
4+
5+
private String name;
6+
7+
private boolean isAnnotation;
8+
private boolean isArray;
9+
private boolean isInterface;
10+
private boolean isSynthetic;
11+
private boolean isLocalClass;
12+
private boolean isMemberClass;
13+
private boolean isEnum;
14+
15+
public void cproverInitializeClassLiteral(
16+
String name,
17+
boolean isAnnotation,
18+
boolean isArray,
19+
boolean isInterface,
20+
boolean isSynthetic,
21+
boolean isLocalClass,
22+
boolean isMemberClass,
23+
boolean isEnum) {
24+
25+
this.name = name;
26+
this.isAnnotation = isAnnotation;
27+
this.isArray = isArray;
28+
this.isInterface = isInterface;
29+
this.isSynthetic = isSynthetic;
30+
this.isLocalClass = isLocalClass;
31+
this.isMemberClass = isMemberClass;
32+
this.isEnum = isEnum;
33+
34+
}
35+
36+
public String getName() { return name; }
37+
38+
public boolean isAnnotation() { return isAnnotation; }
39+
public boolean isArray() { return isArray; }
40+
public boolean isInterface() { return isInterface; }
41+
public boolean isSynthetic() { return isSynthetic; }
42+
public boolean isLocalClass() { return isLocalClass; }
43+
public boolean isMemberClass() { return isMemberClass; }
44+
public boolean isEnum() { return isEnum; }
45+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
Test.class
3+
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
^VERIFICATION SUCCESSFUL$
7+
--
8+
^warning: ignoring

jbmc/src/java_bytecode/ci_lazy_methods.cpp

+26-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "java_string_library_preprocess.h"
1414
#include "remove_exceptions.h"
1515

16+
#include <util/expr_iterator.h>
1617
#include <util/suffix.h>
1718

1819
#include <goto-programs/resolve_inherited_component.h>
@@ -52,6 +53,19 @@ ci_lazy_methodst::ci_lazy_methodst(
5253
class_hierarchy(symbol_table);
5354
}
5455

56+
static bool references_class_model(const exprt &expr)
57+
{
58+
symbol_typet class_type("java::java.lang.Class");
59+
60+
for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it)
61+
{
62+
if(can_cast_expr<symbol_exprt>(*it) && it->type() == class_type)
63+
return true;
64+
}
65+
66+
return false;
67+
}
68+
5569
/// Uses a simple context-insensitive ('ci') analysis to determine which methods
5670
/// may be reachable from the main entry point. In brief, static methods are
5771
/// reachable if we find a callsite in another reachable site, while virtual
@@ -122,6 +136,7 @@ bool ci_lazy_methodst::operator()(
122136

123137
std::unordered_set<irep_idt> methods_already_populated;
124138
std::unordered_set<exprt, irep_hash> virtual_function_calls;
139+
bool class_initializer_seen = false;
125140

126141
bool any_new_classes = true;
127142
while(any_new_classes)
@@ -149,8 +164,17 @@ bool ci_lazy_methodst::operator()(
149164
// Couldn't convert this function
150165
continue;
151166
}
152-
gather_virtual_callsites(
153-
symbol_table.lookup_ref(mname).value, virtual_function_calls);
167+
const exprt &method_body = symbol_table.lookup_ref(mname).value;
168+
169+
gather_virtual_callsites(method_body, virtual_function_calls);
170+
171+
if(!class_initializer_seen && references_class_model(method_body))
172+
{
173+
class_initializer_seen = true;
174+
methods_to_convert_later.insert(
175+
get_java_class_literal_initializer_signature());
176+
}
177+
154178
any_new_methods=true;
155179
}
156180
}

jbmc/src/java_bytecode/java_bytecode_convert_class.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,9 @@ void java_bytecode_convert_classt::convert(
265265
class_type.set_tag(c.name);
266266
class_type.set(ID_base_name, c.name);
267267
class_type.set(ID_abstract, c.is_abstract);
268+
class_type.set(ID_is_annotation, c.is_annotation);
269+
class_type.set(ID_interface, c.is_interface);
270+
class_type.set(ID_synthetic, c.is_synthetic);
268271
class_type.set_final(c.is_final);
269272
if(c.is_enum)
270273
{

jbmc/src/java_bytecode/java_bytecode_language.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,8 @@ bool java_bytecode_languaget::generate_support_functions(
785785
get_message_handler(),
786786
assume_inputs_non_null,
787787
object_factory_parameters,
788-
get_pointer_type_selector());
788+
get_pointer_type_selector(),
789+
string_refinement_enabled);
789790
}
790791

791792
/// Uses a simple context-insensitive ('ci') analysis to determine which methods

jbmc/src/java_bytecode/java_bytecode_parse_tree.h

+3
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,9 @@ class java_bytecode_parse_treet
204204
bool is_enum=false;
205205
bool is_public=false, is_protected=false, is_private=false;
206206
bool is_final = false;
207+
bool is_interface = false;
208+
bool is_synthetic = false;
209+
bool is_annotation = false;
207210
bool attribute_bootstrapmethods_read = false;
208211
size_t enum_elements=0;
209212

jbmc/src/java_bytecode/java_bytecode_parser.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -451,9 +451,11 @@ bool java_bytecode_parsert::parse()
451451
#define ACC_BRIDGE 0x0040
452452
#define ACC_VARARGS 0x0080
453453
#define ACC_NATIVE 0x0100
454+
#define ACC_INTERFACE 0x0200
454455
#define ACC_ABSTRACT 0x0400
455456
#define ACC_STRICT 0x0800
456457
#define ACC_SYNTHETIC 0x1000
458+
#define ACC_ANNOTATION 0x2000
457459
#define ACC_ENUM 0x4000
458460

459461
#ifdef _MSC_VER
@@ -496,6 +498,9 @@ void java_bytecode_parsert::rClassFile()
496498
parsed_class.is_protected=(access_flags&ACC_PROTECTED)!=0;
497499
parsed_class.is_private=(access_flags&ACC_PRIVATE)!=0;
498500
parsed_class.is_final = (access_flags & ACC_FINAL) != 0;
501+
parsed_class.is_interface = (access_flags & ACC_INTERFACE) != 0;
502+
parsed_class.is_synthetic = (access_flags & ACC_SYNTHETIC) != 0;
503+
parsed_class.is_annotation = (access_flags & ACC_ANNOTATION) != 0;
499504
parsed_class.name=
500505
constant(this_class).type().get(ID_C_base_name);
501506

jbmc/src/java_bytecode/java_entry_point.cpp

+85-4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Author: Daniel Kroening, [email protected]
1717
#include <linking/static_lifetime_init.h>
1818

1919
#include "java_object_factory.h"
20+
#include "java_string_literals.h"
2021
#include "java_utils.h"
2122

2223
static void create_initialize(symbol_table_baset &symbol_table)
@@ -65,12 +66,44 @@ static bool should_init_symbol(const symbolt &sym)
6566
return is_java_string_literal_id(sym.name);
6667
}
6768

69+
irep_idt get_java_class_literal_initializer_signature()
70+
{
71+
static irep_idt signature =
72+
"java::java.lang.Class.cproverInitializeClassLiteral:"
73+
"(Ljava/lang/String;ZZZZZZZ)V";
74+
return signature;
75+
}
76+
77+
/// If symbol is a class literal, and an appropriate initializer method exists,
78+
/// return a pointer to its symbol. If not, return null.
79+
/// \param symbol: possible class literal symbol
80+
/// \param symbol_table: table to search
81+
/// \return pointer to the initializer method symbol or null
82+
static const symbolt *get_class_literal_initializer(
83+
const symbolt &symbol,
84+
const symbol_table_baset &symbol_table)
85+
{
86+
if(symbol.value.is_not_nil())
87+
return nullptr;
88+
if(symbol.type != symbol_typet("java::java.lang.Class"))
89+
return nullptr;
90+
if(!has_suffix(id2string(symbol.name), "@class_model"))
91+
return nullptr;
92+
return symbol_table.lookup(get_java_class_literal_initializer_signature());
93+
}
94+
95+
static constant_exprt constant_bool(bool val)
96+
{
97+
return from_integer(val ? 1 : 0, java_boolean_type());
98+
}
99+
68100
static void java_static_lifetime_init(
69101
symbol_table_baset &symbol_table,
70102
const source_locationt &source_location,
71103
bool assume_init_pointers_not_null,
72104
const object_factory_parameterst &object_factory_parameters,
73-
const select_pointer_typet &pointer_type_selector)
105+
const select_pointer_typet &pointer_type_selector,
106+
bool string_refinement_enabled)
74107
{
75108
symbolt &initialize_symbol=*symbol_table.get_writeable(INITIALIZE_FUNCTION);
76109
code_blockt &code_block=to_code_block(to_code(initialize_symbol.value));
@@ -88,7 +121,53 @@ static void java_static_lifetime_init(
88121
const symbolt &sym=*symbol_table.lookup(symname);
89122
if(should_init_symbol(sym))
90123
{
91-
if(sym.value.is_nil() && sym.type!=empty_typet())
124+
if(const symbolt *class_literal_init_method =
125+
get_class_literal_initializer(sym, symbol_table))
126+
{
127+
const std::string &name_str = id2string(sym.name);
128+
irep_idt class_name =
129+
name_str.substr(0, name_str.size() - strlen("@class_model"));
130+
const symbolt &class_symbol = symbol_table.lookup_ref(class_name);
131+
132+
bool class_is_array = has_prefix(name_str, "java::array[");
133+
134+
exprt name_literal(ID_java_string_literal);
135+
name_literal.set(ID_value, to_class_type(class_symbol.type).get_tag());
136+
137+
symbol_exprt class_name_literal =
138+
get_or_create_string_literal_symbol(
139+
name_literal, symbol_table, string_refinement_enabled);
140+
141+
// Call the literal initializer method instead of a nondet initializer:
142+
143+
// For arguments we can't parse yet:
144+
side_effect_expr_nondett nondet_bool(java_boolean_type());
145+
146+
// Argument order is: name, isAnnotation, isArray, isInterface,
147+
// isSynthetic, isLocalClass, isMemberClass, isEnum
148+
149+
code_function_callt initializer_call;
150+
initializer_call.function() = class_literal_init_method->symbol_expr();
151+
152+
code_function_callt::argumentst &args = initializer_call.arguments();
153+
154+
args.push_back(address_of_exprt(sym.symbol_expr())); /* this */
155+
args.push_back(address_of_exprt(class_name_literal));
156+
args.push_back(
157+
constant_bool(class_symbol.type.get_bool(ID_is_annotation)));
158+
args.push_back(constant_bool(class_is_array));
159+
args.push_back(
160+
constant_bool(class_symbol.type.get_bool(ID_interface)));
161+
args.push_back(
162+
constant_bool(class_symbol.type.get_bool(ID_synthetic)));
163+
args.push_back(nondet_bool);
164+
args.push_back(nondet_bool);
165+
args.push_back(
166+
constant_bool(class_symbol.type.get_bool(ID_enumeration)));
167+
168+
code_block.move_to_operands(initializer_call);
169+
}
170+
else if(sym.value.is_nil() && sym.type!=empty_typet())
92171
{
93172
bool allow_null=!assume_init_pointers_not_null;
94173
if(allow_null)
@@ -403,7 +482,8 @@ bool java_entry_point(
403482
message_handlert &message_handler,
404483
bool assume_init_pointers_not_null,
405484
const object_factory_parameterst &object_factory_parameters,
406-
const select_pointer_typet &pointer_type_selector)
485+
const select_pointer_typet &pointer_type_selector,
486+
bool string_refinement_enabled)
407487
{
408488
// check if the entry point is already there
409489
if(symbol_table.symbols.find(goto_functionst::entry_point())!=
@@ -426,7 +506,8 @@ bool java_entry_point(
426506
symbol.location,
427507
assume_init_pointers_not_null,
428508
object_factory_parameters,
429-
pointer_type_selector);
509+
pointer_type_selector,
510+
string_refinement_enabled);
430511

431512
return generate_java_start_function(
432513
symbol,

jbmc/src/java_bytecode/java_entry_point.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ bool java_entry_point(
2525
class message_handlert &message_handler,
2626
bool assume_init_pointers_not_null,
2727
const object_factory_parameterst &object_factory_parameters,
28-
const select_pointer_typet &pointer_type_selector);
28+
const select_pointer_typet &pointer_type_selector,
29+
bool string_refinement_enabled);
2930

3031
struct main_function_resultt
3132
{
@@ -60,6 +61,8 @@ struct main_function_resultt
6061
}
6162
};
6263

64+
irep_idt get_java_class_literal_initializer_signature();
65+
6366
/// Figures out the entry point of the code to verify
6467
main_function_resultt get_main_symbol(
6568
const symbol_table_baset &symbol_table,

src/util/irep_ids.def

+3
Original file line numberDiff line numberDiff line change
@@ -662,8 +662,11 @@ IREP_ID_TWO(overflow_shl, overflow-shl)
662662
IREP_ID_TWO(C_no_initialization_required, #no_initialization_required)
663663
IREP_ID_TWO(overlay_class, java::com.diffblue.OverlayClassImplementation)
664664
IREP_ID_TWO(overlay_method, java::com.diffblue.OverlayMethodImplementation)
665+
IREP_ID_ONE(is_annotation)
665666
IREP_ID_TWO(C_annotations, #annotations)
666667
IREP_ID_ONE(final)
668+
IREP_ID_ONE(synthetic)
669+
IREP_ID_ONE(interface)
667670

668671
// Projects depending on this code base that wish to extend the list of
669672
// available ids should provide a file local_irep_ids.h in their source tree and

0 commit comments

Comments
 (0)