Skip to content

Commit 5a33d91

Browse files
Merge pull request #582 from peterschrammel/update-from-master
Update test-gen-support from master
2 parents 60744ee + 1cbd032 commit 5a33d91

28 files changed

+694
-481
lines changed

.travis.yml

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ matrix:
2828
packages:
2929
- libwww-perl
3030
- clang-3.7
31+
- libstdc++-5-dev
3132
- libubsan0
3233
before_install:
3334
- mkdir bin ; ln -s /usr/bin/clang-3.7 bin/gcc

src/java_bytecode/java_bytecode_convert_class.cpp

+74-4
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,15 @@ class java_bytecode_convert_classt:public messaget
3030
bool _disable_runtime_checks,
3131
size_t _max_array_length,
3232
lazy_methodst& _lazy_methods,
33-
lazy_methods_modet _lazy_methods_mode):
33+
lazy_methods_modet _lazy_methods_mode,
34+
bool _string_refinement_enabled):
3435
messaget(_message_handler),
3536
symbol_table(_symbol_table),
3637
disable_runtime_checks(_disable_runtime_checks),
3738
max_array_length(_max_array_length),
3839
lazy_methods(_lazy_methods),
39-
lazy_methods_mode(_lazy_methods_mode)
40+
lazy_methods_mode(_lazy_methods_mode),
41+
string_refinement_enabled(_string_refinement_enabled)
4042
{
4143
}
4244

@@ -46,6 +48,9 @@ class java_bytecode_convert_classt:public messaget
4648

4749
if(parse_tree.loading_successful)
4850
convert(parse_tree.parsed_class);
51+
else if(string_refinement_enabled &&
52+
parse_tree.parsed_class.name=="java.lang.String")
53+
add_string_type();
4954
else
5055
generate_class_stub(parse_tree.parsed_class.name);
5156
}
@@ -59,13 +64,15 @@ class java_bytecode_convert_classt:public messaget
5964
const size_t max_array_length;
6065
lazy_methodst &lazy_methods;
6166
lazy_methods_modet lazy_methods_mode;
67+
bool string_refinement_enabled;
6268

6369
// conversion
6470
void convert(const classt &c);
6571
void convert(symbolt &class_symbol, const fieldt &f);
6672

6773
void generate_class_stub(const irep_idt &class_name);
6874
void add_array_types();
75+
void add_string_type();
6976
};
7077

7178
/*******************************************************************\
@@ -360,15 +367,17 @@ bool java_bytecode_convert_class(
360367
bool disable_runtime_checks,
361368
size_t max_array_length,
362369
lazy_methodst &lazy_methods,
363-
lazy_methods_modet lazy_methods_mode)
370+
lazy_methods_modet lazy_methods_mode,
371+
bool string_refinement_enabled)
364372
{
365373
java_bytecode_convert_classt java_bytecode_convert_class(
366374
symbol_table,
367375
message_handler,
368376
disable_runtime_checks,
369377
max_array_length,
370378
lazy_methods,
371-
lazy_methods_mode);
379+
lazy_methods_mode,
380+
string_refinement_enabled);
372381

373382
try
374383
{
@@ -392,3 +401,64 @@ bool java_bytecode_convert_class(
392401

393402
return true;
394403
}
404+
405+
/*******************************************************************\
406+
407+
Function: java_bytecode_convert_classt::add_string_type
408+
409+
Purpose: Implements the java.lang.String type in the case that
410+
we provide an internal implementation.
411+
412+
\*******************************************************************/
413+
414+
void java_bytecode_convert_classt::add_string_type()
415+
{
416+
class_typet string_type;
417+
string_type.set_tag("java.lang.String");
418+
string_type.components().resize(3);
419+
string_type.components()[0].set_name("@java.lang.Object");
420+
string_type.components()[0].set_pretty_name("@java.lang.Object");
421+
string_type.components()[0].type()=symbol_typet("java::java.lang.Object");
422+
string_type.components()[1].set_name("length");
423+
string_type.components()[1].set_pretty_name("length");
424+
string_type.components()[1].type()=java_int_type();
425+
string_type.components()[2].set_name("data");
426+
string_type.components()[2].set_pretty_name("data");
427+
// Use a pointer-to-unbounded-array instead of a pointer-to-char.
428+
// Saves some casting in the string refinement algorithm but may
429+
// be unnecessary.
430+
string_type.components()[2].type()=pointer_typet(
431+
array_typet(java_char_type(), infinity_exprt(java_int_type())));
432+
string_type.add_base(symbol_typet("java::java.lang.Object"));
433+
434+
symbolt string_symbol;
435+
string_symbol.name="java::java.lang.String";
436+
string_symbol.base_name="java.lang.String";
437+
string_symbol.type=string_type;
438+
string_symbol.is_type=true;
439+
440+
symbol_table.add(string_symbol);
441+
442+
// Also add a stub of `String.equals` so that remove-virtual-functions
443+
// generates a check for Object.equals vs. String.equals.
444+
// No need to fill it in, as pass_preprocess will replace the calls again.
445+
symbolt string_equals_symbol;
446+
string_equals_symbol.name=
447+
"java::java.lang.String.equals:(Ljava/lang/Object;)Z";
448+
string_equals_symbol.base_name="java.lang.String.equals";
449+
string_equals_symbol.pretty_name="java.lang.String.equals";
450+
string_equals_symbol.mode=ID_java;
451+
452+
code_typet string_equals_type;
453+
string_equals_type.return_type()=java_boolean_type();
454+
code_typet::parametert thisparam;
455+
thisparam.set_this();
456+
thisparam.type()=pointer_typet(symbol_typet(string_symbol.name));
457+
code_typet::parametert otherparam;
458+
otherparam.type()=pointer_typet(symbol_typet("java::java.lang.Object"));
459+
string_equals_type.parameters().push_back(thisparam);
460+
string_equals_type.parameters().push_back(otherparam);
461+
string_equals_symbol.type=std::move(string_equals_type);
462+
463+
symbol_table.add(string_equals_symbol);
464+
}

src/java_bytecode/java_bytecode_convert_class.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ bool java_bytecode_convert_class(
2222
bool disable_runtime_checks,
2323
size_t max_array_length,
2424
lazy_methodst &,
25-
lazy_methods_modet);
25+
lazy_methods_modet,
26+
bool string_refinement_enabled);
2627

2728
#endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_CLASS_H

src/java_bytecode/java_bytecode_language.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd)
4343
{
4444
disable_runtime_checks=cmd.isset("disable-runtime-check");
4545
assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null");
46+
string_refinement_enabled=cmd.isset("string-refine");
4647
if(cmd.isset("java-max-input-array-length"))
4748
max_nondet_array_length=
4849
std::stoi(cmd.get_value("java-max-input-array-length"));
@@ -494,7 +495,8 @@ bool java_bytecode_languaget::typecheck(
494495
disable_runtime_checks,
495496
max_user_array_length,
496497
lazy_methods,
497-
lazy_methods_mode))
498+
lazy_methods_mode,
499+
string_refinement_enabled))
498500
return true;
499501
}
500502

@@ -509,7 +511,7 @@ bool java_bytecode_languaget::typecheck(
509511

510512
// now typecheck all
511513
if(java_bytecode_typecheck(
512-
symbol_table, get_message_handler()))
514+
symbol_table, get_message_handler(), string_refinement_enabled))
513515
return true;
514516

515517
return false;

src/java_bytecode/java_bytecode_language.h

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ class java_bytecode_languaget:public languaget
104104
size_t max_user_array_length; // max size for user code created arrays
105105
lazy_methodst lazy_methods;
106106
lazy_methods_modet lazy_methods_mode;
107+
bool string_refinement_enabled;
107108
};
108109

109110
languaget *new_java_bytecode_language();

src/java_bytecode/java_bytecode_typecheck.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,11 @@ Function: java_bytecode_typecheck
126126

127127
bool java_bytecode_typecheck(
128128
symbol_tablet &symbol_table,
129-
message_handlert &message_handler)
129+
message_handlert &message_handler,
130+
bool string_refinement_enabled)
130131
{
131132
java_bytecode_typecheckt java_bytecode_typecheck(
132-
symbol_table, message_handler);
133+
symbol_table, message_handler, string_refinement_enabled);
133134
return java_bytecode_typecheck.typecheck_main();
134135
}
135136

src/java_bytecode/java_bytecode_typecheck.h

+7-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ Author: Daniel Kroening, [email protected]
2121

2222
bool java_bytecode_typecheck(
2323
symbol_tablet &symbol_table,
24-
message_handlert &message_handler);
24+
message_handlert &message_handler,
25+
bool string_refinement_enabled);
2526

2627
bool java_bytecode_typecheck(
2728
exprt &expr,
@@ -33,10 +34,12 @@ class java_bytecode_typecheckt:public typecheckt
3334
public:
3435
java_bytecode_typecheckt(
3536
symbol_tablet &_symbol_table,
36-
message_handlert &_message_handler):
37+
message_handlert &_message_handler,
38+
bool _string_refinement_enabled):
3739
typecheckt(_message_handler),
3840
symbol_table(_symbol_table),
39-
ns(symbol_table)
41+
ns(symbol_table),
42+
string_refinement_enabled(_string_refinement_enabled)
4043
{
4144
}
4245

@@ -48,6 +51,7 @@ class java_bytecode_typecheckt:public typecheckt
4851
protected:
4952
symbol_tablet &symbol_table;
5053
const namespacet ns;
54+
bool string_refinement_enabled;
5155

5256
void typecheck_type_symbol(symbolt &);
5357
void typecheck_non_type_symbol(symbolt &);

src/java_bytecode/java_bytecode_typecheck_expr.cpp

+107-3
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,14 @@ Author: Daniel Kroening, [email protected]
1010

1111
#include <util/std_expr.h>
1212
#include <util/prefix.h>
13+
#include <util/arith_tools.h>
14+
#include <util/unicode.h>
15+
16+
#include <linking/zero_initializer.h>
1317

1418
#include "java_bytecode_typecheck.h"
1519
#include "java_pointer_casts.h"
20+
#include "java_types.h"
1621

1722
/*******************************************************************\
1823
@@ -114,6 +119,27 @@ static std::string escape_non_alnum(const std::string &toescape)
114119

115120
/*******************************************************************\
116121
122+
Function: utf16_to_array
123+
124+
Inputs: `in`: wide string to convert
125+
126+
Outputs: Returns a Java char array containing the same wchars.
127+
128+
Purpose: Convert UCS-2 or UTF-16 to an array expression.
129+
130+
\*******************************************************************/
131+
132+
static array_exprt utf16_to_array(const std::wstring &in)
133+
{
134+
const auto jchar=java_char_type();
135+
array_exprt ret(array_typet(jchar, infinity_exprt(java_int_type())));
136+
for(const auto c : in)
137+
ret.copy_to_operands(from_integer(c, jchar));
138+
return ret;
139+
}
140+
141+
/*******************************************************************\
142+
117143
Function: java_bytecode_typecheckt::typecheck_expr_java_string_literal
118144
119145
Inputs:
@@ -136,28 +162,106 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr)
136162
auto findit=symbol_table.symbols.find(escaped_symbol_name);
137163
if(findit!=symbol_table.symbols.end())
138164
{
139-
expr=findit->second.symbol_expr();
165+
expr=address_of_exprt(findit->second.symbol_expr());
140166
return;
141167
}
142168

143169
// Create a new symbol:
144170
symbolt new_symbol;
145171
new_symbol.name=escaped_symbol_name;
146-
new_symbol.type=pointer_typet(string_type);
172+
new_symbol.type=string_type;
147173
new_symbol.base_name="Literal";
148174
new_symbol.pretty_name=value;
149175
new_symbol.mode=ID_java;
150176
new_symbol.is_type=false;
151177
new_symbol.is_lvalue=true;
152178
new_symbol.is_static_lifetime=true; // These are basically const global data.
153179

180+
// Regardless of string refinement setting, at least initialize
181+
// the literal with @clsid = String and @lock = false:
182+
symbol_typet jlo_symbol("java::java.lang.Object");
183+
const auto &jlo_struct=to_struct_type(ns.follow(jlo_symbol));
184+
struct_exprt jlo_init(jlo_symbol);
185+
const auto &jls_struct=to_struct_type(ns.follow(string_type));
186+
187+
jlo_init.copy_to_operands(
188+
constant_exprt(
189+
"java::java.lang.String",
190+
jlo_struct.components()[0].type()));
191+
jlo_init.copy_to_operands(
192+
from_integer(
193+
0,
194+
jlo_struct.components()[1].type()));
195+
196+
// If string refinement *is* around, populate the actual
197+
// contents as well:
198+
if(string_refinement_enabled)
199+
{
200+
struct_exprt literal_init(new_symbol.type);
201+
literal_init.move_to_operands(jlo_init);
202+
203+
// Initialize the string with a constant utf-16 array:
204+
symbolt array_symbol;
205+
array_symbol.name=escaped_symbol_name+"_constarray";
206+
array_symbol.type=array_typet(
207+
java_char_type(), infinity_exprt(java_int_type()));
208+
array_symbol.base_name="Literal_constarray";
209+
array_symbol.pretty_name=value;
210+
array_symbol.mode=ID_java;
211+
array_symbol.is_type=false;
212+
array_symbol.is_lvalue=true;
213+
// These are basically const global data:
214+
array_symbol.is_static_lifetime=true;
215+
array_symbol.is_state_var=true;
216+
auto literal_array=utf16_to_array(
217+
utf8_to_utf16_little_endian(id2string(value)));
218+
array_symbol.value=literal_array;
219+
220+
if(symbol_table.add(array_symbol))
221+
throw "failed to add constarray symbol to symbol table";
222+
223+
literal_init.copy_to_operands(
224+
from_integer(literal_array.operands().size(),
225+
jls_struct.components()[1].type()));
226+
literal_init.copy_to_operands(
227+
address_of_exprt(array_symbol.symbol_expr()));
228+
229+
new_symbol.value=literal_init;
230+
}
231+
else if(jls_struct.components().size()>=1 &&
232+
jls_struct.components()[0].get_name()=="@java.lang.Object")
233+
{
234+
// Case where something defined java.lang.String, so it has
235+
// a proper base class (always java.lang.Object in practical
236+
// JDKs seen so far)
237+
struct_exprt literal_init(new_symbol.type);
238+
literal_init.move_to_operands(jlo_init);
239+
for(const auto &comp : jls_struct.components())
240+
{
241+
if(comp.get_name()=="@java.lang.Object")
242+
continue;
243+
// Other members of JDK's java.lang.String we don't understand
244+
// without string-refinement. Just zero-init them; consider using
245+
// test-gen-like nondet object trees instead.
246+
literal_init.copy_to_operands(
247+
zero_initializer(comp.type(), expr.source_location(), ns));
248+
}
249+
new_symbol.value=literal_init;
250+
}
251+
else if(jls_struct.get_bool(ID_incomplete_class))
252+
{
253+
// Case where java.lang.String was stubbed, and so directly defines
254+
// @class_identifier and @lock:
255+
new_symbol.value=jlo_init;
256+
}
257+
154258
if(symbol_table.add(new_symbol))
155259
{
156260
error() << "failed to add string literal symbol to symbol table" << eom;
157261
throw 0;
158262
}
159263

160-
expr=new_symbol.symbol_expr();
264+
expr=address_of_exprt(new_symbol.symbol_expr());
161265
}
162266

163267
/*******************************************************************\

0 commit comments

Comments
 (0)