Skip to content

Update test-gen-support from master #582

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ matrix:
packages:
- libwww-perl
- clang-3.7
- libstdc++-5-dev
- libubsan0
before_install:
- mkdir bin ; ln -s /usr/bin/clang-3.7 bin/gcc
Expand Down
78 changes: 74 additions & 4 deletions src/java_bytecode/java_bytecode_convert_class.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ class java_bytecode_convert_classt:public messaget
bool _disable_runtime_checks,
size_t _max_array_length,
lazy_methodst& _lazy_methods,
lazy_methods_modet _lazy_methods_mode):
lazy_methods_modet _lazy_methods_mode,
bool _string_refinement_enabled):
messaget(_message_handler),
symbol_table(_symbol_table),
disable_runtime_checks(_disable_runtime_checks),
max_array_length(_max_array_length),
lazy_methods(_lazy_methods),
lazy_methods_mode(_lazy_methods_mode)
lazy_methods_mode(_lazy_methods_mode),
string_refinement_enabled(_string_refinement_enabled)
{
}

Expand All @@ -46,6 +48,9 @@ class java_bytecode_convert_classt:public messaget

if(parse_tree.loading_successful)
convert(parse_tree.parsed_class);
else if(string_refinement_enabled &&
parse_tree.parsed_class.name=="java.lang.String")
add_string_type();
else
generate_class_stub(parse_tree.parsed_class.name);
}
Expand All @@ -59,13 +64,15 @@ class java_bytecode_convert_classt:public messaget
const size_t max_array_length;
lazy_methodst &lazy_methods;
lazy_methods_modet lazy_methods_mode;
bool string_refinement_enabled;

// conversion
void convert(const classt &c);
void convert(symbolt &class_symbol, const fieldt &f);

void generate_class_stub(const irep_idt &class_name);
void add_array_types();
void add_string_type();
};

/*******************************************************************\
Expand Down Expand Up @@ -360,15 +367,17 @@ bool java_bytecode_convert_class(
bool disable_runtime_checks,
size_t max_array_length,
lazy_methodst &lazy_methods,
lazy_methods_modet lazy_methods_mode)
lazy_methods_modet lazy_methods_mode,
bool string_refinement_enabled)
{
java_bytecode_convert_classt java_bytecode_convert_class(
symbol_table,
message_handler,
disable_runtime_checks,
max_array_length,
lazy_methods,
lazy_methods_mode);
lazy_methods_mode,
string_refinement_enabled);

try
{
Expand All @@ -392,3 +401,64 @@ bool java_bytecode_convert_class(

return true;
}

/*******************************************************************\

Function: java_bytecode_convert_classt::add_string_type

Purpose: Implements the java.lang.String type in the case that
we provide an internal implementation.

\*******************************************************************/

void java_bytecode_convert_classt::add_string_type()
{
class_typet string_type;
string_type.set_tag("java.lang.String");
string_type.components().resize(3);
string_type.components()[0].set_name("@java.lang.Object");
string_type.components()[0].set_pretty_name("@java.lang.Object");
string_type.components()[0].type()=symbol_typet("java::java.lang.Object");
string_type.components()[1].set_name("length");
string_type.components()[1].set_pretty_name("length");
string_type.components()[1].type()=java_int_type();
string_type.components()[2].set_name("data");
string_type.components()[2].set_pretty_name("data");
// Use a pointer-to-unbounded-array instead of a pointer-to-char.
// Saves some casting in the string refinement algorithm but may
// be unnecessary.
string_type.components()[2].type()=pointer_typet(
array_typet(java_char_type(), infinity_exprt(java_int_type())));
string_type.add_base(symbol_typet("java::java.lang.Object"));

symbolt string_symbol;
string_symbol.name="java::java.lang.String";
string_symbol.base_name="java.lang.String";
string_symbol.type=string_type;
string_symbol.is_type=true;

symbol_table.add(string_symbol);

// Also add a stub of `String.equals` so that remove-virtual-functions
// generates a check for Object.equals vs. String.equals.
// No need to fill it in, as pass_preprocess will replace the calls again.
symbolt string_equals_symbol;
string_equals_symbol.name=
"java::java.lang.String.equals:(Ljava/lang/Object;)Z";
string_equals_symbol.base_name="java.lang.String.equals";
string_equals_symbol.pretty_name="java.lang.String.equals";
string_equals_symbol.mode=ID_java;

code_typet string_equals_type;
string_equals_type.return_type()=java_boolean_type();
code_typet::parametert thisparam;
thisparam.set_this();
thisparam.type()=pointer_typet(symbol_typet(string_symbol.name));
code_typet::parametert otherparam;
otherparam.type()=pointer_typet(symbol_typet("java::java.lang.Object"));
string_equals_type.parameters().push_back(thisparam);
string_equals_type.parameters().push_back(otherparam);
string_equals_symbol.type=std::move(string_equals_type);

symbol_table.add(string_equals_symbol);
}
3 changes: 2 additions & 1 deletion src/java_bytecode/java_bytecode_convert_class.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ bool java_bytecode_convert_class(
bool disable_runtime_checks,
size_t max_array_length,
lazy_methodst &,
lazy_methods_modet);
lazy_methods_modet,
bool string_refinement_enabled);

#endif // CPROVER_JAVA_BYTECODE_JAVA_BYTECODE_CONVERT_CLASS_H
6 changes: 4 additions & 2 deletions src/java_bytecode/java_bytecode_language.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ void java_bytecode_languaget::get_language_options(const cmdlinet &cmd)
{
disable_runtime_checks=cmd.isset("disable-runtime-check");
assume_inputs_non_null=cmd.isset("java-assume-inputs-non-null");
string_refinement_enabled=cmd.isset("string-refine");
if(cmd.isset("java-max-input-array-length"))
max_nondet_array_length=
std::stoi(cmd.get_value("java-max-input-array-length"));
Expand Down Expand Up @@ -494,7 +495,8 @@ bool java_bytecode_languaget::typecheck(
disable_runtime_checks,
max_user_array_length,
lazy_methods,
lazy_methods_mode))
lazy_methods_mode,
string_refinement_enabled))
return true;
}

Expand All @@ -509,7 +511,7 @@ bool java_bytecode_languaget::typecheck(

// now typecheck all
if(java_bytecode_typecheck(
symbol_table, get_message_handler()))
symbol_table, get_message_handler(), string_refinement_enabled))
return true;

return false;
Expand Down
1 change: 1 addition & 0 deletions src/java_bytecode/java_bytecode_language.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class java_bytecode_languaget:public languaget
size_t max_user_array_length; // max size for user code created arrays
lazy_methodst lazy_methods;
lazy_methods_modet lazy_methods_mode;
bool string_refinement_enabled;
};

languaget *new_java_bytecode_language();
Expand Down
5 changes: 3 additions & 2 deletions src/java_bytecode/java_bytecode_typecheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,11 @@ Function: java_bytecode_typecheck

bool java_bytecode_typecheck(
symbol_tablet &symbol_table,
message_handlert &message_handler)
message_handlert &message_handler,
bool string_refinement_enabled)
{
java_bytecode_typecheckt java_bytecode_typecheck(
symbol_table, message_handler);
symbol_table, message_handler, string_refinement_enabled);
return java_bytecode_typecheck.typecheck_main();
}

Expand Down
10 changes: 7 additions & 3 deletions src/java_bytecode/java_bytecode_typecheck.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ Author: Daniel Kroening, [email protected]

bool java_bytecode_typecheck(
symbol_tablet &symbol_table,
message_handlert &message_handler);
message_handlert &message_handler,
bool string_refinement_enabled);

bool java_bytecode_typecheck(
exprt &expr,
Expand All @@ -33,10 +34,12 @@ class java_bytecode_typecheckt:public typecheckt
public:
java_bytecode_typecheckt(
symbol_tablet &_symbol_table,
message_handlert &_message_handler):
message_handlert &_message_handler,
bool _string_refinement_enabled):
typecheckt(_message_handler),
symbol_table(_symbol_table),
ns(symbol_table)
ns(symbol_table),
string_refinement_enabled(_string_refinement_enabled)
{
}

Expand All @@ -48,6 +51,7 @@ class java_bytecode_typecheckt:public typecheckt
protected:
symbol_tablet &symbol_table;
const namespacet ns;
bool string_refinement_enabled;

void typecheck_type_symbol(symbolt &);
void typecheck_non_type_symbol(symbolt &);
Expand Down
110 changes: 107 additions & 3 deletions src/java_bytecode/java_bytecode_typecheck_expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,14 @@ Author: Daniel Kroening, [email protected]

#include <util/std_expr.h>
#include <util/prefix.h>
#include <util/arith_tools.h>
#include <util/unicode.h>

#include <linking/zero_initializer.h>

#include "java_bytecode_typecheck.h"
#include "java_pointer_casts.h"
#include "java_types.h"

/*******************************************************************\

Expand Down Expand Up @@ -114,6 +119,27 @@ static std::string escape_non_alnum(const std::string &toescape)

/*******************************************************************\

Function: utf16_to_array

Inputs: `in`: wide string to convert

Outputs: Returns a Java char array containing the same wchars.

Purpose: Convert UCS-2 or UTF-16 to an array expression.

\*******************************************************************/

static array_exprt utf16_to_array(const std::wstring &in)
{
const auto jchar=java_char_type();
array_exprt ret(array_typet(jchar, infinity_exprt(java_int_type())));
for(const auto c : in)
ret.copy_to_operands(from_integer(c, jchar));
return ret;
}

/*******************************************************************\

Function: java_bytecode_typecheckt::typecheck_expr_java_string_literal

Inputs:
Expand All @@ -136,28 +162,106 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr)
auto findit=symbol_table.symbols.find(escaped_symbol_name);
if(findit!=symbol_table.symbols.end())
{
expr=findit->second.symbol_expr();
expr=address_of_exprt(findit->second.symbol_expr());
return;
}

// Create a new symbol:
symbolt new_symbol;
new_symbol.name=escaped_symbol_name;
new_symbol.type=pointer_typet(string_type);
new_symbol.type=string_type;
new_symbol.base_name="Literal";
new_symbol.pretty_name=value;
new_symbol.mode=ID_java;
new_symbol.is_type=false;
new_symbol.is_lvalue=true;
new_symbol.is_static_lifetime=true; // These are basically const global data.

// Regardless of string refinement setting, at least initialize
// the literal with @clsid = String and @lock = false:
symbol_typet jlo_symbol("java::java.lang.Object");
const auto &jlo_struct=to_struct_type(ns.follow(jlo_symbol));
struct_exprt jlo_init(jlo_symbol);
const auto &jls_struct=to_struct_type(ns.follow(string_type));

jlo_init.copy_to_operands(
constant_exprt(
"java::java.lang.String",
jlo_struct.components()[0].type()));
jlo_init.copy_to_operands(
from_integer(
0,
jlo_struct.components()[1].type()));

// If string refinement *is* around, populate the actual
// contents as well:
if(string_refinement_enabled)
{
struct_exprt literal_init(new_symbol.type);
literal_init.move_to_operands(jlo_init);

// Initialize the string with a constant utf-16 array:
symbolt array_symbol;
array_symbol.name=escaped_symbol_name+"_constarray";
array_symbol.type=array_typet(
java_char_type(), infinity_exprt(java_int_type()));
array_symbol.base_name="Literal_constarray";
array_symbol.pretty_name=value;
array_symbol.mode=ID_java;
array_symbol.is_type=false;
array_symbol.is_lvalue=true;
// These are basically const global data:
array_symbol.is_static_lifetime=true;
array_symbol.is_state_var=true;
auto literal_array=utf16_to_array(
utf8_to_utf16_little_endian(id2string(value)));
array_symbol.value=literal_array;

if(symbol_table.add(array_symbol))
throw "failed to add constarray symbol to symbol table";

literal_init.copy_to_operands(
from_integer(literal_array.operands().size(),
jls_struct.components()[1].type()));
literal_init.copy_to_operands(
address_of_exprt(array_symbol.symbol_expr()));

new_symbol.value=literal_init;
}
else if(jls_struct.components().size()>=1 &&
jls_struct.components()[0].get_name()=="@java.lang.Object")
{
// Case where something defined java.lang.String, so it has
// a proper base class (always java.lang.Object in practical
// JDKs seen so far)
struct_exprt literal_init(new_symbol.type);
literal_init.move_to_operands(jlo_init);
for(const auto &comp : jls_struct.components())
{
if(comp.get_name()=="@java.lang.Object")
continue;
// Other members of JDK's java.lang.String we don't understand
// without string-refinement. Just zero-init them; consider using
// test-gen-like nondet object trees instead.
literal_init.copy_to_operands(
zero_initializer(comp.type(), expr.source_location(), ns));
}
new_symbol.value=literal_init;
}
else if(jls_struct.get_bool(ID_incomplete_class))
{
// Case where java.lang.String was stubbed, and so directly defines
// @class_identifier and @lock:
new_symbol.value=jlo_init;
}

if(symbol_table.add(new_symbol))
{
error() << "failed to add string literal symbol to symbol table" << eom;
throw 0;
}

expr=new_symbol.symbol_expr();
expr=address_of_exprt(new_symbol.symbol_expr());
}

/*******************************************************************\
Expand Down
Loading