Skip to content

Commit d95cb12

Browse files
committed
Move string literal initialisation into separate file
1 parent 515ebdd commit d95cb12

File tree

4 files changed

+226
-163
lines changed

4 files changed

+226
-163
lines changed

src/java_bytecode/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ SRC = bytecode_info.cpp \
2424
java_pointer_casts.cpp \
2525
java_root_class.cpp \
2626
java_string_library_preprocess.cpp \
27+
java_string_literals.cpp \
2728
java_types.cpp \
2829
java_utils.cpp \
2930
generate_java_generic_type.cpp \

src/java_bytecode/java_bytecode_typecheck_expr.cpp

+4-163
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,6 @@ Author: Daniel Kroening, [email protected]
1111

1212
#include "java_bytecode_typecheck.h"
1313

14-
#include <iomanip>
15-
1614
#include <util/arith_tools.h>
1715
#include <util/unicode.h>
1816

@@ -23,6 +21,7 @@ Author: Daniel Kroening, [email protected]
2321
#include "java_utils.h"
2422
#include "java_root_class.h"
2523
#include "java_string_library_preprocess.h"
24+
#include "java_string_literals.h"
2625

2726
void java_bytecode_typecheckt::typecheck_expr(exprt &expr)
2827
{
@@ -71,169 +70,11 @@ void java_bytecode_typecheckt::typecheck_expr_java_new_array(
7170
typecheck_type(type);
7271
}
7372

74-
static std::string escape_non_alnum(const std::string &toescape)
75-
{
76-
std::ostringstream escaped;
77-
for(auto &ch : toescape)
78-
{
79-
if(ch=='_')
80-
escaped << "__";
81-
else if(isalnum(ch))
82-
escaped << ch;
83-
else
84-
escaped << '_'
85-
<< std::hex
86-
<< std::setfill('0')
87-
<< std::setw(2)
88-
<< (unsigned int)ch;
89-
}
90-
return escaped.str();
91-
}
92-
93-
/// Convert UCS-2 or UTF-16 to an array expression.
94-
/// \par parameters: `in`: wide string to convert
95-
/// \return Returns a Java char array containing the same wchars.
96-
static array_exprt utf16_to_array(const std::wstring &in)
97-
{
98-
const auto jchar=java_char_type();
99-
array_exprt ret(
100-
array_typet(jchar, from_integer(in.length(), java_int_type())));
101-
for(const auto c : in)
102-
ret.copy_to_operands(from_integer(c, jchar));
103-
return ret;
104-
}
105-
10673
void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr)
10774
{
108-
const irep_idt value=expr.get(ID_value);
109-
const symbol_typet string_type("java::java.lang.String");
110-
111-
const std::string escaped_symbol_name = escape_non_alnum(id2string(value));
112-
const std::string escaped_symbol_name_with_prefix =
113-
JAVA_STRING_LITERAL_PREFIX "." + escaped_symbol_name;
114-
115-
auto findit = symbol_table.symbols.find(escaped_symbol_name_with_prefix);
116-
if(findit!=symbol_table.symbols.end())
117-
{
118-
expr=address_of_exprt(findit->second.symbol_expr());
119-
return;
120-
}
121-
122-
// Create a new symbol:
123-
symbolt new_symbol;
124-
new_symbol.name = escaped_symbol_name_with_prefix;
125-
new_symbol.type=string_type;
126-
new_symbol.base_name = escaped_symbol_name;
127-
new_symbol.pretty_name=value;
128-
new_symbol.mode=ID_java;
129-
new_symbol.is_type=false;
130-
new_symbol.is_lvalue=true;
131-
new_symbol.is_static_lifetime=true; // These are basically const global data.
132-
133-
// Regardless of string refinement setting, at least initialize
134-
// the literal with @clsid = String and @lock = false:
135-
symbol_typet jlo_symbol("java::java.lang.Object");
136-
const auto &jlo_struct=to_struct_type(ns.follow(jlo_symbol));
137-
struct_exprt jlo_init(jlo_symbol);
138-
const auto &jls_struct=to_struct_type(ns.follow(string_type));
139-
java_root_class_init(jlo_init, jlo_struct, false, "java::java.lang.String");
140-
141-
// If string refinement *is* around, populate the actual
142-
// contents as well:
143-
if(string_refinement_enabled)
144-
{
145-
struct_exprt literal_init(new_symbol.type);
146-
literal_init.operands().resize(jls_struct.components().size());
147-
const std::size_t jlo_nb = jls_struct.component_number("@java.lang.Object");
148-
literal_init.operands()[jlo_nb] = jlo_init;
149-
150-
const std::size_t length_nb = jls_struct.component_number("length");
151-
const typet &length_type = jls_struct.components()[length_nb].type();
152-
const exprt length = from_integer(id2string(value).size(), length_type);
153-
literal_init.operands()[length_nb] = length;
154-
155-
// Initialize the string with a constant utf-16 array:
156-
symbolt array_symbol;
157-
array_symbol.name = escaped_symbol_name_with_prefix + "_constarray";
158-
array_symbol.base_name = escaped_symbol_name + "_constarray";
159-
array_symbol.pretty_name=value;
160-
array_symbol.mode=ID_java;
161-
array_symbol.is_type=false;
162-
array_symbol.is_lvalue=true;
163-
// These are basically const global data:
164-
array_symbol.is_static_lifetime=true;
165-
array_symbol.is_state_var=true;
166-
array_symbol.value =
167-
utf16_to_array(utf8_to_utf16_little_endian(id2string(value)));
168-
array_symbol.type = array_symbol.value.type();
169-
170-
if(symbol_table.add(array_symbol))
171-
throw "failed to add constarray symbol to symbol table";
172-
173-
const symbol_exprt array_expr = array_symbol.symbol_expr();
174-
const address_of_exprt array_pointer(
175-
index_exprt(array_expr, from_integer(0, java_int_type())));
176-
177-
const std::size_t data_nb = jls_struct.component_number("data");
178-
literal_init.operands()[data_nb] = array_pointer;
179-
180-
// Associate array with pointer
181-
symbolt return_symbol;
182-
return_symbol.name = escaped_symbol_name_with_prefix + "_return_value";
183-
return_symbol.base_name = escaped_symbol_name + "_return_value";
184-
return_symbol.pretty_name =
185-
escaped_symbol_name.length() > 10
186-
? escaped_symbol_name.substr(0, 10) + "..._return_value"
187-
: escaped_symbol_name + "_return_value";
188-
return_symbol.mode = ID_java;
189-
return_symbol.is_type = false;
190-
return_symbol.is_lvalue = true;
191-
return_symbol.is_static_lifetime = true;
192-
return_symbol.is_state_var = true;
193-
return_symbol.value = make_function_application(
194-
ID_cprover_associate_array_to_pointer_func,
195-
{array_symbol.value, array_pointer},
196-
java_int_type(),
197-
symbol_table);
198-
return_symbol.type = return_symbol.value.type();
199-
if(symbol_table.add(return_symbol))
200-
throw "failed to add return symbol to symbol table";
201-
new_symbol.value=literal_init;
202-
}
203-
else if(jls_struct.components().size()>=1 &&
204-
jls_struct.components()[0].get_name()=="@java.lang.Object")
205-
{
206-
// Case where something defined java.lang.String, so it has
207-
// a proper base class (always java.lang.Object in practical
208-
// JDKs seen so far)
209-
struct_exprt literal_init(new_symbol.type);
210-
literal_init.move_to_operands(jlo_init);
211-
for(const auto &comp : jls_struct.components())
212-
{
213-
if(comp.get_name()=="@java.lang.Object")
214-
continue;
215-
// Other members of JDK's java.lang.String we don't understand
216-
// without string-refinement. Just zero-init them; consider using
217-
// test-gen-like nondet object trees instead.
218-
literal_init.copy_to_operands(
219-
zero_initializer(comp.type(), expr.source_location(), ns));
220-
}
221-
new_symbol.value=literal_init;
222-
}
223-
else if(jls_struct.get_bool(ID_incomplete_class))
224-
{
225-
// Case where java.lang.String was stubbed, and so directly defines
226-
// @class_identifier and @lock:
227-
new_symbol.value=jlo_init;
228-
}
229-
230-
if(symbol_table.add(new_symbol))
231-
{
232-
error() << "failed to add string literal symbol to symbol table" << eom;
233-
throw 0;
234-
}
235-
236-
expr=address_of_exprt(new_symbol.symbol_expr());
75+
expr = address_of_exprt(
76+
get_or_create_string_literal_symbol(
77+
expr, symbol_table, string_refinement_enabled));
23778
}
23879

23980
void java_bytecode_typecheckt::typecheck_expr_symbol(symbol_exprt &expr)
+201
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
/*******************************************************************\
2+
3+
Module: Java string literal processing
4+
5+
Author: Chris Smowton, [email protected]
6+
7+
\*******************************************************************/
8+
9+
#include "java_string_literals.h"
10+
#include "java_root_class.h"
11+
#include "java_types.h"
12+
#include "java_utils.h"
13+
14+
#include <linking/zero_initializer.h>
15+
16+
#include <util/arith_tools.h>
17+
#include <util/namespace.h>
18+
#include <util/unicode.h>
19+
20+
#include <iomanip>
21+
#include <sstream>
22+
23+
/// Replace non-alphanumeric characters with `_xx` escapes, where xx are hex
24+
/// digits. Underscores are replaced by `__`.
25+
/// \param to_escape: string to escape
26+
/// \return string with non-alphanumeric characters escaped
27+
static std::string escape_non_alnum(const std::string &to_escape)
28+
{
29+
std::ostringstream escaped;
30+
for(auto &ch : to_escape)
31+
{
32+
if(ch=='_')
33+
escaped << "__";
34+
else if(isalnum(ch))
35+
escaped << ch;
36+
else
37+
escaped << '_'
38+
<< std::hex
39+
<< std::setfill('0')
40+
<< std::setw(2)
41+
<< (unsigned int)ch;
42+
}
43+
return escaped.str();
44+
}
45+
46+
/// Convert UCS-2 or UTF-16 to an array expression.
47+
/// \par parameters: `in`: wide string to convert
48+
/// \return Returns a Java char array containing the same wchars.
49+
static array_exprt utf16_to_array(const std::wstring &in)
50+
{
51+
const auto jchar=java_char_type();
52+
array_exprt ret(
53+
array_typet(jchar, from_integer(in.length(), java_int_type())));
54+
for(const auto c : in)
55+
ret.copy_to_operands(from_integer(c, jchar));
56+
return ret;
57+
}
58+
59+
/// Creates or gets an existing constant global symbol for a given string
60+
/// literal.
61+
/// \param string_expr: string literal expression to convert
62+
/// \param symbol_table: global symbol table. If not already present, constant
63+
/// global symbols will be added.
64+
/// \param string_refinement_enabled: if true, string refinement's string data
65+
/// structure will also be initialised and added to the symbol table.
66+
/// \return a symbol_expr corresponding to the new or existing literal symbol.
67+
symbol_exprt get_or_create_string_literal_symbol(
68+
const exprt &string_expr,
69+
symbol_table_baset &symbol_table,
70+
bool string_refinement_enabled)
71+
{
72+
PRECONDITION(string_expr.id() == ID_java_string_literal);
73+
const irep_idt value = string_expr.get(ID_value);
74+
const symbol_typet string_type("java::java.lang.String");
75+
76+
const std::string escaped_symbol_name = escape_non_alnum(id2string(value));
77+
const std::string escaped_symbol_name_with_prefix =
78+
JAVA_STRING_LITERAL_PREFIX "." + escaped_symbol_name;
79+
80+
auto findit = symbol_table.symbols.find(escaped_symbol_name_with_prefix);
81+
if(findit != symbol_table.symbols.end())
82+
return findit->second.symbol_expr();
83+
84+
// Create a new symbol:
85+
symbolt new_symbol;
86+
new_symbol.name = escaped_symbol_name_with_prefix;
87+
new_symbol.type = string_type;
88+
new_symbol.base_name = escaped_symbol_name;
89+
new_symbol.pretty_name = value;
90+
new_symbol.mode = ID_java;
91+
new_symbol.is_type = false;
92+
new_symbol.is_lvalue = true;
93+
new_symbol.is_static_lifetime = true;
94+
95+
namespacet ns(symbol_table);
96+
97+
// Regardless of string refinement setting, at least initialize
98+
// the literal with @clsid = String and @lock = false:
99+
symbol_typet jlo_symbol("java::java.lang.Object");
100+
const auto &jlo_struct = to_struct_type(ns.follow(jlo_symbol));
101+
struct_exprt jlo_init(jlo_symbol);
102+
const auto &jls_struct = to_struct_type(ns.follow(string_type));
103+
java_root_class_init(jlo_init, jlo_struct, false, "java::java.lang.String");
104+
105+
// If string refinement *is* around, populate the actual
106+
// contents as well:
107+
if(string_refinement_enabled)
108+
{
109+
struct_exprt literal_init(new_symbol.type);
110+
literal_init.operands().resize(jls_struct.components().size());
111+
const std::size_t jlo_nb = jls_struct.component_number("@java.lang.Object");
112+
literal_init.operands()[jlo_nb] = jlo_init;
113+
114+
const std::size_t length_nb = jls_struct.component_number("length");
115+
const typet &length_type = jls_struct.components()[length_nb].type();
116+
const exprt length = from_integer(id2string(value).size(), length_type);
117+
literal_init.operands()[length_nb] = length;
118+
119+
// Initialize the string with a constant utf-16 array:
120+
symbolt array_symbol;
121+
array_symbol.name = escaped_symbol_name_with_prefix + "_constarray";
122+
array_symbol.base_name = escaped_symbol_name + "_constarray";
123+
array_symbol.pretty_name = value;
124+
array_symbol.mode = ID_java;
125+
array_symbol.is_type = false;
126+
array_symbol.is_lvalue = true;
127+
// These are basically const global data:
128+
array_symbol.is_static_lifetime = true;
129+
array_symbol.is_state_var = true;
130+
array_symbol.value =
131+
utf16_to_array(utf8_to_utf16_little_endian(id2string(value)));
132+
array_symbol.type = array_symbol.value.type();
133+
134+
if(symbol_table.add(array_symbol))
135+
throw "failed to add constarray symbol to symbol table";
136+
137+
const symbol_exprt array_expr = array_symbol.symbol_expr();
138+
const address_of_exprt array_pointer(
139+
index_exprt(array_expr, from_integer(0, java_int_type())));
140+
141+
const std::size_t data_nb = jls_struct.component_number("data");
142+
literal_init.operands()[data_nb] = array_pointer;
143+
144+
// Associate array with pointer
145+
symbolt return_symbol;
146+
return_symbol.name = escaped_symbol_name_with_prefix + "_return_value";
147+
return_symbol.base_name = escaped_symbol_name + "_return_value";
148+
return_symbol.pretty_name =
149+
escaped_symbol_name.length() > 10
150+
? escaped_symbol_name.substr(0, 10) + "..._return_value"
151+
: escaped_symbol_name + "_return_value";
152+
return_symbol.mode = ID_java;
153+
return_symbol.is_type = false;
154+
return_symbol.is_lvalue = true;
155+
return_symbol.is_static_lifetime = true;
156+
return_symbol.is_state_var = true;
157+
return_symbol.value = make_function_application(
158+
ID_cprover_associate_array_to_pointer_func,
159+
{array_symbol.value, array_pointer},
160+
java_int_type(),
161+
symbol_table);
162+
return_symbol.type = return_symbol.value.type();
163+
if(symbol_table.add(return_symbol))
164+
throw "failed to add return symbol to symbol table";
165+
new_symbol.value = literal_init;
166+
}
167+
else if(jls_struct.components().size()>=1 &&
168+
jls_struct.components()[0].get_name()=="@java.lang.Object")
169+
{
170+
// Case where something defined java.lang.String, so it has
171+
// a proper base class (always java.lang.Object in practical
172+
// JDKs seen so far)
173+
struct_exprt literal_init(new_symbol.type);
174+
literal_init.move_to_operands(jlo_init);
175+
for(const auto &comp : jls_struct.components())
176+
{
177+
if(comp.get_name()=="@java.lang.Object")
178+
continue;
179+
// Other members of JDK's java.lang.String we don't understand
180+
// without string-refinement. Just zero-init them; consider using
181+
// test-gen-like nondet object trees instead.
182+
literal_init.copy_to_operands(
183+
zero_initializer(comp.type(), string_expr.source_location(), ns));
184+
}
185+
new_symbol.value = literal_init;
186+
}
187+
else if(jls_struct.get_bool(ID_incomplete_class))
188+
{
189+
// Case where java.lang.String was stubbed, and so directly defines
190+
// @class_identifier and @lock:
191+
new_symbol.value = jlo_init;
192+
}
193+
194+
bool add_failed = symbol_table.add(new_symbol);
195+
INVARIANT(
196+
!add_failed,
197+
"string literal symbol was already checked not to be "
198+
"in the symbol table, so adding it should succeed");
199+
200+
return new_symbol.symbol_expr();
201+
}

0 commit comments

Comments
 (0)