10
10
11
11
#include < util/std_expr.h>
12
12
#include < util/prefix.h>
13
+ #include < util/arith_tools.h>
14
+ #include < util/unicode.h>
15
+
16
+ #include < linking/zero_initializer.h>
13
17
14
18
#include " java_bytecode_typecheck.h"
15
19
#include " java_pointer_casts.h"
20
+ #include " java_types.h"
16
21
17
22
/* ******************************************************************\
18
23
@@ -114,6 +119,27 @@ static std::string escape_non_alnum(const std::string &toescape)
114
119
115
120
/* ******************************************************************\
116
121
122
+ Function: utf16_to_array
123
+
124
+ Inputs: `in`: wide string to convert
125
+
126
+ Outputs: Returns a Java char array containing the same wchars.
127
+
128
+ Purpose: Convert UCS-2 or UTF-16 to an array expression.
129
+
130
+ \*******************************************************************/
131
+
132
+ static array_exprt utf16_to_array (const std::wstring &in)
133
+ {
134
+ const auto jchar=java_char_type ();
135
+ array_exprt ret (array_typet (jchar, infinity_exprt (java_int_type ())));
136
+ for (const auto c : in)
137
+ ret.copy_to_operands (from_integer (c, jchar));
138
+ return ret;
139
+ }
140
+
141
+ /* ******************************************************************\
142
+
117
143
Function: java_bytecode_typecheckt::typecheck_expr_java_string_literal
118
144
119
145
Inputs:
@@ -136,28 +162,106 @@ void java_bytecode_typecheckt::typecheck_expr_java_string_literal(exprt &expr)
136
162
auto findit=symbol_table.symbols .find (escaped_symbol_name);
137
163
if (findit!=symbol_table.symbols .end ())
138
164
{
139
- expr=findit->second .symbol_expr ();
165
+ expr=address_of_exprt ( findit->second .symbol_expr () );
140
166
return ;
141
167
}
142
168
143
169
// Create a new symbol:
144
170
symbolt new_symbol;
145
171
new_symbol.name =escaped_symbol_name;
146
- new_symbol.type =pointer_typet ( string_type) ;
172
+ new_symbol.type =string_type;
147
173
new_symbol.base_name =" Literal" ;
148
174
new_symbol.pretty_name =value;
149
175
new_symbol.mode =ID_java;
150
176
new_symbol.is_type =false ;
151
177
new_symbol.is_lvalue =true ;
152
178
new_symbol.is_static_lifetime =true ; // These are basically const global data.
153
179
180
+ // Regardless of string refinement setting, at least initialize
181
+ // the literal with @clsid = String and @lock = false:
182
+ symbol_typet jlo_symbol (" java::java.lang.Object" );
183
+ const auto &jlo_struct=to_struct_type (ns.follow (jlo_symbol));
184
+ struct_exprt jlo_init (jlo_symbol);
185
+ const auto &jls_struct=to_struct_type (ns.follow (string_type));
186
+
187
+ jlo_init.copy_to_operands (
188
+ constant_exprt (
189
+ " java::java.lang.String" ,
190
+ jlo_struct.components ()[0 ].type ()));
191
+ jlo_init.copy_to_operands (
192
+ from_integer (
193
+ 0 ,
194
+ jlo_struct.components ()[1 ].type ()));
195
+
196
+ // If string refinement *is* around, populate the actual
197
+ // contents as well:
198
+ if (string_refinement_enabled)
199
+ {
200
+ struct_exprt literal_init (new_symbol.type );
201
+ literal_init.move_to_operands (jlo_init);
202
+
203
+ // Initialize the string with a constant utf-16 array:
204
+ symbolt array_symbol;
205
+ array_symbol.name =escaped_symbol_name+" _constarray" ;
206
+ array_symbol.type =array_typet (
207
+ java_char_type (), infinity_exprt (java_int_type ()));
208
+ array_symbol.base_name =" Literal_constarray" ;
209
+ array_symbol.pretty_name =value;
210
+ array_symbol.mode =ID_java;
211
+ array_symbol.is_type =false ;
212
+ array_symbol.is_lvalue =true ;
213
+ // These are basically const global data:
214
+ array_symbol.is_static_lifetime =true ;
215
+ array_symbol.is_state_var =true ;
216
+ auto literal_array=utf16_to_array (
217
+ utf8_to_utf16_little_endian (id2string (value)));
218
+ array_symbol.value =literal_array;
219
+
220
+ if (symbol_table.add (array_symbol))
221
+ throw " failed to add constarray symbol to symbol table" ;
222
+
223
+ literal_init.copy_to_operands (
224
+ from_integer (literal_array.operands ().size (),
225
+ jls_struct.components ()[1 ].type ()));
226
+ literal_init.copy_to_operands (
227
+ address_of_exprt (array_symbol.symbol_expr ()));
228
+
229
+ new_symbol.value =literal_init;
230
+ }
231
+ else if (jls_struct.components ().size ()>=1 &&
232
+ jls_struct.components ()[0 ].get_name ()==" @java.lang.Object" )
233
+ {
234
+ // Case where something defined java.lang.String, so it has
235
+ // a proper base class (always java.lang.Object in practical
236
+ // JDKs seen so far)
237
+ struct_exprt literal_init (new_symbol.type );
238
+ literal_init.move_to_operands (jlo_init);
239
+ for (const auto &comp : jls_struct.components ())
240
+ {
241
+ if (comp.get_name ()==" @java.lang.Object" )
242
+ continue ;
243
+ // Other members of JDK's java.lang.String we don't understand
244
+ // without string-refinement. Just zero-init them; consider using
245
+ // test-gen-like nondet object trees instead.
246
+ literal_init.copy_to_operands (
247
+ zero_initializer (comp.type (), expr.source_location (), ns));
248
+ }
249
+ new_symbol.value =literal_init;
250
+ }
251
+ else if (jls_struct.get_bool (ID_incomplete_class))
252
+ {
253
+ // Case where java.lang.String was stubbed, and so directly defines
254
+ // @class_identifier and @lock:
255
+ new_symbol.value =jlo_init;
256
+ }
257
+
154
258
if (symbol_table.add (new_symbol))
155
259
{
156
260
error () << " failed to add string literal symbol to symbol table" << eom;
157
261
throw 0 ;
158
262
}
159
263
160
- expr=new_symbol.symbol_expr ();
264
+ expr=address_of_exprt ( new_symbol.symbol_expr () );
161
265
}
162
266
163
267
/* ******************************************************************\
0 commit comments