Skip to content

Commit e4e26d8

Browse files
Merge pull request #675 from allredj/testgen-string-refine-corrections-on-preprocess
Corrections to the string solver
2 parents 29d67d3 + 6958a56 commit e4e26d8

18 files changed

+1768
-985
lines changed

src/goto-programs/string_refine_preprocess.cpp

+403-403
Large diffs are not rendered by default.

src/goto-programs/string_refine_preprocess.h

+22-49
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Date: September 2016
1414

1515
#include <goto-programs/goto_model.h>
1616
#include <util/ui_message.h>
17+
#include <util/string_expr.h>
1718

1819
class string_refine_preprocesst:public messaget
1920
{
@@ -29,28 +30,21 @@ class string_refine_preprocesst:public messaget
2930
typedef std::unordered_map<irep_idt, irep_idt, irep_id_hash> id_mapt;
3031
typedef std::unordered_map<exprt, exprt, irep_hash> expr_mapt;
3132

32-
// String builders maps the different names of a same StringBuilder object
33-
// to a unique expression.
34-
expr_mapt string_builders;
35-
3633
// Map name of Java string functions to there equivalent in the solver
3734
id_mapt side_effect_functions;
3835
id_mapt string_functions;
3936
id_mapt c_string_functions;
4037
id_mapt string_function_calls;
41-
id_mapt string_of_char_array_functions;
42-
id_mapt string_of_char_array_function_calls;
43-
id_mapt side_effect_char_array_functions;
4438

4539
std::unordered_map<irep_idt, std::string, irep_id_hash> signatures;
46-
expr_mapt hidden_strings;
47-
expr_mapt java_to_cprover_strings;
4840

4941
// unique id for each newly created symbols
5042
int next_symbol_id;
5143

5244
void initialize_string_function_table();
5345

46+
static bool check_java_type(const typet &type, const std::string &tag);
47+
5448
static bool is_java_string_pointer_type(const typet &type);
5549

5650
static bool is_java_string_type(const typet &type);
@@ -61,6 +55,20 @@ class string_refine_preprocesst:public messaget
6155

6256
static bool is_java_char_sequence_type(const typet &type);
6357

58+
static bool is_java_char_sequence_pointer_type(const typet &type);
59+
60+
static bool is_java_char_array_type(const typet &type);
61+
62+
static bool is_java_char_array_pointer_type(const typet &type);
63+
64+
static bool implements_java_char_sequence(const typet &type)
65+
{
66+
return
67+
is_java_char_sequence_pointer_type(type) ||
68+
is_java_string_builder_pointer_type(type) ||
69+
is_java_string_pointer_type(type);
70+
}
71+
6472
symbol_exprt fresh_array(
6573
const typet &type, const source_locationt &location);
6674
symbol_exprt fresh_string(
@@ -110,6 +118,9 @@ class string_refine_preprocesst:public messaget
110118
void get_data_and_length_type_of_string(
111119
const exprt &expr, typet &data_type, typet &length_type);
112120

121+
void get_data_and_length_type_of_char_array(
122+
const exprt &expr, typet &data_type, typet &length_type);
123+
113124
function_application_exprt build_function_application(
114125
const irep_idt &function_name,
115126
const typet &type,
@@ -136,27 +147,16 @@ class string_refine_preprocesst:public messaget
136147
const source_locationt &location,
137148
const std::string &signature);
138149

139-
void make_assign(
140-
goto_programt &goto_program,
141-
goto_programt::targett &target,
142-
const exprt &lhs,
143-
const code_typet &function_type,
144-
const irep_idt &function_name,
145-
const exprt::operandst &arg,
146-
const source_locationt &loc,
147-
const std::string &sig);
148-
149150
exprt make_cprover_string_assign(
150151
goto_programt &goto_program,
151152
goto_programt::targett &target,
152153
const exprt &rhs,
153154
const source_locationt &location);
154155

155-
void make_string_copy(
156+
string_exprt make_cprover_char_array_assign(
156157
goto_programt &goto_program,
157158
goto_programt::targett &target,
158-
const exprt &lhs,
159-
const exprt &argument,
159+
const exprt &rhs,
160160
const source_locationt &location);
161161

162162
void make_string_function(
@@ -192,33 +192,6 @@ class string_refine_preprocesst:public messaget
192192
void make_to_char_array_function(
193193
goto_programt &goto_program, goto_programt::targett &);
194194

195-
exprt make_cprover_char_array_assign(
196-
goto_programt &goto_program,
197-
goto_programt::targett &target,
198-
const exprt &rhs,
199-
const source_locationt &location);
200-
201-
void make_char_array_function(
202-
goto_programt &goto_program,
203-
goto_programt::targett &target,
204-
const irep_idt &function_name,
205-
const std::string &signature,
206-
std::size_t index,
207-
bool assign_first_arg=false,
208-
bool skip_first_arg=false);
209-
210-
void make_char_array_function_call(
211-
goto_programt &goto_program,
212-
goto_programt::targett &target,
213-
const irep_idt &function_name,
214-
const std::string &signature);
215-
216-
void make_char_array_side_effect(
217-
goto_programt &goto_program,
218-
goto_programt::targett &target,
219-
const irep_idt &function_name,
220-
const std::string &signature);
221-
222195
void replace_string_calls(goto_functionst::function_mapt::iterator f_it);
223196
};
224197

src/java_bytecode/java_bytecode_convert_class.cpp

+19-8
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,16 @@ class java_bytecode_convert_classt:public messaget
5050
convert(parse_tree.parsed_class);
5151
else if(string_refinement_enabled &&
5252
parse_tree.parsed_class.name=="java.lang.String")
53-
add_string_type();
53+
add_string_type("java.lang.String");
54+
else if(string_refinement_enabled &&
55+
parse_tree.parsed_class.name=="java.lang.StringBuilder")
56+
add_string_type("java.lang.StringBuilder");
57+
else if(string_refinement_enabled &&
58+
parse_tree.parsed_class.name=="java.lang.CharSequence")
59+
add_string_type("java.lang.CharSequence");
60+
else if(string_refinement_enabled &&
61+
parse_tree.parsed_class.name=="java.lang.StringBuffer")
62+
add_string_type("java.lang.StringBuffer");
5463
else
5564
generate_class_stub(parse_tree.parsed_class.name);
5665
}
@@ -72,7 +81,7 @@ class java_bytecode_convert_classt:public messaget
7281

7382
void generate_class_stub(const irep_idt &class_name);
7483
void add_array_types();
75-
void add_string_type();
84+
void add_string_type(const irep_idt &class_name);
7685
};
7786

7887
/*******************************************************************\
@@ -406,15 +415,17 @@ bool java_bytecode_convert_class(
406415
407416
Function: java_bytecode_convert_classt::add_string_type
408417
418+
Inputs: a name for the class such as "java.lang.String"
419+
409420
Purpose: Implements the java.lang.String type in the case that
410421
we provide an internal implementation.
411422
412423
\*******************************************************************/
413424

414-
void java_bytecode_convert_classt::add_string_type()
425+
void java_bytecode_convert_classt::add_string_type(const irep_idt &class_name)
415426
{
416427
class_typet string_type;
417-
string_type.set_tag("java.lang.String");
428+
string_type.set_tag(class_name);
418429
string_type.components().resize(3);
419430
string_type.components()[0].set_name("@java.lang.Object");
420431
string_type.components()[0].set_pretty_name("@java.lang.Object");
@@ -432,8 +443,8 @@ void java_bytecode_convert_classt::add_string_type()
432443
string_type.add_base(symbol_typet("java::java.lang.Object"));
433444

434445
symbolt string_symbol;
435-
string_symbol.name="java::java.lang.String";
436-
string_symbol.base_name="java.lang.String";
446+
string_symbol.name="java::"+id2string(class_name);
447+
string_symbol.base_name=id2string(class_name);
437448
string_symbol.type=string_type;
438449
string_symbol.is_type=true;
439450

@@ -445,8 +456,8 @@ void java_bytecode_convert_classt::add_string_type()
445456
symbolt string_equals_symbol;
446457
string_equals_symbol.name=
447458
"java::java.lang.String.equals:(Ljava/lang/Object;)Z";
448-
string_equals_symbol.base_name="java.lang.String.equals";
449-
string_equals_symbol.pretty_name="java.lang.String.equals";
459+
string_equals_symbol.base_name=id2string(class_name)+".equals";
460+
string_equals_symbol.pretty_name=id2string(class_name)+".equals";
450461
string_equals_symbol.mode=ID_java;
451462

452463
code_typet string_equals_type;

src/solvers/refinement/string_constraint.h

-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@ class string_constraintt: public exprt
4848
return operands()[4];
4949
}
5050

51-
5251
private:
5352
string_constraintt();
5453

src/solvers/refinement/string_constraint_generator.h

+26-19
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ Author: Romain Brenguier, [email protected]
1414
#define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H
1515

1616
#include <util/string_expr.h>
17+
#include <util/replace_expr.h>
1718
#include <util/refined_string_type.h>
1819
#include <solvers/refinement/string_constraint.h>
1920

@@ -65,21 +66,20 @@ class string_constraint_generatort
6566
symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type);
6667
symbol_exprt fresh_boolean(const irep_idt &prefix);
6768
string_exprt fresh_string(const refined_string_typet &type);
69+
string_exprt get_string_expr(const exprt &expr);
70+
string_exprt convert_java_string_to_string_exprt(
71+
const exprt &underlying);
72+
plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2);
6873

69-
// We maintain a map from symbols to strings.
70-
std::map<irep_idt, string_exprt> symbol_to_string;
74+
// Maps unresolved symbols to the string_exprt that was created for them
75+
std::map<irep_idt, string_exprt> unresolved_symbols;
7176

72-
string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym);
7377

74-
void assign_to_symbol(
75-
const symbol_exprt &sym, const string_exprt &expr)
76-
{
77-
symbol_to_string[sym.get_identifier()]=expr;
78-
}
78+
string_exprt find_or_add_string_of_symbol(
79+
const symbol_exprt &sym,
80+
const refined_string_typet &ref_type);
7981

80-
string_exprt add_axioms_for_string_expr(const exprt &expr);
81-
void set_string_symbol_equal_to_expr(
82-
const symbol_exprt &sym, const exprt &str);
82+
string_exprt add_axioms_for_refined_string(const exprt &expr);
8383

8484
exprt add_axioms_for_function_application(
8585
const function_application_exprt &expr);
@@ -96,6 +96,8 @@ class string_constraint_generatort
9696
const std::size_t MAX_FLOAT_LENGTH=15;
9797
const std::size_t MAX_DOUBLE_LENGTH=30;
9898

99+
std::map<function_application_exprt, exprt> function_application_cache;
100+
99101
static irep_idt extract_java_string(const symbol_exprt &s);
100102

101103
exprt axiom_for_is_positive_index(const exprt &x);
@@ -117,6 +119,9 @@ class string_constraint_generatort
117119
// The specification is partial: the actual value is not actually computed
118120
// but we ensure that hash codes of equal strings are equal.
119121
exprt add_axioms_for_hash_code(const function_application_exprt &f);
122+
// To each string on which hash_code was called we associate a symbol
123+
// representing the return value of the hash_code function.
124+
std::map<string_exprt, exprt> hash_code_of_string;
120125

121126
exprt add_axioms_for_is_empty(const function_application_exprt &f);
122127
exprt add_axioms_for_is_prefix(
@@ -224,7 +229,9 @@ class string_constraint_generatort
224229
// the start for negative number
225230
string_exprt add_axioms_from_float(const function_application_exprt &f);
226231
string_exprt add_axioms_from_float(
227-
const exprt &f, bool double_precision=false);
232+
const exprt &f,
233+
const refined_string_typet &ref_type,
234+
bool double_precision);
228235

229236
// Add axioms corresponding to the String.valueOf(D) java function
230237
// TODO: the specifications is only partial
@@ -253,6 +260,7 @@ class string_constraint_generatort
253260
string_exprt add_axioms_for_code_point(
254261
const exprt &code_point, const refined_string_typet &ref_type);
255262
string_exprt add_axioms_for_java_char_array(const exprt &char_array);
263+
exprt add_axioms_for_char_pointer(const function_application_exprt &fun);
256264
string_exprt add_axioms_for_if(const if_exprt &expr);
257265
exprt add_axioms_for_char_literal(const function_application_exprt &f);
258266

@@ -270,6 +278,8 @@ class string_constraint_generatort
270278
const function_application_exprt &f);
271279

272280
exprt add_axioms_for_parse_int(const function_application_exprt &f);
281+
exprt add_axioms_for_correct_number_format(
282+
const string_exprt &str, std::size_t max_size=10);
273283
exprt add_axioms_for_to_char_array(const function_application_exprt &f);
274284
exprt add_axioms_for_compare_to(const function_application_exprt &f);
275285

@@ -278,6 +288,9 @@ class string_constraint_generatort
278288
// string pointers
279289
symbol_exprt add_axioms_for_intern(const function_application_exprt &f);
280290

291+
// Pool used for the intern method
292+
std::map<string_exprt, symbol_exprt> intern_of_string;
293+
281294
// Tells which language is used. C and Java are supported
282295
irep_idt mode;
283296

@@ -293,14 +306,8 @@ class string_constraint_generatort
293306
exprt int_of_hex_char(const exprt &chr) const;
294307
exprt is_high_surrogate(const exprt &chr) const;
295308
exprt is_low_surrogate(const exprt &chr) const;
296-
static exprt character_equals_ignore_case(
309+
exprt character_equals_ignore_case(
297310
exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z);
298-
299-
// Pool used for the intern method
300-
std::map<string_exprt, symbol_exprt> pool;
301-
302-
// Used to determine whether hashcode should be equal
303-
std::map<string_exprt, symbol_exprt> hash;
304311
};
305312

306313
#endif

src/solvers/refinement/string_constraint_generator_code_points.cpp

+10-8
Original file line numberDiff line numberDiff line change
@@ -160,17 +160,18 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at(
160160
{
161161
typet return_type=f.type();
162162
assert(return_type.id()==ID_signedbv);
163-
string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]);
163+
string_exprt str=get_string_expr(args(f, 2)[0]);
164164
const exprt &pos=args(f, 2)[1];
165165

166166
symbol_exprt result=fresh_symbol("char", return_type);
167167
exprt index1=from_integer(1, str.length().type());
168168
const exprt &char1=str[pos];
169-
const exprt &char2=str[plus_exprt(pos, index1)];
169+
const exprt &char2=str[plus_exprt_with_overflow_check(pos, index1)];
170170
exprt char1_as_int=typecast_exprt(char1, return_type);
171171
exprt char2_as_int=typecast_exprt(char2, return_type);
172172
exprt pair=pair_value(char1_as_int, char2_as_int, return_type);
173-
exprt is_low=is_low_surrogate(str[plus_exprt(pos, index1)]);
173+
exprt is_low=is_low_surrogate(
174+
str[plus_exprt_with_overflow_check(pos, index1)]);
174175
exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low);
175176

176177
axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair)));
@@ -199,7 +200,7 @@ exprt string_constraint_generatort::add_axioms_for_code_point_before(
199200
typet return_type=f.type();
200201
assert(return_type.id()==ID_signedbv);
201202
symbol_exprt result=fresh_symbol("char", return_type);
202-
string_exprt str=add_axioms_for_string_expr(args[0]);
203+
string_exprt str=get_string_expr(args[0]);
203204

204205
const exprt &char1=
205206
str[minus_exprt(args[1], from_integer(2, str.length().type()))];
@@ -234,7 +235,7 @@ Function: string_constraint_generatort::add_axioms_for_code_point_count
234235
exprt string_constraint_generatort::add_axioms_for_code_point_count(
235236
const function_application_exprt &f)
236237
{
237-
string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]);
238+
string_exprt str=get_string_expr(args(f, 3)[0]);
238239
const exprt &begin=args(f, 3)[1];
239240
const exprt &end=args(f, 3)[2];
240241
const typet &return_type=f.type();
@@ -265,14 +266,15 @@ Function: string_constraint_generatort::add_axioms_for_offset_by_code_point
265266
exprt string_constraint_generatort::add_axioms_for_offset_by_code_point(
266267
const function_application_exprt &f)
267268
{
268-
string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]);
269+
string_exprt str=get_string_expr(args(f, 3)[0]);
269270
const exprt &index=args(f, 3)[1];
270271
const exprt &offset=args(f, 3)[2];
271272
const typet &return_type=f.type();
272273
symbol_exprt result=fresh_symbol("offset_by_code_point", return_type);
273274

274-
exprt minimum=plus_exprt(index, offset);
275-
exprt maximum=plus_exprt(index, plus_exprt(offset, offset));
275+
exprt minimum=plus_exprt_with_overflow_check(index, offset);
276+
exprt maximum=plus_exprt_with_overflow_check(
277+
index, plus_exprt_with_overflow_check(offset, offset));
276278
axioms.push_back(binary_relation_exprt(result, ID_le, maximum));
277279
axioms.push_back(binary_relation_exprt(result, ID_ge, minimum));
278280

0 commit comments

Comments
 (0)