Skip to content

Commit f5dc2e6

Browse files
Class for generating string constraints from string functions
1 parent bb4fe7d commit f5dc2e6

11 files changed

+3864
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
/*******************************************************************\
2+
3+
Module: Generates string constraints to link results from string functions
4+
with their arguments. This is inspired by the PASS paper at HVC'13:
5+
"PASS: String Solving with Parameterized Array and Interval Automaton"
6+
by Guodong Li and Indradeep Ghosh, which gives examples of constraints
7+
for several functions.
8+
9+
Author: Romain Brenguier, [email protected]
10+
11+
\*******************************************************************/
12+
13+
#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H
14+
#define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H
15+
16+
#include <ansi-c/c_types.h>
17+
#include <solvers/refinement/refined_string_type.h>
18+
#include <solvers/refinement/string_expr.h>
19+
#include <solvers/refinement/string_constraint.h>
20+
21+
class string_constraint_generatort
22+
{
23+
public:
24+
// This module keeps a list of axioms. It has methods which generate
25+
// string constraints for different string funcitons and add them
26+
// to the axiom list.
27+
28+
string_constraint_generatort():
29+
mode(ID_unknown), refined_string_type(char_type())
30+
{ }
31+
32+
void set_mode(irep_idt _mode)
33+
{
34+
// only C and java modes supported
35+
assert((_mode==ID_java) || (_mode==ID_C));
36+
mode=_mode;
37+
refined_string_type=refined_string_typet(get_char_type());
38+
}
39+
40+
irep_idt &get_mode() { return mode; }
41+
42+
typet get_char_type() const;
43+
typet get_index_type() const
44+
{
45+
if(mode==ID_java)
46+
return refined_string_typet::java_index_type();
47+
assert(mode==ID_C);
48+
return refined_string_typet::index_type();
49+
}
50+
51+
const refined_string_typet &get_refined_string_type() const
52+
{
53+
return refined_string_type;
54+
}
55+
56+
// Axioms are of three kinds: universally quantified string constraint,
57+
// not contains string constraints and simple formulas.
58+
std::list<exprt> axioms;
59+
60+
// Boolean symbols for the results of some string functions
61+
std::list<symbol_exprt> boolean_symbols;
62+
63+
// Symbols used in existential quantifications
64+
std::list<symbol_exprt> index_symbols;
65+
66+
// Used to store information about witnesses for not_contains constraints
67+
std::map<string_not_contains_constraintt, symbol_exprt> witness;
68+
69+
exprt get_witness_of(
70+
const string_not_contains_constraintt &c,
71+
const exprt &univ_val) const
72+
{
73+
return index_exprt(witness.at(c), univ_val);
74+
}
75+
76+
symbol_exprt fresh_exist_index(const irep_idt &prefix);
77+
symbol_exprt fresh_univ_index(const irep_idt &prefix);
78+
symbol_exprt fresh_boolean(const irep_idt &prefix);
79+
80+
// We maintain a map from symbols to strings.
81+
std::map<irep_idt, string_exprt> symbol_to_string;
82+
83+
string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym);
84+
85+
void assign_to_symbol(
86+
const symbol_exprt &sym, const string_exprt &expr)
87+
{
88+
symbol_to_string[sym.get_identifier()]=expr;
89+
}
90+
91+
string_exprt add_axioms_for_string_expr(const exprt &expr);
92+
void set_string_symbol_equal_to_expr(
93+
const symbol_exprt &sym, const exprt &str);
94+
95+
exprt add_axioms_for_function_application(
96+
const function_application_exprt &expr);
97+
98+
constant_exprt constant_char(int i) const;
99+
100+
private:
101+
// The integer with the longest string is Integer.MIN_VALUE which is -2^31,
102+
// that is -2147483648 so takes 11 characters to write.
103+
// The long with the longest string is Long.MIN_VALUE which is -2^63,
104+
// approximately -9.223372037*10^18 so takes 20 characters to write.
105+
const std::size_t MAX_INTEGER_LENGTH=11;
106+
const std::size_t MAX_LONG_LENGTH=20;
107+
const std::size_t MAX_FLOAT_LENGTH=15;
108+
const std::size_t MAX_DOUBLE_LENGTH=30;
109+
110+
irep_idt extract_java_string(const symbol_exprt &s) const;
111+
112+
exprt axiom_for_is_positive_index(const exprt &x);
113+
114+
// The following functions add axioms for the returned value
115+
// to be equal to the result of the function given as argument.
116+
// They are not accessed directly from other classes: they call
117+
// `add_axioms_for_function_application` which determines which of
118+
// these methods should be called.
119+
120+
exprt add_axioms_for_char_at(const function_application_exprt &f);
121+
exprt add_axioms_for_code_point_at(const function_application_exprt &f);
122+
exprt add_axioms_for_code_point_before(const function_application_exprt &f);
123+
exprt add_axioms_for_contains(const function_application_exprt &f);
124+
exprt add_axioms_for_equals(const function_application_exprt &f);
125+
exprt add_axioms_for_equals_ignore_case(const function_application_exprt &f);
126+
exprt add_axioms_for_data(const function_application_exprt &f);
127+
128+
// Add axioms corresponding to the String.hashCode java function
129+
// The specification is partial: the actual value is not actually computed
130+
// but we ensure that hash codes of equal strings are equal.
131+
exprt add_axioms_for_hash_code(const function_application_exprt &f);
132+
133+
exprt add_axioms_for_is_empty(const function_application_exprt &f);
134+
exprt add_axioms_for_is_prefix(
135+
const string_exprt &prefix, const string_exprt &str, const exprt &offset);
136+
exprt add_axioms_for_is_prefix(
137+
const function_application_exprt &f, bool swap_arguments=false);
138+
exprt add_axioms_for_is_suffix(
139+
const function_application_exprt &f, bool swap_arguments=false);
140+
exprt add_axioms_for_length(const function_application_exprt &f);
141+
string_exprt add_axioms_for_empty_string(const function_application_exprt &f);
142+
string_exprt add_axioms_for_char_set(const function_application_exprt &expr);
143+
string_exprt add_axioms_for_copy(const function_application_exprt &f);
144+
string_exprt add_axioms_for_concat(
145+
const string_exprt &s1, const string_exprt &s2);
146+
string_exprt add_axioms_for_concat(const function_application_exprt &f);
147+
string_exprt add_axioms_for_concat_int(const function_application_exprt &f);
148+
string_exprt add_axioms_for_concat_long(const function_application_exprt &f);
149+
string_exprt add_axioms_for_concat_bool(const function_application_exprt &f);
150+
string_exprt add_axioms_for_concat_char(const function_application_exprt &f);
151+
string_exprt add_axioms_for_concat_double(
152+
const function_application_exprt &f);
153+
string_exprt add_axioms_for_concat_float(const function_application_exprt &f);
154+
string_exprt add_axioms_for_concat_code_point(
155+
const function_application_exprt &f);
156+
string_exprt add_axioms_for_constant(irep_idt sval);
157+
string_exprt add_axioms_for_delete(
158+
const string_exprt &str, const exprt &start, const exprt &end);
159+
string_exprt add_axioms_for_delete(const function_application_exprt &expr);
160+
string_exprt add_axioms_for_delete_char_at(
161+
const function_application_exprt &expr);
162+
string_exprt add_axioms_for_insert(
163+
const string_exprt &s1, const string_exprt &s2, const exprt &offset);
164+
string_exprt add_axioms_for_insert(const function_application_exprt &f);
165+
string_exprt add_axioms_for_insert_int(const function_application_exprt &f);
166+
string_exprt add_axioms_for_insert_long(const function_application_exprt &f);
167+
string_exprt add_axioms_for_insert_bool(const function_application_exprt &f);
168+
string_exprt add_axioms_for_insert_char(const function_application_exprt &f);
169+
string_exprt add_axioms_for_insert_double(
170+
const function_application_exprt &f);
171+
string_exprt add_axioms_for_insert_float(const function_application_exprt &f);
172+
string_exprt add_axioms_for_insert_char_array(
173+
const function_application_exprt &f);
174+
string_exprt add_axioms_from_literal(const function_application_exprt &f);
175+
string_exprt add_axioms_from_int(const function_application_exprt &f);
176+
string_exprt add_axioms_from_int(const exprt &i, size_t max_size);
177+
string_exprt add_axioms_from_int_hex(const exprt &i);
178+
string_exprt add_axioms_from_int_hex(const function_application_exprt &f);
179+
string_exprt add_axioms_from_long(const function_application_exprt &f);
180+
string_exprt add_axioms_from_long(const exprt &i, size_t max_size);
181+
string_exprt add_axioms_from_bool(const function_application_exprt &f);
182+
string_exprt add_axioms_from_bool(const exprt &i);
183+
string_exprt add_axioms_from_char(const function_application_exprt &f);
184+
string_exprt add_axioms_from_char(const exprt &i);
185+
string_exprt add_axioms_from_char_array(const function_application_exprt &f);
186+
string_exprt add_axioms_from_char_array(
187+
const exprt &length,
188+
const exprt &data,
189+
const exprt &offset,
190+
const exprt &count);
191+
exprt add_axioms_for_index_of(
192+
const string_exprt &str,
193+
const exprt &c,
194+
const exprt &from_index);
195+
196+
// Add axioms corresponding to the String.indexOf:(String;I) java function
197+
// TODO: the specifications are only partial:
198+
// we add axioms stating that the returned value is either -1 or greater than
199+
// from_index and the string beggining there has prefix substring
200+
exprt add_axioms_for_index_of_string(
201+
const string_exprt &str,
202+
const string_exprt &substring,
203+
const exprt &from_index);
204+
205+
// Add axioms corresponding to the String.indexOf java functions
206+
// TODO: the specifications are only partial for the ones that look for
207+
// strings
208+
exprt add_axioms_for_index_of(const function_application_exprt &f);
209+
210+
// Add axioms corresponding to the String.lastIndexOf:(String;I) java function
211+
// TODO: the specifications are only partial
212+
exprt add_axioms_for_last_index_of_string(
213+
const string_exprt &str,
214+
const string_exprt &substring,
215+
const exprt &from_index);
216+
217+
// Add axioms corresponding to the String.lastIndexOf:(CI) java function
218+
exprt add_axioms_for_last_index_of(
219+
const string_exprt &str,
220+
const exprt &c,
221+
const exprt &from_index);
222+
223+
// Add axioms corresponding to the String.lastIndexOf java functions
224+
// TODO: the specifications is only partial
225+
exprt add_axioms_for_last_index_of(const function_application_exprt &f);
226+
227+
// TODO: the specifications of these functions is only partial
228+
// We currently only specify that the string for NaN is "NaN", for infinity
229+
// and minus infinity the string are "Infinity" and "-Infinity respectively
230+
// otherwise the string contains only characters in [0123456789.] and '-' at
231+
// the start for negative number
232+
string_exprt add_axioms_from_float(const function_application_exprt &f);
233+
string_exprt add_axioms_from_float(
234+
const exprt &f, bool double_precision=false);
235+
236+
// Add axioms corresponding to the String.valueOf(D) java function
237+
// TODO: the specifications is only partial
238+
string_exprt add_axioms_from_double(const function_application_exprt &f);
239+
240+
string_exprt add_axioms_for_replace(const function_application_exprt &f);
241+
string_exprt add_axioms_for_set_length(const function_application_exprt &f);
242+
243+
// TODO: the specification may not be correct for the case where the
244+
// string is shorter than end. An actual java program should throw an
245+
// exception in that case
246+
string_exprt add_axioms_for_substring(
247+
const string_exprt &str, const exprt &start, const exprt &end);
248+
string_exprt add_axioms_for_substring(const function_application_exprt &expr);
249+
250+
string_exprt add_axioms_for_to_lower_case(
251+
const function_application_exprt &expr);
252+
string_exprt add_axioms_for_to_upper_case(
253+
const function_application_exprt &expr);
254+
string_exprt add_axioms_for_trim(const function_application_exprt &expr);
255+
256+
// Add axioms corresponding to the String.valueOf([CII) function
257+
// TODO: not working correctly at the moment
258+
string_exprt add_axioms_for_value_of(const function_application_exprt &f);
259+
260+
string_exprt add_axioms_for_code_point(const exprt &code_point);
261+
string_exprt add_axioms_for_java_char_array(const exprt &char_array);
262+
string_exprt add_axioms_for_if(const if_exprt &expr);
263+
exprt add_axioms_for_char_literal(const function_application_exprt &f);
264+
265+
// Add axioms corresponding the String.codePointCount java function
266+
// TODO: this function is underspecified, we do not compute the exact value
267+
// but over approximate it.
268+
exprt add_axioms_for_code_point_count(const function_application_exprt &f);
269+
270+
// Add axioms corresponding the String.offsetByCodePointCount java function
271+
// TODO: this function is underspecified, it should return the index within
272+
// this String that is offset from the given first argument by second argument
273+
// code points and we approximate this by saying the result is
274+
// between index + offset and index + 2 * offset
275+
exprt add_axioms_for_offset_by_code_point(
276+
const function_application_exprt &f);
277+
278+
exprt add_axioms_for_parse_int(const function_application_exprt &f);
279+
exprt add_axioms_for_to_char_array(const function_application_exprt &f);
280+
exprt add_axioms_for_compare_to(const function_application_exprt &f);
281+
282+
// Add axioms corresponding to the String.intern java function
283+
// TODO: this does not work at the moment because of the way we treat
284+
// string pointers
285+
symbol_exprt add_axioms_for_intern(const function_application_exprt &f);
286+
287+
// Tells which language is used. C and Java are supported
288+
irep_idt mode;
289+
290+
// Type of strings used in the refinement
291+
refined_string_typet refined_string_type;
292+
293+
// assert that the number of argument is equal to nb and extract them
294+
static const function_application_exprt::argumentst &args(
295+
const function_application_exprt &expr, size_t nb)
296+
{
297+
const function_application_exprt::argumentst &args=expr.arguments();
298+
assert(args.size()==nb);
299+
return args;
300+
}
301+
302+
exprt int_of_hex_char(exprt chr) const;
303+
exprt is_high_surrogate(const exprt &chr) const;
304+
exprt is_low_surrogate(const exprt &chr) const;
305+
static exprt character_equals_ignore_case(
306+
exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z);
307+
308+
// Pool used for the intern method
309+
std::map<string_exprt, symbol_exprt> pool;
310+
311+
// Used to determine whether hashcode should be equal
312+
std::map<string_exprt, symbol_exprt> hash;
313+
};
314+
315+
#endif

0 commit comments

Comments
 (0)