|
| 1 | +/*******************************************************************\ |
| 2 | +
|
| 3 | +Module: Generates string constraints to link results from string functions |
| 4 | + with their arguments. This is inspired by the PASS paper at HVC'13: |
| 5 | + "PASS: String Solving with Parameterized Array and Interval Automaton" |
| 6 | + by Guodong Li and Indradeep Ghosh, which gives examples of constraints |
| 7 | + for several functions. |
| 8 | +
|
| 9 | +Author: Romain Brenguier, [email protected] |
| 10 | +
|
| 11 | +\*******************************************************************/ |
| 12 | + |
| 13 | +#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H |
| 14 | +#define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H |
| 15 | + |
| 16 | +#include <ansi-c/c_types.h> |
| 17 | +#include <solvers/refinement/refined_string_type.h> |
| 18 | +#include <solvers/refinement/string_expr.h> |
| 19 | +#include <solvers/refinement/string_constraint.h> |
| 20 | + |
| 21 | +class string_constraint_generatort |
| 22 | +{ |
| 23 | +public: |
| 24 | + // This module keeps a list of axioms. It has methods which generate |
| 25 | + // string constraints for different string funcitons and add them |
| 26 | + // to the axiom list. |
| 27 | + |
| 28 | + string_constraint_generatort(): |
| 29 | + mode(ID_unknown), refined_string_type(char_type()) |
| 30 | + { } |
| 31 | + |
| 32 | + void set_mode(irep_idt _mode) |
| 33 | + { |
| 34 | + // only C and java modes supported |
| 35 | + assert((_mode==ID_java) || (_mode==ID_C)); |
| 36 | + mode=_mode; |
| 37 | + refined_string_type=refined_string_typet(get_char_type()); |
| 38 | + } |
| 39 | + |
| 40 | + irep_idt &get_mode() { return mode; } |
| 41 | + |
| 42 | + typet get_char_type() const; |
| 43 | + typet get_index_type() const |
| 44 | + { |
| 45 | + if(mode==ID_java) |
| 46 | + return refined_string_typet::java_index_type(); |
| 47 | + assert(mode==ID_C); |
| 48 | + return refined_string_typet::index_type(); |
| 49 | + } |
| 50 | + |
| 51 | + const refined_string_typet &get_refined_string_type() const |
| 52 | + { |
| 53 | + return refined_string_type; |
| 54 | + } |
| 55 | + |
| 56 | + // Axioms are of three kinds: universally quantified string constraint, |
| 57 | + // not contains string constraints and simple formulas. |
| 58 | + std::list<exprt> axioms; |
| 59 | + |
| 60 | + // Boolean symbols for the results of some string functions |
| 61 | + std::list<symbol_exprt> boolean_symbols; |
| 62 | + |
| 63 | + // Symbols used in existential quantifications |
| 64 | + std::list<symbol_exprt> index_symbols; |
| 65 | + |
| 66 | + // Used to store information about witnesses for not_contains constraints |
| 67 | + std::map<string_not_contains_constraintt, symbol_exprt> witness; |
| 68 | + |
| 69 | + exprt get_witness_of( |
| 70 | + const string_not_contains_constraintt &c, |
| 71 | + const exprt &univ_val) const |
| 72 | + { |
| 73 | + return index_exprt(witness.at(c), univ_val); |
| 74 | + } |
| 75 | + |
| 76 | + symbol_exprt fresh_exist_index(const irep_idt &prefix); |
| 77 | + symbol_exprt fresh_univ_index(const irep_idt &prefix); |
| 78 | + symbol_exprt fresh_boolean(const irep_idt &prefix); |
| 79 | + |
| 80 | + // We maintain a map from symbols to strings. |
| 81 | + std::map<irep_idt, string_exprt> symbol_to_string; |
| 82 | + |
| 83 | + string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym); |
| 84 | + |
| 85 | + void assign_to_symbol( |
| 86 | + const symbol_exprt &sym, const string_exprt &expr) |
| 87 | + { |
| 88 | + symbol_to_string[sym.get_identifier()]=expr; |
| 89 | + } |
| 90 | + |
| 91 | + string_exprt add_axioms_for_string_expr(const exprt &expr); |
| 92 | + void set_string_symbol_equal_to_expr( |
| 93 | + const symbol_exprt &sym, const exprt &str); |
| 94 | + |
| 95 | + exprt add_axioms_for_function_application( |
| 96 | + const function_application_exprt &expr); |
| 97 | + |
| 98 | + constant_exprt constant_char(int i) const; |
| 99 | + |
| 100 | +private: |
| 101 | + // The integer with the longest string is Integer.MIN_VALUE which is -2^31, |
| 102 | + // that is -2147483648 so takes 11 characters to write. |
| 103 | + // The long with the longest string is Long.MIN_VALUE which is -2^63, |
| 104 | + // approximately -9.223372037*10^18 so takes 20 characters to write. |
| 105 | + const std::size_t MAX_INTEGER_LENGTH=11; |
| 106 | + const std::size_t MAX_LONG_LENGTH=20; |
| 107 | + const std::size_t MAX_FLOAT_LENGTH=15; |
| 108 | + const std::size_t MAX_DOUBLE_LENGTH=30; |
| 109 | + |
| 110 | + irep_idt extract_java_string(const symbol_exprt &s) const; |
| 111 | + |
| 112 | + exprt axiom_for_is_positive_index(const exprt &x); |
| 113 | + |
| 114 | + // The following functions add axioms for the returned value |
| 115 | + // to be equal to the result of the function given as argument. |
| 116 | + // They are not accessed directly from other classes: they call |
| 117 | + // `add_axioms_for_function_application` which determines which of |
| 118 | + // these methods should be called. |
| 119 | + |
| 120 | + exprt add_axioms_for_char_at(const function_application_exprt &f); |
| 121 | + exprt add_axioms_for_code_point_at(const function_application_exprt &f); |
| 122 | + exprt add_axioms_for_code_point_before(const function_application_exprt &f); |
| 123 | + exprt add_axioms_for_contains(const function_application_exprt &f); |
| 124 | + exprt add_axioms_for_equals(const function_application_exprt &f); |
| 125 | + exprt add_axioms_for_equals_ignore_case(const function_application_exprt &f); |
| 126 | + exprt add_axioms_for_data(const function_application_exprt &f); |
| 127 | + |
| 128 | + // Add axioms corresponding to the String.hashCode java function |
| 129 | + // The specification is partial: the actual value is not actually computed |
| 130 | + // but we ensure that hash codes of equal strings are equal. |
| 131 | + exprt add_axioms_for_hash_code(const function_application_exprt &f); |
| 132 | + |
| 133 | + exprt add_axioms_for_is_empty(const function_application_exprt &f); |
| 134 | + exprt add_axioms_for_is_prefix( |
| 135 | + const string_exprt &prefix, const string_exprt &str, const exprt &offset); |
| 136 | + exprt add_axioms_for_is_prefix( |
| 137 | + const function_application_exprt &f, bool swap_arguments=false); |
| 138 | + exprt add_axioms_for_is_suffix( |
| 139 | + const function_application_exprt &f, bool swap_arguments=false); |
| 140 | + exprt add_axioms_for_length(const function_application_exprt &f); |
| 141 | + string_exprt add_axioms_for_empty_string(const function_application_exprt &f); |
| 142 | + string_exprt add_axioms_for_char_set(const function_application_exprt &expr); |
| 143 | + string_exprt add_axioms_for_copy(const function_application_exprt &f); |
| 144 | + string_exprt add_axioms_for_concat( |
| 145 | + const string_exprt &s1, const string_exprt &s2); |
| 146 | + string_exprt add_axioms_for_concat(const function_application_exprt &f); |
| 147 | + string_exprt add_axioms_for_concat_int(const function_application_exprt &f); |
| 148 | + string_exprt add_axioms_for_concat_long(const function_application_exprt &f); |
| 149 | + string_exprt add_axioms_for_concat_bool(const function_application_exprt &f); |
| 150 | + string_exprt add_axioms_for_concat_char(const function_application_exprt &f); |
| 151 | + string_exprt add_axioms_for_concat_double( |
| 152 | + const function_application_exprt &f); |
| 153 | + string_exprt add_axioms_for_concat_float(const function_application_exprt &f); |
| 154 | + string_exprt add_axioms_for_concat_code_point( |
| 155 | + const function_application_exprt &f); |
| 156 | + string_exprt add_axioms_for_constant(irep_idt sval); |
| 157 | + string_exprt add_axioms_for_delete( |
| 158 | + const string_exprt &str, const exprt &start, const exprt &end); |
| 159 | + string_exprt add_axioms_for_delete(const function_application_exprt &expr); |
| 160 | + string_exprt add_axioms_for_delete_char_at( |
| 161 | + const function_application_exprt &expr); |
| 162 | + string_exprt add_axioms_for_insert( |
| 163 | + const string_exprt &s1, const string_exprt &s2, const exprt &offset); |
| 164 | + string_exprt add_axioms_for_insert(const function_application_exprt &f); |
| 165 | + string_exprt add_axioms_for_insert_int(const function_application_exprt &f); |
| 166 | + string_exprt add_axioms_for_insert_long(const function_application_exprt &f); |
| 167 | + string_exprt add_axioms_for_insert_bool(const function_application_exprt &f); |
| 168 | + string_exprt add_axioms_for_insert_char(const function_application_exprt &f); |
| 169 | + string_exprt add_axioms_for_insert_double( |
| 170 | + const function_application_exprt &f); |
| 171 | + string_exprt add_axioms_for_insert_float(const function_application_exprt &f); |
| 172 | + string_exprt add_axioms_for_insert_char_array( |
| 173 | + const function_application_exprt &f); |
| 174 | + string_exprt add_axioms_from_literal(const function_application_exprt &f); |
| 175 | + string_exprt add_axioms_from_int(const function_application_exprt &f); |
| 176 | + string_exprt add_axioms_from_int(const exprt &i, size_t max_size); |
| 177 | + string_exprt add_axioms_from_int_hex(const exprt &i); |
| 178 | + string_exprt add_axioms_from_int_hex(const function_application_exprt &f); |
| 179 | + string_exprt add_axioms_from_long(const function_application_exprt &f); |
| 180 | + string_exprt add_axioms_from_long(const exprt &i, size_t max_size); |
| 181 | + string_exprt add_axioms_from_bool(const function_application_exprt &f); |
| 182 | + string_exprt add_axioms_from_bool(const exprt &i); |
| 183 | + string_exprt add_axioms_from_char(const function_application_exprt &f); |
| 184 | + string_exprt add_axioms_from_char(const exprt &i); |
| 185 | + string_exprt add_axioms_from_char_array(const function_application_exprt &f); |
| 186 | + string_exprt add_axioms_from_char_array( |
| 187 | + const exprt &length, |
| 188 | + const exprt &data, |
| 189 | + const exprt &offset, |
| 190 | + const exprt &count); |
| 191 | + exprt add_axioms_for_index_of( |
| 192 | + const string_exprt &str, |
| 193 | + const exprt &c, |
| 194 | + const exprt &from_index); |
| 195 | + |
| 196 | + // Add axioms corresponding to the String.indexOf:(String;I) java function |
| 197 | + // TODO: the specifications are only partial: |
| 198 | + // we add axioms stating that the returned value is either -1 or greater than |
| 199 | + // from_index and the string beggining there has prefix substring |
| 200 | + exprt add_axioms_for_index_of_string( |
| 201 | + const string_exprt &str, |
| 202 | + const string_exprt &substring, |
| 203 | + const exprt &from_index); |
| 204 | + |
| 205 | + // Add axioms corresponding to the String.indexOf java functions |
| 206 | + // TODO: the specifications are only partial for the ones that look for |
| 207 | + // strings |
| 208 | + exprt add_axioms_for_index_of(const function_application_exprt &f); |
| 209 | + |
| 210 | + // Add axioms corresponding to the String.lastIndexOf:(String;I) java function |
| 211 | + // TODO: the specifications are only partial |
| 212 | + exprt add_axioms_for_last_index_of_string( |
| 213 | + const string_exprt &str, |
| 214 | + const string_exprt &substring, |
| 215 | + const exprt &from_index); |
| 216 | + |
| 217 | + // Add axioms corresponding to the String.lastIndexOf:(CI) java function |
| 218 | + exprt add_axioms_for_last_index_of( |
| 219 | + const string_exprt &str, |
| 220 | + const exprt &c, |
| 221 | + const exprt &from_index); |
| 222 | + |
| 223 | + // Add axioms corresponding to the String.lastIndexOf java functions |
| 224 | + // TODO: the specifications is only partial |
| 225 | + exprt add_axioms_for_last_index_of(const function_application_exprt &f); |
| 226 | + |
| 227 | + // TODO: the specifications of these functions is only partial |
| 228 | + // We currently only specify that the string for NaN is "NaN", for infinity |
| 229 | + // and minus infinity the string are "Infinity" and "-Infinity respectively |
| 230 | + // otherwise the string contains only characters in [0123456789.] and '-' at |
| 231 | + // the start for negative number |
| 232 | + string_exprt add_axioms_from_float(const function_application_exprt &f); |
| 233 | + string_exprt add_axioms_from_float( |
| 234 | + const exprt &f, bool double_precision=false); |
| 235 | + |
| 236 | + // Add axioms corresponding to the String.valueOf(D) java function |
| 237 | + // TODO: the specifications is only partial |
| 238 | + string_exprt add_axioms_from_double(const function_application_exprt &f); |
| 239 | + |
| 240 | + string_exprt add_axioms_for_replace(const function_application_exprt &f); |
| 241 | + string_exprt add_axioms_for_set_length(const function_application_exprt &f); |
| 242 | + |
| 243 | + // TODO: the specification may not be correct for the case where the |
| 244 | + // string is shorter than end. An actual java program should throw an |
| 245 | + // exception in that case |
| 246 | + string_exprt add_axioms_for_substring( |
| 247 | + const string_exprt &str, const exprt &start, const exprt &end); |
| 248 | + string_exprt add_axioms_for_substring(const function_application_exprt &expr); |
| 249 | + |
| 250 | + string_exprt add_axioms_for_to_lower_case( |
| 251 | + const function_application_exprt &expr); |
| 252 | + string_exprt add_axioms_for_to_upper_case( |
| 253 | + const function_application_exprt &expr); |
| 254 | + string_exprt add_axioms_for_trim(const function_application_exprt &expr); |
| 255 | + |
| 256 | + // Add axioms corresponding to the String.valueOf([CII) function |
| 257 | + // TODO: not working correctly at the moment |
| 258 | + string_exprt add_axioms_for_value_of(const function_application_exprt &f); |
| 259 | + |
| 260 | + string_exprt add_axioms_for_code_point(const exprt &code_point); |
| 261 | + string_exprt add_axioms_for_java_char_array(const exprt &char_array); |
| 262 | + string_exprt add_axioms_for_if(const if_exprt &expr); |
| 263 | + exprt add_axioms_for_char_literal(const function_application_exprt &f); |
| 264 | + |
| 265 | + // Add axioms corresponding the String.codePointCount java function |
| 266 | + // TODO: this function is underspecified, we do not compute the exact value |
| 267 | + // but over approximate it. |
| 268 | + exprt add_axioms_for_code_point_count(const function_application_exprt &f); |
| 269 | + |
| 270 | + // Add axioms corresponding the String.offsetByCodePointCount java function |
| 271 | + // TODO: this function is underspecified, it should return the index within |
| 272 | + // this String that is offset from the given first argument by second argument |
| 273 | + // code points and we approximate this by saying the result is |
| 274 | + // between index + offset and index + 2 * offset |
| 275 | + exprt add_axioms_for_offset_by_code_point( |
| 276 | + const function_application_exprt &f); |
| 277 | + |
| 278 | + exprt add_axioms_for_parse_int(const function_application_exprt &f); |
| 279 | + exprt add_axioms_for_to_char_array(const function_application_exprt &f); |
| 280 | + exprt add_axioms_for_compare_to(const function_application_exprt &f); |
| 281 | + |
| 282 | + // Add axioms corresponding to the String.intern java function |
| 283 | + // TODO: this does not work at the moment because of the way we treat |
| 284 | + // string pointers |
| 285 | + symbol_exprt add_axioms_for_intern(const function_application_exprt &f); |
| 286 | + |
| 287 | + // Tells which language is used. C and Java are supported |
| 288 | + irep_idt mode; |
| 289 | + |
| 290 | + // Type of strings used in the refinement |
| 291 | + refined_string_typet refined_string_type; |
| 292 | + |
| 293 | + // assert that the number of argument is equal to nb and extract them |
| 294 | + static const function_application_exprt::argumentst &args( |
| 295 | + const function_application_exprt &expr, size_t nb) |
| 296 | + { |
| 297 | + const function_application_exprt::argumentst &args=expr.arguments(); |
| 298 | + assert(args.size()==nb); |
| 299 | + return args; |
| 300 | + } |
| 301 | + |
| 302 | + exprt int_of_hex_char(exprt chr) const; |
| 303 | + exprt is_high_surrogate(const exprt &chr) const; |
| 304 | + exprt is_low_surrogate(const exprt &chr) const; |
| 305 | + static exprt character_equals_ignore_case( |
| 306 | + exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z); |
| 307 | + |
| 308 | + // Pool used for the intern method |
| 309 | + std::map<string_exprt, symbol_exprt> pool; |
| 310 | + |
| 311 | + // Used to determine whether hashcode should be equal |
| 312 | + std::map<string_exprt, symbol_exprt> hash; |
| 313 | +}; |
| 314 | + |
| 315 | +#endif |
0 commit comments