From e8cd7be80c12e93b76584db1c4c4b37411e90faf Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 23 Dec 2016 12:20:10 +0000 Subject: [PATCH 1/8] Making check methods of refinement virtual MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We make `check_SAT` and `check_UNSAT` methods of the bitvector refinement virtual so that it can be overloaded by string refinement. --- src/solvers/refinement/bv_refinement.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/solvers/refinement/bv_refinement.h b/src/solvers/refinement/bv_refinement.h index de18da27361..00379373265 100644 --- a/src/solvers/refinement/bv_refinement.h +++ b/src/solvers/refinement/bv_refinement.h @@ -83,8 +83,8 @@ class bv_refinementt:public bv_pointerst void get_values(approximationt &approximation); bool is_in_conflict(approximationt &approximation); - void check_SAT(); - void check_UNSAT(); + virtual void check_SAT(); + virtual void check_UNSAT(); bool progress; // we refine the theory of arrays From 5018806dd46826f9f19e52ab1dbaee137d86ec14 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 23 Dec 2016 12:34:16 +0000 Subject: [PATCH 2/8] Adding identifiers for string expressions and functions --- src/util/irep_ids.txt | 63 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/src/util/irep_ids.txt b/src/util/irep_ids.txt index bd6c082973f..9bcb7807671 100644 --- a/src/util/irep_ids.txt +++ b/src/util/irep_ids.txt @@ -738,3 +738,66 @@ java_bytecode_index java_instanceof java_super_method_call java_enum_static_unwind +string_constraint +string_not_contains_constraint +cprover_char_literal_func +cprover_string_literal_func +cprover_string_char_at_func +cprover_string_char_set_func +cprover_string_code_point_at_func +cprover_string_code_point_before_func +cprover_string_code_point_count_func +cprover_string_offset_by_code_point_func +cprover_string_compare_to_func +cprover_string_concat_func +cprover_string_concat_int_func +cprover_string_concat_long_func +cprover_string_concat_char_func +cprover_string_concat_bool_func +cprover_string_concat_double_func +cprover_string_concat_float_func +cprover_string_concat_code_point_func +cprover_string_contains_func +cprover_string_copy_func +cprover_string_delete_func +cprover_string_delete_char_at_func +cprover_string_equal_func +cprover_string_equals_ignore_case_func +cprover_string_empty_string_func +cprover_string_endswith_func +cprover_string_format_func +cprover_string_hash_code_func +cprover_string_index_of_func +cprover_string_intern_func +cprover_string_insert_func +cprover_string_insert_int_func +cprover_string_insert_long_func +cprover_string_insert_bool_func +cprover_string_insert_char_func +cprover_string_insert_float_func +cprover_string_insert_double_func +cprover_string_insert_char_array_func +cprover_string_is_prefix_func +cprover_string_is_suffix_func +cprover_string_is_empty_func +cprover_string_last_index_of_func +cprover_string_length_func +cprover_string_data_func +cprover_string_of_int_func +cprover_string_of_int_hex_func +cprover_string_of_long_func +cprover_string_of_bool_func +cprover_string_of_float_func +cprover_string_of_double_func +cprover_string_of_char_func +cprover_string_of_char_array_func +cprover_string_parse_int_func +cprover_string_replace_func +cprover_string_set_length_func +cprover_string_startswith_func +cprover_string_substring_func +cprover_string_to_char_array_func +cprover_string_to_lower_case_func +cprover_string_to_upper_case_func +cprover_string_trim_func +cprover_string_value_of_func \ No newline at end of file From 188d9189f1c0ac462f493bc359d6e312638edf84 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Tue, 24 Jan 2017 16:28:05 +0000 Subject: [PATCH 3/8] Added a to_unsigned_integer function to arith_tools This function converts a positive integer expression to an unsigned int and returns true in case of an error. --- src/util/arith_tools.cpp | 26 ++++++++++++++++++++++++++ src/util/arith_tools.h | 3 +++ 2 files changed, 29 insertions(+) diff --git a/src/util/arith_tools.cpp b/src/util/arith_tools.cpp index 6acac029673..4df13e8b3a0 100644 --- a/src/util/arith_tools.cpp +++ b/src/util/arith_tools.cpp @@ -114,6 +114,32 @@ bool to_integer(const constant_exprt &expr, mp_integer &int_value) /*******************************************************************\ +Function: to_unsigned_integer + + Inputs: a constant expression and a reference to an unsigned int + + Outputs: an error flag + + Purpose: convert a positive integer expression to an unsigned int + +\*******************************************************************/ + +bool to_unsigned_integer(const constant_exprt &expr, unsigned &uint_value) +{ + mp_integer i; + if(to_integer(expr, i)) + return true; + if(i<0) + return true; + else + { + uint_value=integer2unsigned(i); + return false; + } +} + +/*******************************************************************\ + Function: from_integer Inputs: diff --git a/src/util/arith_tools.h b/src/util/arith_tools.h index 2dbef68b7a3..54f83a2d923 100644 --- a/src/util/arith_tools.h +++ b/src/util/arith_tools.h @@ -22,6 +22,9 @@ bool to_integer(const exprt &expr, mp_integer &int_value); // returns 'true' on error bool to_integer(const constant_exprt &expr, mp_integer &int_value); +// returns 'true' on error +bool to_unsigned_integer(const constant_exprt &expr, unsigned &uint_value); + // assert(false) in case of unsupported type constant_exprt from_integer(const mp_integer &int_value, const typet &type); From 756dea2e9f1da63c2be21e64c831ecde85ffa7d9 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 23 Dec 2016 12:23:28 +0000 Subject: [PATCH 4/8] Class for string types used in the string solver This defines the type for string expressions used by the string solver. These string expressions contains a field length, of type index_type, a field content of type content_type. We also define functions to recognise the C and java string types. --- .../refinement/refined_string_type.cpp | 144 ++++++++++++++++++ src/solvers/refinement/refined_string_type.h | 112 ++++++++++++++ 2 files changed, 256 insertions(+) create mode 100644 src/solvers/refinement/refined_string_type.cpp create mode 100644 src/solvers/refinement/refined_string_type.h diff --git a/src/solvers/refinement/refined_string_type.cpp b/src/solvers/refinement/refined_string_type.cpp new file mode 100644 index 00000000000..cfe44ba4c29 --- /dev/null +++ b/src/solvers/refinement/refined_string_type.cpp @@ -0,0 +1,144 @@ +/********************************************************************\ + +Module: Type for string expressions used by the string solver. + These string expressions contain a field `length`, of type + `index_type`, a field `content` of type `content_type`. + This module also defines functions to recognise the C and java + string types. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include +#include +#include + +/*******************************************************************\ + +Constructor: refined_string_typet::refined_string_typet + + Inputs: type of characters + +\*******************************************************************/ + +refined_string_typet::refined_string_typet(typet char_type) +{ + infinity_exprt infinite_index(refined_string_typet::index_type()); + array_typet char_array(char_type, infinite_index); + components().emplace_back("length", refined_string_typet::index_type()); + components().emplace_back("content", char_array); +} + +/*******************************************************************\ + +Function: refined_string_typet::is_c_string_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of C strings + +\*******************************************************************/ + +bool refined_string_typet::is_c_string_type(const typet &type) +{ + return + type.id()==ID_struct && + to_struct_type(type).get_tag()==CPROVER_PREFIX"string"; +} + +/*******************************************************************\ + +Function: refined_string_typet::is_java_string_pointer_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string pointers + +\*******************************************************************/ + +bool refined_string_typet::is_java_string_pointer_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + return is_java_string_type(subtype); + } + return false; +} + +/*******************************************************************\ + +Function: refined_string_typet::is_java_string_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string + +\*******************************************************************/ + +bool refined_string_typet::is_java_string_type(const typet &type) +{ + if(type.id()==ID_symbol) + { + irep_idt tag=to_symbol_type(type).get_identifier(); + return tag=="java::java.lang.String"; + } + else if(type.id()==ID_struct) + { + irep_idt tag=to_struct_type(type).get_tag(); + return tag=="java.lang.String"; + } + return false; +} + +/*******************************************************************\ + +Function: refined_string_typet::is_java_string_builder_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java string builder + +\*******************************************************************/ + +bool refined_string_typet::is_java_string_builder_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + if(subtype.id()==ID_struct) + { + irep_idt tag=to_struct_type(subtype).get_tag(); + return tag=="java.lang.StringBuilder"; + } + } + return false; +} + +/*******************************************************************\ + +Function: refined_string_typet::is_java_char_sequence_type + + Inputs: a type + + Outputs: Boolean telling whether the type is that of java char sequence + +\*******************************************************************/ + +bool refined_string_typet::is_java_char_sequence_type(const typet &type) +{ + if(type.id()==ID_pointer) + { + const pointer_typet &pt=to_pointer_type(type); + const typet &subtype=pt.subtype(); + if(subtype.id()==ID_struct) + { + const irep_idt &tag=to_struct_type(subtype).get_tag(); + return tag=="java.lang.CharSequence"; + } + } + return false; +} diff --git a/src/solvers/refinement/refined_string_type.h b/src/solvers/refinement/refined_string_type.h new file mode 100644 index 00000000000..5ad67cc2c31 --- /dev/null +++ b/src/solvers/refinement/refined_string_type.h @@ -0,0 +1,112 @@ +/********************************************************************\ + +Module: Type for string expressions used by the string solver. + These string expressions contain a field `length`, of type + `index_type`, a field `content` of type `content_type`. + This module also defines functions to recognise the C and java + string types. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_REFINED_STRING_TYPE_H +#define CPROVER_SOLVERS_REFINEMENT_REFINED_STRING_TYPE_H + +#include +#include +#include +#include +#include +#include + +// Internal type used for string refinement +class refined_string_typet: public struct_typet +{ +public: + explicit refined_string_typet(typet char_type); + + // Type for the content (list of characters) of a string + const array_typet &get_content_type() const + { + assert(components().size()==2); + return to_array_type(components()[1].type()); + } + + const typet &get_char_type() + { + assert(components().size()==2); + return components()[0].type(); + } + + const typet &get_index_type() const + { + return get_content_type().size().type(); + } + + static typet index_type() + { + return signed_int_type(); + } + + static typet java_index_type() + { + return java_int_type(); + } + + // For C the unrefined string type is __CPROVER_string, for java it is a + // pointer to a struct with tag java.lang.String + + static bool is_c_string_type(const typet &type); + + static bool is_java_string_pointer_type(const typet &type); + + static bool is_java_string_type(const typet &type); + + static bool is_java_string_builder_type(const typet &type); + + static bool is_java_char_sequence_type(const typet &type); + + static typet get_char_type(const exprt &expr) + { + if(is_c_string_type(expr.type())) + return char_type(); + else + return java_char_type(); + } + + static typet get_index_type(const exprt &expr) + { + if(is_c_string_type(expr.type())) + return index_type(); + else + return java_index_type(); + } + + static bool is_unrefined_string_type(const typet &type) + { + return ( + is_c_string_type(type) || + is_java_string_pointer_type(type) || + is_java_string_builder_type(type) || + is_java_char_sequence_type(type)); + } + + static bool is_unrefined_string(const exprt &expr) + { + return (is_unrefined_string_type(expr.type())); + } + + constant_exprt index_of_int(int i) const + { + return from_integer(i, get_index_type()); + } +}; + +const refined_string_typet &to_refined_string_type(const typet &type) +{ + assert(type.id()==ID_struct); + return static_cast(type); +} + +#endif From c45f8cbc65c3064b353ca4d3a9d8fc6a0803d6b7 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 23 Dec 2016 12:28:20 +0000 Subject: [PATCH 5/8] Class for string expressions to be used by the string solver --- src/solvers/refinement/string_expr.cpp | 55 +++++++++ src/solvers/refinement/string_expr.h | 150 +++++++++++++++++++++++++ 2 files changed, 205 insertions(+) create mode 100644 src/solvers/refinement/string_expr.cpp create mode 100644 src/solvers/refinement/string_expr.h diff --git a/src/solvers/refinement/string_expr.cpp b/src/solvers/refinement/string_expr.cpp new file mode 100644 index 00000000000..0c284621d8d --- /dev/null +++ b/src/solvers/refinement/string_expr.cpp @@ -0,0 +1,55 @@ +/*******************************************************************\ + +Module: String expressions for the string solver + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include + +unsigned string_exprt::next_symbol_id=1; + +/*******************************************************************\ + +Function: string_exprt::fresh_symbol + + Inputs: a prefix and a type + + Outputs: a symbol of type tp whose name starts with + "string_refinement#" followed by prefix + + Purpose: generate a new symbol expression of the given type with some prefix + +\*******************************************************************/ + +symbol_exprt string_exprt::fresh_symbol( + const irep_idt &prefix, const typet &type) +{ + std::ostringstream buf; + buf << "string_refinement#" << prefix << "#" << (next_symbol_id++); + irep_idt name(buf.str()); + return symbol_exprt(name, type); +} + +/*******************************************************************\ + +Constructor: string_exprt + + Inputs: a type for characters + + Purpose: construct a string expression whose length and content are new + variables + +\*******************************************************************/ + +string_exprt::string_exprt(typet char_type): + struct_exprt(refined_string_typet(char_type)) +{ + refined_string_typet t(char_type); + symbol_exprt length= + fresh_symbol("string_length", refined_string_typet::index_type()); + symbol_exprt content=fresh_symbol("string_content", t.get_content_type()); + move_to_operands(length, content); +} + diff --git a/src/solvers/refinement/string_expr.h b/src/solvers/refinement/string_expr.h new file mode 100644 index 00000000000..29d6b12eae7 --- /dev/null +++ b/src/solvers/refinement/string_expr.h @@ -0,0 +1,150 @@ +/******************************************************************\ + +Module: String expressions for the string solver + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_EXPR_H +#define CPROVER_SOLVERS_REFINEMENT_STRING_EXPR_H + +#include + +#include +#include +#include + + +// Expressions that encode strings +class string_exprt: public struct_exprt +{ +public: + // Initialize string from the type of characters + explicit string_exprt(typet char_type); + + // Default uses C character type + string_exprt() : string_exprt(char_type()) {} + + // Generate a new symbol of the given type with a prefix + static symbol_exprt fresh_symbol( + const irep_idt &prefix, const typet &type=bool_typet()); + + // Expression corresponding to the length of the string + const exprt &length() const { return op0(); } + exprt &length() { return op0(); } + + // Expression corresponding to the content (array of characters) of the string + const exprt &content() const { return op1(); } + exprt &content() { return op1(); } + + // Type of the expression as a refined string type + const refined_string_typet &refined_type() const + { + return to_refined_string_type(type()); + } + + static exprt within_bounds(const exprt &idx, const exprt &bound); + + // Expression of the character at position idx in the string + index_exprt operator[] (const exprt &idx) const + { + return index_exprt(content(), idx); + } + + index_exprt operator[] (int i) const + { + return index_exprt(content(), refined_type().index_of_int(i)); + } + + // Comparison on the length of the strings + binary_relation_exprt axiom_for_is_longer_than( + const string_exprt &rhs) const + { + return binary_relation_exprt(length(), ID_ge, rhs.length()); + } + + binary_relation_exprt axiom_for_is_longer_than( + const exprt &rhs) const + { + return binary_relation_exprt(length(), ID_ge, rhs); + } + + binary_relation_exprt axiom_for_is_strictly_longer_than( + const exprt &rhs) const + { + return binary_relation_exprt(rhs, ID_lt, length()); + } + + binary_relation_exprt axiom_for_is_strictly_longer_than( + const string_exprt &rhs) const + { + return binary_relation_exprt(rhs.length(), ID_lt, length()); + } + + binary_relation_exprt axiom_for_is_strictly_longer_than(int i) const + { + return axiom_for_is_strictly_longer_than(refined_type().index_of_int(i)); + } + + binary_relation_exprt axiom_for_is_shorter_than( + const string_exprt &rhs) const + { + return binary_relation_exprt(length(), ID_le, rhs.length()); + } + + binary_relation_exprt axiom_for_is_shorter_than( + const exprt &rhs) const + { + return binary_relation_exprt(length(), ID_le, rhs); + } + + binary_relation_exprt axiom_for_is_shorter_than(int i) const + { + return axiom_for_is_shorter_than(refined_type().index_of_int(i)); + } + + binary_relation_exprt axiom_for_is_strictly_shorter_than( + const string_exprt &rhs) const + { + return binary_relation_exprt(length(), ID_lt, rhs.length()); + } + + binary_relation_exprt axiom_for_is_strictly_shorter_than( + const exprt &rhs) const + { + return binary_relation_exprt(length(), ID_lt, rhs); + } + + equal_exprt axiom_for_has_same_length_as( + const string_exprt &rhs) const + { + return equal_exprt(length(), rhs.length()); + } + + equal_exprt axiom_for_has_length(const exprt &rhs) const + { + return equal_exprt(length(), rhs); + } + + equal_exprt axiom_for_has_length(int i) const + { + return axiom_for_has_length(refined_type().index_of_int(i)); + } + + static irep_idt extract_java_string(const symbol_exprt &s); + + static unsigned next_symbol_id; + + friend inline string_exprt &to_string_expr(exprt &expr); +}; + + +inline string_exprt &to_string_expr(exprt &expr) +{ + assert(expr.id()==ID_struct); + return static_cast(expr); +} + + +#endif From bb4fe7d76db5da5e10d4b3ca108a72babb0d463b Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 23 Dec 2016 12:30:56 +0000 Subject: [PATCH 6/8] Class for string constraints. String constraints are formula about strings. They can contains universaly quantified variables. --- src/solvers/refinement/string_constraint.h | 183 +++++++++++++++++++++ 1 file changed, 183 insertions(+) create mode 100644 src/solvers/refinement/string_constraint.h diff --git a/src/solvers/refinement/string_constraint.h b/src/solvers/refinement/string_constraint.h new file mode 100644 index 00000000000..1f5a56ae9b7 --- /dev/null +++ b/src/solvers/refinement/string_constraint.h @@ -0,0 +1,183 @@ +/*******************************************************************\ + +Module: Defines string constraints. These are formulas talking about strings. + We implemented two forms of constraints: `string_constraintt` + are formulas of the form $\forall univ_var \in [lb,ub[. prem => body$, + and not_contains_constraintt of the form: + $\forall x in [lb,ub[. p(x) => \exists y in [lb,ub[. s1[x+y] != s2[y]$. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_H +#define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_H + +#include +#include +#include + +class string_constraintt: public exprt +{ +public: + // String constraints are of the form + // forall univ_var in [lower_bound,upper_bound[. premise => body + + const exprt &premise() const + { + return op0(); + } + + const exprt &body() const + { + return op1(); + } + + const symbol_exprt &univ_var() const + { + return to_symbol_expr(op2()); + } + + const exprt &upper_bound() const + { + return op3(); + } + + const exprt &lower_bound() const + { + return operands()[4]; + } + + + private: + string_constraintt(); + + public: + string_constraintt( + const symbol_exprt &_univ_var, + const exprt &bound_inf, + const exprt &bound_sup, + const exprt &prem, + const exprt &body): + exprt(ID_string_constraint) + { + copy_to_operands(prem, body); + copy_to_operands(_univ_var, bound_sup, bound_inf); + } + + // Default bound inferior is 0 + string_constraintt( + const symbol_exprt &_univ_var, + const exprt &bound_sup, + const exprt &prem, + const exprt &body): + string_constraintt( + _univ_var, + from_integer(0, _univ_var.type()), + bound_sup, + prem, + body) + {} + + // Default premise is true + string_constraintt( + const symbol_exprt &_univ_var, + const exprt &bound_sup, + const exprt &body): + string_constraintt(_univ_var, bound_sup, true_exprt(), body) + {} + + exprt univ_within_bounds() const + { + return and_exprt( + binary_relation_exprt(lower_bound(), ID_le, univ_var()), + binary_relation_exprt(upper_bound(), ID_gt, univ_var())); + } +}; + +extern inline const string_constraintt &to_string_constraint(const exprt &expr) +{ + assert(expr.id()==ID_string_constraint && expr.operands().size()==5); + return static_cast(expr); +} + +extern inline string_constraintt &to_string_constraint(exprt &expr) +{ + assert(expr.id()==ID_string_constraint && expr.operands().size()==5); + return static_cast(expr); +} + +class string_not_contains_constraintt: public exprt +{ +public: + // string_not contains_constraintt are formula of the form: + // forall x in [lb,ub[. p(x) => exists y in [lb,ub[. s1[x+y] != s2[y] + + string_not_contains_constraintt( + exprt univ_lower_bound, + exprt univ_bound_sup, + exprt premise, + exprt exists_bound_inf, + exprt exists_bound_sup, + exprt s0, + exprt s1) : + exprt(ID_string_not_contains_constraint) + { + copy_to_operands(univ_lower_bound, univ_bound_sup, premise); + copy_to_operands(exists_bound_inf, exists_bound_sup, s0); + copy_to_operands(s1); + }; + + const exprt &univ_lower_bound() const + { + return op0(); + } + + const exprt &univ_upper_bound() const + { + return op1(); + } + + const exprt &premise() const + { + return op2(); + } + + const exprt &exists_lower_bound() const + { + return op3(); + } + + const exprt &exists_upper_bound() const + { + return operands()[4]; + } + + const exprt &s0() const + { + return operands()[5]; + } + + const exprt &s1() const + { + return operands()[6]; + } +}; + +inline const string_not_contains_constraintt +&to_string_not_contains_constraint(const exprt &expr) +{ + assert(expr.id()==ID_string_not_contains_constraint); + assert(expr.operands().size()==7); + return static_cast(expr); +} + +inline string_not_contains_constraintt +&to_string_not_contains_constraint(exprt &expr) +{ + assert(expr.id()==ID_string_not_contains_constraint); + assert(expr.operands().size()==7); + return static_cast(expr); +} + +#endif From f5dc2e6c1244bcb82d8e5d4faa3b60f9fca72106 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 23 Dec 2016 12:40:52 +0000 Subject: [PATCH 7/8] Class for generating string constraints from string functions --- .../refinement/string_constraint_generator.h | 315 ++++++++ ...tring_constraint_generator_code_points.cpp | 283 +++++++ ...string_constraint_generator_comparison.cpp | 344 +++++++++ .../string_constraint_generator_concat.cpp | 222 ++++++ .../string_constraint_generator_constants.cpp | 143 ++++ .../string_constraint_generator_indexof.cpp | 285 +++++++ .../string_constraint_generator_insert.cpp | 220 ++++++ .../string_constraint_generator_main.cpp | 700 ++++++++++++++++++ .../string_constraint_generator_testing.cpp | 246 ++++++ ...ng_constraint_generator_transformation.cpp | 460 ++++++++++++ .../string_constraint_generator_valueof.cpp | 646 ++++++++++++++++ 11 files changed, 3864 insertions(+) create mode 100644 src/solvers/refinement/string_constraint_generator.h create mode 100644 src/solvers/refinement/string_constraint_generator_code_points.cpp create mode 100644 src/solvers/refinement/string_constraint_generator_comparison.cpp create mode 100644 src/solvers/refinement/string_constraint_generator_concat.cpp create mode 100644 src/solvers/refinement/string_constraint_generator_constants.cpp create mode 100644 src/solvers/refinement/string_constraint_generator_indexof.cpp create mode 100644 src/solvers/refinement/string_constraint_generator_insert.cpp create mode 100644 src/solvers/refinement/string_constraint_generator_main.cpp create mode 100644 src/solvers/refinement/string_constraint_generator_testing.cpp create mode 100644 src/solvers/refinement/string_constraint_generator_transformation.cpp create mode 100644 src/solvers/refinement/string_constraint_generator_valueof.cpp diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h new file mode 100644 index 00000000000..a78f1473510 --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator.h @@ -0,0 +1,315 @@ +/*******************************************************************\ + +Module: Generates string constraints to link results from string functions + with their arguments. This is inspired by the PASS paper at HVC'13: + "PASS: String Solving with Parameterized Array and Interval Automaton" + by Guodong Li and Indradeep Ghosh, which gives examples of constraints + for several functions. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H +#define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H + +#include +#include +#include +#include + +class string_constraint_generatort +{ +public: + // This module keeps a list of axioms. It has methods which generate + // string constraints for different string funcitons and add them + // to the axiom list. + + string_constraint_generatort(): + mode(ID_unknown), refined_string_type(char_type()) + { } + + void set_mode(irep_idt _mode) + { + // only C and java modes supported + assert((_mode==ID_java) || (_mode==ID_C)); + mode=_mode; + refined_string_type=refined_string_typet(get_char_type()); + } + + irep_idt &get_mode() { return mode; } + + typet get_char_type() const; + typet get_index_type() const + { + if(mode==ID_java) + return refined_string_typet::java_index_type(); + assert(mode==ID_C); + return refined_string_typet::index_type(); + } + + const refined_string_typet &get_refined_string_type() const + { + return refined_string_type; + } + + // Axioms are of three kinds: universally quantified string constraint, + // not contains string constraints and simple formulas. + std::list axioms; + + // Boolean symbols for the results of some string functions + std::list boolean_symbols; + + // Symbols used in existential quantifications + std::list index_symbols; + + // Used to store information about witnesses for not_contains constraints + std::map witness; + + exprt get_witness_of( + const string_not_contains_constraintt &c, + const exprt &univ_val) const + { + return index_exprt(witness.at(c), univ_val); + } + + symbol_exprt fresh_exist_index(const irep_idt &prefix); + symbol_exprt fresh_univ_index(const irep_idt &prefix); + symbol_exprt fresh_boolean(const irep_idt &prefix); + + // We maintain a map from symbols to strings. + std::map symbol_to_string; + + string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym); + + void assign_to_symbol( + const symbol_exprt &sym, const string_exprt &expr) + { + symbol_to_string[sym.get_identifier()]=expr; + } + + string_exprt add_axioms_for_string_expr(const exprt &expr); + void set_string_symbol_equal_to_expr( + const symbol_exprt &sym, const exprt &str); + + exprt add_axioms_for_function_application( + const function_application_exprt &expr); + + constant_exprt constant_char(int i) const; + +private: + // The integer with the longest string is Integer.MIN_VALUE which is -2^31, + // that is -2147483648 so takes 11 characters to write. + // The long with the longest string is Long.MIN_VALUE which is -2^63, + // approximately -9.223372037*10^18 so takes 20 characters to write. + const std::size_t MAX_INTEGER_LENGTH=11; + const std::size_t MAX_LONG_LENGTH=20; + const std::size_t MAX_FLOAT_LENGTH=15; + const std::size_t MAX_DOUBLE_LENGTH=30; + + irep_idt extract_java_string(const symbol_exprt &s) const; + + exprt axiom_for_is_positive_index(const exprt &x); + + // The following functions add axioms for the returned value + // to be equal to the result of the function given as argument. + // They are not accessed directly from other classes: they call + // `add_axioms_for_function_application` which determines which of + // these methods should be called. + + exprt add_axioms_for_char_at(const function_application_exprt &f); + exprt add_axioms_for_code_point_at(const function_application_exprt &f); + exprt add_axioms_for_code_point_before(const function_application_exprt &f); + exprt add_axioms_for_contains(const function_application_exprt &f); + exprt add_axioms_for_equals(const function_application_exprt &f); + exprt add_axioms_for_equals_ignore_case(const function_application_exprt &f); + exprt add_axioms_for_data(const function_application_exprt &f); + + // Add axioms corresponding to the String.hashCode java function + // The specification is partial: the actual value is not actually computed + // but we ensure that hash codes of equal strings are equal. + exprt add_axioms_for_hash_code(const function_application_exprt &f); + + exprt add_axioms_for_is_empty(const function_application_exprt &f); + exprt add_axioms_for_is_prefix( + const string_exprt &prefix, const string_exprt &str, const exprt &offset); + exprt add_axioms_for_is_prefix( + const function_application_exprt &f, bool swap_arguments=false); + exprt add_axioms_for_is_suffix( + const function_application_exprt &f, bool swap_arguments=false); + exprt add_axioms_for_length(const function_application_exprt &f); + string_exprt add_axioms_for_empty_string(const function_application_exprt &f); + string_exprt add_axioms_for_char_set(const function_application_exprt &expr); + string_exprt add_axioms_for_copy(const function_application_exprt &f); + string_exprt add_axioms_for_concat( + const string_exprt &s1, const string_exprt &s2); + string_exprt add_axioms_for_concat(const function_application_exprt &f); + string_exprt add_axioms_for_concat_int(const function_application_exprt &f); + string_exprt add_axioms_for_concat_long(const function_application_exprt &f); + string_exprt add_axioms_for_concat_bool(const function_application_exprt &f); + string_exprt add_axioms_for_concat_char(const function_application_exprt &f); + string_exprt add_axioms_for_concat_double( + const function_application_exprt &f); + string_exprt add_axioms_for_concat_float(const function_application_exprt &f); + string_exprt add_axioms_for_concat_code_point( + const function_application_exprt &f); + string_exprt add_axioms_for_constant(irep_idt sval); + string_exprt add_axioms_for_delete( + const string_exprt &str, const exprt &start, const exprt &end); + string_exprt add_axioms_for_delete(const function_application_exprt &expr); + string_exprt add_axioms_for_delete_char_at( + const function_application_exprt &expr); + string_exprt add_axioms_for_insert( + const string_exprt &s1, const string_exprt &s2, const exprt &offset); + string_exprt add_axioms_for_insert(const function_application_exprt &f); + string_exprt add_axioms_for_insert_int(const function_application_exprt &f); + string_exprt add_axioms_for_insert_long(const function_application_exprt &f); + string_exprt add_axioms_for_insert_bool(const function_application_exprt &f); + string_exprt add_axioms_for_insert_char(const function_application_exprt &f); + string_exprt add_axioms_for_insert_double( + const function_application_exprt &f); + string_exprt add_axioms_for_insert_float(const function_application_exprt &f); + string_exprt add_axioms_for_insert_char_array( + const function_application_exprt &f); + string_exprt add_axioms_from_literal(const function_application_exprt &f); + string_exprt add_axioms_from_int(const function_application_exprt &f); + string_exprt add_axioms_from_int(const exprt &i, size_t max_size); + string_exprt add_axioms_from_int_hex(const exprt &i); + string_exprt add_axioms_from_int_hex(const function_application_exprt &f); + string_exprt add_axioms_from_long(const function_application_exprt &f); + string_exprt add_axioms_from_long(const exprt &i, size_t max_size); + string_exprt add_axioms_from_bool(const function_application_exprt &f); + string_exprt add_axioms_from_bool(const exprt &i); + string_exprt add_axioms_from_char(const function_application_exprt &f); + string_exprt add_axioms_from_char(const exprt &i); + string_exprt add_axioms_from_char_array(const function_application_exprt &f); + string_exprt add_axioms_from_char_array( + const exprt &length, + const exprt &data, + const exprt &offset, + const exprt &count); + exprt add_axioms_for_index_of( + const string_exprt &str, + const exprt &c, + const exprt &from_index); + + // Add axioms corresponding to the String.indexOf:(String;I) java function + // TODO: the specifications are only partial: + // we add axioms stating that the returned value is either -1 or greater than + // from_index and the string beggining there has prefix substring + exprt add_axioms_for_index_of_string( + const string_exprt &str, + const string_exprt &substring, + const exprt &from_index); + + // Add axioms corresponding to the String.indexOf java functions + // TODO: the specifications are only partial for the ones that look for + // strings + exprt add_axioms_for_index_of(const function_application_exprt &f); + + // Add axioms corresponding to the String.lastIndexOf:(String;I) java function + // TODO: the specifications are only partial + exprt add_axioms_for_last_index_of_string( + const string_exprt &str, + const string_exprt &substring, + const exprt &from_index); + + // Add axioms corresponding to the String.lastIndexOf:(CI) java function + exprt add_axioms_for_last_index_of( + const string_exprt &str, + const exprt &c, + const exprt &from_index); + + // Add axioms corresponding to the String.lastIndexOf java functions + // TODO: the specifications is only partial + exprt add_axioms_for_last_index_of(const function_application_exprt &f); + + // TODO: the specifications of these functions is only partial + // We currently only specify that the string for NaN is "NaN", for infinity + // and minus infinity the string are "Infinity" and "-Infinity respectively + // otherwise the string contains only characters in [0123456789.] and '-' at + // the start for negative number + string_exprt add_axioms_from_float(const function_application_exprt &f); + string_exprt add_axioms_from_float( + const exprt &f, bool double_precision=false); + + // Add axioms corresponding to the String.valueOf(D) java function + // TODO: the specifications is only partial + string_exprt add_axioms_from_double(const function_application_exprt &f); + + string_exprt add_axioms_for_replace(const function_application_exprt &f); + string_exprt add_axioms_for_set_length(const function_application_exprt &f); + + // TODO: the specification may not be correct for the case where the + // string is shorter than end. An actual java program should throw an + // exception in that case + string_exprt add_axioms_for_substring( + const string_exprt &str, const exprt &start, const exprt &end); + string_exprt add_axioms_for_substring(const function_application_exprt &expr); + + string_exprt add_axioms_for_to_lower_case( + const function_application_exprt &expr); + string_exprt add_axioms_for_to_upper_case( + const function_application_exprt &expr); + string_exprt add_axioms_for_trim(const function_application_exprt &expr); + + // Add axioms corresponding to the String.valueOf([CII) function + // TODO: not working correctly at the moment + string_exprt add_axioms_for_value_of(const function_application_exprt &f); + + string_exprt add_axioms_for_code_point(const exprt &code_point); + string_exprt add_axioms_for_java_char_array(const exprt &char_array); + string_exprt add_axioms_for_if(const if_exprt &expr); + exprt add_axioms_for_char_literal(const function_application_exprt &f); + + // Add axioms corresponding the String.codePointCount java function + // TODO: this function is underspecified, we do not compute the exact value + // but over approximate it. + exprt add_axioms_for_code_point_count(const function_application_exprt &f); + + // Add axioms corresponding the String.offsetByCodePointCount java function + // TODO: this function is underspecified, it should return the index within + // this String that is offset from the given first argument by second argument + // code points and we approximate this by saying the result is + // between index + offset and index + 2 * offset + exprt add_axioms_for_offset_by_code_point( + const function_application_exprt &f); + + exprt add_axioms_for_parse_int(const function_application_exprt &f); + exprt add_axioms_for_to_char_array(const function_application_exprt &f); + exprt add_axioms_for_compare_to(const function_application_exprt &f); + + // Add axioms corresponding to the String.intern java function + // TODO: this does not work at the moment because of the way we treat + // string pointers + symbol_exprt add_axioms_for_intern(const function_application_exprt &f); + + // Tells which language is used. C and Java are supported + irep_idt mode; + + // Type of strings used in the refinement + refined_string_typet refined_string_type; + + // assert that the number of argument is equal to nb and extract them + static const function_application_exprt::argumentst &args( + const function_application_exprt &expr, size_t nb) + { + const function_application_exprt::argumentst &args=expr.arguments(); + assert(args.size()==nb); + return args; + } + + exprt int_of_hex_char(exprt chr) const; + exprt is_high_surrogate(const exprt &chr) const; + exprt is_low_surrogate(const exprt &chr) const; + static exprt character_equals_ignore_case( + exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z); + + // Pool used for the intern method + std::map pool; + + // Used to determine whether hashcode should be equal + std::map hash; +}; + +#endif diff --git a/src/solvers/refinement/string_constraint_generator_code_points.cpp b/src/solvers/refinement/string_constraint_generator_code_points.cpp new file mode 100644 index 00000000000..807587cbc45 --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator_code_points.cpp @@ -0,0 +1,283 @@ +/*******************************************************************\ + +Module: Generates string constraints for Java functions dealing with + code points + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include + +/******************************************************************* \ + +Function: string_constraint_generatort::add_axioms_for_code_point + + Inputs: an expression representing a java code point + + Outputs: a new string expression + + Purpose: add axioms for the conversion of an integer representing a java + code point to a utf-16 string + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_code_point( + const exprt &code_point) +{ + string_exprt res(get_char_type()); + const typet &type=code_point.type(); + assert(type.id()==ID_signedbv); + + // We add axioms: + // a1 : code_point<0x010000 => |res|=1 + // a2 : code_point>=0x010000 => |res|=2 + // a3 : code_point<0x010000 => res[0]=code_point + // a4 : code_point>=0x010000 => res[0]=0xD800+(code_point-0x10000)/0x0400 + // a5 : code_point>=0x010000 => res[1]=0xDC00+(code_point-0x10000)/0x0400 + // For more explenations about this conversion, see: + // https://en.wikipedia.org/wiki/UTF-16 + + exprt hex010000=from_integer(0x010000, type); + exprt hexD800=from_integer(0xD800, type); + exprt hexDC00=from_integer(0xDC00, type); + exprt hex0400=from_integer(0x0400, type); + + binary_relation_exprt small(code_point, ID_lt, hex010000); + implies_exprt a1(small, res.axiom_for_has_length(1)); + axioms.push_back(a1); + + implies_exprt a2(not_exprt(small), res.axiom_for_has_length(2)); + axioms.push_back(a2); + + typecast_exprt code_point_as_char(code_point, get_char_type()); + implies_exprt a3(small, equal_exprt(res[0], code_point_as_char)); + axioms.push_back(a3); + + plus_exprt first_char( + hexD800, div_exprt(minus_exprt(code_point, hex010000), hex0400)); + implies_exprt a4( + not_exprt(small), + equal_exprt(res[0], typecast_exprt(first_char, get_char_type()))); + axioms.push_back(a4); + + plus_exprt second_char(hexDC00, mod_exprt(code_point, hex0400)); + implies_exprt a5( + not_exprt(small), + equal_exprt(res[1], typecast_exprt(second_char, get_char_type()))); + axioms.push_back(a5); + + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::is_high_surrogate + + Inputs: a character expression + + Outputs: a Boolean expression + + Purpose: the output is true when the character is a high surrogate for + UTF-16 encoding, see https://en.wikipedia.org/wiki/UTF-16 for + more explenation about the encoding; + this is true when the character is in the range 0xD800..0xDBFF + +\*******************************************************************/ + +exprt string_constraint_generatort::is_high_surrogate(const exprt &chr) const +{ + return and_exprt( + binary_relation_exprt(chr, ID_ge, constant_char(0xD800)), + binary_relation_exprt(chr, ID_le, constant_char(0xDBFF))); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::is_low_surrogate + + Inputs: a character expression + + Outputs: a Boolean expression + + Purpose: the output is true when the character is a low surrogate for + UTF-16 encoding, see https://en.wikipedia.org/wiki/UTF-16 for + more explenation about the encoding; + this is true when the character is in the range 0xDC00..0xDFFF + +\*******************************************************************/ + +exprt string_constraint_generatort::is_low_surrogate(const exprt &chr) const +{ + return and_exprt( + binary_relation_exprt(chr, ID_ge, constant_char(0xDC00)), + binary_relation_exprt(chr, ID_le, constant_char(0xDFFF))); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::pair_value + + Inputs: two character expressions and a return type + char1 and char2 should be of type return_type + + Outputs: an integer expression of type return_type + + Purpose: the output corresponds to the unicode character given by the + pair of characters of inputs assuming it has been encoded in + UTF-16, see https://en.wikipedia.org/wiki/UTF-16 for + more explenation about the encoding; + the operation we perform is: + pair_value=0x10000+(((char1%0x0800)*0x0400)+char2%0x0400) + +\*******************************************************************/ + +exprt pair_value(exprt char1, exprt char2, typet return_type) +{ + exprt hex010000=from_integer(0x010000, return_type); + exprt hex0800=from_integer(0x0800, return_type); + exprt hex0400=from_integer(0x0400, return_type); + mult_exprt m1(mod_exprt(char1, hex0800), hex0400); + mod_exprt m2(char2, hex0400); + plus_exprt pair_value(hex010000, plus_exprt(m1, m2)); + return pair_value; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_code_point_at + + Inputs: function application with two arguments: a string and an index + + Outputs: a integer expression corresponding to a code point + + Purpose: add axioms corresponding to the String.codePointAt java function + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_code_point_at( + const function_application_exprt &f) +{ + typet return_type=f.type(); + assert(return_type.id()==ID_signedbv); + string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + const exprt &pos=args(f, 2)[1]; + + symbol_exprt result=string_exprt::fresh_symbol("char", return_type); + exprt index1=from_integer(1, get_index_type()); + const exprt &char1=str[pos]; + const exprt &char2=str[plus_exprt(pos, index1)]; + exprt char1_as_int=typecast_exprt(char1, return_type); + exprt char2_as_int=typecast_exprt(char2, return_type); + exprt pair=pair_value(char1_as_int, char2_as_int, return_type); + exprt is_low=is_low_surrogate(str[plus_exprt(pos, index1)]); + exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low); + + axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); + axioms.push_back( + implies_exprt(not_exprt(return_pair), equal_exprt(result, char1_as_int))); + return result; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_code_point_before + + Inputs: function application with two arguments: a string and an index + + Outputs: a integer expression corresponding to a code point + + Purpose: add axioms corresponding to the String.codePointBefore java function + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_code_point_before( + const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(args.size()==2); + typet return_type=f.type(); + assert(return_type.id()==ID_signedbv); + symbol_exprt result=string_exprt::fresh_symbol("char", return_type); + string_exprt str=add_axioms_for_string_expr(args[0]); + + const exprt &char1= + str[minus_exprt(args[1], from_integer(2, get_index_type()))]; + const exprt &char2= + str[minus_exprt(args[1], from_integer(1, get_index_type()))]; + exprt char1_as_int=typecast_exprt(char1, return_type); + exprt char2_as_int=typecast_exprt(char2, return_type); + + exprt pair=pair_value(char1_as_int, char2_as_int, return_type); + exprt return_pair=and_exprt( + is_high_surrogate(char1), is_low_surrogate(char2)); + + axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair))); + axioms.push_back( + implies_exprt(not_exprt(return_pair), equal_exprt(result, char2_as_int))); + return result; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_code_point_count + + Inputs: function application with three arguments: a string and two indexes + + Outputs: an integer expression + + Purpose: add axioms giving approximate bounds on the result of the + String.codePointCount java function + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_code_point_count( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + const exprt &begin=args(f, 3)[1]; + const exprt &end=args(f, 3)[2]; + const typet &return_type=f.type(); + symbol_exprt result= + string_exprt::fresh_symbol("code_point_count", return_type); + minus_exprt length(end, begin); + div_exprt minimum(length, from_integer(2, get_index_type())); + axioms.push_back(binary_relation_exprt(result, ID_le, length)); + axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); + + return result; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_offset_by_code_point + + Inputs: function application with three arguments: a string and two indexes + + Outputs: a new string expression + + Purpose: add axioms giving approximate bounds on the result of the + String.offsetByCodePointCount java function. + We approximate the result by saying the result is + between index + offset and index + 2 * offset + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_offset_by_code_point( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + const exprt &index=args(f, 3)[1]; + const exprt &offset=args(f, 3)[2]; + const typet &return_type=f.type(); + symbol_exprt result= + string_exprt::fresh_symbol("offset_by_code_point", return_type); + + exprt minimum=plus_exprt(index, offset); + exprt maximum=plus_exprt(index, plus_exprt(offset, offset)); + axioms.push_back(binary_relation_exprt(result, ID_le, maximum)); + axioms.push_back(binary_relation_exprt(result, ID_ge, minimum)); + + return result; +} + diff --git a/src/solvers/refinement/string_constraint_generator_comparison.cpp b/src/solvers/refinement/string_constraint_generator_comparison.cpp new file mode 100644 index 00000000000..1cec8cf5442 --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator_comparison.cpp @@ -0,0 +1,344 @@ +/*******************************************************************\ + +Module: Generates string constraints for function comparing strings, + such as: equals, equalsIgnoreCase, compareTo, hashCode, intern + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_equals + + Inputs: function application with two string arguments + + Outputs: a expression of Boolean type + + Purpose: add axioms stating that the result is true exactly when the strings + represented by the arguments are equal + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_equals( + const function_application_exprt &f) +{ + assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); + symbol_exprt eq=fresh_boolean("equal"); + typecast_exprt tc_eq(eq, f.type()); + + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + + // We want to write: + // eq <=> (s1.length=s2.length &&forall i s1.length=s2.length + // a2 : forall i s1[i]=s2[i] + // a3 : !eq => s1.length!=s2.length + // || (witness |s1|=|s2| + // a2 : forall qvar, 0<=qvar<|s1|, + // eq => char_equal_ignore_case(s1[qvar],s2[qvar]); + // a3 : !eq => |s1|!=s2 || (0 <=witness<|s1| &&!char_equal_ignore_case) + + implies_exprt a1(eq, s1.axiom_for_has_same_length_as(s2)); + axioms.push_back(a1); + + symbol_exprt qvar=fresh_univ_index("QA_equal_ignore_case"); + exprt constr2=character_equals_ignore_case( + s1[qvar], s2[qvar], char_a, char_A, char_Z); + string_constraintt a2(qvar, s1.length(), eq, constr2); + axioms.push_back(a2); + + symbol_exprt witness=fresh_exist_index("witness_unequal_ignore_case"); + exprt zero=from_integer(0, get_index_type()); + and_exprt bound_witness( + binary_relation_exprt(witness, ID_lt, s1.length()), + binary_relation_exprt(witness, ID_ge, zero)); + exprt witness_eq=character_equals_ignore_case( + s1[witness], s2[witness], char_a, char_A, char_Z); + not_exprt witness_diff(witness_eq); + implies_exprt a3( + not_exprt(eq), + or_exprt( + notequal_exprt(s1.length(), s2.length()), + and_exprt(bound_witness, witness_diff))); + axioms.push_back(a3); + + return tc_eq; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_hash_code + + Inputs: function application with a string argument + + Outputs: a integer expression corresponding to the hash code of the string + + Purpose: add axioms stating that if two strings are equal then their hash + codes are equals + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_hash_code( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + typet return_type=f.type(); + + // initialisation of the missing pool variable + std::map::iterator it; + for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + if(hash.find(it->second)==hash.end()) + hash[it->second]=string_exprt::fresh_symbol("hash", return_type); + + // for each string s. either: + // c1: hash(str)=hash(s) + // c2: |str|!=|s| + // c3: (|str|==|s| &&exists i<|s|. s[i]!=str[i]) + + // WARNING: the specification may be incomplete + for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + { + symbol_exprt i=fresh_exist_index("index_hash"); + equal_exprt c1(hash[it->second], hash[str]); + not_exprt c2(equal_exprt(it->second.length(), str.length())); + and_exprt c3( + equal_exprt(it->second.length(), str.length()), + and_exprt( + not_exprt(equal_exprt(str[i], it->second[i])), + and_exprt( + str.axiom_for_is_strictly_longer_than(i), + axiom_for_is_positive_index(i)))); + axioms.push_back(or_exprt(c1, or_exprt(c2, c3))); + } + return hash[str]; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_compare_to + + Inputs: function application with two string arguments + + Outputs: a integer expression + + Purpose: add axioms corresponding to the String.compareTo java function + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_compare_to( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]); + const typet &return_type=f.type(); + symbol_exprt res=string_exprt::fresh_symbol("compare_to", return_type); + + // In the lexicographic comparison, x is the first point where the two + // strings differ. + // We add axioms: + // a1 : res==0 => |s1|=|s2| + // a2 : forall i<|s1|. s1[i]==s2[i] + // a3 : exists x. + // res!=0 ==> x> 0 && + // ((|s1| <= |s2| &&x<|s1|) || (|s1| >= |s2| &&x<|s2|) + // &&res=s1[x]-s2[x] ) + // || cond2: + // (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|) + // a4 : forall i s1[i]=s2[i] + + assert(return_type.id()==ID_signedbv); + + equal_exprt res_null=equal_exprt(res, from_integer(0, return_type)); + implies_exprt a1(res_null, s1.axiom_for_has_same_length_as(s2)); + axioms.push_back(a1); + + symbol_exprt i=fresh_univ_index("QA_compare_to"); + string_constraintt a2(i, s1.length(), res_null, equal_exprt(s1[i], s2[i])); + axioms.push_back(a2); + + symbol_exprt x=fresh_exist_index("index_compare_to"); + equal_exprt ret_char_diff( + res, + minus_exprt( + typecast_exprt(s1[x], return_type), + typecast_exprt(s2[x], return_type))); + equal_exprt ret_length_diff( + res, + minus_exprt( + typecast_exprt(s1.length(), return_type), + typecast_exprt(s2.length(), return_type))); + or_exprt guard1( + and_exprt(s1.axiom_for_is_shorter_than(s2), + s1.axiom_for_is_strictly_longer_than(x)), + and_exprt(s1.axiom_for_is_longer_than(s2), + s2.axiom_for_is_strictly_longer_than(x))); + and_exprt cond1(ret_char_diff, guard1); + or_exprt guard2( + and_exprt(s2.axiom_for_is_strictly_longer_than(s1), + s1.axiom_for_has_length(x)), + and_exprt(s1.axiom_for_is_strictly_longer_than(s2), + s2.axiom_for_has_length(x))); + and_exprt cond2(ret_length_diff, guard2); + + implies_exprt a3( + not_exprt(res_null), + and_exprt( + binary_relation_exprt(x, ID_ge, from_integer(0, return_type)), + or_exprt(cond1, cond2))); + axioms.push_back(a3); + + string_constraintt a4(i, x, not_exprt(res_null), equal_exprt(s1[i], s2[i])); + axioms.push_back(a4); + + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_intern + + Inputs: function application with one string argument + + Outputs: a string expression + + Purpose: add axioms stating that the return value for two equal string + should be the same + +\*******************************************************************/ + +symbol_exprt string_constraint_generatort::add_axioms_for_intern( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + const typet &return_type=f.type(); + + // initialisation of the missing pool variable + std::map::iterator it; + for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + if(pool.find(it->second)==pool.end()) + pool[it->second]=string_exprt::fresh_symbol("pool", return_type); + + // intern(str)=s_0 || s_1 || ... + // for each string s. + // intern(str)=intern(s) || |str|!=|s| + // || (|str|==|s| &&exists i<|s|. s[i]!=str[i]) + + // symbol_exprt intern=string_exprt::fresh_symbol("intern",return_type); + + exprt disj=false_exprt(); + for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + disj=or_exprt( + disj, equal_exprt(pool[str], symbol_exprt(it->first, return_type))); + + axioms.push_back(disj); + + + // WARNING: the specification may be incomplete or incorrect + for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++) + if(it->second!=str) + { + symbol_exprt i=fresh_exist_index("index_intern"); + axioms.push_back( + or_exprt( + equal_exprt(pool[it->second], pool[str]), + or_exprt( + not_exprt(str.axiom_for_has_same_length_as(it->second)), + and_exprt( + str.axiom_for_has_same_length_as(it->second), + and_exprt( + not_exprt(equal_exprt(str[i], it->second[i])), + and_exprt(str.axiom_for_is_strictly_longer_than(i), + axiom_for_is_positive_index(i))))))); + } + + return pool[str]; +} diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp new file mode 100644 index 00000000000..c6a1a7e52e7 --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -0,0 +1,222 @@ +/*******************************************************************\ + +Module: Generates string constraints for functions adding content + add the end of strings + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_concat + + Inputs: two string expressions + + Outputs: a new string expression + + Purpose: add axioms to say that the returned string expression is equal to + the concatenation of the two string expressions given as input + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_concat( + const string_exprt &s1, const string_exprt &s2) +{ + string_exprt res(get_char_type()); + + // We add axioms: + // a1 : |res|=|s1|+|s2| + // a2 : |s1| <= |res| (to avoid overflow with very long strings) + // a3 : |s2| <= |res| (to avoid overflow with very long strings) + // a4 : forall i<|s1|. res[i]=s1[i] + // a5 : forall i<|s2|. res[i+|s1|]=s2[i] + + exprt a1=res.axiom_for_has_length( + plus_exprt(s1.length(), s2.length())); + axioms.push_back(a1); + axioms.push_back(s1.axiom_for_is_shorter_than(res)); + axioms.push_back(s2.axiom_for_is_shorter_than(res)); + + symbol_exprt idx=fresh_univ_index("QA_index_concat"); + string_constraintt a4(idx, s1.length(), equal_exprt(s1[idx], res[idx])); + axioms.push_back(a4); + + symbol_exprt idx2=fresh_univ_index("QA_index_concat2"); + equal_exprt res_eq(s2[idx2], res[plus_exprt(idx2, s1.length())]); + string_constraintt a5(idx2, s2.length(), res_eq); + axioms.push_back(a5); + + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_concat + + Inputs: function application with two arguments which are strings + + Outputs: a new string expression + + Purpose: add axioms to say that the returned string expression is equal to + the concatenation of the two string arguments of + the function application + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_concat( + const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(args.size()==2); + + string_exprt s1=add_axioms_for_string_expr(args[0]); + string_exprt s2=add_axioms_for_string_expr(args[1]); + + return add_axioms_for_concat(s1, s2); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_concat_int + + Inputs: function application with two arguments: a string and an integer + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.append(I) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_concat_int( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_INTEGER_LENGTH); + return add_axioms_for_concat(s1, s2); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_long + + Inputs: function application with two arguments: a string and a + integer of type long + + Outputs: a new string expression + + Purpose: Add axioms corresponding to the StringBuilder.append(J) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_concat_long( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_int(args(f, 2)[1], MAX_LONG_LENGTH); + return add_axioms_for_concat(s1, s2); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_concat_bool + + Inputs: function application two arguments: a string and a bool + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.append(Z) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_concat_bool( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_bool(args(f, 2)[1]); + return add_axioms_for_concat(s1, s2); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_concat_char + + Inputs: function application with two arguments: a string and a char + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.append(C) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_concat_char( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_char(args(f, 2)[1]); + return add_axioms_for_concat(s1, s2); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_concat_double + + Inputs: function application with two arguments: a string and a double + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.append(D) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_concat_double( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_float( + args(f, 2)[1], MAX_DOUBLE_LENGTH); + return add_axioms_for_concat(s1, s2); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_concat_float + + Inputs: function application with two arguments: a string and a float + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.append(F) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_concat_float( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_from_float(args(f, 2)[1], MAX_FLOAT_LENGTH); + return add_axioms_for_concat(s1, s2); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_concat_code_point + + Inputs: function application with two arguments: a string and a code point + + Outputs: a new string expression + + Purpose: Add axioms corresponding to the StringBuilder.appendCodePoint(I) + function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_concat_code_point( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s2=add_axioms_for_code_point(args(f, 2)[1]); + return add_axioms_for_concat(s1, s2); +} diff --git a/src/solvers/refinement/string_constraint_generator_constants.cpp b/src/solvers/refinement/string_constraint_generator_constants.cpp new file mode 100644 index 00000000000..aa3466a2e57 --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator_constants.cpp @@ -0,0 +1,143 @@ +/*******************************************************************\ + +Module: Generates string constraints for constant strings + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include +#include +#include +#include + +/*******************************************************************\ + +Function: string_constraint_generatort::extract_java_string + + Inputs: a symbol expression representing a java literal + + Outputs: a string constant + + Purpose: extract java string from symbol expression when they are encoded + inside the symbol name + +\*******************************************************************/ + +irep_idt string_constraint_generatort::extract_java_string( + const symbol_exprt &s) const +{ + std::string tmp=id2string(s.get_identifier()); + std::string prefix("java::java.lang.String.Literal."); + assert(has_prefix(tmp, prefix)); + std::string value=tmp.substr(prefix.length()); + return irep_idt(value); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_constant + + Inputs: a string constant + + Outputs: a string expression + + Purpose: add axioms saying the returned string expression should be equal + to the string constant + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_constant( + irep_idt sval) +{ + string_exprt res(get_char_type()); + std::string c_str=id2string(sval); + std::wstring str; + + // TODO: we should have a special treatment for java strings when the + // conversion function is available: +#if 0 + if(mode==ID_java) + str=utf8_to_utf16_little_endian(c_str); + else +#endif + str=widen(c_str); + + for(std::size_t i=0; i + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_index_of + + Inputs: a string expression, a character expression and an integer expression + + Outputs: a integer expression + + Purpose: add axioms that the returned value is either -1 or greater than + from_index and the character at that position equals to c + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_index_of( + const string_exprt &str, const exprt &c, const exprt &from_index) +{ + symbol_exprt index=fresh_exist_index("index_of"); + symbol_exprt contains=fresh_boolean("contains_in_index_of"); + + // We add axioms: + // a1 : -1 <= index<|str| + // a2 : !contains <=> index=-1 + // a3 : contains => from_index<=index&&str[index]=c + // a4 : forall n, from_index<=n str[n]!=c + // a5 : forall m, from_index<=n<|str|. !contains => str[m]!=c + + exprt minus1=from_integer(-1, get_index_type()); + and_exprt a1( + binary_relation_exprt(index, ID_ge, minus1), + binary_relation_exprt(index, ID_lt, str.length())); + axioms.push_back(a1); + + equal_exprt a2(not_exprt(contains), equal_exprt(index, minus1)); + axioms.push_back(a2); + + implies_exprt a3( + contains, + and_exprt( + binary_relation_exprt(from_index, ID_le, index), + equal_exprt(str[index], c))); + axioms.push_back(a3); + + symbol_exprt n=fresh_univ_index("QA_index_of"); + string_constraintt a4( + n, from_index, index, contains, not_exprt(equal_exprt(str[n], c))); + axioms.push_back(a4); + + symbol_exprt m=fresh_univ_index("QA_index_of"); + string_constraintt a5( + m, + from_index, + str.length(), + not_exprt(contains), + not_exprt(equal_exprt(str[m], c))); + axioms.push_back(a5); + + return index; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_index_of_string + + Inputs: two string expressions and an integer expression + + Outputs: a integer expression + + Purpose: add axioms stating that the returned value is either -1 or greater + than from_index and the string beggining there has prefix substring + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_index_of_string( + const string_exprt &str, + const string_exprt &substring, + const exprt &from_index) +{ + symbol_exprt offset=fresh_exist_index("index_of"); + symbol_exprt contains=fresh_boolean("contains_substring"); + + // We add axioms: + // a1 : contains => |substring|>=offset>=from_index + // a2 : !contains => offset=-1 + // a3 : forall 0 <= witness str[witness+offset]=substring[witness] + + implies_exprt a1( + contains, + and_exprt( + str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + binary_relation_exprt(offset, ID_ge, from_index))); + axioms.push_back(a1); + + implies_exprt a2( + not_exprt(contains), + equal_exprt(offset, from_integer(-1, get_index_type()))); + axioms.push_back(a2); + + symbol_exprt qvar=fresh_univ_index("QA_index_of_string"); + string_constraintt a3( + qvar, + substring.length(), + contains, + equal_exprt(str[plus_exprt(qvar, offset)], substring[qvar])); + axioms.push_back(a3); + + return offset; +} + +exprt string_constraint_generatort::add_axioms_for_last_index_of_string( + const string_exprt &str, + const string_exprt &substring, + const exprt &from_index) +{ + symbol_exprt offset=fresh_exist_index("index_of"); + symbol_exprt contains=fresh_boolean("contains_substring"); + + // We add axioms: + // a1 : contains => |substring| >= length &&offset <= from_index + // a2 : !contains => offset=-1 + // a3 : forall 0 <= witness str[witness+offset]=substring[witness] + + implies_exprt a1( + contains, + and_exprt( + str.axiom_for_is_longer_than(plus_exprt(substring.length(), offset)), + binary_relation_exprt(offset, ID_le, from_index))); + axioms.push_back(a1); + + implies_exprt a2( + not_exprt(contains), + equal_exprt(offset, from_integer(-1, get_index_type()))); + axioms.push_back(a2); + + symbol_exprt qvar=fresh_univ_index("QA_index_of_string"); + equal_exprt constr3(str[plus_exprt(qvar, offset)], substring[qvar]); + string_constraintt a3(qvar, substring.length(), contains, constr3); + axioms.push_back(a3); + + return offset; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_index_of + + Inputs: function application with 2 or 3 arguments + + Outputs: a integer expression + + Purpose: add axioms corresponding to the String.indexOf:(C), + String.indexOf:(CI), String.indexOf:(String), and + String.indexOf:(String,I) java functions + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_index_of( + const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(f.type()==get_index_type()); + string_exprt str=add_axioms_for_string_expr(args[0]); + const exprt &c=args[1]; + exprt from_index; + + if(args.size()==2) + from_index=from_integer(0, get_index_type()); + else if(args.size()==3) + from_index=args[2]; + else + assert(false); + + if(refined_string_typet::is_java_string_pointer_type(c.type())) + { + string_exprt sub=add_axioms_for_string_expr(c); + return add_axioms_for_index_of_string(str, sub, from_index); + } + else + return add_axioms_for_index_of( + str, typecast_exprt(c, get_char_type()), from_index); +} + +exprt string_constraint_generatort::add_axioms_for_last_index_of( + const string_exprt &str, const exprt &c, const exprt &from_index) +{ + symbol_exprt index=fresh_exist_index("last_index_of"); + symbol_exprt contains=fresh_boolean("contains_in_last_index_of"); + + // We add axioms: + // a1 : -1 <= i <= from_index + // a2 : (i=-1 <=> !contains) + // a3 : (contains => i <= from_index &&s[i]=c) + // a4 : forall n. i+1 <= n < from_index +1 &&contains => s[n]!=c + // a5 : forall m. 0 <= m < from_index +1 &&!contains => s[m]!=c + + exprt index1=from_integer(1, get_index_type()); + exprt minus1=from_integer(-1, get_index_type()); + exprt from_index_plus_one=plus_exprt(from_index, index1); + and_exprt a1( + binary_relation_exprt(index, ID_ge, minus1), + binary_relation_exprt(index, ID_lt, from_index_plus_one)); + axioms.push_back(a1); + + equal_exprt a2(not_exprt(contains), equal_exprt(index, minus1)); + axioms.push_back(a2); + + implies_exprt a3( + contains, + and_exprt( + binary_relation_exprt(from_index, ID_ge, index), + equal_exprt(str[index], c))); + axioms.push_back(a3); + + symbol_exprt n=fresh_univ_index("QA_last_index_of"); + string_constraintt a4( + n, + plus_exprt(index, index1), + from_index_plus_one, + contains, + not_exprt(equal_exprt(str[n], c))); + axioms.push_back(a4); + + symbol_exprt m=fresh_univ_index("QA_last_index_of"); + string_constraintt a5( + m, + from_index_plus_one, + not_exprt(contains), + not_exprt(equal_exprt(str[m], c))); + axioms.push_back(a5); + + return index; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_last_index_of + + Inputs: function application with 2 or 3 arguments + + Outputs: a integer expression + + Purpose: add axioms corresponding to the String.lastIndexOf:(C), + String.lastIndexOf:(CI), String.lastIndexOf:(String), and + String.lastIndexOf:(String,I) java functions + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_last_index_of( + const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(f.type()==get_index_type()); + string_exprt str=add_axioms_for_string_expr(args[0]); + exprt c=args[1]; + exprt from_index; + + if(args.size()==2) + from_index=minus_exprt(str.length(), from_integer(1, get_index_type())); + else if(args.size()==3) + from_index=args[2]; + else + assert(false); + + if(refined_string_typet::is_java_string_pointer_type(c.type())) + { + string_exprt sub=add_axioms_for_string_expr(c); + return add_axioms_for_last_index_of_string(str, sub, from_index); + } + else + return add_axioms_for_last_index_of( + str, typecast_exprt(c, get_char_type()), from_index); +} + diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp new file mode 100644 index 00000000000..8073ab4cb1f --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -0,0 +1,220 @@ +/*******************************************************************\ + +Module: Generates string constraints for the family of insert Java functions + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_insert + + Inputs: two string expression and an integer offset + + Outputs: a new string expression + + Purpose: add axioms stating that the result correspond to the first string + where we inserted the second one at possition offset + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_insert( + const string_exprt &s1, const string_exprt &s2, const exprt &offset) +{ + assert(offset.type()==get_index_type()); + string_exprt pref=add_axioms_for_substring( + s1, from_integer(0, get_index_type()), offset); + string_exprt suf=add_axioms_for_substring(s1, offset, s1.length()); + string_exprt concat1=add_axioms_for_concat(pref, s2); + return add_axioms_for_concat(concat1, suf); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_insert + + Inputs: function application with three arguments: two strings and an index + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.insert(String) java + function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_insert( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_for_string_expr(args(f, 3)[2]); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_insert_int + + Inputs: function application with three arguments: a string, an integer + offset, and an integer + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.insert(I) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_insert_int( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_INTEGER_LENGTH); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_insert_long + + Inputs: function application with three arguments: a string, an integer + offset and a long + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.insert(J) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_insert_long( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_int(args(f, 3)[2], MAX_LONG_LENGTH); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_insert_bool + + Inputs: function application with three arguments: a string, an integer + offset, and a Boolean + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.insert(Z) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_insert_bool( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_bool(args(f, 3)[2]); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_insert_char + + Inputs: function application with three arguments: a string, an integer + offset, and a character + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.insert(C) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_insert_char( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_char(args(f, 3)[2]); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_insert_double + + Inputs: function application with three arguments: a string, an integer + offset, and a double + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.insert(D) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_insert_double( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_insert_float + + Inputs: function application with three arguments: a string, an integer + offset, and a float + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.insert(F) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_insert_float( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 3)[0]); + string_exprt s2=add_axioms_from_float(args(f, 3)[2]); + return add_axioms_for_insert(s1, s2, args(f, 3)[1]); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_insert_char_array + + Inputs: function application with 4 arguments plus two optional arguments: + a string, an offset index, a length, data array, an offset and a + count + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.insert:(I[CII) + and StringBuilder.insert:(I[C) java functions + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_insert_char_array( + const function_application_exprt &f) +{ + exprt offset; + exprt count; + if(f.arguments().size()==6) + { + offset=f.arguments()[4]; + count=f.arguments()[5]; + } + else + { + assert(f.arguments().size()==4); + count=f.arguments()[2]; + offset=from_integer(0, get_index_type()); + } + + string_exprt str=add_axioms_for_string_expr(f.arguments()[0]); + const exprt &length=f.arguments()[2]; + const exprt &data=f.arguments()[3]; + string_exprt arr=add_axioms_from_char_array( + length, data, offset, count); + return add_axioms_for_insert(str, arr, f.arguments()[1]); +} diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp new file mode 100644 index 00000000000..e76ae3f1f3d --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -0,0 +1,700 @@ +/*******************************************************************\ + +Module: Generates string constraints to link results from string functions + with their arguments. This is inspired by the PASS paper at HVC'13: + "PASS: String Solving with Parameterized Array and Interval Automaton" + by Guodong Li and Indradeep Ghosh, which gives examples of constraints + for several functions. + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include + +/*******************************************************************\ + +Function: string_constraint_generatort::constant_char + + Inputs: integer representing a character, we do not use char type here + because java characters use more than 8 bits. + + Outputs: constant expression corresponding to the character. + + Purpose: generate constant character expression with character type. + +\*******************************************************************/ + +constant_exprt string_constraint_generatort::constant_char(int i) const +{ + return from_integer(i, get_char_type()); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::get_char_type + + Outputs: a type for characters + + Purpose: returns the type of characters that is adapted to the current mode + +\*******************************************************************/ + +typet string_constraint_generatort::get_char_type() const +{ + if(mode==ID_C) + return char_type(); + else if(mode==ID_java) + return java_char_type(); + else + assert(false); // only C and java modes supported +} + +/*******************************************************************\ + +Function: string_constraint_generatort::fresh_univ_index + + Inputs: a prefix + + Outputs: a symbol of index type whose name starts with the prefix + + Purpose: generate an index symbol to be used as an universaly quantified + variable + +\*******************************************************************/ + +symbol_exprt string_constraint_generatort::fresh_univ_index( + const irep_idt &prefix) +{ + return string_exprt::fresh_symbol(prefix, get_index_type()); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::fresh_exist_index + + Inputs: a prefix + + Outputs: a symbol of index type whose name starts with the prefix + + Purpose: generate an index symbol which is existentially quantified + +\*******************************************************************/ + +symbol_exprt string_constraint_generatort::fresh_exist_index( + const irep_idt &prefix) +{ + symbol_exprt s=string_exprt::fresh_symbol(prefix, get_index_type()); + index_symbols.push_back(s); + return s; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::fresh_boolean + + Inputs: a prefix + + Outputs: a symbol of index type whose name starts with the prefix + + Purpose: generate a Boolean symbol which is existentially quantified + +\*******************************************************************/ + +symbol_exprt string_constraint_generatort::fresh_boolean( + const irep_idt &prefix) +{ + symbol_exprt b=string_exprt::fresh_symbol(prefix, bool_typet()); + boolean_symbols.push_back(b); + return b; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_string_expr + + Inputs: an expression of type string + + Outputs: a string expression that is linked to the argument through + axioms that are added to the list + + Purpose: obtain a refined string expression corresponding to string + variable of string function call + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_string_expr( + const exprt &unrefined_string) +{ + string_exprt s; + + if(unrefined_string.id()==ID_function_application) + { + exprt res=add_axioms_for_function_application( + to_function_application_expr(unrefined_string)); + assert(res.type()==refined_string_type); + s=to_string_expr(res); + } + else if(unrefined_string.id()==ID_symbol) + s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string)); + else if(unrefined_string.id()==ID_address_of) + { + assert(unrefined_string.op0().id()==ID_symbol); + s=find_or_add_string_of_symbol(to_symbol_expr(unrefined_string.op0())); + } + else if(unrefined_string.id()==ID_if) + s=add_axioms_for_if(to_if_expr(unrefined_string)); + else if(unrefined_string.id()==ID_nondet_symbol || + unrefined_string.id()==ID_struct) + { + // TODO: for now we ignore non deterministic symbols and struct + } + else if(unrefined_string.id()==ID_typecast) + { + exprt arg=to_typecast_expr(unrefined_string).op(); + exprt res=add_axioms_for_string_expr(arg); + assert(res.type()==refined_string_typet(get_char_type())); + s=to_string_expr(res); + } + else + { + throw "add_axioms_for_string_expr:\n"+unrefined_string.pretty()+ + "\nwhich is not a function application, "+ + "a symbol or an if expression"; + } + + axioms.push_back( + s.axiom_for_is_longer_than(from_integer(0, get_index_type()))); + return s; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_if + + Inputs: an if expression + + Outputs: a string expression + + Purpose: add axioms for an if expression which should return a string + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_if( + const if_exprt &expr) +{ + string_exprt res(get_char_type()); + assert( + refined_string_typet::is_unrefined_string_type(expr.true_case().type())); + string_exprt t=add_axioms_for_string_expr(expr.true_case()); + assert( + refined_string_typet::is_unrefined_string_type(expr.false_case().type())); + string_exprt f=add_axioms_for_string_expr(expr.false_case()); + + axioms.push_back( + implies_exprt(expr.cond(), res.axiom_for_has_same_length_as(t))); + symbol_exprt qvar=fresh_univ_index("QA_string_if_true"); + equal_exprt qequal(res[qvar], t[qvar]); + axioms.push_back(string_constraintt(qvar, t.length(), expr.cond(), qequal)); + axioms.push_back( + implies_exprt(not_exprt(expr.cond()), res.axiom_for_has_same_length_as(f))); + symbol_exprt qvar2=fresh_univ_index("QA_string_if_false"); + equal_exprt qequal2(res[qvar2], f[qvar2]); + string_constraintt sc2(qvar2, f.length(), not_exprt(expr.cond()), qequal2); + axioms.push_back(sc2); + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::find_or_add_string_of_symbol + + Inputs: a symbol expression + + Outputs: a string expression + + Purpose: if a symbol represent a string is present in the symbol_to_string + table, returns the corresponding string, if the symbol is not yet + present, creates a new string with the correct type depending on + whether the mode is java or c, adds it to the table and returns it. + +\*******************************************************************/ + +string_exprt string_constraint_generatort::find_or_add_string_of_symbol( + const symbol_exprt &sym) +{ + irep_idt id=sym.get_identifier(); + auto entry=symbol_to_string.insert( + std::make_pair(id, string_exprt(get_char_type()))); + return entry.first->second; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_function_application + + Inputs: an expression containing a function application + + Outputs: expression corresponding to the result of the function application + + Purpose: strings contained in this call are converted to objects of type + `string_exprt`, through adding axioms. Axioms are then added to + enforce that the result corresponds to the function application. + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_function_application( + const function_application_exprt &expr) +{ + const exprt &name=expr.function(); + assert(name.id()==ID_symbol); + + const irep_idt &id=is_ssa_expr(name)?to_ssa_expr(name).get_object_name(): + to_symbol_expr(name).get_identifier(); + + // TODO: improve efficiency of this test by either ordering test by frequency + // or using a map + + if(id==ID_cprover_char_literal_func) + return add_axioms_for_char_literal(expr); + else if(id==ID_cprover_string_length_func) + return add_axioms_for_length(expr); + else if(id==ID_cprover_string_equal_func) + return add_axioms_for_equals(expr); + else if(id==ID_cprover_string_equals_ignore_case_func) + return add_axioms_for_equals_ignore_case(expr); + else if(id==ID_cprover_string_is_empty_func) + return add_axioms_for_is_empty(expr); + else if(id==ID_cprover_string_char_at_func) + return add_axioms_for_char_at(expr); + else if(id==ID_cprover_string_is_prefix_func) + return add_axioms_for_is_prefix(expr); + else if(id==ID_cprover_string_is_suffix_func) + return add_axioms_for_is_suffix(expr); + else if(id==ID_cprover_string_startswith_func) + return add_axioms_for_is_prefix(expr, true); + else if(id==ID_cprover_string_endswith_func) + return add_axioms_for_is_suffix(expr, true); + else if(id==ID_cprover_string_contains_func) + return add_axioms_for_contains(expr); + else if(id==ID_cprover_string_hash_code_func) + return add_axioms_for_hash_code(expr); + else if(id==ID_cprover_string_index_of_func) + return add_axioms_for_index_of(expr); + else if(id==ID_cprover_string_last_index_of_func) + return add_axioms_for_last_index_of(expr); + else if(id==ID_cprover_string_parse_int_func) + return add_axioms_for_parse_int(expr); + else if(id==ID_cprover_string_to_char_array_func) + return add_axioms_for_to_char_array(expr); + else if(id==ID_cprover_string_code_point_at_func) + return add_axioms_for_code_point_at(expr); + else if(id==ID_cprover_string_code_point_before_func) + return add_axioms_for_code_point_before(expr); + else if(id==ID_cprover_string_code_point_count_func) + return add_axioms_for_code_point_count(expr); + else if(id==ID_cprover_string_offset_by_code_point_func) + return add_axioms_for_offset_by_code_point(expr); + else if(id==ID_cprover_string_compare_to_func) + return add_axioms_for_compare_to(expr); + else if(id==ID_cprover_string_literal_func) + return add_axioms_from_literal(expr); + else if(id==ID_cprover_string_concat_func) + return add_axioms_for_concat(expr); + else if(id==ID_cprover_string_concat_int_func) + return add_axioms_for_concat_int(expr); + else if(id==ID_cprover_string_concat_long_func) + return add_axioms_for_concat_long(expr); + else if(id==ID_cprover_string_concat_bool_func) + return add_axioms_for_concat_bool(expr); + else if(id==ID_cprover_string_concat_char_func) + return add_axioms_for_concat_char(expr); + else if(id==ID_cprover_string_concat_double_func) + return add_axioms_for_concat_double(expr); + else if(id==ID_cprover_string_concat_float_func) + return add_axioms_for_concat_float(expr); + else if(id==ID_cprover_string_concat_code_point_func) + return add_axioms_for_concat_code_point(expr); + else if(id==ID_cprover_string_insert_func) + return add_axioms_for_insert(expr); + else if(id==ID_cprover_string_insert_int_func) + return add_axioms_for_insert_int(expr); + else if(id==ID_cprover_string_insert_long_func) + return add_axioms_for_insert_long(expr); + else if(id==ID_cprover_string_insert_bool_func) + return add_axioms_for_insert_bool(expr); + else if(id==ID_cprover_string_insert_char_func) + return add_axioms_for_insert_char(expr); + else if(id==ID_cprover_string_insert_double_func) + return add_axioms_for_insert_double(expr); + else if(id==ID_cprover_string_insert_float_func) + return add_axioms_for_insert_float(expr); + else if(id==ID_cprover_string_insert_char_array_func) + return add_axioms_for_insert_char_array(expr); + else if(id==ID_cprover_string_substring_func) + return add_axioms_for_substring(expr); + else if(id==ID_cprover_string_trim_func) + return add_axioms_for_trim(expr); + else if(id==ID_cprover_string_to_lower_case_func) + return add_axioms_for_to_lower_case(expr); + else if(id==ID_cprover_string_to_upper_case_func) + return add_axioms_for_to_upper_case(expr); + else if(id==ID_cprover_string_char_set_func) + return add_axioms_for_char_set(expr); + else if(id==ID_cprover_string_value_of_func) + return add_axioms_for_value_of(expr); + else if(id==ID_cprover_string_empty_string_func) + return add_axioms_for_empty_string(expr); + else if(id==ID_cprover_string_copy_func) + return add_axioms_for_copy(expr); + else if(id==ID_cprover_string_of_int_func) + return add_axioms_from_int(expr); + else if(id==ID_cprover_string_of_int_hex_func) + return add_axioms_from_int_hex(expr); + else if(id==ID_cprover_string_of_float_func) + return add_axioms_from_float(expr); + else if(id==ID_cprover_string_of_double_func) + return add_axioms_from_double(expr); + else if(id==ID_cprover_string_of_long_func) + return add_axioms_from_long(expr); + else if(id==ID_cprover_string_of_bool_func) + return add_axioms_from_bool(expr); + else if(id==ID_cprover_string_of_char_func) + return add_axioms_from_char(expr); + else if(id==ID_cprover_string_of_char_array_func) + return add_axioms_from_char_array(expr); + else if(id==ID_cprover_string_set_length_func) + return add_axioms_for_set_length(expr); + else if(id==ID_cprover_string_delete_func) + return add_axioms_for_delete(expr); + else if(id==ID_cprover_string_delete_char_at_func) + return add_axioms_for_delete_char_at(expr); + else if(id==ID_cprover_string_replace_func) + return add_axioms_for_replace(expr); + else if(id==ID_cprover_string_data_func) + return add_axioms_for_data(expr); + else + { + std::string msg("string_exprt::function_application: unknown symbol :"); + msg+=id2string(id); + throw msg; + } +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_copy + + Inputs: function application with one argument, which is a string + + Outputs: a new string expression + + Purpose: add axioms to say that the returned string expression is equal to + the argument of the function application + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_copy( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 1)[0]); + string_exprt res(get_char_type()); + + // We add axioms: + // a1 : |res|=|s1| + // a2 : forall i<|s1|. s1[i]=res[i] + + axioms.push_back(res.axiom_for_has_same_length_as(s1)); + + symbol_exprt idx=fresh_univ_index("QA_index_copy"); + string_constraintt a2(idx, s1.length(), equal_exprt(s1[idx], res[idx])); + axioms.push_back(a2); + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_java_char_array + + Inputs: an expression corresponding to a java object of type char array + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.valueOf([C) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_java_char_array( + const exprt &char_array) +{ + string_exprt res(get_char_type()); + exprt arr=to_address_of_expr(char_array).object(); + exprt len=member_exprt(arr, "length", res.length().type()); + exprt cont=member_exprt(arr, "data", res.content().type()); + res.length()=len; + res.content()=cont; + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_length + + Inputs: function application with one string argument + + Outputs: a string expression of index type + + Purpose: add axioms corresponding to the String.length java function + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_length( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + return str.length(); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_data + + Inputs: function application with three arguments: one string, a java + Array object and the corresponding data field + + Outputs: an expression of type void + + Purpose: add axioms stating that the content of the string argument is + equal to the content of the array argument + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_data( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + const exprt &tab_data=args(f, 3)[1]; + const exprt &data=args(f, 3)[2]; + symbol_exprt qvar=fresh_univ_index("QA_string_data"); + + // translating data[qvar] to the correct expression + // which is (signed int)byte_extract_little_endian + // (data, (2l*qvar) + POINTER_OFFSET(byte_extract_little_endian + // (tab.data, 0l, unsigned short int *)), unsigned short int) + mult_exprt qvar2( + from_integer(2, java_long_type()), + typecast_exprt(qvar, java_long_type())); + byte_extract_exprt extract( + ID_byte_extract_little_endian, + tab_data, + from_integer(0, java_long_type()), + pointer_typet(java_char_type())); + plus_exprt arg2(qvar2, pointer_offset(extract)); + + byte_extract_exprt extract2( + ID_byte_extract_little_endian, data, arg2, java_char_type()); + exprt char_in_tab=typecast_exprt(extract2, get_char_type()); + + string_constraintt eq( + qvar, str.length(), equal_exprt(str[qvar], char_in_tab)); + axioms.push_back(eq); + + exprt void_expr; + void_expr.type()=void_typet(); + return void_expr; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_char_array + + Inputs: a length expression, an array expression, a offset index, and a + count index + + Outputs: a new string expression + + Purpose: add axioms stating that the content of the returned string + equals to the content of the array argument, starting at offset and + with `count` characters + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_char_array( + const exprt &length, + const exprt &data, + const exprt &offset, + const exprt &count) +{ + string_exprt str(get_char_type()); + + // We add axioms: + // a1 : forall q < count. str[q] = data[q+offset] + // a2 : |str| = count + + symbol_exprt qvar=fresh_univ_index("QA_string_of_char_array"); + exprt char_in_tab=data; + assert(char_in_tab.id()==ID_index); + char_in_tab.op1()=plus_exprt(qvar, offset); + + string_constraintt a1(qvar, count, equal_exprt(str[qvar], char_in_tab)); + axioms.push_back(a1); + axioms.push_back(equal_exprt(str.length(), count)); + + return str; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_char_array + + Inputs: function application with 2 arguments and 2 additional optional + arguments: length, char array, offset and count + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.:(I[CII) + and String.:(I[C) java functions + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_char_array( + const function_application_exprt &f) +{ + exprt offset; + exprt count; + if(f.arguments().size()==4) + { + offset=f.arguments()[2]; + count=f.arguments()[3]; + } + else + { + assert(f.arguments().size()==2); + count=f.arguments()[0]; + offset=from_integer(0, get_index_type()); + } + const exprt &tab_length=f.arguments()[0]; + const exprt &data=f.arguments()[1]; + return add_axioms_from_char_array(tab_length, data, offset, count); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_is_positive_index + + Inputs: an index expression + + Outputs: a Boolean expression + + Purpose: expression true exactly when the index is positive + +\*******************************************************************/ + +exprt string_constraint_generatort::axiom_for_is_positive_index(const exprt &x) +{ + return binary_relation_exprt( + x, ID_ge, from_integer(0, get_index_type())); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_char_literal + + Inputs: function application with one character argument + + Outputs: a new character expression + + Purpose: add axioms stating that the returned value is equal to the argument + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_char_literal( + const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(args.size()==1); // there should be exactly 1 argument to char literal + + const exprt &arg=args[0]; + // for C programs argument to char literal should be one string constant + // of size 1. + if(arg.operands().size()==1 && + arg.op0().operands().size()==1 && + arg.op0().op0().operands().size()==2 && + arg.op0().op0().op0().id()==ID_string_constant) + { + const string_constantt s=to_string_constant(arg.op0().op0().op0()); + irep_idt sval=s.get_value(); + assert(sval.size()==1); + return from_integer(unsigned(sval[0]), get_char_type()); + } + else + { + throw "convert_char_literal unimplemented"; + } +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_char_at + + Inputs: function application with two arguments: a string and an integer + + Outputs: a character expression + + Purpose: add axioms stating that the character of the string at the given + position is equal to the returned value + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_char_at( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + symbol_exprt char_sym=string_exprt::fresh_symbol("char", get_char_type()); + axioms.push_back(equal_exprt(char_sym, str[args(f, 2)[1]])); + return char_sym; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_to_char_array + + Inputs: function application with one string argument + + Outputs: a char array expression + + Purpose: add axioms corresponding to the String.toCharArray java function + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_to_char_array( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]); + return str.content(); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::set_string_symbol_equal_to_expr + + Inputs: a symbol and a string + + Purpose: add a correspondence to make sure the symbol points to the + same string as the second argument + +\*******************************************************************/ + +void string_constraint_generatort::set_string_symbol_equal_to_expr( + const symbol_exprt &sym, const exprt &str) +{ + if(str.id()==ID_symbol) + assign_to_symbol(sym, find_or_add_string_of_symbol(to_symbol_expr(str))); + else + assign_to_symbol(sym, add_axioms_for_string_expr(str)); +} diff --git a/src/solvers/refinement/string_constraint_generator_testing.cpp b/src/solvers/refinement/string_constraint_generator_testing.cpp new file mode 100644 index 00000000000..6df308c533e --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator_testing.cpp @@ -0,0 +1,246 @@ +/*******************************************************************\ + +Module: Generates string constraints for string functions that return + Boolean values + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_is_prefix + + Inputs: a prefix string, a string and an integer offset + + Outputs: a Boolean expression + + Purpose: add axioms stating that the returned expression is true exactly + when the first string is a prefix of the second one, starting at + position offset + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_is_prefix( + const string_exprt &prefix, const string_exprt &str, const exprt &offset) +{ + symbol_exprt isprefix=fresh_boolean("isprefix"); + + // We add axioms: + // a1 : isprefix => |str| >= |prefix|+offset + // a2 : forall 0<=qvar + // s0[witness+offset]=s2[witness] + // a3 : !isprefix => |str| < |prefix|+offset + // || (|str| >= |prefix|+offset &&0<=witness<|prefix| + // &&str[witness+ofsset]!=prefix[witness]) + + implies_exprt a1( + isprefix, + str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))); + axioms.push_back(a1); + + symbol_exprt qvar=fresh_univ_index("QA_isprefix"); + string_constraintt a2( + qvar, + prefix.length(), + isprefix, + equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])); + axioms.push_back(a2); + + symbol_exprt witness=fresh_exist_index("witness_not_isprefix"); + and_exprt witness_diff( + axiom_for_is_positive_index(witness), + and_exprt( + prefix.axiom_for_is_strictly_longer_than(witness), + notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness]))); + or_exprt s0_notpref_s1( + not_exprt( + str.axiom_for_is_longer_than(plus_exprt(prefix.length(), offset))), + and_exprt( + witness_diff, + str.axiom_for_is_longer_than( + plus_exprt(prefix.length(), offset)))); + + implies_exprt a3(not_exprt(isprefix), s0_notpref_s1); + axioms.push_back(a3); + return isprefix; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_is_prefix + + Inputs: a function application with 2 or 3 arguments and a Boolean telling + whether the prefix is the second argument (when swap_arguments is + true) or the first argument + + Outputs: a Boolean expression + + Purpose: add axioms corresponding to the String.isPrefix java function + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_is_prefix( + const function_application_exprt &f, bool swap_arguments) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); + string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); + string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + exprt offset; + if(args.size()==2) + offset=from_integer(0, get_index_type()); + else if(args.size()==3) + offset=args[2]; + return typecast_exprt(add_axioms_for_is_prefix(s0, s1, offset), f.type()); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_is_empty + + Inputs: function application with a string argument + + Outputs: a Boolean expression + + Purpose: add axioms stating that the returned value is true exactly when + the argument string is empty + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_is_empty( + const function_application_exprt &f) +{ + assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); + + // We add axioms: + // a1 : is_empty => |s0| = 0 + // a2 : s0 => is_empty + + symbol_exprt is_empty=fresh_boolean("is_empty"); + string_exprt s0=add_axioms_for_string_expr(args(f, 1)[0]); + axioms.push_back(implies_exprt(is_empty, s0.axiom_for_has_length(0))); + axioms.push_back(implies_exprt(s0.axiom_for_has_length(0), is_empty)); + return typecast_exprt(is_empty, f.type()); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_is_suffix + + Inputs: a function application with 2 or 3 arguments and a Boolean telling + whether the suffix is the second argument (when swap_arguments is + true) or the first argument + + Outputs: a Boolean expression + + Purpose: add axioms corresponding to the String.isSuffix java function + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_is_suffix( + const function_application_exprt &f, bool swap_arguments) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(args.size()==2); // bad args to string issuffix? + assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); + + symbol_exprt issuffix=fresh_boolean("issuffix"); + typecast_exprt tc_issuffix(issuffix, f.type()); + string_exprt s0=add_axioms_for_string_expr(args[swap_arguments?1:0]); + string_exprt s1=add_axioms_for_string_expr(args[swap_arguments?0:1]); + + // We add axioms: + // a1 : issufix => s0.length >= s1.length + // a2 : forall witness s1[witness]=s0[witness + s0.length-s1.length] + // a3 : !issuffix => + // s1.length > s0.length + // || (s1.length > witness>=0 + // &&s1[witness]!=s0[witness + s0.length-s1.length] + + implies_exprt a1(issuffix, s1.axiom_for_is_longer_than(s0)); + axioms.push_back(a1); + + symbol_exprt qvar=fresh_univ_index("QA_suffix"); + exprt qvar_shifted=plus_exprt( + qvar, minus_exprt(s1.length(), s0.length())); + string_constraintt a2( + qvar, s0.length(), issuffix, equal_exprt(s0[qvar], s1[qvar_shifted])); + axioms.push_back(a2); + + symbol_exprt witness=fresh_exist_index("witness_not_suffix"); + exprt shifted=plus_exprt( + witness, minus_exprt(s1.length(), s0.length())); + or_exprt constr3( + s0.axiom_for_is_strictly_longer_than(s1), + and_exprt( + notequal_exprt(s0[witness], s1[shifted]), + and_exprt( + s0.axiom_for_is_strictly_longer_than(witness), + axiom_for_is_positive_index(witness)))); + implies_exprt a3(not_exprt(issuffix), constr3); + + axioms.push_back(a3); + return tc_issuffix; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_contains + + Inputs: function application with two string arguments + + Outputs: a Boolean expression + + Purpose: add axioms corresponding to the String.contains java function + +\*******************************************************************/ + +exprt string_constraint_generatort::add_axioms_for_contains( + const function_application_exprt &f) +{ + assert(f.type()==bool_typet() || f.type().id()==ID_c_bool); + symbol_exprt contains=fresh_boolean("contains"); + typecast_exprt tc_contains(contains, f.type()); + string_exprt s0=add_axioms_for_string_expr(args(f, 2)[0]); + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[1]); + + // We add axioms: + // a1 : contains => s0.length >= s1.length + // a2 : 0 <= startpos <= s0.length-s1.length + // a3 : forall qvar s1[qvar]=s0[startpos + qvar] + // a4 : !contains => s1.length > s0.length + // || (forall startpos <= s0.length-s1.length. + // exists witness= |s1| ) + // ==> exists witness<|s1|. s1[witness]!=s0[startpos+witness] + string_not_contains_constraintt a4( + from_integer(0, get_index_type()), + plus_exprt(from_integer(1, get_index_type()), length_diff), + and_exprt(not_exprt(contains), s0.axiom_for_is_longer_than(s1)), + from_integer(0, get_index_type()), s1.length(), s0, s1); + axioms.push_back(a4); + + return tc_contains; +} diff --git a/src/solvers/refinement/string_constraint_generator_transformation.cpp b/src/solvers/refinement/string_constraint_generator_transformation.cpp new file mode 100644 index 00000000000..c361c9d4e48 --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator_transformation.cpp @@ -0,0 +1,460 @@ +/*******************************************************************\ + +Module: Generates string constraints for string transformations, + that is, functions taking one string and returning another + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_set_length + + Inputs: function application with two arguments, the first of which is + a string and the second an integer which should have same type has + return by get_index_type() + + Outputs: a new string expression + + Purpose: add axioms to say that the returned string expression has length + given by the second argument and whose characters are equal to + those of the first argument for the positions which are defined in + both strings + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_set_length( + const function_application_exprt &f) +{ + string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]); + exprt k=args(f, 2)[1]; + string_exprt res(get_char_type()); + + // We add axioms: + // a1 : |res|=k + // a2 : forall i s[i]=s1[i]) &&(i >= k ==> s[i]=0) + + axioms.push_back(res.axiom_for_has_length(k)); + + symbol_exprt idx=fresh_univ_index("QA_index_set_length"); + string_constraintt a2( + idx, k, and_exprt( + implies_exprt( + s1.axiom_for_is_strictly_longer_than(idx), + equal_exprt(s1[idx], res[idx])), + implies_exprt( + s1.axiom_for_is_shorter_than(idx), + equal_exprt(s1[idx], constant_char(0))))); + axioms.push_back(a2); + + return res; +} + + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_substring + + Inputs: function application with one string argument, one start index + argument and an optional end index argument + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.substring java function + Warning: the specification may not be correct for the case where the + string is shorter than the end index + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_substring( + const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + assert(args.size()>=2); + string_exprt str=add_axioms_for_string_expr(args[0]); + exprt i(args[1]); + exprt j; + if(args.size()==3) + { + j=args[2]; + } + else + { + assert(args.size()==2); + j=str.length(); + } + return add_axioms_for_substring(str, i, j); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_substring + + Inputs: a string expression, an expression for the start index, and an + expression for the end index + + Outputs: a new string expression + + Purpose: add axioms stating that the returned string expression is equal + to the input one starting at `start` and ending before `end` + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_substring( + const string_exprt &str, const exprt &start, const exprt &end) +{ + symbol_exprt idx=fresh_exist_index("index_substring"); + assert(start.type()==get_index_type()); + assert(end.type()==get_index_type()); + string_exprt res(get_char_type()); + + // We add axioms: + // a1 : start < end => |res| = end - start + // a2 : start >= end => |res| = 0 + // a3 : |str| > end + // a4 : forall idx= 0 + // a3 : str >= idx + // a4 : |res|>= 0 + // a5 : |res|<=|str| + // (this is necessary to prevent exceeding the biggest integer) + // a6 : forall n' ' &&s[m+|s1|-1]>' ') || m=|s| + + exprt a1=str.axiom_for_is_longer_than(plus_exprt(idx, res.length())); + axioms.push_back(a1); + + binary_relation_exprt a2(idx, ID_ge, from_integer(0, get_index_type())); + axioms.push_back(a2); + + exprt a3=str.axiom_for_is_longer_than(idx); + axioms.push_back(a3); + + exprt a4=res.axiom_for_is_longer_than( + from_integer(0, get_index_type())); + axioms.push_back(a4); + + exprt a5=res.axiom_for_is_shorter_than(str); + axioms.push_back(a5); + + symbol_exprt n=fresh_univ_index("QA_index_trim"); + binary_relation_exprt non_print(str[n], ID_le, space_char); + string_constraintt a6(n, idx, non_print); + axioms.push_back(a6); + + symbol_exprt n2=fresh_univ_index("QA_index_trim2"); + minus_exprt bound(str.length(), plus_exprt(idx, res.length())); + binary_relation_exprt eqn2( + str[plus_exprt(idx, plus_exprt(res.length(), n2))], + ID_le, + space_char); + + string_constraintt a7(n2, bound, eqn2); + axioms.push_back(a7); + + symbol_exprt n3=fresh_univ_index("QA_index_trim3"); + equal_exprt eqn3(res[n3], str[plus_exprt(n3, idx)]); + string_constraintt a8(n3, res.length(), eqn3); + axioms.push_back(a8); + + minus_exprt index_before( + plus_exprt(idx, res.length()), from_integer(1, get_index_type())); + binary_relation_exprt no_space_before(str[index_before], ID_gt, space_char); + or_exprt a9( + equal_exprt(idx, str.length()), + and_exprt( + binary_relation_exprt(str[idx], ID_gt, space_char), + no_space_before)); + axioms.push_back(a9); + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_to_lower_case + + Inputs: function application with one string argument + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.toLowerCase java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_to_lower_case( + const function_application_exprt &expr) +{ + string_exprt str=add_axioms_for_string_expr(args(expr, 1)[0]); + string_exprt res(get_char_type()); + exprt char_a=constant_char('a'); + exprt char_A=constant_char('A'); + exprt char_z=constant_char('z'); + exprt char_Z=constant_char('Z'); + + // TODO: add support for locales using case mapping information + // from the UnicodeData file. + + // We add axioms: + // a1 : |res| = |str| + // a2 : forall idx res[idx]=str[idx]+'a'-'A' + // a3 : forall idx res[idx]=str[idx] + // forall idx res[idx]=str[idx]+'A'-'a' + // a3 : forall idx res[idx]=str[idx] + + exprt a1=res.axiom_for_has_same_length_as(str); + axioms.push_back(a1); + + symbol_exprt idx=fresh_univ_index("QA_upper_case"); + exprt is_lower_case=and_exprt( + binary_relation_exprt(char_a, ID_le, str[idx]), + binary_relation_exprt(str[idx], ID_le, char_z)); + minus_exprt diff(char_A, char_a); + equal_exprt convert(res[idx], plus_exprt(str[idx], diff)); + string_constraintt a2(idx, res.length(), is_lower_case, convert); + axioms.push_back(a2); + + equal_exprt eq(res[idx], str[idx]); + string_constraintt a3(idx, res.length(), not_exprt(is_lower_case), eq); + axioms.push_back(a3); + return res; +} + + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_char_set + + Inputs: function application with three arguments, the first is a string + the second an index and the third a character + + Outputs: a new string expression + + Purpose: add axioms corresponding stating that the result is similar to + that of the StringBuilder.setCharAt java function + Warning: this may be underspecified in the case wher the index exceed + the length of the string + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_char_set( + const function_application_exprt &f) +{ + string_exprt res(get_char_type()); + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + with_exprt sarrnew(str.content(), args(f, 3)[1], args(f, 3)[2]); + + // We add axiom: + // a1 : arg1 < |str| => res = str with [arg1]=arg2 + + implies_exprt a1( + binary_relation_exprt(args(f, 3)[1], ID_lt, str.length()), + and_exprt( + equal_exprt(res.content(), sarrnew), + res.axiom_for_has_same_length_as(str))); + axioms.push_back(a1); + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_replace + + Inputs: function application with three arguments, the first is a string, + the second and the third are characters + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.replace java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_replace( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + const exprt &old_char=args(f, 3)[1]; + const exprt &new_char=args(f, 3)[2]; + string_exprt res(get_char_type()); + + // We add axioms: + // a1 : |res| = |str| + // a2 : forall qvar, 0<=qvar<|res|, + // str[qvar]=oldChar => res[qvar]=newChar + // !str[qvar]=oldChar => res[qvar]=str[qvar] + + axioms.push_back(res.axiom_for_has_same_length_as(str)); + + symbol_exprt qvar=fresh_univ_index("QA_replace"); + implies_exprt case1( + equal_exprt(str[qvar], old_char), + equal_exprt(res[qvar], new_char)); + implies_exprt case2( + not_exprt(equal_exprt(str[qvar], old_char)), + equal_exprt(res[qvar], str[qvar])); + string_constraintt a2(qvar, res.length(), and_exprt(case1, case2)); + axioms.push_back(a2); + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_delete_char_at + + Inputs: function application with two arguments, the first is a string + and the second is an index + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.deleteCharAt java + function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_delete_char_at( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]); + exprt index_one=from_integer(1, get_index_type()); + return add_axioms_for_delete( + str, args(f, 2)[1], plus_exprt(args(f, 2)[1], index_one)); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_delete + + Inputs: a string expression, a start index and an end index + + Outputs: a new string expression + + Purpose: add axioms stating that the returned string corresponds to the input + one where we removed characters between the positions + start (included) and end (not included) + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_delete( + const string_exprt &str, const exprt &start, const exprt &end) +{ + assert(start.type()==get_index_type()); + assert(end.type()==get_index_type()); + string_exprt str1=add_axioms_for_substring( + str, from_integer(0, get_index_type()), start); + string_exprt str2=add_axioms_for_substring(str, end, str.length()); + return add_axioms_for_concat(str1, str2); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_delete + + Inputs: function application with three arguments: a string + expression, a start index and an end index + + Outputs: a new string expression + + Purpose: add axioms corresponding to the StringBuilder.delete java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_delete( + const function_application_exprt &f) +{ + string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]); + return add_axioms_for_delete(str, args(f, 3)[1], args(f, 3)[2]); +} diff --git a/src/solvers/refinement/string_constraint_generator_valueof.cpp b/src/solvers/refinement/string_constraint_generator_valueof.cpp new file mode 100644 index 00000000000..3712b297386 --- /dev/null +++ b/src/solvers/refinement/string_constraint_generator_valueof.cpp @@ -0,0 +1,646 @@ +/*******************************************************************\ + +Module: Generates string constraints for functions generating strings + from other types, in particular int, long, float, double, char, bool + +Author: Romain Brenguier, romain.brenguier@diffblue.com + +\*******************************************************************/ + +#include +#include + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_int + + Inputs: function application with one integer argument + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.valueOf(I) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_int( + const function_application_exprt &expr) +{ + return add_axioms_from_int(args(expr, 1)[0], MAX_INTEGER_LENGTH); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_long + + Inputs: function application with one long argument + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.valueOf(J) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_long( + const function_application_exprt &expr) +{ + return add_axioms_from_int(args(expr, 1)[0], MAX_LONG_LENGTH); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_float + + Inputs: function application with one float argument + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.valueOf(F) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_float( + const function_application_exprt &f) +{ + return add_axioms_from_float(args(f, 1)[0], false); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_double + + Inputs: function application with one double argument + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.valueOf(D) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_double( + const function_application_exprt &f) +{ + return add_axioms_from_float(args(f, 1)[0], true); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_float + + Inputs: float expression and Boolean signaling that the argument has + double precision + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.valueOf(F) java function + Warning: we currently only have partial specification + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_float( + const exprt &f, bool double_precision) +{ + typet char_type=get_char_type(); + string_exprt res(char_type); + const exprt &index24=from_integer(24, get_index_type()); + axioms.push_back(res.axiom_for_is_shorter_than(index24)); + + string_exprt magnitude(char_type); + string_exprt sign_string(char_type); + string_exprt nan_string=add_axioms_for_constant("NaN"); + + // We add the axioms: + // a1 : If the argument is NaN, the result length is that of "NaN". + // a2 : If the argument is NaN, the result content is the string "NaN". + // a3 : f<0 => |sign_string|=1 + // a4 : f>=0 => |sign_string|=0 + // a5 : f<0 => sign_string[0]='-' + // a6 : f infinite => |magnitude|=|"Infinity"| + // a7 : forall i<|"Infinity"|, f infinite => magnitude[i]="Infinity"[i] + // a8 : f=0 => |magnitude|=|"0.0"| + // a9 : forall i<|"0.0"|, f=0 => f[i]="0.0"[i] + + ieee_float_spect fspec= + double_precision?ieee_float_spect::double_precision() + :ieee_float_spect::single_precision(); + + exprt isnan=float_bvt().isnan(f, fspec); + implies_exprt a1(isnan, magnitude.axiom_for_has_same_length_as(nan_string)); + axioms.push_back(a1); + + symbol_exprt qvar=fresh_univ_index("QA_equal_nan"); + string_constraintt a2( + qvar, + nan_string.length(), + isnan, + equal_exprt(magnitude[qvar], nan_string[qvar])); + axioms.push_back(a2); + + // If the argument is not NaN, the result is a string that represents + // the sign and magnitude (absolute value) of the argument. + // If the sign is negative, the first character of the result is '-'; + // if the sign is positive, no sign character appears in the result. + + bitvector_typet bv_type=to_bitvector_type(f.type()); + unsigned width=bv_type.get_width(); + exprt isneg=extractbit_exprt(f, width-1); + + implies_exprt a3(isneg, sign_string.axiom_for_has_length(1)); + axioms.push_back(a3); + + implies_exprt a4(not_exprt(isneg), sign_string.axiom_for_has_length(0)); + axioms.push_back(a4); + + implies_exprt a5(isneg, equal_exprt(sign_string[0], constant_char('-'))); + axioms.push_back(a5); + + // If m is infinity, it is represented by the characters "Infinity"; + // thus, positive infinity produces the result "Infinity" and negative + // infinity produces the result "-Infinity". + + string_exprt infinity_string=add_axioms_for_constant("Infinity"); + exprt isinf=float_bvt().isinf(f, fspec); + implies_exprt a6( + isinf, magnitude.axiom_for_has_same_length_as(infinity_string)); + axioms.push_back(a6); + + symbol_exprt qvar_inf=fresh_univ_index("QA_equal_infinity"); + equal_exprt meq(magnitude[qvar_inf], infinity_string[qvar_inf]); + string_constraintt a7(qvar_inf, infinity_string.length(), isinf, meq); + axioms.push_back(a7); + + // If m is zero, it is represented by the characters "0.0"; thus, negative + // zero produces the result "-0.0" and positive zero produces "0.0". + + string_exprt zero_string=add_axioms_for_constant("0.0"); + exprt iszero=float_bvt().is_zero(f, fspec); + implies_exprt a8(iszero, magnitude.axiom_for_has_same_length_as(zero_string)); + axioms.push_back(a8); + + symbol_exprt qvar_zero=fresh_univ_index("QA_equal_zero"); + equal_exprt eq_zero(magnitude[qvar_zero], zero_string[qvar_zero]); + string_constraintt a9(qvar_zero, zero_string.length(), iszero, eq_zero); + axioms.push_back(a9); + + return add_axioms_for_concat(sign_string, magnitude); +} + + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_bool + + Inputs: function application with on Boolean argument + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.valueOf(Z) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_bool( + const function_application_exprt &f) +{ + return add_axioms_from_bool(args(f, 1)[0]); +} + + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_bool + + Inputs: Boolean expression + + Outputs: a new string expression + + Purpose: add axioms stating that the returned string equals "true" when + the Boolean expression is true and "false" when it is false + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_bool( + const exprt &b) +{ + typet char_type=get_char_type(); + string_exprt res(char_type); + + assert(b.type()==bool_typet() || b.type().id()==ID_c_bool); + + typecast_exprt eq(b, bool_typet()); + + string_exprt true_string=add_axioms_for_constant("true"); + string_exprt false_string=add_axioms_for_constant("false"); + + // We add axioms: + // a1 : eq => res = |"true"| + // a2 : forall i < |"true"|. eq => res[i]="true"[i] + // a3 : !eq => res = |"false"| + // a4 : forall i < |"false"|. !eq => res[i]="false"[i] + + implies_exprt a1(eq, res.axiom_for_has_same_length_as(true_string)); + axioms.push_back(a1); + symbol_exprt qvar=fresh_univ_index("QA_equal_true"); + string_constraintt a2( + qvar, + true_string.length(), + eq, + equal_exprt(res[qvar], true_string[qvar])); + axioms.push_back(a2); + + implies_exprt a3( + not_exprt(eq), res.axiom_for_has_same_length_as(false_string)); + axioms.push_back(a3); + + symbol_exprt qvar1=fresh_univ_index("QA_equal_false"); + string_constraintt a4( + qvar, + false_string.length(), + not_exprt(eq), + equal_exprt(res[qvar1], false_string[qvar1])); + axioms.push_back(a4); + + return res; +} + +/*******************************************************************\ + +Function: smallest_by_digit + + Inputs: number of digit + + Outputs: an integer with the right number of digit + + Purpose: gives the smallest integer with the specified number of digits + +\*******************************************************************/ + +static mp_integer smallest_by_digit(int nb) +{ + return power(10, nb-1); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_int + + Inputs: a signed integer expression, and a maximal size for the string + representation + + Outputs: a string expression + + Purpose: add axioms to say the string corresponds to the result of + String.valueOf(I) or String.valueOf(J) java functions applied on the + integer expression + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_int( + const exprt &i, size_t max_size) +{ + string_exprt res(get_char_type()); + const typet &type=i.type(); + assert(type.id()==ID_signedbv); + exprt ten=from_integer(10, type); + exprt zero_char=constant_char('0'); + exprt nine_char=constant_char('9'); + exprt minus_char=constant_char('-'); + exprt zero=from_integer(0, get_index_type()); + exprt max=from_integer(max_size, get_index_type()); + + // We add axioms: + // a1 : 0 <|res|<=max_size + // a2 : (res[0]='-')||('0'<=res[0]<='9') + + and_exprt a1(res.axiom_for_is_strictly_longer_than(zero), + res.axiom_for_is_shorter_than(max)); + axioms.push_back(a1); + + exprt chr=res[0]; + exprt starts_with_minus=equal_exprt(chr, minus_char); + exprt starts_with_digit=and_exprt( + binary_relation_exprt(chr, ID_ge, zero_char), + binary_relation_exprt(chr, ID_le, nine_char)); + or_exprt a2(starts_with_digit, starts_with_minus); + axioms.push_back(a2); + + for(size_t size=1; size<=max_size; size++) + { + // For each possible size, we add axioms: + // all_numbers: forall 1<=i<=size. '0'<=res[i]<='9' + // a3 : |res|=size&&'0'<=res[0]<='9' => + // i=sum+str[0]-'0' &&all_numbers + // a4 : |res|=size&&res[0]='-' => i=-sum + // a5 : size>1 => |res|=size&&'0'<=res[0]<='9' => res[0]!='0' + // a6 : size>1 => |res|=size&&res[0]'-' => res[1]!='0' + // a7 : size==max_size => i>1000000000 + exprt sum=from_integer(0, type); + exprt all_numbers=true_exprt(); + chr=res[0]; + exprt first_value=typecast_exprt(minus_exprt(chr, zero_char), type); + + for(size_t j=1; j1) + { + equal_exprt r0_zero(res[zero], zero_char); + implies_exprt a5( + and_exprt(premise, starts_with_digit), + not_exprt(r0_zero)); + axioms.push_back(a5); + + exprt one=from_integer(1, get_index_type()); + equal_exprt r1_zero(res[one], zero_char); + implies_exprt a6( + and_exprt(premise, starts_with_minus), + not_exprt(r1_zero)); + axioms.push_back(a6); + } + + // we have to be careful when exceeding the maximal size of integers + if(size==max_size) + { + exprt smallest_with_10_digits=from_integer( + smallest_by_digit(max_size), type); + binary_relation_exprt big(i, ID_ge, smallest_with_10_digits); + implies_exprt a7(premise, big); + axioms.push_back(a7); + } + } + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::int_of_hex_char + + Inputs: a character expression in the following set: 0123456789abcdef + + Outputs: an integer expression + + Purpose: returns the value represented by the character + +\*******************************************************************/ + +exprt string_constraint_generatort::int_of_hex_char(exprt chr) const +{ + exprt zero_char=constant_char('0'); + exprt nine_char=constant_char('9'); + exprt a_char=constant_char('a'); + return if_exprt( + binary_relation_exprt(chr, ID_gt, nine_char), + plus_exprt(constant_char(10), minus_exprt(chr, a_char)), + minus_exprt(chr, zero_char)); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_int_hex + + Inputs: one integer argument + + Outputs: a new string expression + + Purpose: add axioms stating that the returned string corresponds to the + integer argument written in hexadecimal + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_int_hex( + const exprt &i) +{ + string_exprt res(get_char_type()); + const typet &type=i.type(); + assert(type.id()==ID_signedbv); + exprt sixteen=from_integer(16, type); + exprt minus_char=constant_char('-'); + exprt zero_char=constant_char('0'); + exprt nine_char=constant_char('9'); + exprt a_char=constant_char('a'); + exprt f_char=constant_char('f'); + + size_t max_size=8; + axioms.push_back( + and_exprt(res.axiom_for_is_strictly_longer_than(0), + res.axiom_for_is_shorter_than(max_size))); + + for(size_t size=1; size<=max_size; size++) + { + exprt sum=from_integer(0, type); + exprt all_numbers=true_exprt(); + exprt chr=res[0]; + + for(size_t j=0; j1) + axioms.push_back( + implies_exprt(premise, not_exprt(equal_exprt(res[0], zero_char)))); + } + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_int_hex + + Inputs: function application with integer argument + + Outputs: a new string expression + + Purpose: add axioms corresponding to the Integer.toHexString(I) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_int_hex( + const function_application_exprt &f) +{ + return add_axioms_from_int_hex(args(f, 1)[0]); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_char + + Inputs: function application one char argument + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.valueOf(C) java function + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_char( + const function_application_exprt &f) +{ + return add_axioms_from_char(args(f, 1)[0]); +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_from_char + + Inputs: one expression of type char + + Outputs: a new string expression + + Purpose: add axioms stating that the returned string as length 1 and + the character it contains correspond to the input expression + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_from_char(const exprt &c) +{ + string_exprt res(get_char_type()); + and_exprt lemma(equal_exprt(res[0], c), res.axiom_for_has_length(1)); + axioms.push_back(lemma); + return res; +} + +/*******************************************************************\ + +Function: string_constraint_generatort::add_axioms_for_value_of + + Inputs: function application with one or three arguments + + Outputs: a new string expression + + Purpose: add axioms corresponding to the String.valueOf([C) and + String.valueOf([CII) functions + +\*******************************************************************/ + +string_exprt string_constraint_generatort::add_axioms_for_value_of( + const function_application_exprt &f) +{ + const function_application_exprt::argumentst &args=f.arguments(); + if(args.size()==3) + { + string_exprt res(get_char_type()); + exprt char_array=args[0]; + exprt offset=args[1]; + exprt count=args[2]; + + // We add axioms: + // a1 : |res| = count + // a2 : forall idx i=sum+first_value + // a2 : starts_with_plus => i=sum + // a3 : starts_with_minus => i=-sum + + equal_exprt premise=str.axiom_for_has_length(size); + implies_exprt a1( + and_exprt(premise, starts_with_digit), + equal_exprt(i, plus_exprt(sum, first_value))); + axioms.push_back(a1); + + implies_exprt a2(and_exprt(premise, starts_with_plus), equal_exprt(i, sum)); + axioms.push_back(a2); + + implies_exprt a3( + and_exprt(premise, starts_with_minus), + equal_exprt(i, unary_minus_exprt(sum))); + axioms.push_back(a3); + } + return i; +} From 20f497039e78da13f023647710eab598e479eac6 Mon Sep 17 00:00:00 2001 From: Romain Brenguier Date: Fri, 23 Dec 2016 12:44:12 +0000 Subject: [PATCH 8/8] String refinement class Inherits from `bv_refinementt`. It implements a string solver by creating string constraints via a `string_constraint_generatort` objects, and sove them by progressively instantiating universal constraints as needed. --- src/solvers/refinement/string_refinement.cpp | 1099 ++++++++++++++++++ src/solvers/refinement/string_refinement.h | 120 ++ 2 files changed, 1219 insertions(+) create mode 100644 src/solvers/refinement/string_refinement.cpp create mode 100644 src/solvers/refinement/string_refinement.h diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp new file mode 100644 index 00000000000..cb406cbacb2 --- /dev/null +++ b/src/solvers/refinement/string_refinement.cpp @@ -0,0 +1,1099 @@ +/*******************************************************************\ + +Module: String support via creating string constraints and progressively + instantiating the universal constraints as needed. + The procedure is described in the PASS paper at HVC'13: + "PASS: String Solving with Parameterized Array and Interval Automaton" + by Guodong Li and Indradeep Ghosh. + +Author: Alberto Griggio, alberto.griggio@gmail.com + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +string_refinementt::string_refinementt( + const namespacet &_ns, propt &_prop, unsigned refinement_bound): + supert(_ns, _prop), + use_counter_example(false), + initial_loop_bound(refinement_bound) +{ } + +/*******************************************************************\ + +Function: string_refinementt::set_mode + + Purpose: determine which language should be used + +\*******************************************************************/ + +void string_refinementt::set_mode() +{ + debug() << "initializing mode" << eom; + symbolt init=ns.lookup(irep_idt(CPROVER_PREFIX"initialize")); + irep_idt mode=init.mode; + debug() << "mode detected as " << mode << eom; + generator.set_mode(mode); +} + +/*******************************************************************\ + +Function: string_refinementt::display_index_set + + Purpose: display the current index set, for debugging + +\*******************************************************************/ + +void string_refinementt::display_index_set() +{ + for(const auto &i : index_set) + { + const exprt &s=i.first; + debug() << "IS(" << from_expr(s) << ")=={"; + + for(auto j : i.second) + debug() << from_expr(j) << "; "; + debug() << "}" << eom; + } +} + +/*******************************************************************\ + +Function: string_refinementt::add_instantiations + + Purpose: compute the index set for all formulas, instantiate the formulas + with the found indexes, and add them as lemmas. + +\*******************************************************************/ + +void string_refinementt::add_instantiations() +{ + debug() << "string_constraint_generatort::add_instantiations: " + << "going through the current index set:" << eom; + for(const auto &i : current_index_set) + { + const exprt &s=i.first; + debug() << "IS(" << from_expr(s) << ")=={"; + + for(const auto &j : i.second) + debug() << from_expr(j) << "; "; + debug() << "}" << eom; + + for(const auto &j : i.second) + { + const exprt &val=j; + + for(const auto &ua : universal_axioms) + { + exprt lemma=instantiate(ua, s, val); + add_lemma(lemma); + } + } + } +} + +/*******************************************************************\ + +Function: string_refinementt::convert_rest + + Inputs: an expression + + Outputs: a literal + + Purpose: if the expression is a function application, we convert it + using our own convert_function_application method + +\*******************************************************************/ + +literalt string_refinementt::convert_rest(const exprt &expr) +{ + if(expr.id()==ID_function_application) + { + // can occur in __CPROVER_assume + bvt bv=convert_function_application(to_function_application_expr(expr)); + assert(bv.size()==1); + return bv[0]; + } + else + { + return supert::convert_rest(expr); + } +} + +/*******************************************************************\ + +Function: string_refinementt::convert_symbol + + Inputs: an expression + + Outputs: a bitvector + + Purpose: if the expression as string type, look up for the string in the + list of string symbols that we maintain, and convert it; + otherwise use the method of the parent class + +\*******************************************************************/ + +bvt string_refinementt::convert_symbol(const exprt &expr) +{ + const typet &type=expr.type(); + const irep_idt &identifier=expr.get(ID_identifier); + assert(!identifier.empty()); + + if(refined_string_typet::is_unrefined_string_type(type)) + { + string_exprt str= + generator.find_or_add_string_of_symbol(to_symbol_expr(expr)); + bvt bv=convert_bv(str); + return bv; + } + else + return supert::convert_symbol(expr); +} + +/*******************************************************************\ + +Function: string_refinementt::convert_function_application + + Inputs: a function_application + + Outputs: a bitvector + + Purpose: generate an expression, add lemmas stating that this expression + corresponds to the result of the given function call, and convert + this expression + +\*******************************************************************/ + +bvt string_refinementt::convert_function_application +(const function_application_exprt &expr) +{ + debug() << "string_refinementt::convert_function_application " + << from_expr(expr) << eom; + exprt f=generator.add_axioms_for_function_application(expr); + return convert_bv(f); +} + +/*******************************************************************\ + +Function: string_refinementt::boolbv_set_equality_to_true + + Inputs: an equality expression + + Outputs: a Boolean flag to signal a proble + + Purpose: add lemmas to the solver corresponding to the given equation + +\*******************************************************************/ + +bool string_refinementt::boolbv_set_equality_to_true(const equal_exprt &expr) +{ + if(!equality_propagation) + return true; + + // We should not do that everytime, but I cannot find + // another good entry point + if(generator.get_mode()==ID_unknown) + set_mode(); + + typet type=ns.follow(expr.lhs().type()); + + if(expr.lhs().id()==ID_symbol && + // We can have affectation of string from StringBuilder or CharSequence + // type==ns.follow(expr.rhs().type()) && + type.id()!=ID_bool) + { + debug() << "string_refinementt " << from_expr(expr.lhs()) << " <- " + << from_expr(expr.rhs()) << eom; + + + if(expr.rhs().id()==ID_typecast) + { + exprt uncast=to_typecast_expr(expr.rhs()).op(); + if(refined_string_typet::is_unrefined_string_type(uncast.type())) + { + debug() << "(sr) detected casted string" << eom; + symbol_exprt sym=to_symbol_expr(expr.lhs()); + generator.set_string_symbol_equal_to_expr(sym, uncast); + return false; + } + } + + if(refined_string_typet::is_unrefined_string_type(type)) + { + symbol_exprt sym=to_symbol_expr(expr.lhs()); + generator.set_string_symbol_equal_to_expr(sym, expr.rhs()); + return false; + } + else if(type==ns.follow(expr.rhs().type())) + { + if(is_unbounded_array(type)) + return true; + bvt bv1=convert_bv(expr.rhs()); + const irep_idt &identifier= + to_symbol_expr(expr.lhs()).get_identifier(); + map.set_literals(identifier, type, bv1); + if(freeze_all) + set_frozen(bv1); + return false; + } + } + + return true; +} + +/*******************************************************************\ + +Function: string_refinementt::dec_solve + + Outputs: result of the decision procedure + + Purpose: use a refinement loop to instantiate universal axioms, + call the sat solver, and instantiate more indexes if needed. + +\*******************************************************************/ + +decision_proceduret::resultt string_refinementt::dec_solve() +{ + for(const exprt &axiom : generator.axioms) + if(axiom.id()==ID_string_constraint) + { + string_constraintt c=to_string_constraint(axiom); + universal_axioms.push_back(c); + } + else if(axiom.id()==ID_string_not_contains_constraint) + { + string_not_contains_constraintt nc_axiom= + to_string_not_contains_constraint(axiom); + array_typet witness_type + (refined_string_typet::index_type(), + infinity_exprt(refined_string_typet::index_type())); + generator.witness[nc_axiom]= + string_exprt::fresh_symbol("not_contains_witness", witness_type); + not_contains_axioms.push_back(nc_axiom); + } + else + { + add_lemma(axiom); + } + + initial_index_set(universal_axioms); + update_index_set(cur); + cur.clear(); + add_instantiations(); + + while((initial_loop_bound--)>0) + { + decision_proceduret::resultt res=supert::dec_solve(); + + switch(res) + { + case D_SATISFIABLE: + if(!check_axioms()) + { + debug() << "check_SAT: got SAT but the model is not correct" << eom; + } + else + { + debug() << "check_SAT: the model is correct" << eom; + return D_SATISFIABLE; + } + + debug() << "refining..." << eom; + // Since the model is not correct although we got SAT, we need to refine + // the property we are checking by adding more indices to the index set, + // and instantiating universal formulas with this indices. + // We will then relaunch the solver with these added lemmas. + current_index_set.clear(); + update_index_set(cur); + cur.clear(); + add_instantiations(); + + if(current_index_set.empty()) + { + debug() << "current index set is empty" << eom; + return D_SATISFIABLE; + } + + display_index_set(); + debug()<< "instantiating NOT_CONTAINS constraints" << eom; + for(unsigned i=0; i lemmas; + instantiate_not_contains(not_contains_axioms[i], lemmas); + for(const exprt &lemma : lemmas) + add_lemma(lemma); + } + break; + default: + return res; + } + } + debug() << "string_refinementt::dec_solve reached the maximum number" + << "of steps allowed" << eom; + return D_ERROR; +} + +/*******************************************************************\ + +Function: string_refinementt::convert_bool_bv + + Inputs: a Boolean and a expression with the desired type + + Outputs: a bit vector + + Purpose: fills as many 0 as necessary in the bit vectors to have the + right width + +\*******************************************************************/ + +bvt string_refinementt::convert_bool_bv(const exprt &boole, const exprt &orig) +{ + bvt ret; + ret.push_back(convert(boole)); + size_t width=boolbv_width(orig.type()); + ret.resize(width, const_literal(false)); + return ret; +} + +/*******************************************************************\ + +Function: string_refinementt::add_lemma + + Inputs: a lemma + + Purpose: add the given lemma to the solver + +\*******************************************************************/ + +void string_refinementt::add_lemma(const exprt &lemma, bool add_to_index_set) +{ + if(!seen_instances.insert(lemma).second) + return; + + if(lemma.is_true()) + { + debug() << "string_refinementt::add_lemma : tautology" << eom; + return; + } + + debug() << "adding lemma " << from_expr(lemma) << eom; + + prop.l_set_to_true(convert(lemma)); + if(add_to_index_set) + cur.push_back(lemma); +} + +/*******************************************************************\ + +Function: string_refinementt::string_of_array + + Inputs: a constant array expression and a integer expression + + Outputs: a string + + Purpose: convert the content of a string to a more readable representation. + This should only be used for debbuging. + +\*******************************************************************/ + +std::string string_refinementt::string_of_array( + const exprt &arr, const exprt &size) const +{ + if(size.id()!=ID_constant) + return "string of unknown size"; + unsigned n; + if(to_unsigned_integer(to_constant_expr(size), n)) + n=0; + + if(n>MAX_CONCRETE_STRING_SIZE) + return "very long string"; + if(n==0) + return "\"\""; + + std::ostringstream buf; + buf << "\""; + exprt val=get(arr); + + if(val.id()=="array-list") + { + for(size_t i=0; i(c); + } + } + } + } + else + { + return "unable to get array-list"; + } + + buf << "\""; + return buf.str(); +} + +/*******************************************************************\ + +Function: string_refinementt::get_array + + Inputs: an array expression and a integer expression + + Outputs: a string expression + + Purpose: gets a model of an array and put it in a certain form + +\*******************************************************************/ + +exprt string_refinementt::get_array(const exprt &arr, const exprt &size) +{ + exprt val=get(arr); + typet chart; + if(arr.type().subtype()==generator.get_char_type()) + chart=generator.get_char_type(); + else + assert(false); + + if(val.id()=="array-list") + { + array_typet ret_type(chart, infinity_exprt(generator.get_index_type())); + exprt ret=array_of_exprt(generator.constant_char(0), ret_type); + + for(size_t i=0; i 2, y -> -1. + +\*******************************************************************/ + +std::map string_refinementt::map_representation_of_sum( + const exprt &f) const +{ + // number of time the leaf should be added (can be negative) + std::map elems; + + std::list > to_process; + to_process.push_back(std::make_pair(f, true)); + + while(!to_process.empty()) + { + exprt cur=to_process.back().first; + bool positive=to_process.back().second; + to_process.pop_back(); + if(cur.id()==ID_plus) + { + to_process.push_back(std::make_pair(cur.op1(), positive)); + to_process.push_back(std::make_pair(cur.op0(), positive)); + } + else if(cur.id()==ID_minus) + { + to_process.push_back(std::make_pair(cur.op1(), !positive)); + to_process.push_back(std::make_pair(cur.op0(), positive)); + } + else if(cur.id()==ID_unary_minus) + { + to_process.push_back(std::make_pair(cur.op0(), !positive)); + } + else + { + if(positive) + elems[cur]=elems[cur]+1; + else + elems[cur]=elems[cur]-1; + } + } + return elems; +} + +/*******************************************************************\ + +Function: string_refinementt::sum_over_map + + Inputs: a map from expressions to integers + + Outputs: a expression for the sum of each element in the map a number of + times given by the corresponding integer in the map. + For a map x -> 2, y -> -1 would give an expression $x + x - y$. + +\*******************************************************************/ + +exprt string_refinementt::sum_over_map( + std::map &m, bool negated) const +{ + exprt sum=from_integer(0, generator.get_index_type()); + mp_integer constants=0; + + for(const auto &term : m) + { + // We should group constants together... + const exprt &t=term.first; + int second=negated?(-term.second):term.second; + if(t.id()==ID_constant) + { + std::string value(to_constant_expr(t).get_value().c_str()); + constants+=binary2integer(value, true)*second; + } + else + { + switch(second) + { + case -1: + if(sum==from_integer(0, generator.get_index_type())) + sum=unary_minus_exprt(t); + else + sum=minus_exprt(sum, t); + break; + + case 1: + if(sum==from_integer(0, generator.get_index_type())) + sum=t; + else + sum=plus_exprt(sum, t); + break; + + default: + if(second>1) + { + for(int i=0; isecond; i--) + sum=minus_exprt(sum, t); + } + } + } + } + + exprt index_const=from_integer(constants, generator.get_index_type()); + return plus_exprt(sum, index_const); +} + +/*******************************************************************\ + +Function: string_refinementt::simplify_sum + + Inputs: an expression with only plus and minus expr + + Outputs: an equivalent expression in a cannonical form + +\*******************************************************************/ + +exprt string_refinementt::simplify_sum(const exprt &f) const +{ + std::map map=map_representation_of_sum(f); + return sum_over_map(map); +} + +/*******************************************************************\ + +Function: string_refinementt::compute_inverse_function + + Inputs: a symbol qvar, an expression val, an expression f containing + and − + operations in which qvar should appear exactly once. + + Outputs: an expression corresponding of $f^{−1}(val)$ where $f$ is seen as a + function of $qvar$, i.e. the value that is necessary for qvar for f + to be equal to val. For instance, if `f` corresponds to the expression + $q + x$, `compute_inverse_function(q,v,f)` returns an expression for + $v - x$. + +\*******************************************************************/ + +exprt string_refinementt::compute_inverse_function( + const exprt &qvar, const exprt &val, const exprt &f) +{ + exprt positive, negative; + // number of time the element should be added (can be negative) + // qvar has to be equal to val - f(0) if it appears positively in f + // (ie if f(qvar)=f(0) + qvar) and f(0) - val if it appears negatively + // in f. So we start by computing val - f(0). + std::map elems=map_representation_of_sum(minus_exprt(val, f)); + + // true if qvar appears negatively in f (positively in elems): + bool neg=false; + + auto it=elems.find(qvar); + assert(it!=elems.end()); + if(it->second==1 || it->second==-1) + { + neg=(it->second==1); + } + else + { + assert(it->second==0); + debug() << "in string_refinementt::compute_inverse_function:" + << " warning: occurrences of qvar canceled out " << eom; + } + + elems.erase(it); + return sum_over_map(elems, neg); +} + + + +class find_qvar_visitort: public const_expr_visitort +{ +private: + const exprt &qvar_; + +public: + bool found; + + explicit find_qvar_visitort(const exprt &qvar): qvar_(qvar), found(false) {} + + void operator()(const exprt &expr) + { + if(expr==qvar_) + found=true; + } +}; + +/*******************************************************************\ + +Function: find_qvar + + Inputs: an index expression and a symbol qvar + + Outputs: a Boolean + + Purpose: looks for the symbol and return true if it is found + +\*******************************************************************/ + +static bool find_qvar(const exprt index, const symbol_exprt &qvar) +{ + find_qvar_visitort v2(qvar); + index.visit(v2); + return v2.found; +} + +/*******************************************************************\ + +Function: string_refinementt::initial_index_set + + Inputs: a list of string constraints + + Purpose: add to the index set all the indices that appear in the formulas + and the upper bound minus one + +\*******************************************************************/ + +void string_refinementt::initial_index_set( + const std::vector &string_axioms) +{ + for(const auto &axiom : string_axioms) + initial_index_set(axiom); +} + +/*******************************************************************\ + +Function: string_refinementt::update_index_set + + Inputs: a list of string constraints + + Purpose: add to the index set all the indices that appear in the formulas + +\*******************************************************************/ + +void string_refinementt::update_index_set(const std::vector &cur) +{ + for(const auto &axiom : cur) + update_index_set(axiom); +} + +/*******************************************************************\ + +Function: string_refinementt::initial_index_set + + Inputs: a string constraint + + Purpose: add to the index set all the indices that appear in the formula + and the upper bound minus one + +\*******************************************************************/ + +void string_refinementt::initial_index_set(const string_constraintt &axiom) +{ + symbol_exprt qvar=axiom.univ_var(); + std::list to_process; + to_process.push_back(axiom.body()); + + while(!to_process.empty()) + { + exprt cur=to_process.back(); + to_process.pop_back(); + if(cur.id()==ID_index) + { + const exprt &s=cur.op0(); + const exprt &i=cur.op1(); + + bool has_quant_var=find_qvar(i, qvar); + + // if cur is of the form s[i] and no quantified variable appears in i + if(!has_quant_var) + { + current_index_set[s].insert(i); + index_set[s].insert(i); + } + else + { + // otherwise we add k-1 + exprt e(i); + minus_exprt kminus1( + axiom.upper_bound(), + from_integer(1, generator.get_index_type())); + replace_expr(qvar, kminus1, e); + current_index_set[s].insert(e); + index_set[s].insert(e); + } + } + else + forall_operands(it, cur) + to_process.push_back(*it); + } +} + +/*******************************************************************\ + +Function: string_refinementt::update_index_set + + Inputs: a string constraint + + Purpose: add to the index set all the indices that appear in the formula + +\*******************************************************************/ + +void string_refinementt::update_index_set(const exprt &formula) +{ + std::list to_process; + to_process.push_back(formula); + + while(!to_process.empty()) + { + exprt cur=to_process.back(); + to_process.pop_back(); + if(cur.id()==ID_index) + { + const exprt &s=cur.op0(); + const exprt &i=cur.op1(); + assert(s.type().id()==ID_array); + exprt simplified=simplify_sum(i); + if(index_set[s].insert(simplified).second) + { + debug() << "adding to index set of " << from_expr(s) + << ": " << from_expr(simplified) << eom; + current_index_set[s].insert(simplified); + } + } + else + { + forall_operands(it, cur) + to_process.push_back(*it); + } + } +} + + +// Will be used to visit an expression and return the index used +// with the given char array +class find_index_visitort: public const_expr_visitort +{ +private: + const exprt &str_; + +public: + explicit find_index_visitort(const exprt &str): str_(str) {} + + void operator()(const exprt &expr) + { + if(expr.id()==ID_index) + { + const index_exprt &i=to_index_expr(expr); + if(i.array()==str_) + throw i.index(); + } + } +}; + +/*******************************************************************\ + +Function: string_refinementt::update_index_set + + Inputs: a formula expr and a char array str + + Outputs: an index expression + + Purpose: find an index used in the expression for str, for instance + with arguments (str[k] == 'a') and str, the function should + return k + +\*******************************************************************/ + +exprt find_index(const exprt &expr, const exprt &str) +{ + find_index_visitort v1(str); + try + { + expr.visit(v1); + return nil_exprt(); + } + catch (exprt i) { return i; } +} + + +/*******************************************************************\ + +Function: string_refinementt::instantiate + + Inputs: an universaly quantified formula `axiom`, an array of char + variable `str`, and an index expression `val`. + + Outputs: substitute `qvar` the universaly quantified variable of `axiom`, by + an index `val`, in `axiom`, so that the index used for `str` equals + `val`. For instance, if `axiom` corresponds to + $\forall q. s[q+x]='a' && t[q]='b'$, `instantiate(axom,s,v)` would return + an expression for $s[v]='a' && t[v-x]='b'$. + +\*******************************************************************/ + +exprt string_refinementt::instantiate( + const string_constraintt &axiom, const exprt &str, const exprt &val) +{ + exprt idx=find_index(axiom.body(), str); + if(idx.is_nil()) + return true_exprt(); + if(!find_qvar(idx, axiom.univ_var())) + return true_exprt(); + + exprt r=compute_inverse_function(axiom.univ_var(), val, idx); + implies_exprt instance(axiom.premise(), axiom.body()); + replace_expr(axiom.univ_var(), r, instance); + // We are not sure the index set contains only positive numbers + exprt bounds=and_exprt( + axiom.univ_within_bounds(), + binary_relation_exprt( + from_integer(0, generator.get_index_type()), ID_le, val)); + replace_expr(axiom.univ_var(), r, bounds); + return implies_exprt(bounds, instance); +} + + +void string_refinementt::instantiate_not_contains( + const string_not_contains_constraintt &axiom, std::list &new_lemmas) +{ + exprt s0=axiom.s0(); + exprt s1=axiom.s1(); + + debug() << "instantiate not contains " << from_expr(s0) << " : " + << from_expr(s1) << eom; + expr_sett index_set0=index_set[to_string_expr(s0).content()]; + expr_sett index_set1=index_set[to_string_expr(s1).content()]; + + for(auto it0 : index_set0) + for(auto it1 : index_set1) + { + debug() << from_expr(it0) << " : " << from_expr(it1) << eom; + exprt val=minus_exprt(it0, it1); + exprt witness=generator.get_witness_of(axiom, val); + and_exprt prem_and_is_witness( + axiom.premise(), + equal_exprt(witness, it1)); + + not_exprt differ( + equal_exprt( + to_string_expr(s0)[it0], + to_string_expr(s1)[it1])); + exprt lemma=implies_exprt(prem_and_is_witness, differ); + + new_lemmas.push_back(lemma); + // we put bounds on the witnesses: + // 0 <= v <= |s0| - |s1| ==> 0 <= v+w[v] < |s0| && 0 <= w[v] < |s1| + exprt zero=from_integer(0, generator.get_index_type()); + binary_relation_exprt c1(zero, ID_le, plus_exprt(val, witness)); + binary_relation_exprt c2 + (to_string_expr(s0).length(), ID_gt, plus_exprt(val, witness)); + binary_relation_exprt c3(to_string_expr(s1).length(), ID_gt, witness); + binary_relation_exprt c4(zero, ID_le, witness); + + minus_exprt diff( + to_string_expr(s0).length(), + to_string_expr(s1).length()); + + and_exprt premise( + binary_relation_exprt(zero, ID_le, val), + binary_relation_exprt(diff, ID_ge, val)); + exprt witness_bounds=implies_exprt( + premise, + and_exprt(and_exprt(c1, c2), and_exprt(c3, c4))); + new_lemmas.push_back(witness_bounds); + } +} diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h new file mode 100644 index 00000000000..aac4eed6dab --- /dev/null +++ b/src/solvers/refinement/string_refinement.h @@ -0,0 +1,120 @@ +/*******************************************************************\ + +Module: String support via creating string constraints and progressively + instantiating the universal constraints as needed. + The procedure is described in the PASS paper at HVC'13: + "PASS: String Solving with Parameterized Array and Interval Automaton" + by Guodong Li and Indradeep Ghosh + +Author: Alberto Griggio, alberto.griggio@gmail.com + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H +#define CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_H + +#include + +#include +#include +#include +#include + +// Defines a limit on the string witnesses we will output. +// Longer strings are still concidered possible by the solver but +// it will not output them. +#define MAX_CONCRETE_STRING_SIZE 500 + +#define MAX_NB_REFINEMENT 100 + +class string_refinementt: public bv_refinementt +{ +public: + // refinement_bound is a bound on the number of refinement allowed + string_refinementt( + const namespacet &_ns, propt &_prop, unsigned refinement_bound); + + void set_mode(); + + // Should we use counter examples at each iteration? + bool use_counter_example; + + virtual std::string decision_procedure_text() const + { + return "string refinement loop with "+prop.solver_text(); + } + + static exprt is_positive(const exprt &x); + +protected: + typedef std::set expr_sett; + + virtual bvt convert_symbol(const exprt &expr); + virtual bvt convert_function_application( + const function_application_exprt &expr); + + decision_proceduret::resultt dec_solve(); + + bvt convert_bool_bv(const exprt &boole, const exprt &orig); + +private: + // Base class + typedef bv_refinementt supert; + + unsigned initial_loop_bound; + + string_constraint_generatort generator; + + // Simple constraints that have been given to the solver + expr_sett seen_instances; + + std::vector universal_axioms; + + std::vector not_contains_axioms; + + // Unquantified lemmas that have newly been added + std::vector cur; + + // See the definition in the PASS article + // Warning: this is indexed by array_expressions and not string expressions + std::map current_index_set; + std::map index_set; + + void display_index_set(); + + void add_lemma(const exprt &lemma, bool add_to_index_set=true); + + bool boolbv_set_equality_to_true(const equal_exprt &expr); + + literalt convert_rest(const exprt &expr); + + void add_instantiations(); + + bool check_axioms(); + + void update_index_set(const exprt &formula); + void update_index_set(const std::vector &cur); + void initial_index_set(const string_constraintt &axiom); + void initial_index_set(const std::vector &string_axioms); + + exprt instantiate( + const string_constraintt &axiom, const exprt &str, const exprt &val); + + void instantiate_not_contains( + const string_not_contains_constraintt &axiom, + std::list &new_lemmas); + + exprt compute_inverse_function( + const exprt &qvar, const exprt &val, const exprt &f); + + std::map map_representation_of_sum(const exprt &f) const; + exprt sum_over_map(std::map &m, bool negated=false) const; + + exprt simplify_sum(const exprt &f) const; + + exprt get_array(const exprt &arr, const exprt &size); + + std::string string_of_array(const exprt &arr, const exprt &size) const; +}; + +#endif