diff --git a/regression/strings-smoke-tests/max_input_length/MemberTest.class b/regression/strings-smoke-tests/max_input_length/MemberTest.class index cbb5dcfdbbd..1ffb198b86d 100644 Binary files a/regression/strings-smoke-tests/max_input_length/MemberTest.class and b/regression/strings-smoke-tests/max_input_length/MemberTest.class differ diff --git a/regression/strings-smoke-tests/max_input_length/MemberTest.desc b/regression/strings-smoke-tests/max_input_length/MemberTest.desc index 2a62caa2eda..4603a102472 100644 --- a/regression/strings-smoke-tests/max_input_length/MemberTest.desc +++ b/regression/strings-smoke-tests/max_input_length/MemberTest.desc @@ -1,8 +1,8 @@ CORE MemberTest.class ---refine-strings --verbosity 10 --string-max-length 29 --java-assume-inputs-non-null --function MemberTest.main -^EXIT=0$ +--refine-strings --verbosity 10 --string-max-length 45 --string-max-input-length 31 --function MemberTest.main +^EXIT=10$ ^SIGNAL=0$ -^VERIFICATION SUCCESSFUL$ +^VERIFICATION FAILED$ -- non equal types diff --git a/regression/strings-smoke-tests/max_input_length/MemberTest.java b/regression/strings-smoke-tests/max_input_length/MemberTest.java index cf88d8d8f83..9dd3e67a50b 100644 --- a/regression/strings-smoke-tests/max_input_length/MemberTest.java +++ b/regression/strings-smoke-tests/max_input_length/MemberTest.java @@ -1,9 +1,17 @@ public class MemberTest { String foo; + public void main() { - // Causes this function to be ignored if string-max-length is - // less than 40 + if (foo == null) + return; + + // This would prevent anything from happening if we were to add a + // constraints on strings being smaller than 40 String t = new String("0123456789012345678901234567890123456789"); - assert foo != null && foo.length() < 30; + + if (foo.length() >= 30) + // This should not happen when string-max-input length is smaller + // than 30 + assert false; } } diff --git a/regression/strings-smoke-tests/max_input_length/MemberTest2.desc b/regression/strings-smoke-tests/max_input_length/MemberTest2.desc new file mode 100644 index 00000000000..b4e7b2322e4 --- /dev/null +++ b/regression/strings-smoke-tests/max_input_length/MemberTest2.desc @@ -0,0 +1,8 @@ +CORE +MemberTest.class +--refine-strings --verbosity 10 --string-max-length 45 --string-max-input-length 20 --function MemberTest.main +^EXIT=0$ +^SIGNAL=0$ +^VERIFICATION SUCCESSFUL$ +-- +non equal types diff --git a/regression/strings-smoke-tests/max_input_length/Test.class b/regression/strings-smoke-tests/max_input_length/Test.class index d01720ffa38..37d68870ec9 100644 Binary files a/regression/strings-smoke-tests/max_input_length/Test.class and b/regression/strings-smoke-tests/max_input_length/Test.class differ diff --git a/regression/strings-smoke-tests/max_input_length/Test.java b/regression/strings-smoke-tests/max_input_length/Test.java index 0616daa4053..9c6e8839f58 100644 --- a/regression/strings-smoke-tests/max_input_length/Test.java +++ b/regression/strings-smoke-tests/max_input_length/Test.java @@ -1,8 +1,8 @@ public class Test { public static void main(String s) { - // This prevent anything from happening if string-max-length is smaller - // than 40 - String t = new String("0123456789012345678901234567890123456789"); + // This prevent anything from happening if we were to add a constraints on strings + // being smaller than 40 + String t = new String("0123456789012345678901234567890123456789"); if (s.length() >= 30) // This should not happen when string-max-input length is smaller // than 30 diff --git a/regression/strings-smoke-tests/max_input_length/test.desc b/regression/strings-smoke-tests/max_input_length/test.desc deleted file mode 100644 index fb57067e0d7..00000000000 --- a/regression/strings-smoke-tests/max_input_length/test.desc +++ /dev/null @@ -1,8 +0,0 @@ -CORE -Test.class ---refine-strings --verbosity 10 --string-max-length 30 -^EXIT=0$ -^SIGNAL=0$ -^VERIFICATION SUCCESSFUL$ --- -non equal types diff --git a/src/solvers/Makefile b/src/solvers/Makefile index 73c973836ce..8fbdd95aead 100644 --- a/src/solvers/Makefile +++ b/src/solvers/Makefile @@ -179,6 +179,7 @@ SRC = $(BOOLEFORCE_SRC) \ refinement/refine_arithmetic.cpp \ refinement/refine_arrays.cpp \ refinement/string_refinement.cpp \ + refinement/string_refinement_util.cpp \ refinement/string_constraint_generator_code_points.cpp \ refinement/string_constraint_generator_comparison.cpp \ refinement/string_constraint_generator_concat.cpp \ diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index 74d341a24c9..778ede29cff 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -27,6 +27,62 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include +/// Generation of fresh symbols of a given type +class symbol_generatort final +{ +public: + symbol_exprt + operator()(const irep_idt &prefix, const typet &type = bool_typet()); + +private: + unsigned symbol_count = 0; +}; + +/// Correspondance between arrays and pointers string representations +class array_poolt final +{ +public: + explicit array_poolt(symbol_generatort &symbol_generator) + : fresh_symbol(symbol_generator) + { + } + + const std::unordered_map & + get_arrays_of_pointers() const + { + return arrays_of_pointers; + } + + exprt get_length(const array_string_exprt &s) const; + + void insert(const exprt &pointer_expr, array_string_exprt &array); + + array_string_exprt find(const exprt &pointer, const exprt &length); + + array_string_exprt find(const refined_string_exprt &str); + + /// Converts a struct containing a length and pointer to an array. + /// This allows to get a string expression from arguments of a string + /// builtion function, because string arguments in these function calls + /// are given as a struct containing a length and pointer to an array. + array_string_exprt of_argument(const exprt &arg); + +private: + // associate arrays to char pointers + std::unordered_map arrays_of_pointers; + + // associate length to arrays of infinite size + std::unordered_map + length_of_array; + + // generates fresh symbols + symbol_generatort &fresh_symbol; + + array_string_exprt make_char_array_for_char_pointer( + const exprt &char_pointer, + const typet &char_array_type); +}; + class string_constraint_generatort final { public: @@ -69,22 +125,22 @@ class string_constraint_generatort final return index_exprt(witness.at(c), univ_val); } - symbol_exprt fresh_symbol( - const irep_idt &prefix, const typet &type=bool_typet()); - symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type); - - exprt add_axioms_for_function_application( const function_application_exprt &expr); + symbol_generatort fresh_symbol; + + symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type); + symbol_exprt fresh_exist_index(const irep_idt &prefix, const typet &type); - const std::map &get_arrays_of_pointers() const - { - return arrays_of_pointers_; - } + array_poolt array_pool; - exprt get_length_of_string_array(const array_string_exprt &s) const; + /// Associate array to pointer, and array to length + /// \return an expression if the given function application is one of + /// associate pointer and associate length + optionalt + make_array_pointer_association(const function_application_exprt &expr); // Type used by primitives to signal errors const signedbv_typet get_return_code_type() @@ -99,9 +155,6 @@ class string_constraint_generatort final array_string_exprt get_string_expr(const exprt &expr); plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2); - array_string_exprt associate_char_array_to_char_pointer( - const exprt &char_pointer, - const typet &char_array_type); static constant_exprt constant_char(int i, const typet &char_type); @@ -349,7 +402,6 @@ class string_constraint_generatort final std::map witness; private: std::set created_strings; - unsigned symbol_count=0; const messaget message; std::vector lemmas; @@ -364,12 +416,6 @@ class string_constraint_generatort final // Pool used for the intern method std::map intern_of_string; - - // associate arrays to char pointers - std::map arrays_of_pointers_; - - // associate length to arrays of infinite size - std::map length_of_array_; }; exprt is_digit_with_radix( diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index 1d623280dec..1c42fb768f9 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -31,8 +31,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com string_constraint_generatort::string_constraint_generatort( const string_constraint_generatort::infot &info, const namespacet &ns) - : max_string_length(info.string_max_length), - ns(ns) + : array_pool(fresh_symbol), max_string_length(info.string_max_length), ns(ns) { } @@ -87,8 +86,8 @@ constant_exprt string_constraint_generatort::constant_char( /// \par parameters: a prefix and a type /// \return a symbol of type tp whose name starts with "string_refinement#" /// followed by prefix -symbol_exprt string_constraint_generatort::fresh_symbol( - const irep_idt &prefix, const typet &type) +symbol_exprt symbol_generatort:: +operator()(const irep_idt &prefix, const typet &type) { std::ostringstream buf; buf << "string_refinement#" << prefix << "#" << ++symbol_count; @@ -157,13 +156,12 @@ plus_exprt string_constraint_generatort::plus_exprt_with_overflow_check( /// Associate an actual finite length to infinite arrays /// \param s: array expression representing a string /// \return expression for the length of `s` -exprt string_constraint_generatort::get_length_of_string_array( - const array_string_exprt &s) const +exprt array_poolt::get_length(const array_string_exprt &s) const { if(s.length() == infinity_exprt(s.length().type())) { - auto it = length_of_array_.find(s); - if(it != length_of_array_.end()) + auto it = length_of_array.find(s); + if(it != length_of_array.end()) return it->second; } return s.length(); @@ -185,10 +183,9 @@ array_string_exprt string_constraint_generatort::fresh_string( return str; } -// Associate a char array to a char pointer. The size of the char array is a +// Make a new char array for a char pointer. The size of the char array is a // variable with no constraint. -array_string_exprt -string_constraint_generatort::associate_char_array_to_char_pointer( +array_string_exprt array_poolt::make_char_array_for_char_pointer( const exprt &char_pointer, const typet &char_array_type) { @@ -214,10 +211,10 @@ string_constraint_generatort::associate_char_array_to_char_pointer( else if(char_pointer.id() == ID_if) { const if_exprt &if_expr = to_if_expr(char_pointer); - const array_string_exprt t = associate_char_array_to_char_pointer( - if_expr.true_case(), char_array_type); - const array_string_exprt f = associate_char_array_to_char_pointer( - if_expr.false_case(), char_array_type); + const array_string_exprt t = + make_char_array_for_char_pointer(if_expr.true_case(), char_array_type); + const array_string_exprt f = + make_char_array_for_char_pointer(if_expr.false_case(), char_array_type); array_typet array_type( char_array_type.subtype(), if_exprt( @@ -247,16 +244,32 @@ string_constraint_generatort::associate_char_array_to_char_pointer( array_string_exprt array_sym = to_array_string_expr(fresh_symbol(symbol_name, char_array_type)); auto insert_result = - arrays_of_pointers_.insert(std::make_pair(char_pointer, array_sym)); + arrays_of_pointers.insert(std::make_pair(char_pointer, array_sym)); array_string_exprt result = to_array_string_expr(insert_result.first->second); - add_default_axioms(result); return result; } +void array_poolt::insert( + const exprt &pointer_expr, + array_string_exprt &array_expr) +{ + const exprt &length = array_expr.length(); + if(length == infinity_exprt(length.type())) + { + auto pair = length_of_array.insert( + std::make_pair(array_expr, fresh_symbol("string_length", length.type()))); + array_expr.length() = pair.first->second; + } + + const auto it_bool = + arrays_of_pointers.insert(std::make_pair(pointer_expr, array_expr)); + INVARIANT( + it_bool.second, "should not associate two arrays to the same pointer"); +} + /// Associate a char array to a char pointer. -/// Insert in `arrays_of_pointers_` a binding from `ptr` to `arr`. -/// If the length of `arr` is infinite, we create a new integer symbol and add -/// a binding from `arr` to this length in `length_of_array_`. +/// Insert in `array_pool` a binding from `ptr` to `arr`. If the length of `arr` +/// is infinite, a new integer symbol is created and stored in `array_pool`. /// This also adds the default axioms for `arr`. /// \param f: a function application with argument a character array `arr` and /// a character pointer `ptr`. @@ -272,21 +285,7 @@ exprt string_constraint_generatort::associate_array_to_pointer( : f.arguments()[0]); const exprt &pointer_expr = f.arguments()[1]; - - const auto &length = array_expr.length(); - if(length == infinity_exprt(length.type())) - { - auto pair = length_of_array_.insert( - std::make_pair(array_expr, fresh_symbol("string_length", length.type()))); - array_expr.length() = pair.first->second; - } - - /// \todo We should use a function for inserting the correspondance - /// between array and pointers. - const auto it_bool = - arrays_of_pointers_.insert(std::make_pair(pointer_expr, array_expr)); - INVARIANT( - it_bool.second, "should not associate two arrays to the same pointer"); + array_pool.insert(pointer_expr, array_expr); add_default_axioms(to_array_string_expr(array_expr)); return from_integer(0, f.type()); } @@ -303,7 +302,7 @@ exprt string_constraint_generatort::associate_length_to_array( array_string_exprt array_expr = to_array_string_expr(f.arguments()[0]); const exprt &new_length = f.arguments()[1]; - const auto &length = get_length_of_string_array(array_expr); + const auto &length = array_pool.get_length(array_expr); lemmas.push_back(equal_exprt(length, new_length)); return from_integer(0, f.type()); } @@ -401,18 +400,55 @@ exprt string_constraint_generatort::add_axioms_for_constrain_characters( return from_integer(0, get_return_code_type()); } +/// Creates a new array if the pointer is not pointing to an array +/// \todo This should be replaced by make_char_array_for_char_pointer +array_string_exprt array_poolt::find(const exprt &pointer, const exprt &length) +{ + const array_typet array_type(pointer.type().subtype(), length); + return make_char_array_for_char_pointer(pointer, array_type); +} + /// Adds creates a new array if it does not already exists /// \todo This should be replaced by associate_char_array_to_char_pointer array_string_exprt string_constraint_generatort::char_array_of_pointer( const exprt &pointer, const exprt &length) { - const array_typet array_type(pointer.type().subtype(), length); - const array_string_exprt array = - associate_char_array_to_char_pointer(pointer, array_type); + const array_string_exprt array = array_pool.find(pointer, length); + add_default_axioms(array); return array; } +array_string_exprt array_poolt::find(const refined_string_exprt &str) +{ + return find(str.content(), str.length()); +} + +array_string_exprt array_poolt::of_argument(const exprt &arg) +{ + const auto string_argument = expr_checked_cast(arg); + return find(string_argument.op1(), string_argument.op0()); +} + +static irep_idt get_function_name(const function_application_exprt &expr) +{ + const exprt &name = expr.function(); + PRECONDITION(name.id() == ID_symbol); + return is_ssa_expr(name) ? to_ssa_expr(name).get_object_name() + : to_symbol_expr(name).get_identifier(); +} + +optionalt string_constraint_generatort::make_array_pointer_association( + const function_application_exprt &expr) +{ + const irep_idt &id = get_function_name(expr); + if(id == ID_cprover_associate_array_to_pointer_func) + return associate_array_to_pointer(expr); + else if(id == ID_cprover_associate_length_to_array_func) + return associate_length_to_array(expr); + return {}; +} + /// strings contained in this call are converted to objects of type /// `string_exprt`, through adding axioms. Axioms are then added to enforce that /// the result corresponds to the function application. @@ -421,12 +457,7 @@ array_string_exprt string_constraint_generatort::char_array_of_pointer( exprt string_constraint_generatort::add_axioms_for_function_application( const function_application_exprt &expr) { - const exprt &name=expr.function(); - PRECONDITION(name.id()==ID_symbol); - - const irep_idt &id=is_ssa_expr(name)?to_ssa_expr(name).get_object_name(): - to_symbol_expr(name).get_identifier(); - + const irep_idt &id = get_function_name(expr); exprt res; if(id==ID_cprover_char_literal_func) @@ -533,10 +564,6 @@ exprt string_constraint_generatort::add_axioms_for_function_application( res=add_axioms_for_intern(expr); else if(id==ID_cprover_string_format_func) res=add_axioms_for_format(expr); - else if(id == ID_cprover_associate_array_to_pointer_func) - res = associate_array_to_pointer(expr); - else if(id == ID_cprover_associate_length_to_array_func) - res = associate_length_to_array(expr); else if(id == ID_cprover_string_constrain_characters_func) res = add_axioms_for_constrain_characters(expr); else diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index e5939a980c2..95df34885cc 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -287,106 +287,22 @@ static void substitute_function_applications_in_equations( substitute_function_applications(eq.rhs(), generator); } -/// For now, any unsigned bitvector type of width smaller or equal to 16 is -/// considered a character. -/// \note type that are not characters maybe detected as characters (for -/// instance unsigned char in C), this will make dec_solve do unnecessary -/// steps for these, but should not affect correctness. -/// \param type: a type -/// \return true if the given type represents characters -bool is_char_type(const typet &type) -{ - return type.id() == ID_unsignedbv && - to_unsignedbv_type(type).get_width() <= 16; -} - -/// Distinguish char array from other types. -/// For now, any unsigned bitvector type is considered a character. -/// \param type: a type -/// \param ns: namespace -/// \return true if the given type is an array of characters -bool is_char_array_type(const typet &type, const namespacet &ns) -{ - if(type.id()==ID_symbol) - return is_char_array_type(ns.follow(type), ns); - return type.id() == ID_array && is_char_type(type.subtype()); -} - -/// For now, any unsigned bitvector type is considered a character. -/// \param type: a type -/// \return true if the given type represents a pointer to characters -bool is_char_pointer_type(const typet &type) -{ - return type.id() == ID_pointer && is_char_type(type.subtype()); -} - -/// \param type: a type -/// \param pred: a predicate -/// \return true if one of the subtype of `type` satisfies predicate `pred`. -/// The meaning of "subtype" is in the algebraic datatype sense: -/// for example, the subtypes of a struct are the types of its -/// components, the subtype of a pointer is the type it points to, -/// etc... -/// For instance in the type `t` defined by -/// `{ int a; char[] b; double * c; { bool d} e}`, `int`, `char`, -/// `double` and `bool` are subtypes of `t`. -bool has_subtype( - const typet &type, - const std::function &pred) +/// Fill the array_pointer correspondence and replace the right hand sides of +/// the corresponding equations +static void make_char_array_pointer_associations( + string_constraint_generatort &generator, + std::vector &equations) { - if(pred(type)) - return true; - - if(type.id() == ID_struct || type.id() == ID_union) + for(equal_exprt &eq : equations) { - const struct_union_typet &struct_type = to_struct_union_type(type); - return std::any_of( - struct_type.components().begin(), - struct_type.components().end(), // NOLINTNEXTLINE - [&](const struct_union_typet::componentt &comp) { - return has_subtype(comp.type(), pred); - }); + if( + const auto fun_app = + expr_try_dynamic_cast(eq.rhs())) + { + if(const auto result = generator.make_array_pointer_association(*fun_app)) + eq.rhs() = *result; + } } - - return std::any_of( // NOLINTNEXTLINE - type.subtypes().begin(), type.subtypes().end(), [&](const typet &t) { - return has_subtype(t, pred); - }); -} - -/// \param type: a type -/// \return true if a subtype of `type` is an pointer of characters. -/// The meaning of "subtype" is in the algebraic datatype sense: -/// for example, the subtypes of a struct are the types of its -/// components, the subtype of a pointer is the type it points to, -/// etc... -static bool has_char_pointer_subtype(const typet &type) -{ - return has_subtype(type, is_char_pointer_type); -} - -/// \param type: a type -/// \return true if a subtype of `type` is string_typet. -/// The meaning of "subtype" is in the algebraic datatype sense: -/// for example, the subtypes of a struct are the types of its -/// components, the subtype of a pointer is the type it points to, -/// etc... -static bool has_string_subtype(const typet &type) -{ - // NOLINTNEXTLINE - return has_subtype( - type, [](const typet &subtype) { return subtype == string_typet(); }); -} - -/// \param expr: an expression -/// \param ns: namespace -/// \return true if a subexpression of `expr` is an array of characters -static bool has_char_array_subexpr(const exprt &expr, const namespacet &ns) -{ - for(auto it = expr.depth_begin(); it != expr.depth_end(); ++it) - if(is_char_array_type(it->type(), ns)) - return true; - return false; } void replace_symbols_in_equations( @@ -488,35 +404,6 @@ static union_find_replacet generate_symbol_resolution_from_equations( return solver; } -/// Maps equation to expressions contained in them and conversely expressions to -/// equations that contain them. This can be used on a subset of expressions -/// which interests us, in particular strings. Equations are identified by an -/// index of type `std::size_t` for more efficient insertion and lookup. -class equation_symbol_mappingt -{ -public: - // Record index of the equations that contain a given expression - std::map> equations_containing; - // Record expressions that are contained in the equation with the given index - std::unordered_map> strings_in_equation; - - void add(const std::size_t i, const exprt &expr) - { - equations_containing[expr].push_back(i); - strings_in_equation[i].push_back(expr); - } - - std::vector find_expressions(const std::size_t i) - { - return strings_in_equation[i]; - } - - std::vector find_equations(const exprt &expr) - { - return equations_containing[expr]; - } -}; - /// This is meant to be used on the lhs of an equation with string subtype. /// \param lhs: expression which is either of string type, or a symbol /// representing a struct with some string members @@ -788,8 +675,17 @@ decision_proceduret::resultt string_refinementt::dec_solve() string_id_symbol_resolve.replace_expr(eq.rhs()); } + make_char_array_pointer_associations(generator, equations); + #ifdef DEBUG output_equations(debug(), equations, ns); + + string_dependencest dependences; + for(const equal_exprt &eq : equations) + add_node(dependences, eq, generator.array_pool); + + debug() << "dec_solve: dependence graph:" << eom; + dependences.output_dot(debug()); #endif debug() << "dec_solve: Replace function applications" << eom; @@ -801,7 +697,7 @@ decision_proceduret::resultt string_refinementt::dec_solve() #ifdef DEBUG debug() << "dec_solve: arrays_of_pointers:" << eom; - for(auto pair : generator.get_arrays_of_pointers()) + for(auto pair : generator.array_pool.get_arrays_of_pointers()) { debug() << " * " << format(pair.first) << "\t--> " << format(pair.second) << " : " << format(pair.second.type()) << eom; @@ -1193,7 +1089,7 @@ void debug_model( static const std::string indent(" "); stream << "debug_model:" << '\n'; - for(const auto &pointer_array : generator.get_arrays_of_pointers()) + for(const auto &pointer_array : generator.array_pool.get_arrays_of_pointers()) { const auto arr = pointer_array.second; const exprt model = get_char_array_and_concretize( @@ -1218,68 +1114,6 @@ void debug_model( stream << messaget::eom; } -sparse_arrayt::sparse_arrayt(const with_exprt &expr) -{ - auto ref = std::ref(static_cast(expr)); - while(can_cast_expr(ref.get())) - { - const auto &with_expr = expr_dynamic_cast(ref.get()); - const auto current_index = numeric_cast_v(with_expr.where()); - entries.emplace_back(current_index, with_expr.new_value()); - ref = with_expr.old(); - } - - // This function only handles 'with' and 'array_of' expressions - PRECONDITION(ref.get().id() == ID_array_of); - default_value = expr_dynamic_cast(ref.get()).what(); -} - -exprt sparse_arrayt::to_if_expression(const exprt &index) const -{ - return std::accumulate( - entries.begin(), - entries.end(), - default_value, - [&]( - const exprt if_expr, - const std::pair &entry) { // NOLINT - const exprt entry_index = from_integer(entry.first, index.type()); - const exprt &then_expr = entry.second; - CHECK_RETURN(then_expr.type() == if_expr.type()); - const equal_exprt index_equal(index, entry_index); - return if_exprt(index_equal, then_expr, if_expr, if_expr.type()); - }); -} - -interval_sparse_arrayt::interval_sparse_arrayt(const with_exprt &expr) - : sparse_arrayt(expr) -{ - // Entries are sorted so that successive entries correspond to intervals - std::sort( - entries.begin(), - entries.end(), - []( - const std::pair &a, - const std::pair &b) { return a.first < b.first; }); -} - -exprt interval_sparse_arrayt::to_if_expression(const exprt &index) const -{ - return std::accumulate( - entries.rbegin(), - entries.rend(), - default_value, - [&]( - const exprt if_expr, - const std::pair &entry) { // NOLINT - const exprt entry_index = from_integer(entry.first, index.type()); - const exprt &then_expr = entry.second; - CHECK_RETURN(then_expr.type() == if_expr.type()); - const binary_relation_exprt index_small_eq(index, ID_le, entry_index); - return if_exprt(index_small_eq, then_expr, if_expr, if_expr.type()); - }); -} - /// Create a new expression where 'with' expressions on arrays are replaced by /// 'if' expressions. e.g. for an array access arr[index], where: `arr := /// array_of(12) with {0:=24} with {2:=42}` the constructed expression will be: @@ -2418,7 +2252,7 @@ exprt string_refinementt::get(const exprt &expr) const if(is_char_array_type(ecopy.type(), ns)) { array_string_exprt &arr = to_array_string_expr(ecopy); - arr.length() = generator.get_length_of_string_array(arr); + arr.length() = generator.array_pool.get_length(arr); const auto arr_model_opt = get_array(super_get, ns, generator.max_string_length, debug(), arr); // \todo Refactor with get array in model diff --git a/src/solvers/refinement/string_refinement.h b/src/solvers/refinement/string_refinement.h index e3fdf335820..7ee95f8d3a6 100644 --- a/src/solvers/refinement/string_refinement.h +++ b/src/solvers/refinement/string_refinement.h @@ -27,57 +27,11 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include #include +#include #define DEFAULT_MAX_NB_REFINEMENT std::numeric_limits::max() #define CHARACTER_FOR_UNKNOWN '?' -struct index_set_pairt -{ - std::map> cumulative; - std::map> current; -}; - -struct string_axiomst -{ - std::vector universal; - std::vector not_contains; -}; - -/// Represents arrays of the form `array_of(x) with {i:=a} with {j:=b} ...` -/// by a default value `x` and a list of entries `(i,a)`, `(j,b)` etc. -class sparse_arrayt -{ -public: - /// Initialize a sparse array from an expression of the form - /// `array_of(x) with {i:=a} with {j:=b} ...` - /// This is the form in which array valuations coming from the underlying - /// solver are given. - explicit sparse_arrayt(const with_exprt &expr); - - /// Creates an if_expr corresponding to the result of accessing the array - /// at the given index - virtual exprt to_if_expression(const exprt &index) const; - -protected: - exprt default_value; - std::vector> entries; -}; - -/// Represents arrays by the indexes up to which the value remains the same. -/// For instance `{'a', 'a', 'a', 'b', 'b', 'c'}` would be represented by -/// by ('a', 2) ; ('b', 4), ('c', 5). -/// This is particularly useful when the array is constant on intervals. -class interval_sparse_arrayt final : public sparse_arrayt -{ -public: - /// An expression of the form `array_of(x) with {i:=a} with {j:=b}` is - /// converted to an array `arr` where for all index `k` smaller than `i`, - /// `arr[k]` is `a`, for all index between `i` (exclusive) and `j` it is `b` - /// and for the others it is `x`. - explicit interval_sparse_arrayt(const with_exprt &expr); - exprt to_if_expression(const exprt &index) const override; -}; - class string_refinementt final: public bv_refinementt { private: @@ -139,12 +93,6 @@ exprt concretize_arrays_in_expression( std::size_t string_max_length, const namespacet &ns); -bool is_char_array_type(const typet &type, const namespacet &ns); - -bool has_subtype( - const typet &type, - const std::function &pred); - // Declaration required for unit-test: union_find_replacet string_identifiers_resolution_from_equations( std::vector &equations, diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp new file mode 100644 index 00000000000..bc54c8a0068 --- /dev/null +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -0,0 +1,430 @@ +/*******************************************************************\ + + Module: String solver + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "string_refinement_util.h" + +bool is_char_type(const typet &type) +{ + return type.id() == ID_unsignedbv && + to_unsignedbv_type(type).get_width() <= + STRING_REFINEMENT_MAX_CHAR_WIDTH; +} + +bool is_char_array_type(const typet &type, const namespacet &ns) +{ + if(type.id() == ID_symbol) + return is_char_array_type(ns.follow(type), ns); + return type.id() == ID_array && is_char_type(type.subtype()); +} + +bool is_char_pointer_type(const typet &type) +{ + return type.id() == ID_pointer && is_char_type(type.subtype()); +} + +bool has_subtype( + const typet &type, + const std::function &pred) +{ + if(pred(type)) + return true; + + if(type.id() == ID_struct || type.id() == ID_union) + { + const struct_union_typet &struct_type = to_struct_union_type(type); + return std::any_of( + struct_type.components().begin(), + struct_type.components().end(), // NOLINTNEXTLINE + [&](const struct_union_typet::componentt &comp) { + return has_subtype(comp.type(), pred); + }); + } + + return std::any_of( // NOLINTNEXTLINE + type.subtypes().begin(), type.subtypes().end(), [&](const typet &t) { + return has_subtype(t, pred); + }); +} + +bool has_char_pointer_subtype(const typet &type) +{ + return has_subtype(type, is_char_pointer_type); +} + +bool has_string_subtype(const typet &type) +{ + // NOLINTNEXTLINE + return has_subtype( + type, [](const typet &subtype) { return subtype == string_typet(); }); +} + +bool has_char_array_subexpr(const exprt &expr, const namespacet &ns) +{ + const auto it = std::find_if( + expr.depth_begin(), expr.depth_end(), [&](const exprt &e) { // NOLINT + return is_char_array_type(e.type(), ns); + }); + return it != expr.depth_end(); +} + +sparse_arrayt::sparse_arrayt(const with_exprt &expr) +{ + auto ref = std::ref(static_cast(expr)); + while(can_cast_expr(ref.get())) + { + const auto &with_expr = expr_dynamic_cast(ref.get()); + const auto current_index = numeric_cast_v(with_expr.where()); + entries.emplace_back(current_index, with_expr.new_value()); + ref = with_expr.old(); + } + + // This function only handles 'with' and 'array_of' expressions + PRECONDITION(ref.get().id() == ID_array_of); + default_value = expr_dynamic_cast(ref.get()).what(); +} + +exprt sparse_arrayt::to_if_expression(const exprt &index) const +{ + return std::accumulate( + entries.begin(), + entries.end(), + default_value, + [&]( + const exprt if_expr, + const std::pair &entry) { // NOLINT + const exprt entry_index = from_integer(entry.first, index.type()); + const exprt &then_expr = entry.second; + CHECK_RETURN(then_expr.type() == if_expr.type()); + const equal_exprt index_equal(index, entry_index); + return if_exprt(index_equal, then_expr, if_expr, if_expr.type()); + }); +} + +interval_sparse_arrayt::interval_sparse_arrayt(const with_exprt &expr) + : sparse_arrayt(expr) +{ + // Entries are sorted so that successive entries correspond to intervals + std::sort( + entries.begin(), + entries.end(), + []( + const std::pair &a, + const std::pair &b) { return a.first < b.first; }); +} + +exprt interval_sparse_arrayt::to_if_expression(const exprt &index) const +{ + return std::accumulate( + entries.rbegin(), + entries.rend(), + default_value, + [&]( + const exprt if_expr, + const std::pair &entry) { // NOLINT + const exprt entry_index = from_integer(entry.first, index.type()); + const exprt &then_expr = entry.second; + CHECK_RETURN(then_expr.type() == if_expr.type()); + const binary_relation_exprt index_small_eq(index, ID_le, entry_index); + return if_exprt(index_small_eq, then_expr, if_expr, if_expr.type()); + }); +} + +void equation_symbol_mappingt::add(const std::size_t i, const exprt &expr) +{ + equations_containing[expr].push_back(i); + strings_in_equation[i].push_back(expr); +} + +std::vector +equation_symbol_mappingt::find_expressions(const std::size_t i) +{ + return strings_in_equation[i]; +} + +std::vector +equation_symbol_mappingt::find_equations(const exprt &expr) +{ + return equations_containing[expr]; +} + +string_insertion_builtin_functiont::string_insertion_builtin_functiont( + const std::vector &fun_args, + array_poolt &array_pool) +{ + PRECONDITION(fun_args.size() > 3); + const auto arg1 = expr_checked_cast(fun_args[2]); + input1 = array_pool.find(arg1.op1(), arg1.op0()); + const auto arg2 = expr_checked_cast(fun_args[3]); + input2 = array_pool.find(arg2.op1(), arg2.op0()); + result = array_pool.find(fun_args[1], fun_args[0]); + args.insert(args.end(), fun_args.begin() + 4, fun_args.end()); +} + +/// Construct a string_builtin_functiont object from a function application +/// \return a unique pointer to the created object, this unique pointer is empty +/// if the function does not correspond to one of the supported +/// builtin_functions. +static std::unique_ptr to_string_builtin_function( + const function_application_exprt &fun_app, + array_poolt &array_pool) +{ + const auto name = expr_checked_cast(fun_app.function()); + const irep_idt &id = is_ssa_expr(name) ? to_ssa_expr(name).get_object_name() + : name.get_identifier(); + + if(id == ID_cprover_string_insert_func) + return std::unique_ptr( + new string_insertion_builtin_functiont(fun_app.arguments(), array_pool)); + + if(id == ID_cprover_string_concat_func) + return std::unique_ptr( + new string_insertion_builtin_functiont(fun_app.arguments(), array_pool)); + + return {}; +} + +string_dependencest::string_nodet & +string_dependencest::get_node(const array_string_exprt &e) +{ + auto entry_inserted = node_index_pool.emplace(e, string_nodes.size()); + if(!entry_inserted.second) + return string_nodes[entry_inserted.first->second]; + + string_nodes.emplace_back(); + return string_nodes.back(); +} + +string_dependencest::builtin_function_nodet string_dependencest::make_node( + std::unique_ptr &builtin_function) +{ + const builtin_function_nodet builtin_function_node( + builtin_function_nodes.size()); + builtin_function_nodes.emplace_back(); + builtin_function_nodes.back().swap(builtin_function); + return builtin_function_node; +} + +const string_builtin_functiont &string_dependencest::get_builtin_function( + const builtin_function_nodet &node) const +{ + PRECONDITION(node.index < builtin_function_nodes.size()); + return *(builtin_function_nodes[node.index]); +} + +const std::vector & +string_dependencest::dependencies( + const string_dependencest::string_nodet &node) const +{ + return node.dependencies; +} + +void string_dependencest::add_dependency( + const array_string_exprt &e, + const builtin_function_nodet &builtin_function_node) +{ + string_nodet &string_node = get_node(e); + string_node.dependencies.push_back(builtin_function_node); +} + +void string_dependencest::add_unknown_dependency(const array_string_exprt &e) +{ + string_nodet &string_node = get_node(e); + string_node.depends_on_unknown_builtin_function = true; +} + +static void add_unknown_dependency_to_string_subexprs( + string_dependencest &dependencies, + const function_application_exprt &fun_app, + array_poolt &array_pool) +{ + for(const auto &expr : fun_app.arguments()) + { + std::for_each( + expr.depth_begin(), expr.depth_end(), [&](const exprt &e) { // NOLINT + const auto &string_struct = expr_try_dynamic_cast(e); + if(string_struct && string_struct->operands().size() == 2) + { + const array_string_exprt string = + array_pool.find(string_struct->op1(), string_struct->op0()); + dependencies.add_unknown_dependency(string); + } + }); + } +} + +static void add_dependency_to_string_subexprs( + string_dependencest &dependencies, + const function_application_exprt &fun_app, + const string_dependencest::builtin_function_nodet &builtin_function_node, + array_poolt &array_pool) +{ + PRECONDITION(fun_app.arguments()[0].type().id() != ID_pointer); + if( + fun_app.arguments().size() > 1 && + fun_app.arguments()[1].type().id() == ID_pointer) + { + // Case where the result is given as a pointer argument + const array_string_exprt string = + array_pool.find(fun_app.arguments()[1], fun_app.arguments()[0]); + dependencies.add_dependency(string, builtin_function_node); + } + + for(const auto &expr : fun_app.arguments()) + { + std::for_each( + expr.depth_begin(), + expr.depth_end(), + [&](const exprt &e) { // NOLINT + if(const auto structure = expr_try_dynamic_cast(e)) + { + const array_string_exprt string = array_pool.of_argument(*structure); + dependencies.add_dependency(string, builtin_function_node); + } + }); + } +} + +string_dependencest::node_indext string_dependencest::size() const +{ + return builtin_function_nodes.size() + string_nodes.size(); +} + +/// Convert an index of a string node in `string_nodes` to the node_indext for +/// the same node +static std::size_t string_index_to_node_index(const std::size_t string_index) +{ + return 2 * string_index + 1; +} + +/// Convert an index of a builtin function node to the node_indext for +/// the same node +static std::size_t +builtin_function_index_to_node_index(const std::size_t builtin_index) +{ + return 2 * builtin_index; +} + +string_dependencest::node_indext +string_dependencest::node_index(const builtin_function_nodet &n) const +{ + return builtin_function_index_to_node_index(n.index); +} + +string_dependencest::node_indext +string_dependencest::node_index(const array_string_exprt &s) const +{ + return string_index_to_node_index(node_index_pool.at(s)); +} + +optionalt +string_dependencest::get_builtin_function_node(node_indext i) const +{ + if(i % 2 == 0) + return builtin_function_nodet(i / 2); + return {}; +} + +optionalt +string_dependencest::get_string_node(node_indext i) const +{ + if(i % 2 == 1 && i / 2 < string_nodes.size()) + return string_nodes[i / 2]; + return {}; +} + +bool add_node( + string_dependencest &dependencies, + const equal_exprt &equation, + array_poolt &array_pool) +{ + const auto fun_app = + expr_try_dynamic_cast(equation.rhs()); + if(!fun_app) + return false; + + auto builtin_function = to_string_builtin_function(*fun_app, array_pool); + + if(!builtin_function) + { + add_unknown_dependency_to_string_subexprs( + dependencies, *fun_app, array_pool); + return true; + } + + const auto &builtin_function_node = dependencies.make_node(builtin_function); + // Warning: `builtin_function` has been emptied and should not be used anymore + + if( + const auto &string_result = + dependencies.get_builtin_function(builtin_function_node).string_result()) + dependencies.add_dependency(*string_result, builtin_function_node); + else + add_dependency_to_string_subexprs( + dependencies, *fun_app, builtin_function_node, array_pool); + + return true; +} + +void string_dependencest::for_each_successor( + const std::size_t &i, + const std::function &f) const +{ + if(const auto &builtin_function_node = get_builtin_function_node(i)) + { + const string_builtin_functiont &p = + get_builtin_function(*builtin_function_node); + std::for_each( + p.string_arguments().begin(), + p.string_arguments().end(), + [&](const array_string_exprt &s) { f(node_index(s)); }); + } + else if(const auto &s = get_string_node(i)) + { + std::for_each( + s->dependencies.begin(), + s->dependencies.end(), + [&](const builtin_function_nodet &p) { f(node_index(p)); }); + } + else + UNREACHABLE; +} + +void string_dependencest::output_dot(std::ostream &stream) const +{ + const auto for_each_node = + [&](const std::function &f) { // NOLINT + for(std::size_t i = 0; i < string_nodes.size(); ++i) + f(string_index_to_node_index(i)); + for(std::size_t i = 0; i < builtin_function_nodes.size(); ++i) + f(builtin_function_index_to_node_index(i)); + }; + + const auto for_each_succ = [&]( + const std::size_t &i, + const std::function &f) { // NOLINT + for_each_successor(i, f); + }; + + const auto node_to_string = [&](const std::size_t &i) { // NOLINT + return std::to_string(i); + }; + stream << "digraph dependencies {\n"; + output_dot_generic( + stream, for_each_node, for_each_succ, node_to_string); + stream << '}' << std::endl; +} diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h new file mode 100644 index 00000000000..4b67df35592 --- /dev/null +++ b/src/solvers/refinement/string_refinement_util.h @@ -0,0 +1,343 @@ +/*******************************************************************\ + + Module: String solver + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#ifndef CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_UTIL_H +#define CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_UTIL_H + +#include "string_constraint.h" +#include "string_constraint_generator.h" + +/// For now, any unsigned bitvector type of width smaller or equal to 16 is +/// considered a character. +/// \note type that are not characters maybe detected as characters (for +/// instance unsigned char in C), this will make dec_solve do unnecessary +/// steps for these, but should not affect correctness. +/// \param type: a type +/// \return true if the given type represents characters +bool is_char_type(const typet &type); + +/// Distinguish char array from other types. +/// For now, any unsigned bitvector type is considered a character. +/// \param type: a type +/// \param ns: namespace +/// \return true if the given type is an array of characters +bool is_char_array_type(const typet &type, const namespacet &ns); + +/// For now, any unsigned bitvector type is considered a character. +/// \param type: a type +/// \return true if the given type represents a pointer to characters +bool is_char_pointer_type(const typet &type); + +/// \param type: a type +/// \param pred: a predicate +/// \return true if one of the subtype of `type` satisfies predicate `pred`. +/// The meaning of "subtype" is in the algebraic datatype sense: +/// for example, the subtypes of a struct are the types of its +/// components, the subtype of a pointer is the type it points to, +/// etc... +/// For instance in the type `t` defined by +/// `{ int a; char[] b; double * c; { bool d} e}`, `int`, `char`, +/// `double` and `bool` are subtypes of `t`. +bool has_subtype( + const typet &type, + const std::function &pred); + +/// \param type: a type +/// \return true if a subtype of `type` is an pointer of characters. +/// The meaning of "subtype" is in the algebraic datatype sense: +/// for example, the subtypes of a struct are the types of its +/// components, the subtype of a pointer is the type it points to, +/// etc... +bool has_char_pointer_subtype(const typet &type); + +/// \param type: a type +/// \return true if a subtype of `type` is string_typet. +/// The meaning of "subtype" is in the algebraic datatype sense: +/// for example, the subtypes of a struct are the types of its +/// components, the subtype of a pointer is the type it points to, +/// etc... +bool has_string_subtype(const typet &type); + +/// \param expr: an expression +/// \param ns: namespace +/// \return true if a subexpression of `expr` is an array of characters +bool has_char_array_subexpr(const exprt &expr, const namespacet &ns); + +struct index_set_pairt +{ + std::map> cumulative; + std::map> current; +}; + +struct string_axiomst +{ + std::vector universal; + std::vector not_contains; +}; + +/// Represents arrays of the form `array_of(x) with {i:=a} with {j:=b} ...` +/// by a default value `x` and a list of entries `(i,a)`, `(j,b)` etc. +class sparse_arrayt +{ +public: + /// Initialize a sparse array from an expression of the form + /// `array_of(x) with {i:=a} with {j:=b} ...` + /// This is the form in which array valuations coming from the underlying + /// solver are given. + explicit sparse_arrayt(const with_exprt &expr); + + /// Creates an if_expr corresponding to the result of accessing the array + /// at the given index + virtual exprt to_if_expression(const exprt &index) const; + +protected: + exprt default_value; + std::vector> entries; +}; + +/// Represents arrays by the indexes up to which the value remains the same. +/// For instance `{'a', 'a', 'a', 'b', 'b', 'c'}` would be represented by +/// by ('a', 2) ; ('b', 4), ('c', 5). +/// This is particularly useful when the array is constant on intervals. +class interval_sparse_arrayt final : public sparse_arrayt +{ +public: + /// An expression of the form `array_of(x) with {i:=a} with {j:=b}` is + /// converted to an array `arr` where for all index `k` smaller than `i`, + /// `arr[k]` is `a`, for all index between `i` (exclusive) and `j` it is `b` + /// and for the others it is `x`. + explicit interval_sparse_arrayt(const with_exprt &expr); + exprt to_if_expression(const exprt &index) const override; +}; + +/// Maps equation to expressions contained in them and conversely expressions to +/// equations that contain them. This can be used on a subset of expressions +/// which interests us, in particular strings. Equations are identified by an +/// index of type `std::size_t` for more efficient insertion and lookup. +class equation_symbol_mappingt +{ +public: + /// Record the fact that equation `i` contains expression `expr` + void add(const std::size_t i, const exprt &expr); + + /// \param i: index corresponding to an equation + /// \return vector of expressions contained in the equation with the given + /// index `i` + std::vector find_expressions(const std::size_t i); + + /// \param expr: an expression + /// \return vector of equations containing the given expression `expr` + std::vector find_equations(const exprt &expr); + +private: + /// Record index of the equations that contain a given expression + std::map> equations_containing; + /// Record expressions that are contained in the equation with the given index + std::unordered_map> strings_in_equation; +}; + +/// Base class for string functions that are built in the solver +class string_builtin_functiont +{ +public: + string_builtin_functiont() = default; + string_builtin_functiont(const string_builtin_functiont &) = delete; + + virtual optionalt string_result() const + { + return {}; + } + + virtual std::vector string_arguments() const + { + return {}; + } +}; + +/// String builtin_function transforming one string into another +class string_transformation_builtin_functiont : public string_builtin_functiont +{ +public: + array_string_exprt result; + array_string_exprt input; + std::vector args; + exprt return_code; + optionalt string_result() const override + { + return result; + } + std::vector string_arguments() const override + { + return {input}; + } +}; + +/// String inserting a string into another one +class string_insertion_builtin_functiont : public string_builtin_functiont +{ +public: + array_string_exprt result; + array_string_exprt input1; + array_string_exprt input2; + std::vector args; + exprt return_code; + + /// Constructor from arguments of a function application + string_insertion_builtin_functiont( + const std::vector &fun_args, + array_poolt &array_pool); + + optionalt string_result() const override + { + return result; + } + std::vector string_arguments() const override + { + return {input1, input2}; + } +}; + +/// String creation from other types +class string_creation_builtin_functiont : public string_builtin_functiont +{ +public: + array_string_exprt result; + std::vector args; + exprt return_code; + + optionalt string_result() const override + { + return result; + } +}; + +/// String test +class string_test_builtin_functiont : public string_builtin_functiont +{ +public: + exprt result; + std::vector under_test; + std::vector args; + std::vector string_arguments() const override + { + return under_test; + } +}; + +/// Keep track of dependencies between strings. +/// Each string points to builtin_function calls on which it depends, +/// each builtin_function points to the strings on which the result depend. +class string_dependencest +{ +public: + /// A builtin_function node is just an index in the `builtin_function_nodes` + /// vector. + class builtin_function_nodet + { + public: + std::size_t index; + explicit builtin_function_nodet(std::size_t i) : index(i) + { + } + }; + + /// A string node points to builtin_function on which it depends + class string_nodet + { + public: + std::vector dependencies; + + // In case it depends on a builtin_function we don't support yet + bool depends_on_unknown_builtin_function = false; + }; + + string_nodet &get_node(const array_string_exprt &e); + + /// `builtin_function` is reset to an empty pointer after the node is created + builtin_function_nodet + make_node(std::unique_ptr &builtin_function); + const std::vector & + dependencies(const string_nodet &node) const; + const string_builtin_functiont & + get_builtin_function(const builtin_function_nodet &node) const; + + /// Add edge from node for `e` to node for `builtin_function` + void add_dependency( + const array_string_exprt &e, + const builtin_function_nodet &builtin_function); + + /// Mark node for `e` as depending on unknown builtin_function + void add_unknown_dependency(const array_string_exprt &e); + + void output_dot(std::ostream &stream) const; + +private: + /// Set of nodes representing builtin_functions + std::vector> builtin_function_nodes; + + /// Set of nodes representing strings + std::vector string_nodes; + + /// Nodes describing dependencies of a string: values of the map correspond + /// to indexes in the vector `string_nodes`. + std::unordered_map + node_index_pool; + + /// Common index for all nodes (both strings and builtin_functions) so that we + /// can reuse generic algorithms of util/graph.h + /// Even indexes correspond to builtin_function nodes, odd indexes to string + /// nodes. + typedef std::size_t node_indext; + + /// \return total number of nodes + node_indext size() const; + + /// \param n: builtin function node + /// \return index corresponding to builtin function node `n` + node_indext node_index(const builtin_function_nodet &n) const; + + /// \param s: array expression representing a string + /// \return index corresponding to an string exprt s + node_indext node_index(const array_string_exprt &s) const; + + /// \param i: index of a node + /// \return corresponding node if the index corresponds to a builtin function + /// node, empty optional otherwise + optionalt + get_builtin_function_node(node_indext i) const; + + /// \param i: index of a node + /// \return corresponding node if the index corresponds to a string + /// node, empty optional otherwise + optionalt get_string_node(node_indext i) const; + + /// Applies `f` on all successors of the node with index `i` + void for_each_successor( + const node_indext &i, + const std::function &f) const; +}; + +/// When right hand side of equation is a builtin_function add +/// a "string_builtin_function" node to the graph and connect it to the strings +/// on which it depends and which depends on it. +/// If the string builtin_function is not a supported one, mark all the string +/// arguments as depending on an unknown builtin_function. +/// \param dependencies: graph to which we add the node +/// \param equation: an equation whose right hand side is possibly a call to a +/// string builtin_function. +/// \param array_pool: array pool containing arrays corresponding to the string +/// exprt arguments of the builtin_function call +/// \return true if a node was added, if false is returned it either means that +/// the right hand side is not a function application +/// \todo there should be a class with just the three functions we require here +bool add_node( + string_dependencest &dependencies, + const equal_exprt &equation, + array_poolt &array_pool); + +#endif // CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_UTIL_H diff --git a/src/util/graph.h b/src/util/graph.h index 122bb85dd18..60d56cf2600 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -20,6 +20,7 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include #include "invariant.h" @@ -264,8 +265,11 @@ class grapht std::list topsort() const; + std::vector get_successors(const node_indext &n) const; void output_dot(std::ostream &out) const; - void output_dot_node(std::ostream &out, node_indext n) const; + void for_each_successor( + const node_indext &n, + std::function f) const; protected: class tarjant @@ -668,23 +672,64 @@ std::list::node_indext> grapht::topsort() const return nodelist; } -template -void grapht::output_dot(std::ostream &out) const +template +void output_dot_generic( + std::ostream &out, + const std::function)> + &for_each_node, + const std::function< + void(const node_index_type &, std::function)> + &for_each_succ, + const std::function node_to_string) { - for(node_indext n=0; n " << node_to_string(n) << '\n'; + }); + }); } -template -void grapht::output_dot_node(std::ostream &out, node_indext n) const +template +std::vector::node_indext> +grapht::get_successors(const node_indext &n) const { - const nodet &node=nodes[n]; + std::vector result; + std::transform( + nodes[n].out.begin(), + nodes[n].out.end(), + std::back_inserter(result), + [&](const std::pair &edge) { return edge.first; }); + return result; +} - for(typename edgest::const_iterator - it=node.out.begin(); - it!=node.out.end(); - it++) - out << n << " -> " << it->first << '\n'; +template +void grapht::for_each_successor( + const node_indext &n, + std::function f) const +{ + std::for_each( + nodes[n].out.begin(), + nodes[n].out.end(), + [&](const std::pair &edge) { f(edge.first); }); +} + +template +void grapht::output_dot(std::ostream &out) const +{ + const auto for_each_node = + [&](const std::function &f) { // NOLINT + for(node_indext i = 0; i < nodes.size(); ++i) + f(i); + }; + + const auto for_each_succ = [&]( + const node_indext &i, + const std::function &f) { // NOLINT + for_each_successor(i, f); + }; + + const auto to_string = [](const node_indext &i) { return std::to_string(i); }; + output_dot_generic(out, for_each_node, for_each_succ, to_string); } #endif // CPROVER_UTIL_GRAPH_H diff --git a/src/util/magic.h b/src/util/magic.h index b3525c2f199..e3bfbdeeae3 100644 --- a/src/util/magic.h +++ b/src/util/magic.h @@ -8,5 +8,6 @@ #include const std::size_t MAX_FLATTENED_ARRAY_SIZE=1000; +const std::size_t STRING_REFINEMENT_MAX_CHAR_WIDTH = 16; #endif diff --git a/unit/Makefile b/unit/Makefile index e366389aff2..50cca33c851 100644 --- a/unit/Makefile +++ b/unit/Makefile @@ -33,6 +33,7 @@ SRC += unit_tests.cpp \ solvers/refinement/string_constraint_generator_valueof/is_digit_with_radix.cpp \ solvers/refinement/string_constraint_instantiation/instantiate_not_contains.cpp \ solvers/refinement/string_refinement/concretize_array.cpp \ + solvers/refinement/string_refinement/dependency_graph.cpp \ solvers/refinement/string_refinement/has_subtype.cpp \ solvers/refinement/string_refinement/substitute_array_list.cpp \ solvers/refinement/string_refinement/string_symbol_resolution.cpp \ diff --git a/unit/solvers/refinement/string_refinement/dependency_graph.cpp b/unit/solvers/refinement/string_refinement/dependency_graph.cpp new file mode 100644 index 00000000000..d4e9748badb --- /dev/null +++ b/unit/solvers/refinement/string_refinement/dependency_graph.cpp @@ -0,0 +1,194 @@ +/*******************************************************************\ + + Module: Unit tests for dependency graph + solvers/refinement/string_refinement.cpp + + Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include + +#include +#include +#include +#include +#include + +#ifdef DEBUG +#include +#include +#include +#include +#endif + +typet length_type() +{ + return signedbv_typet(32); +} + +/// Make a struct with a pointer content and an integer length +struct_exprt make_string_argument(std::string name) +{ + struct_typet type; + const array_typet char_array(char_type(), infinity_exprt(length_type())); + type.components().emplace_back("length", length_type()); + type.components().emplace_back("content", pointer_type(char_array)); + + const symbol_exprt length(name + "_length", length_type()); + const symbol_exprt content(name + "_content", pointer_type(java_char_type())); + struct_exprt expr(type); + expr.operands().push_back(length); + expr.operands().push_back(content); + return expr; +} + +SCENARIO("dependency_graph", "[core][solvers][refinement][string_refinement]") +{ + GIVEN("dependency graph") + { + string_dependencest dependences; + refined_string_typet string_type(java_char_type(), java_int_type()); + const exprt string1 = make_string_argument("string1"); + const exprt string2 = make_string_argument("string2"); + const exprt string3 = make_string_argument("string3"); + const exprt string4 = make_string_argument("string4"); + const exprt string5 = make_string_argument("string5"); + const exprt string6 = make_string_argument("string6"); + const symbol_exprt lhs("lhs", unsignedbv_typet(32)); + const symbol_exprt lhs2("lhs2", unsignedbv_typet(32)); + const symbol_exprt lhs3("lhs3", unsignedbv_typet(32)); + code_typet fun_type; + + // fun1 is s3 = s1.concat(s2) + function_application_exprt fun1(fun_type); + fun1.function() = symbol_exprt(ID_cprover_string_concat_func); + fun1.arguments().push_back(string3.op0()); + fun1.arguments().push_back(string3.op1()); + fun1.arguments().push_back(string1); + fun1.arguments().push_back(string2); + + // fun2 is s5 = s3.concat(s4) + function_application_exprt fun2(fun_type); + fun2.function() = symbol_exprt(ID_cprover_string_concat_func); + fun2.arguments().push_back(string5.op0()); + fun2.arguments().push_back(string5.op1()); + fun2.arguments().push_back(string3); + fun2.arguments().push_back(string4); + + // fun3 is s6 = s5.concat(s2) + function_application_exprt fun3(fun_type); + fun3.function() = symbol_exprt(ID_cprover_string_concat_func); + fun3.arguments().push_back(string6.op0()); + fun3.arguments().push_back(string6.op1()); + fun3.arguments().push_back(string5); + fun3.arguments().push_back(string2); + + const equal_exprt equation1(lhs, fun1); + const equal_exprt equation2(lhs2, fun2); + const equal_exprt equation3(lhs3, fun3); + + WHEN("We add dependencies") + { + symbol_generatort generator; + array_poolt array_pool(generator); + + bool success = add_node(dependences, equation1, array_pool); + REQUIRE(success); + success = add_node(dependences, equation2, array_pool); + REQUIRE(success); + success = add_node(dependences, equation3, array_pool); + REQUIRE(success); + +#ifdef DEBUG // useful output for visualizing the graph + { + register_language(new_java_bytecode_language); + symbol_tablet symbol_table; + namespacet ns(symbol_table); + dependencies.output_dot(std::cerr, [&](const exprt &expr) { // NOLINT + return from_expr(ns, "", expr); + }); + } +#endif + + // The dot output of the graph would look like: + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // digraph dependencies { + // "string_refinement#char_array_symbol#3" -> primitive0; + // "string_refinement#char_array_symbol#6" -> primitive1; + // "string_refinement#char_array_symbol#9" -> primitive2; + // primitive0 -> "string_refinement#char_array_symbol#1"; + // primitive0 -> "string_refinement#char_array_symbol#2"; + // primitive1 -> "string_refinement#char_array_symbol#3"; + // primitive1 -> "string_refinement#char_array_symbol#5"; + // primitive2 -> "string_refinement#char_array_symbol#6"; + // primitive2 -> "string_refinement#char_array_symbol#2"; + // } + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + const array_string_exprt char_array1 = + array_pool.find(string1.op1(), string1.op0()); + const array_string_exprt char_array2 = + array_pool.find(string2.op1(), string2.op0()); + const array_string_exprt char_array3 = + array_pool.find(string3.op1(), string3.op0()); + const array_string_exprt char_array4 = + array_pool.find(string4.op1(), string4.op0()); + const array_string_exprt char_array5 = + array_pool.find(string5.op1(), string5.op0()); + const array_string_exprt char_array6 = + array_pool.find(string6.op1(), string6.op0()); + + THEN("string3 depends on primitive0") + { + const auto &node = dependences.get_node(char_array3); + const std::vector + &depends = dependences.dependencies(node); + REQUIRE(depends.size() == 1); + const auto &primitive0 = dependences.get_builtin_function(depends[0]); + + THEN("primitive0 depends on string1 and string2") + { + const auto &depends2 = primitive0.string_arguments(); + REQUIRE(depends2.size() == 2); + REQUIRE(depends2[0] == char_array1); + REQUIRE(depends2[1] == char_array2); + } + } + + THEN("string5 depends on primitive1") + { + const auto &node = dependences.get_node(char_array5); + const std::vector + &depends = dependences.dependencies(node); + REQUIRE(depends.size() == 1); + const auto &primitive1 = dependences.get_builtin_function(depends[0]); + + THEN("primitive1 depends on string3 and string4") + { + const auto &depends2 = primitive1.string_arguments(); + REQUIRE(depends2.size() == 2); + REQUIRE(depends2[0] == char_array3); + REQUIRE(depends2[1] == char_array4); + } + } + + THEN("string6 depends on primitive2") + { + const auto &node = dependences.get_node(char_array6); + const std::vector + &depends = dependences.dependencies(node); + REQUIRE(depends.size() == 1); + const auto &primitive2 = dependences.get_builtin_function(depends[0]); + + THEN("primitive2 depends on string5 and string2") + { + const auto &depends2 = primitive2.string_arguments(); + REQUIRE(depends2.size() == 2); + REQUIRE(depends2[0] == char_array5); + REQUIRE(depends2[1] == char_array2); + } + } + } + } +}