diff --git a/regression/jbmc-strings/StringDependencies/Test.class b/regression/jbmc-strings/StringDependencies/Test.class new file mode 100644 index 00000000000..c06ccc8b8df Binary files /dev/null and b/regression/jbmc-strings/StringDependencies/Test.class differ diff --git a/regression/jbmc-strings/StringDependencies/Test.java b/regression/jbmc-strings/StringDependencies/Test.java new file mode 100644 index 00000000000..80fabe62f0b --- /dev/null +++ b/regression/jbmc-strings/StringDependencies/Test.java @@ -0,0 +1,22 @@ +class Test { + void test(String s) { + // Filter + if(s == null) + return; + + // Act + + // this matters for the final test + String t = s.concat("_foo"); + + // these should be ignored by the solver as they + // do not matter for the final test + String u = s.concat("_bar"); + String v = s.concat("_baz"); + String w = s.concat("_fiz"); + String x = s.concat("_buz"); + + // Assert + assert t.endsWith("foo"); + } +} diff --git a/regression/jbmc-strings/StringDependencies/test.desc b/regression/jbmc-strings/StringDependencies/test.desc new file mode 100644 index 00000000000..8368650a1cf --- /dev/null +++ b/regression/jbmc-strings/StringDependencies/test.desc @@ -0,0 +1,12 @@ +CORE +Test.class +--refine-strings --string-max-length 1000 --string-max-input-length 100 --verbosity 10 Test.class --function Test.test +^EXIT=0$ +^SIGNAL=0$ +assertion at file Test.java line 20 .*: SUCCESS +string_refinement::check_axioms: 3 universal axioms +-- +-- +We check there are exactly 3 universal formulas considered by the solver (2 for +`t = s.concat("_foo")` and 1 for `t.endsWith("foo")`). +The other concatenations should be ignored. diff --git a/regression/jbmc-strings/StringEquals/test.desc b/regression/jbmc-strings/StringEquals/test.desc index b71cf70c218..3e35d340082 100644 --- a/regression/jbmc-strings/StringEquals/test.desc +++ b/regression/jbmc-strings/StringEquals/test.desc @@ -1,6 +1,6 @@ CORE Test.class ---refine-strings --string-max-length 1000 --function Test.check +--refine-strings --string-max-length 40 --function Test.check ^EXIT=10$ ^SIGNAL=0$ assertion at file Test.java line 6 .* SUCCESS diff --git a/src/solvers/refinement/string_builtin_function.cpp b/src/solvers/refinement/string_builtin_function.cpp index 045a85a1c8e..6776bb026e9 100644 --- a/src/solvers/refinement/string_builtin_function.cpp +++ b/src/solvers/refinement/string_builtin_function.cpp @@ -18,8 +18,10 @@ static array_string_exprt make_string( string_transformation_builtin_functiont:: string_transformation_builtin_functiont( + const exprt &return_code, const std::vector &fun_args, array_poolt &array_pool) + : string_builtin_functiont(return_code) { PRECONDITION(fun_args.size() > 2); const auto arg1 = expr_checked_cast(fun_args[2]); @@ -55,8 +57,10 @@ optionalt string_transformation_builtin_functiont::eval( } string_insertion_builtin_functiont::string_insertion_builtin_functiont( + const exprt &return_code, const std::vector &fun_args, array_poolt &array_pool) + : string_builtin_functiont(return_code) { PRECONDITION(fun_args.size() > 4); const auto arg1 = expr_checked_cast(fun_args[2]); @@ -69,8 +73,10 @@ string_insertion_builtin_functiont::string_insertion_builtin_functiont( } string_concatenation_builtin_functiont::string_concatenation_builtin_functiont( + const exprt &return_code, const std::vector &fun_args, array_poolt &array_pool) + : string_insertion_builtin_functiont(return_code) { PRECONDITION(fun_args.size() >= 4 && fun_args.size() <= 6); const auto arg1 = expr_checked_cast(fun_args[2]); @@ -207,3 +213,33 @@ optionalt string_insertion_builtin_functiont::eval( const array_typet type(result.type().subtype(), length); return make_string(result_value, type); } + +string_builtin_function_with_no_evalt::string_builtin_function_with_no_evalt( + const exprt &return_code, + const function_application_exprt &f, + array_poolt &array_pool) + : string_builtin_functiont(return_code), function_application(f) +{ + const std::vector &fun_args = f.arguments(); + std::size_t i = 0; + if(fun_args.size() >= 2 && fun_args[1].type().id() == ID_pointer) + { + string_res = array_pool.find(fun_args[1], fun_args[0]); + i = 2; + } + + for(; i < fun_args.size(); ++i) + { + const auto arg = expr_try_dynamic_cast(fun_args[i]); + // TODO: use is_refined_string_type ? + if( + arg && arg->operands().size() == 2 && + arg->op1().type().id() == ID_pointer) + { + INVARIANT(is_refined_string_type(arg->type()), "should be a string"); + string_args.push_back(array_pool.find(arg->op1(), arg->op0())); + } + else + args.push_back(fun_args[i]); + } +} diff --git a/src/solvers/refinement/string_builtin_function.h b/src/solvers/refinement/string_builtin_function.h index 3ebdd6dc8f3..c6fad6b57da 100644 --- a/src/solvers/refinement/string_builtin_function.h +++ b/src/solvers/refinement/string_builtin_function.h @@ -7,6 +7,7 @@ #include #include #include +#include "string_constraint_generator.h" class array_poolt; @@ -32,8 +33,33 @@ class string_builtin_functiont virtual std::string name() const = 0; -protected: + /// Add constraints ensuring that the value of result expression of the + /// builtin function corresponds to the value of the function call. + virtual exprt + add_constraints(string_constraint_generatort &constraint_generator) const = 0; + + /// Constraint ensuring that the length of the strings are coherent with + /// the function call. + virtual exprt length_constraint() const = 0; + + exprt return_code; + + /// Tells whether the builtin function can be a testing function, that is a + /// function that does not return a string, for instance like `equals`, + /// `indexOf` or `compare`. + virtual bool maybe_testing_function() const + { + return true; + } + +private: string_builtin_functiont() = default; + +protected: + explicit string_builtin_functiont(const exprt &return_code) + : return_code(return_code) + { + } }; /// String builtin_function transforming one string into another @@ -43,7 +69,6 @@ class string_transformation_builtin_functiont : public string_builtin_functiont array_string_exprt result; array_string_exprt input; std::vector args; - exprt return_code; /// Constructor from arguments of a function application. /// The arguments in `fun_args` should be in order: @@ -51,6 +76,7 @@ class string_transformation_builtin_functiont : public string_builtin_functiont /// a string `arg1` of type refined_string_typet, and potentially some /// arguments of primitive types. string_transformation_builtin_functiont( + const exprt &return_code, const std::vector &fun_args, array_poolt &array_pool); @@ -70,6 +96,11 @@ class string_transformation_builtin_functiont : public string_builtin_functiont optionalt eval(const std::function &get_value) const override; + + bool maybe_testing_function() const override + { + return false; + } }; /// Adding a character at the end of a string @@ -82,9 +113,10 @@ class string_concat_char_builtin_functiont /// an integer `result.length`, a character pointer `&result[0]`, /// a string `arg1` of type refined_string_typet, and a character. string_concat_char_builtin_functiont( + const exprt &return_code, const std::vector &fun_args, array_poolt &array_pool) - : string_transformation_builtin_functiont(fun_args, array_pool) + : string_transformation_builtin_functiont(return_code, fun_args, array_pool) { } @@ -96,6 +128,16 @@ class string_concat_char_builtin_functiont { return "concat_char"; } + + exprt add_constraints(string_constraint_generatort &generator) const override + { + return generator.add_axioms_for_concat_char(result, input, args[0]); + } + + exprt length_constraint() const override + { + return length_constraint_for_concat_char(result, input); + } }; /// String inserting a string into another one @@ -106,7 +148,6 @@ class string_insertion_builtin_functiont : public string_builtin_functiont array_string_exprt input1; array_string_exprt input2; std::vector args; - exprt return_code; /// Constructor from arguments of a function application. /// The arguments in `fun_args` should be in order: @@ -115,6 +156,7 @@ class string_insertion_builtin_functiont : public string_builtin_functiont /// a string `arg2` of type refined_string_typet, /// and potentially some arguments of primitive types. string_insertion_builtin_functiont( + const exprt &return_code, const std::vector &fun_args, array_poolt &array_pool); @@ -141,8 +183,34 @@ class string_insertion_builtin_functiont : public string_builtin_functiont return "insert"; } + exprt add_constraints(string_constraint_generatort &generator) const override + { + if(args.size() == 1) + return generator.add_axioms_for_insert(result, input1, input2, args[0]); + if(args.size() == 3) + UNIMPLEMENTED; + UNREACHABLE; + }; + + exprt length_constraint() const override + { + if(args.size() == 1) + return length_constraint_for_insert(result, input1, input2, args[0]); + if(args.size() == 3) + UNIMPLEMENTED; + UNREACHABLE; + }; + + bool maybe_testing_function() const override + { + return false; + } + protected: - string_insertion_builtin_functiont() = default; + explicit string_insertion_builtin_functiont(const exprt &return_code) + : string_builtin_functiont(return_code) + { + } }; class string_concatenation_builtin_functiont final @@ -156,6 +224,7 @@ class string_concatenation_builtin_functiont final /// a string `arg2` of type refined_string_typet, /// optionally followed by an integer `start` and an integer `end`. string_concatenation_builtin_functiont( + const exprt &return_code, const std::vector &fun_args, array_poolt &array_pool); @@ -168,6 +237,26 @@ class string_concatenation_builtin_functiont final { return "concat"; } + + exprt add_constraints(string_constraint_generatort &generator) const override + { + if(args.size() == 0) + return generator.add_axioms_for_concat(result, input1, input2); + if(args.size() == 2) + return generator.add_axioms_for_concat_substr( + result, input1, input2, args[0], args[1]); + UNREACHABLE; + }; + + exprt length_constraint() const override + { + if(args.size() == 0) + return length_constraint_for_concat(result, input1, input2); + if(args.size() == 2) + return length_constraint_for_concat_substr( + result, input1, input2, args[0], args[1]); + UNREACHABLE; + } }; /// String creation from other types @@ -182,6 +271,11 @@ class string_creation_builtin_functiont : public string_builtin_functiont { return result; } + + bool maybe_testing_function() const override + { + return false; + } }; /// String test @@ -197,4 +291,53 @@ class string_test_builtin_functiont : public string_builtin_functiont } }; +/// Functions that are not yet supported in this class but are supported by +/// string_constraint_generatort. +/// \note Ultimately this should be disappear, once all builtin function have +/// a corresponding string_builtin_functiont class. +class string_builtin_function_with_no_evalt : public string_builtin_functiont +{ +public: + function_application_exprt function_application; + optionalt string_res; + std::vector string_args; + std::vector args; + + string_builtin_function_with_no_evalt( + const exprt &return_code, + const function_application_exprt &f, + array_poolt &array_pool); + + std::string name() const override + { + return id2string(function_application.function().get_identifier()); + } + std::vector string_arguments() const override + { + return std::vector(string_args); + } + optionalt string_result() const override + { + return string_res; + } + + optionalt + eval(const std::function &get_value) const override + { + return {}; + } + + exprt add_constraints(string_constraint_generatort &generator) const override + { + return generator.add_axioms_for_function_application(function_application); + }; + + exprt length_constraint() const override + { + // For now, there is no need for implementing that as `add_constraints` + // should always be called on these functions + UNIMPLEMENTED; + } +}; + #endif // CPROVER_SOLVERS_REFINEMENT_STRING_BUILTIN_FUNCTION_H diff --git a/src/solvers/refinement/string_constraint_generator.h b/src/solvers/refinement/string_constraint_generator.h index b7a3397acb9..7fcd2d76574 100644 --- a/src/solvers/refinement/string_constraint_generator.h +++ b/src/solvers/refinement/string_constraint_generator.h @@ -105,10 +105,14 @@ class string_constraint_generatort final /// Axioms are of three kinds: universally quantified string constraint, /// not contains string constraints and simple formulas. const std::vector &get_lemmas() const; + void add_lemma(const exprt &); const std::vector &get_constraints() const; const std::vector & get_not_contains_constraints() const; + /// Clear all constraints and lemmas + void clear_constraints(); + /// Boolean symbols for the results of some string functions const std::vector &get_boolean_symbols() const; @@ -148,6 +152,26 @@ class string_constraint_generatort final return signedbv_typet(32); } + exprt add_axioms_for_concat_char( + const array_string_exprt &res, + const array_string_exprt &s1, + const exprt &c); + exprt add_axioms_for_concat( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2); + exprt add_axioms_for_concat_substr( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2, + const exprt &start_index, + const exprt &end_index); + exprt add_axioms_for_insert( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2, + const exprt &offset); + private: symbol_exprt fresh_boolean(const irep_idt &prefix); array_string_exprt @@ -203,21 +227,7 @@ class string_constraint_generatort final exprt add_axioms_for_empty_string(const function_application_exprt &f); exprt add_axioms_for_char_set(const function_application_exprt &f); exprt add_axioms_for_copy(const function_application_exprt &f); - exprt add_axioms_for_concat( - const array_string_exprt &res, - const array_string_exprt &s1, - const array_string_exprt &s2); - exprt add_axioms_for_concat_char( - const array_string_exprt &res, - const array_string_exprt &s1, - const exprt &c); exprt add_axioms_for_concat_char(const function_application_exprt &f); - exprt add_axioms_for_concat_substr( - const array_string_exprt &res, - const array_string_exprt &s1, - const array_string_exprt &s2, - const exprt &start_index, - const exprt &end_index); exprt add_axioms_for_concat(const function_application_exprt &f); exprt add_axioms_for_concat_code_point(const function_application_exprt &f); exprt add_axioms_for_constant( @@ -244,11 +254,6 @@ class string_constraint_generatort final const typet &index_type, const typet &char_type); - exprt add_axioms_for_insert( - const array_string_exprt &res, - const array_string_exprt &s1, - const array_string_exprt &s2, - const exprt &offset); exprt add_axioms_for_insert(const function_application_exprt &f); exprt add_axioms_for_insert_int(const function_application_exprt &f); exprt add_axioms_for_insert_bool(const function_application_exprt &f); @@ -442,4 +447,23 @@ exprt minimum(const exprt &a, const exprt &b); /// \return expression representing the maximum of two expressions exprt maximum(const exprt &a, const exprt &b); +exprt length_constraint_for_concat_char( + const array_string_exprt &res, + const array_string_exprt &s1); +exprt length_constraint_for_concat( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2); +exprt length_constraint_for_concat_substr( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2, + const exprt &start_index, + const exprt &end_index); +exprt length_constraint_for_insert( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2, + const exprt &offset); + #endif diff --git a/src/solvers/refinement/string_constraint_generator_concat.cpp b/src/solvers/refinement/string_constraint_generator_concat.cpp index 6c258566351..d1cd743b32d 100644 --- a/src/solvers/refinement/string_constraint_generator_concat.cpp +++ b/src/solvers/refinement/string_constraint_generator_concat.cpp @@ -43,10 +43,8 @@ exprt string_constraint_generatort::add_axioms_for_concat_substr( const exprt end1 = maximum(minimum(end_index, s2.length()), start1); // Axiom 1. - lemmas.push_back([&] { // NOLINT - const plus_exprt res_length(s1.length(), minus_exprt(end1, start1)); - return equal_exprt(res.length(), res_length); - }()); + lemmas.push_back( + length_constraint_for_concat_substr(res, s1, s2, start_index, end_index)); // Axiom 2. constraints.push_back([&] { // NOLINT @@ -67,6 +65,34 @@ exprt string_constraint_generatort::add_axioms_for_concat_substr( return from_integer(0, get_return_code_type()); } +/// Add axioms enforcing that the length of `res` is that of the concatenation +/// of `s1` with the substring of `s2` starting at index `start'` +/// and ending at index `end'`. +/// Where start_index' is max(0, start) and end' is +/// max(min(end, s2.length), start') +exprt length_constraint_for_concat_substr( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2, + const exprt &start, + const exprt &end) +{ + const exprt start1 = maximum(start, from_integer(0, start.type())); + const exprt end1 = maximum(minimum(end, s2.length()), start1); + const plus_exprt res_length(s1.length(), minus_exprt(end1, start1)); + return equal_exprt(res.length(), res_length); +} + +/// Add axioms enforcing that the length of `res` is that of the concatenation +/// of `s1` with `s2` +exprt length_constraint_for_concat( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2) +{ + return equal_exprt(res.length(), plus_exprt(s1.length(), s2.length())); +} + /// Add axioms enforcing that `res` is the concatenation of `s1` with /// character `c`. /// These axioms are : @@ -84,9 +110,7 @@ exprt string_constraint_generatort::add_axioms_for_concat_char( const exprt &c) { const typet &index_type = res.length().type(); - const equal_exprt a1( - res.length(), plus_exprt(s1.length(), from_integer(1, index_type))); - lemmas.push_back(a1); + lemmas.push_back(length_constraint_for_concat_char(res, s1)); symbol_exprt idx = fresh_univ_index("QA_index_concat_char", index_type); string_constraintt a2(idx, s1.length(), equal_exprt(s1[idx], res[idx])); @@ -99,6 +123,16 @@ exprt string_constraint_generatort::add_axioms_for_concat_char( return from_integer(0, get_return_code_type()); } +/// Add axioms enforcing that the length of `res` is that of the concatenation +/// of `s1` with +exprt length_constraint_for_concat_char( + const array_string_exprt &res, + const array_string_exprt &s1) +{ + return equal_exprt( + res.length(), plus_exprt(s1.length(), from_integer(1, s1.length().type()))); +} + /// Add axioms enforcing that `res` is equal to the concatenation of `s1` and /// `s2`. /// diff --git a/src/solvers/refinement/string_constraint_generator_insert.cpp b/src/solvers/refinement/string_constraint_generator_insert.cpp index c5c975ffbff..37791fe2932 100644 --- a/src/solvers/refinement/string_constraint_generator_insert.cpp +++ b/src/solvers/refinement/string_constraint_generator_insert.cpp @@ -14,8 +14,14 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com /// Add axioms ensuring the result `res` corresponds to `s1` where we /// inserted `s2` at position `offset`. +/// We write offset' for `max(0, min(res.length, offset))`. +/// These axioms are: +/// 1. res.length = s1.length + s2.length +/// 2. forall i < offset' . res[i] = s1[i] +/// 3. forall i < s2.length. res[i + offset'] = s2[i] +/// 4. forall i in [offset', s1.length). res[i + s2.length] = s1[i] /// This is equivalent to -/// `res=concat(substring(s1, 0, offset), concat(s2, substring(s1, offset)))`. +/// `res=concat(substring(s1, 0, offset'), concat(s2, substring(s1, offset')))`. /// \param res: array of characters expression /// \param s1: array of characters expression /// \param s2: array of characters expression @@ -30,25 +36,47 @@ exprt string_constraint_generatort::add_axioms_for_insert( { PRECONDITION(offset.type()==s1.length().type()); const typet &index_type = s1.length().type(); - const typet &char_type = s1.content().type().subtype(); - array_string_exprt pref = fresh_string(index_type, char_type); - exprt return_code1 = - add_axioms_for_substring(pref, s1, from_integer(0, offset.type()), offset); - array_string_exprt suf = fresh_string(index_type, char_type); - exprt return_code2 = add_axioms_for_substring(suf, s1, offset, s1.length()); - array_string_exprt concat1 = fresh_string(index_type, char_type); - exprt return_code3 = add_axioms_for_concat(concat1, pref, s2); - exprt return_code4 = add_axioms_for_concat(res, concat1, suf); - return if_exprt( - equal_exprt(return_code1, from_integer(0, return_code1.type())), - if_exprt( - equal_exprt(return_code2, from_integer(0, return_code1.type())), - if_exprt( - equal_exprt(return_code3, from_integer(0, return_code1.type())), - return_code4, - return_code3), - return_code2), - return_code1); + const exprt offset1 = + maximum(from_integer(0, index_type), minimum(s1.length(), offset)); + + // Axiom 1. + lemmas.push_back(length_constraint_for_insert(res, s1, s2, offset)); + + // Axiom 2. + constraints.push_back([&] { // NOLINT + const symbol_exprt i = fresh_symbol("QA_insert1", index_type); + return string_constraintt(i, offset1, equal_exprt(res[i], s1[i])); + }()); + + // Axiom 3. + constraints.push_back([&] { // NOLINT + const symbol_exprt i = fresh_symbol("QA_insert2", index_type); + return string_constraintt( + i, s2.length(), equal_exprt(res[plus_exprt(i, offset1)], s2[i])); + }()); + + // Axiom 4. + constraints.push_back([&] { // NOLINT + const symbol_exprt i = fresh_symbol("QA_insert3", index_type); + return string_constraintt( + i, + offset1, + s1.length(), + equal_exprt(res[plus_exprt(i, s2.length())], s1[i])); + }()); + + return from_integer(0, get_return_code_type()); +} + +/// Add axioms ensuring the length of `res` corresponds that of `s1` where we +/// inserted `s2` at position `offset`. +exprt length_constraint_for_insert( + const array_string_exprt &res, + const array_string_exprt &s1, + const array_string_exprt &s2, + const exprt &offset) +{ + return equal_exprt(res.length(), plus_exprt(s1.length(), s2.length())); } /// Insertion of a string in another at a specific index diff --git a/src/solvers/refinement/string_constraint_generator_main.cpp b/src/solvers/refinement/string_constraint_generator_main.cpp index efd85c8165b..97c0287eb96 100644 --- a/src/solvers/refinement/string_constraint_generator_main.cpp +++ b/src/solvers/refinement/string_constraint_generator_main.cpp @@ -39,6 +39,11 @@ const std::vector &string_constraint_generatort::get_lemmas() const return lemmas; } +void string_constraint_generatort::add_lemma(const exprt &expr) +{ + lemmas.push_back(expr); +} + const std::vector & string_constraint_generatort::get_constraints() const { @@ -318,6 +323,13 @@ string_constraint_generatort::get_string_expr(const exprt &expr) return char_array_of_pointer(str.content(), str.length()); } +void string_constraint_generatort::clear_constraints() +{ + lemmas.clear(); + constraints.clear(); + not_contains_constraints.clear(); +} + /// adds standard axioms about the length of the string and its content: * its /// length should be positive * it should not exceed max_string_length * if /// force_printable_characters is true then all characters should belong to the diff --git a/src/solvers/refinement/string_refinement.cpp b/src/solvers/refinement/string_refinement.cpp index 53da9d1cbb6..8cec89c46fb 100644 --- a/src/solvers/refinement/string_refinement.cpp +++ b/src/solvers/refinement/string_refinement.cpp @@ -251,41 +251,6 @@ static std::vector generate_instantiations( return lemmas; } -/// Remove functions applications and create the necessary axioms. -/// \param expr: an expression possibly containing function applications -/// \param generator: generator for the string constraints -/// \return an expression containing no function application -static void substitute_function_applications( - exprt &expr, - string_constraint_generatort &generator) -{ - for(auto it = expr.depth_begin(), itend = expr.depth_end(); - it != itend;) // No ++it - { - if(it->id() == ID_function_application) - { - it.mutate() = - generator.add_axioms_for_function_application( - to_function_application_expr(expr)); - it.next_sibling_or_parent(); - } - else - ++it; - } -} - -/// Remove functions applications and create the necessary axioms. -/// \param equations: vector of equations -/// \param generator: generator for the string constraints -/// \return vector of equations where function application have been replaced -static void substitute_function_applications_in_equations( - std::vector &equations, - string_constraint_generatort &generator) -{ - for(auto &eq : equations) - substitute_function_applications(eq.rhs(), generator); -} - /// Fill the array_pointer correspondence and replace the right hand sides of /// the corresponding equations static void make_char_array_pointer_associations( @@ -674,23 +639,30 @@ decision_proceduret::resultt string_refinementt::dec_solve() string_id_symbol_resolve.replace_expr(eq.rhs()); } + // Generator is also used by get, so we have to use it as a class member + // but we make sure it is cleared at each `dec_solve` call. + generator.clear_constraints(); make_char_array_pointer_associations(generator, equations); #ifdef DEBUG output_equations(debug(), equations, ns); #endif - debug() << "dec_solve: compute dependency graph:" << eom; - for(const equal_exprt &eq : equations) - add_node(dependencies, eq, generator.array_pool); + debug() << "dec_solve: compute dependency graph and remove function " + << "applications captured by the dependencies:" << eom; + const auto new_equation_end = std::remove_if( + equations.begin(), equations.end(), [&](const equal_exprt &eq) { // NOLINT + return add_node(dependencies, eq, generator.array_pool); + }); + equations.erase(new_equation_end, equations.end()); #ifdef DEBUG dependencies.output_dot(debug()); #endif - debug() << "dec_solve: Replace function applications" << eom; - // Generator is also used by get, that's why we use a class member - substitute_function_applications_in_equations(equations, generator); + debug() << "dec_solve: add constraints" << eom; + dependencies.add_constraints(generator); + #ifdef DEBUG output_equations(debug(), equations, ns); #endif diff --git a/src/solvers/refinement/string_refinement_util.cpp b/src/solvers/refinement/string_refinement_util.cpp index 7da97d672c7..0175984f4b5 100644 --- a/src/solvers/refinement/string_refinement_util.cpp +++ b/src/solvers/refinement/string_refinement_util.cpp @@ -17,8 +17,15 @@ #include #include #include +#include #include "string_refinement_util.h" +/// Applies `f` on all strings contained in `e` that are not if-expressions. +/// For instance on input `cond1?s1:cond2?s2:s3` we apply `f` on s1, s2 and s3. +static void for_each_atomic_string( + const array_string_exprt &e, + const std::function f); + bool is_char_type(const typet &type) { return type.id() == ID_unsignedbv && @@ -164,11 +171,10 @@ equation_symbol_mappingt::find_equations(const exprt &expr) } /// Construct a string_builtin_functiont object from a function application -/// \return a unique pointer to the created object, this unique pointer is empty -/// if the function does not correspond to one of the supported -/// builtin_functions. +/// \return a unique pointer to the created object static std::unique_ptr to_string_builtin_function( const function_application_exprt &fun_app, + const exprt &return_code, array_poolt &array_pool) { const auto name = expr_checked_cast(fun_app.function()); @@ -176,18 +182,19 @@ static std::unique_ptr to_string_builtin_function( : name.get_identifier(); if(id == ID_cprover_string_insert_func) - return std::unique_ptr( - new string_insertion_builtin_functiont(fun_app.arguments(), array_pool)); + return util_make_unique( + return_code, fun_app.arguments(), array_pool); if(id == ID_cprover_string_concat_func) return util_make_unique( - fun_app.arguments(), array_pool); + return_code, fun_app.arguments(), array_pool); if(id == ID_cprover_string_concat_char_func) return util_make_unique( - fun_app.arguments(), array_pool); + return_code, fun_app.arguments(), array_pool); - return {}; + return util_make_unique( + return_code, fun_app, array_pool); } string_dependenciest::string_nodet & @@ -210,80 +217,40 @@ string_dependenciest::node_at(const array_string_exprt &e) const return {}; } -string_dependenciest::builtin_function_nodet string_dependenciest::make_node( +string_dependenciest::builtin_function_nodet &string_dependenciest::make_node( std::unique_ptr &builtin_function) { - const builtin_function_nodet builtin_function_node( - builtin_function_nodes.size()); - builtin_function_nodes.emplace_back(); - builtin_function_nodes.back().swap(builtin_function); - return builtin_function_node; + builtin_function_nodes.emplace_back( + std::move(builtin_function), builtin_function_nodes.size()); + return builtin_function_nodes.back(); } const string_builtin_functiont &string_dependenciest::get_builtin_function( const builtin_function_nodet &node) const { - PRECONDITION(node.index < builtin_function_nodes.size()); - return *(builtin_function_nodes[node.index]); + return *node.data; } -const std::vector & -string_dependenciest::dependencies( - const string_dependenciest::string_nodet &node) const +static void for_each_atomic_string( + const array_string_exprt &e, + const std::function f) { - return node.dependencies; + if(e.id() != ID_if) + return f(e); + + const auto if_expr = to_if_expr(e); + for_each_atomic_string(to_array_string_expr(if_expr.true_case()), f); + for_each_atomic_string(to_array_string_expr(if_expr.false_case()), f); } void string_dependenciest::add_dependency( const array_string_exprt &e, const builtin_function_nodet &builtin_function_node) { - if(e.id() == ID_if) - { - const auto if_expr = to_if_expr(e); - const auto &true_case = to_array_string_expr(if_expr.true_case()); - const auto &false_case = to_array_string_expr(if_expr.false_case()); - add_dependency(true_case, builtin_function_node); - add_dependency(false_case, builtin_function_node); - return; - } - string_nodet &string_node = get_node(e); - string_node.dependencies.push_back(builtin_function_node); -} - -void string_dependenciest::add_unknown_dependency(const array_string_exprt &e) -{ - if(e.id() == ID_if) - { - const auto if_expr = to_if_expr(e); - const auto &true_case = to_array_string_expr(if_expr.true_case()); - const auto &false_case = to_array_string_expr(if_expr.false_case()); - add_unknown_dependency(true_case); - add_unknown_dependency(false_case); - return; - } - string_nodet &string_node = get_node(e); - string_node.depends_on_unknown_builtin_function = true; -} - -static void add_unknown_dependency_to_string_subexprs( - string_dependenciest &dependencies, - const function_application_exprt &fun_app, - array_poolt &array_pool) -{ - for(const auto &expr : fun_app.arguments()) - { - std::for_each( - expr.depth_begin(), expr.depth_end(), [&](const exprt &e) { // NOLINT - if(is_refined_string_type(e.type())) - { - const auto &string_struct = expr_checked_cast(e); - const array_string_exprt string = - array_pool.find(string_struct.op1(), string_struct.op0()); - dependencies.add_unknown_dependency(string); - } - }); - } + for_each_atomic_string(e, [&](const array_string_exprt &s) { //NOLINT + string_nodet &string_node = get_node(s); + string_node.dependencies.push_back(builtin_function_node.index); + }); } static void add_dependency_to_string_subexprs( @@ -332,11 +299,9 @@ optionalt string_dependenciest::eval( const auto node = string_nodes[it->second]; const auto &f = node.result_from; - if( - f && node.dependencies.size() == 1 && - !node.depends_on_unknown_builtin_function) + if(f && node.dependencies.size() == 1) { - const auto value_opt = get_builtin_function(*f).eval(get_value); + const auto value_opt = builtin_function_nodes[*f].data->eval(get_value); eval_string_cache[it->second] = value_opt; return value_opt; } @@ -360,15 +325,10 @@ bool add_node( if(!fun_app) return false; - auto builtin_function = to_string_builtin_function(*fun_app, array_pool); - - if(!builtin_function) - { - add_unknown_dependency_to_string_subexprs( - dependencies, *fun_app, array_pool); - return true; - } + auto builtin_function = + to_string_builtin_function(*fun_app, equation.lhs(), array_pool); + CHECK_RETURN(builtin_function != nullptr); const auto &builtin_function_node = dependencies.make_node(builtin_function); // Warning: `builtin_function` has been emptied and should not be used anymore @@ -378,7 +338,16 @@ bool add_node( { dependencies.add_dependency(*string_result, builtin_function_node); auto &node = dependencies.get_node(*string_result); - node.result_from = builtin_function_node; + node.result_from = builtin_function_node.index; + + // Ensure all atomic strings in the argument have an associated node + for(const auto arg : builtin_function_node.data->string_arguments()) + { + for_each_atomic_string( + arg, [&](const array_string_exprt &atomic) { // NOLINT + (void)dependencies.get_node(atomic); + }); + } } else add_dependency_to_string_subexprs( @@ -387,42 +356,56 @@ bool add_node( return true; } -void string_dependenciest::for_each_successor( - const nodet &node, - const std::function &f) const +void string_dependenciest::for_each_dependency( + const builtin_function_nodet &node, + const std::function &f) const { - if(node.kind == nodet::BUILTIN) + for(const auto &s : node.data->string_arguments()) { - const auto &builtin = builtin_function_nodes[node.index]; - for(const auto &s : builtin->string_arguments()) + std::vector> stack({s}); + while(!stack.empty()) { - std::vector> stack({s}); - while(!stack.empty()) + const auto current = stack.back(); + stack.pop_back(); + if(const auto if_expr = expr_try_dynamic_cast(current.get())) { - const auto current = stack.back(); - stack.pop_back(); - if(const auto if_expr = expr_try_dynamic_cast(current.get())) - { - stack.emplace_back(if_expr->true_case()); - stack.emplace_back(if_expr->false_case()); - } - else if(const auto node = node_at(to_array_string_expr(current))) - f(nodet(*node)); - else - UNREACHABLE; + stack.emplace_back(if_expr->true_case()); + stack.emplace_back(if_expr->false_case()); } + else if(const auto string_node = node_at(to_array_string_expr(current))) + f(*string_node); + else + UNREACHABLE; } } - else if(node.kind == nodet::STRING) +} + +void string_dependenciest::for_each_dependency( + const string_nodet &node, + const std::function &f) const +{ + for(const std::size_t &index : node.dependencies) + f(builtin_function_nodes[index]); +} + +void string_dependenciest::for_each_successor( + const nodet &node, + const std::function &f) const +{ + switch(node.kind) { - const auto &s_node = string_nodes[node.index]; - std::for_each( - s_node.dependencies.begin(), - s_node.dependencies.end(), - [&](const builtin_function_nodet &p) { f(nodet(p)); }); + case nodet::BUILTIN: + for_each_dependency( + builtin_function_nodes[node.index], + [&](const string_nodet &n) { return f(nodet(n)); }); + break; + + case nodet::STRING: + for_each_dependency( + string_nodes[node.index], + [&](const builtin_function_nodet &n) { return f(nodet(n)); }); + break; } - else - UNREACHABLE; } void string_dependenciest::for_each_node( @@ -431,7 +414,7 @@ void string_dependenciest::for_each_node( for(const auto string_node : string_nodes) f(nodet(string_node)); for(std::size_t i = 0; i < builtin_function_nodes.size(); ++i) - f(nodet(builtin_function_nodet(i))); + f(nodet(builtin_function_nodes[i])); } void string_dependenciest::output_dot(std::ostream &stream) const @@ -447,7 +430,8 @@ void string_dependenciest::output_dot(std::ostream &stream) const const auto node_to_string = [&](const nodet &n) { // NOLINT std::stringstream ostream; if(n.kind == nodet::BUILTIN) - ostream << builtin_function_nodes[n.index]->name() << n.index; + ostream << '"' << builtin_function_nodes[n.index].data->name() << '_' + << n.index << '"'; else ostream << '"' << format(string_nodes[n.index].expr) << '"'; return ostream.str(); @@ -456,3 +440,34 @@ void string_dependenciest::output_dot(std::ostream &stream) const output_dot_generic(stream, for_each, for_each_succ, node_to_string); stream << '}' << std::endl; } + +void string_dependenciest::add_constraints( + string_constraint_generatort &generator) +{ + std::unordered_set test_dependencies; + for(const auto &builtin : builtin_function_nodes) + { + if(builtin.data->maybe_testing_function()) + test_dependencies.insert(nodet(builtin)); + } + + get_reachable( + test_dependencies, + [&]( + const nodet &n, + const std::function &f) { // NOLINT + for_each_successor(n, f); + }); + + for(const auto &node : builtin_function_nodes) + { + if(test_dependencies.count(nodet(node))) + { + const auto &builtin = builtin_function_nodes[node.index]; + const exprt return_value = builtin.data->add_constraints(generator); + generator.add_lemma(equal_exprt(return_value, builtin.data->return_code)); + } + else + generator.add_lemma(node.data->length_constraint()); + } +} diff --git a/src/solvers/refinement/string_refinement_util.h b/src/solvers/refinement/string_refinement_util.h index f4a0e219adb..e8f7b615e8f 100644 --- a/src/solvers/refinement/string_refinement_util.h +++ b/src/solvers/refinement/string_refinement_util.h @@ -149,15 +149,33 @@ class equation_symbol_mappingt class string_dependenciest { public: - /// A builtin_function node is just an index in the `builtin_function_nodes` - /// vector. + /// A builtin function node contains a builtin function call class builtin_function_nodet { public: + // index in the `builtin_function_nodes` vector std::size_t index; - explicit builtin_function_nodet(std::size_t i) : index(i) + // pointer to the builtin function + std::unique_ptr data; + + explicit builtin_function_nodet( + std::unique_ptr d, + std::size_t i) + : index(i), data(std::move(d)) + { + } + + builtin_function_nodet(builtin_function_nodet &&other) + : index(other.index), data(std::move(other.data)) { } + + builtin_function_nodet &operator=(builtin_function_nodet &&other) + { + index = other.index; + data = std::move(other.data); + return *this; + } }; /// A string node points to builtin_function on which it depends @@ -168,12 +186,12 @@ class string_dependenciest array_string_exprt expr; // index in the string_nodes vector std::size_t index; - // builtin functions on which it depends - std::vector dependencies; + // builtin functions on which it depends, refered by there index in + // builtin_function node vector. + // \todo should these we shared pointers? + std::vector dependencies; // builtin function of which it is the result - optionalt result_from; - // In case it depends on a builtin_function we don't support yet - bool depends_on_unknown_builtin_function = false; + optionalt result_from; explicit string_nodet(array_string_exprt e, const std::size_t index) : expr(std::move(e)), index(index) @@ -187,10 +205,8 @@ class string_dependenciest node_at(const array_string_exprt &e) const; /// `builtin_function` is reset to an empty pointer after the node is created - builtin_function_nodet + builtin_function_nodet & make_node(std::unique_ptr &builtin_function); - const std::vector & - dependencies(const string_nodet &node) const; const string_builtin_functiont & get_builtin_function(const builtin_function_nodet &node) const; @@ -201,8 +217,13 @@ class string_dependenciest const array_string_exprt &e, const builtin_function_nodet &builtin_function); - /// Mark node for `e` as depending on unknown builtin_function - void add_unknown_dependency(const array_string_exprt &e); + /// Applies `f` to each node on which `node` depends + void for_each_dependency( + const string_nodet &node, + const std::function &f) const; + void for_each_dependency( + const builtin_function_nodet &node, + const std::function &f) const; /// Attempt to evaluate the given string from the dependencies and valuation /// of strings on which it depends @@ -217,9 +238,14 @@ class string_dependenciest void output_dot(std::ostream &stream) const; + /// For all builtin call on which a test (or an unsupported buitin) + /// result depends, add the corresponding constraints. For the other builtin + /// only add constraints on the length. + void add_constraints(string_constraint_generatort &generatort); + private: /// Set of nodes representing builtin_functions - std::vector> builtin_function_nodes; + std::vector builtin_function_nodes; /// Set of nodes representing strings std::vector string_nodes; @@ -248,6 +274,23 @@ class string_dependenciest : kind(STRING), index(string_node.index) { } + + bool operator==(const nodet &n) const + { + return n.kind == kind && n.index == index; + } + }; + + /// Hash function for nodes + // NOLINTNEXTLINE(readability/identifiers) + struct node_hash + { + size_t + operator()(const string_dependenciest::nodet &node) const optional_noexcept + { + return 2 * node.index + + (node.kind == string_dependenciest::nodet::STRING ? 0 : 1); + } }; mutable std::vector> eval_string_cache; diff --git a/src/util/graph.h b/src/util/graph.h index ed67703cf9d..13c0db26bc0 100644 --- a/src/util/graph.h +++ b/src/util/graph.h @@ -461,6 +461,38 @@ void grapht::visit_reachable(node_indext src) nodes[index].visited = true; } +/// Add to `set`, nodes that are reachable from `set`. +/// +/// This implements a depth first search using a stack: at each step we pop a +/// node, and push on the stack all its successors that have not yet been +/// visited. +/// \param set: set of source nodes, must be a container with an +/// `insert(const value_type&)` method. +/// \param for_each_successor: function which given a node `n` and a function +/// `f`, applies `f` on all successors of `n`. +template +void get_reachable( + Container &set, + const std::function &)> + &for_each_successor) +{ + std::vector stack; + for(const auto &elt : set) + stack.push_back(elt); + + while(!stack.empty()) + { + auto n = stack.back(); + stack.pop_back(); + for_each_successor(n, [&](const nodet &node) { // NOLINT + if(set.insert(node).second) + stack.push_back(node); + }); + } +} + /// Run depth-first search on the graph, starting from a single source /// node. /// \param src The node to start the search from. diff --git a/unit/solvers/refinement/string_refinement/dependency_graph.cpp b/unit/solvers/refinement/string_refinement/dependency_graph.cpp index 09c4b075fb0..3a0b6e16b62 100644 --- a/unit/solvers/refinement/string_refinement/dependency_graph.cpp +++ b/unit/solvers/refinement/string_refinement/dependency_graph.cpp @@ -3,7 +3,7 @@ Module: Unit tests for dependency graph solvers/refinement/string_refinement.cpp - Author: DiffBlue Limited. All rights reserved. + Author: DiffBlue Ltd. \*******************************************************************/ @@ -47,7 +47,7 @@ SCENARIO("dependency_graph", "[core][solvers][refinement][string_refinement]") { GIVEN("dependency graph") { - string_dependenciest dependences; + string_dependenciest dependencies; refined_string_typet string_type(java_char_type(), java_int_type()); const exprt string1 = make_string_argument("string1"); const exprt string2 = make_string_argument("string2"); @@ -93,11 +93,11 @@ SCENARIO("dependency_graph", "[core][solvers][refinement][string_refinement]") symbol_generatort generator; array_poolt array_pool(generator); - bool success = add_node(dependences, equation1, array_pool); + bool success = add_node(dependencies, equation1, array_pool); REQUIRE(success); - success = add_node(dependences, equation2, array_pool); + success = add_node(dependencies, equation2, array_pool); REQUIRE(success); - success = add_node(dependences, equation3, array_pool); + success = add_node(dependencies, equation3, array_pool); REQUIRE(success); #ifdef DEBUG // useful output for visualizing the graph @@ -105,9 +105,7 @@ SCENARIO("dependency_graph", "[core][solvers][refinement][string_refinement]") register_language(new_java_bytecode_language); symbol_tablet symbol_table; namespacet ns(symbol_table); - dependencies.output_dot(std::cerr, [&](const exprt &expr) { // NOLINT - return from_expr(ns, "", expr); - }); + dependencies.output_dot(std::cerr); } #endif @@ -141,53 +139,59 @@ SCENARIO("dependency_graph", "[core][solvers][refinement][string_refinement]") THEN("string3 depends on primitive0") { - const auto &node = dependences.get_node(char_array3); - const std::vector - &depends = dependences.dependencies(node); - REQUIRE(depends.size() == 1); - const auto &primitive0 = dependences.get_builtin_function(depends[0]); - - THEN("primitive0 depends on string1 and string2") - { - const auto &depends2 = primitive0.string_arguments(); - REQUIRE(depends2.size() == 2); - REQUIRE(depends2[0] == char_array1); - REQUIRE(depends2[1] == char_array2); - } + const auto &node = dependencies.get_node(char_array3); + std::size_t nb_dependencies = 0; + dependencies.for_each_dependency( + node, + [&](const string_dependenciest::builtin_function_nodet &n) { // NOLINT + nb_dependencies++; + THEN("primitive0 depends on string1 and string2") + { + const auto &depends2 = n.data->string_arguments(); + REQUIRE(depends2.size() == 2); + REQUIRE(depends2[0] == char_array1); + REQUIRE(depends2[1] == char_array2); + } + }); + REQUIRE(nb_dependencies == 1); } THEN("string5 depends on primitive1") { - const auto &node = dependences.get_node(char_array5); - const std::vector - &depends = dependences.dependencies(node); - REQUIRE(depends.size() == 1); - const auto &primitive1 = dependences.get_builtin_function(depends[0]); - - THEN("primitive1 depends on string3 and string4") - { - const auto &depends2 = primitive1.string_arguments(); - REQUIRE(depends2.size() == 2); - REQUIRE(depends2[0] == char_array3); - REQUIRE(depends2[1] == char_array4); - } + const auto &node = dependencies.get_node(char_array5); + std::size_t nb_dependencies = 0; + dependencies.for_each_dependency( + node, + [&](const string_dependenciest::builtin_function_nodet &n) { // NOLINT + nb_dependencies++; + THEN("primitive1 depends on string3 and string4") + { + const auto &depends2 = n.data->string_arguments(); + REQUIRE(depends2.size() == 2); + REQUIRE(depends2[0] == char_array3); + REQUIRE(depends2[1] == char_array4); + } + }); + REQUIRE(nb_dependencies == 1); } THEN("string6 depends on primitive2") { - const auto &node = dependences.get_node(char_array6); - const std::vector - &depends = dependences.dependencies(node); - REQUIRE(depends.size() == 1); - const auto &primitive2 = dependences.get_builtin_function(depends[0]); - - THEN("primitive2 depends on string5 and string2") - { - const auto &depends2 = primitive2.string_arguments(); - REQUIRE(depends2.size() == 2); - REQUIRE(depends2[0] == char_array5); - REQUIRE(depends2[1] == char_array2); - } + const auto &node = dependencies.get_node(char_array6); + std::size_t nb_dependencies = 0; + dependencies.for_each_dependency( + node, + [&](const string_dependenciest::builtin_function_nodet &n) { // NOLINT + nb_dependencies++; + THEN("primitive2 depends on string5 and string2") + { + const auto &depends2 = n.data->string_arguments(); + REQUIRE(depends2.size() == 2); + REQUIRE(depends2[0] == char_array5); + REQUIRE(depends2[1] == char_array2); + } + }); + REQUIRE(nb_dependencies == 1); } } }