Skip to content

Commit cad7b3a

Browse files
Merge pull request diffblue#2297 from romainbrenguier/bugfix/starts-with
Fix bugs with String.startsWith in string refinement
2 parents 8ee1c16 + 7a6277d commit cad7b3a

File tree

8 files changed

+174
-76
lines changed

8 files changed

+174
-76
lines changed
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Must be compiled with CProverString:
2+
// javac Test.java ../cprover/CProverString.java
3+
public class Test {
4+
5+
// Reference implementation
6+
public static boolean referenceStartsWith(String s, String prefix, int offset) {
7+
if (offset < 0 || offset > s.length() - prefix.length()) {
8+
return false;
9+
}
10+
11+
for (int i = 0; i < prefix.length(); i++) {
12+
if (org.cprover.CProverString.charAt(s, offset + i)
13+
!= org.cprover.CProverString.charAt(prefix, i)) {
14+
return false;
15+
}
16+
}
17+
return true;
18+
}
19+
20+
public static boolean check(String s, String t, int offset) {
21+
// Filter out null strings
22+
if(s == null || t == null) {
23+
return false;
24+
}
25+
26+
// Act
27+
final boolean result = s.startsWith(t, offset);
28+
29+
// Assert
30+
final boolean referenceResult = referenceStartsWith(s, t, offset);
31+
assert(result == referenceResult);
32+
33+
// Check reachability
34+
assert(result == false);
35+
return result;
36+
}
37+
38+
public static boolean checkDet() {
39+
boolean result = false;
40+
result = "foo".startsWith("foo", 0);
41+
assert(result);
42+
result = "foo".startsWith("f", -1);
43+
assert(!result);
44+
result = "foo".startsWith("oo", 1);
45+
assert(result);
46+
result = "foo".startsWith("f", 1);
47+
assert(!result);
48+
result = "foo".startsWith("bar", 0);
49+
assert(!result);
50+
result = "foo".startsWith("oo", 2);
51+
assert(!result);
52+
assert(false);
53+
return result;
54+
}
55+
56+
public static boolean checkNonDet(String s) {
57+
// Filter
58+
if (s == null) {
59+
return false;
60+
}
61+
62+
// Act
63+
final boolean result = s.startsWith(s, 1);
64+
65+
// Assert
66+
assert(!result);
67+
68+
// Check reachability
69+
assert(false);
70+
return result;
71+
}
72+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
Test.class
3+
--refine-strings --string-max-input-length 10 --unwind 10 --function Test.check
4+
^EXIT=10$
5+
^SIGNAL=0$
6+
assertion at file Test.java line 31 .*: SUCCESS
7+
assertion at file Test.java line 34 .*: FAILURE
8+
--
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
CORE
2+
Test.class
3+
--refine-strings --string-max-input-length 100 --unwind 10 --function Test.checkDet
4+
^EXIT=10$
5+
^SIGNAL=0$
6+
assertion at file Test.java line 41 .*: SUCCESS
7+
assertion at file Test.java line 43 .*: SUCCESS
8+
assertion at file Test.java line 45 .*: SUCCESS
9+
assertion at file Test.java line 47 .*: SUCCESS
10+
assertion at file Test.java line 49 .*: SUCCESS
11+
assertion at file Test.java line 51 .*: SUCCESS
12+
assertion at file Test.java line 52 .*: FAILURE
13+
--
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
Test.class
3+
--refine-strings --string-max-input-length 100 --unwind 10 --function Test.checkNonDet
4+
^EXIT=10$
5+
^SIGNAL=0$
6+
assertion at file Test.java line 66 .*: SUCCESS
7+
assertion at file Test.java line 69 .*: FAILURE
8+
--

src/solvers/README.md

+2-3
Original file line numberDiff line numberDiff line change
@@ -231,9 +231,6 @@ allocates a new string before calling a primitive.
231231
* `cprover_string_is_prefix` :
232232
\copybrief string_constraint_generatort::add_axioms_for_is_prefix
233233
\link string_constraint_generatort::add_axioms_for_is_prefix More... \endlink
234-
* `cprover_string_is_suffix` :
235-
\copybrief string_constraint_generatort::add_axioms_for_is_suffix
236-
\link string_constraint_generatort::add_axioms_for_is_suffix More... \endlink
237234
* `cprover_string_index_of` :
238235
\copybrief string_constraint_generatort::add_axioms_for_index_of(const function_application_exprt &f)
239236
\link string_constraint_generatort::add_axioms_for_index_of(const function_application_exprt &f) More... \endlink
@@ -319,6 +316,8 @@ allocates a new string before calling a primitive.
319316
* `cprover_string_intern` : Never tested.
320317
* `cprover_string_is_empty` :
321318
Should use `cprover_string_length(s) == 0` instead.
319+
* `cprover_string_is_suffix` : Should use `cprover_string_is_prefix` with an
320+
offset argument.
322321
* `cprover_string_empty_string` : Can use literal of empty string instead.
323322
* `cprover_string_of_long` : Should be the same as `cprover_string_of_int`.
324323
* `cprover_string_delete_char_at` : A call to

src/solvers/refinement/string_constraint_generator_testing.cpp

+41-31
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,19 @@ Author: Romain Brenguier, [email protected]
1515
#include <util/deprecate.h>
1616

1717
/// Add axioms stating that the returned expression is true exactly when the
18-
/// first string is a prefix of the second one, starting at position offset.
18+
/// offset is greater or equal to 0 and the first string is a prefix of the
19+
/// second one, starting at position offset.
1920
///
2021
/// These axioms are:
21-
/// 1. \f$ {\tt isprefix} \Rightarrow |str| \ge |{\tt prefix}|+offset \f$
22-
/// 2. \f$ \forall 0 \le qvar<|{\tt prefix}|.\ {\tt isprefix}
23-
/// \Rightarrow s0[witness+{\tt offset}]=s2[witness] \f$
24-
/// 3. \f$ !{\tt isprefix} \Rightarrow |{\tt str}|<|{\tt prefix}|+{\tt offset}
25-
/// \lor (0 \le witness<|{\tt prefix}|
26-
/// \land {\tt str}[witness+{\tt offset}] \ne {\tt prefix}[witness])\f$
22+
/// 1. isprefix => offset_within_bounds
23+
/// 2. forall qvar in [0, |prefix|).
24+
/// isprefix => str[qvar + offset] = prefix[qvar]
25+
/// 3. !isprefix => !offset_within_bounds
26+
/// || 0 <= witness < |prefix|
27+
/// && str[witness+offset] != prefix[witness]
28+
///
29+
/// where offset_within_bounds is:
30+
/// offset >= 0 && offset <= |str| && |str| - offset >= |prefix|
2731
///
2832
/// \param prefix: an array of characters
2933
/// \param str: an array of characters
@@ -34,34 +38,39 @@ exprt string_constraint_generatort::add_axioms_for_is_prefix(
3438
const array_string_exprt &str,
3539
const exprt &offset)
3640
{
37-
symbol_exprt isprefix=fresh_boolean("isprefix");
38-
const typet &index_type=str.length().type();
41+
const symbol_exprt isprefix = fresh_boolean("isprefix");
42+
const typet &index_type = str.length().type();
43+
const exprt offset_within_bounds = and_exprt(
44+
binary_relation_exprt(offset, ID_ge, from_integer(0, offset.type())),
45+
binary_relation_exprt(offset, ID_le, str.length()),
46+
binary_relation_exprt(
47+
minus_exprt(str.length(), offset), ID_ge, prefix.length()));
3948

4049
// Axiom 1.
41-
lemmas.push_back(
42-
implies_exprt(
43-
isprefix, str.axiom_for_length_ge(plus_exprt(prefix.length(), offset))));
44-
45-
symbol_exprt qvar=fresh_univ_index("QA_isprefix", index_type);
46-
string_constraintt a2(
47-
qvar,
48-
prefix.length(),
49-
implies_exprt(
50-
isprefix, equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar])));
51-
constraints.push_back(a2);
52-
53-
symbol_exprt witness=fresh_exist_index("witness_not_isprefix", index_type);
54-
and_exprt witness_diff(
55-
axiom_for_is_positive_index(witness),
56-
and_exprt(
50+
lemmas.push_back(implies_exprt(isprefix, offset_within_bounds));
51+
52+
// Axiom 2.
53+
constraints.push_back([&] {
54+
const symbol_exprt qvar = fresh_univ_index("QA_isprefix", index_type);
55+
const exprt body = implies_exprt(
56+
isprefix, equal_exprt(str[plus_exprt(qvar, offset)], prefix[qvar]));
57+
return string_constraintt(qvar, prefix.length(), body);
58+
}());
59+
60+
// Axiom 3.
61+
lemmas.push_back([&] {
62+
const exprt witness = fresh_exist_index("witness_not_isprefix", index_type);
63+
const exprt strings_differ_at_witness = and_exprt(
64+
axiom_for_is_positive_index(witness),
5765
prefix.axiom_for_length_gt(witness),
58-
notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness])));
59-
or_exprt s0_notpref_s1(
60-
not_exprt(str.axiom_for_length_ge(plus_exprt(prefix.length(), offset))),
61-
witness_diff);
66+
notequal_exprt(str[plus_exprt(witness, offset)], prefix[witness]));
67+
const exprt s1_does_not_start_with_s0 = or_exprt(
68+
not_exprt(offset_within_bounds),
69+
not_exprt(str.axiom_for_length_ge(plus_exprt(prefix.length(), offset))),
70+
strings_differ_at_witness);
71+
return implies_exprt(not_exprt(isprefix), s1_does_not_start_with_s0);
72+
}());
6273

63-
implies_exprt a3(not_exprt(isprefix), s0_notpref_s1);
64-
lemmas.push_back(a3);
6574
return isprefix;
6675
}
6776

@@ -135,6 +144,7 @@ exprt string_constraint_generatort::add_axioms_for_is_empty(
135144
/// \param swap_arguments: boolean flag telling whether the suffix is the second
136145
/// argument or the first argument
137146
/// \return Boolean expression `issuffix`
147+
DEPRECATED("should use `strings_startwith(s0, s1, s1.length - s0.length)`")
138148
exprt string_constraint_generatort::add_axioms_for_is_suffix(
139149
const function_application_exprt &f, bool swap_arguments)
140150
{

src/solvers/refinement/string_refinement.cpp

+30-42
Original file line numberDiff line numberDiff line change
@@ -1833,18 +1833,17 @@ static void update_index_set(
18331833
}
18341834
}
18351835

1836-
/// Finds an index on `str` used in `expr` that contains `qvar`, for instance
1837-
/// with arguments ``(str[k]=='a')``, `str`, and `k`, the function should
1838-
/// return `k`.
1839-
/// If two different such indexes exist, an invariant will fail.
1836+
/// Find indexes of `str` used in `expr` that contains `qvar`, for instance
1837+
/// with arguments ``(str[k+1]=='a')``, `str`, and `k`, the function should
1838+
/// return `k+1`.
18401839
/// \param [in] expr: the expression to search
18411840
/// \param [in] str: the string which must be indexed
18421841
/// \param [in] qvar: the universal variable that must be in the index
1843-
/// \return an index expression in `expr` on `str` containing `qvar`. Returns
1844-
/// an empty optional when `expr` does not contain `str`.
1845-
static optionalt<exprt>
1846-
find_index(const exprt &expr, const exprt &str, const symbol_exprt &qvar)
1842+
/// \return index expressions in `expr` on `str` containing `qvar`.
1843+
static std::unordered_set<exprt, irep_hash>
1844+
find_indexes(const exprt &expr, const exprt &str, const symbol_exprt &qvar)
18471845
{
1846+
decltype(find_indexes(expr, str, qvar)) result;
18481847
auto index_str_containing_qvar = [&](const exprt &e) {
18491848
if(auto index_expr = expr_try_dynamic_cast<index_exprt>(e))
18501849
{
@@ -1855,28 +1854,11 @@ find_index(const exprt &expr, const exprt &str, const symbol_exprt &qvar)
18551854
return false;
18561855
};
18571856

1858-
auto it = std::find_if(
1859-
expr.depth_begin(), expr.depth_end(), index_str_containing_qvar);
1860-
if(it == expr.depth_end())
1861-
return {};
1862-
const exprt &index = to_index_expr(*it).index();
1863-
1864-
// Check that there are no two such indexes
1865-
it.next_sibling_or_parent();
1866-
while(it != expr.depth_end())
1867-
{
1868-
if(index_str_containing_qvar(*it))
1869-
{
1870-
INVARIANT(
1871-
to_index_expr(*it).index() == index,
1872-
"string should always be indexed by same value in a given formula");
1873-
it.next_sibling_or_parent();
1874-
}
1875-
else
1876-
++it;
1877-
}
1878-
1879-
return index;
1857+
std::for_each(expr.depth_begin(), expr.depth_end(), [&](const exprt &e) {
1858+
if(index_str_containing_qvar(e))
1859+
result.insert(to_index_expr(e).index());
1860+
});
1861+
return result;
18801862
}
18811863

18821864
/// Instantiates a string constraint by substituting the quantifiers.
@@ -1897,18 +1879,24 @@ static exprt instantiate(
18971879
const exprt &str,
18981880
const exprt &val)
18991881
{
1900-
const optionalt<exprt> idx = find_index(axiom.body, str, axiom.univ_var);
1901-
if(!idx.has_value())
1902-
return true_exprt();
1903-
1904-
const exprt r = compute_inverse_function(stream, axiom.univ_var, val, *idx);
1905-
implies_exprt instance(
1906-
and_exprt(
1907-
binary_relation_exprt(axiom.univ_var, ID_ge, axiom.lower_bound),
1908-
binary_relation_exprt(axiom.univ_var, ID_lt, axiom.upper_bound)),
1909-
axiom.body);
1910-
replace_expr(axiom.univ_var, r, instance);
1911-
return instance;
1882+
const auto indexes = find_indexes(axiom.body, str, axiom.univ_var);
1883+
INVARIANT(
1884+
str.id() == ID_array || indexes.size() <= 1,
1885+
"non constant array should always be accessed at the same index");
1886+
exprt::operandst conjuncts;
1887+
for(const auto &index : indexes)
1888+
{
1889+
const exprt univ_var_value =
1890+
compute_inverse_function(stream, axiom.univ_var, val, index);
1891+
implies_exprt instance(
1892+
and_exprt(
1893+
binary_relation_exprt(axiom.univ_var, ID_ge, axiom.lower_bound),
1894+
binary_relation_exprt(axiom.univ_var, ID_lt, axiom.upper_bound)),
1895+
axiom.body);
1896+
replace_expr(axiom.univ_var, univ_var_value, instance);
1897+
conjuncts.push_back(instance);
1898+
}
1899+
return conjunction(conjuncts);
19121900
}
19131901

19141902
/// Instantiates a quantified formula representing `not_contains` by

0 commit comments

Comments
 (0)