-
Notifications
You must be signed in to change notification settings - Fork 273
Make String.indexOf and String.lastIndexOf precise #978
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
f9199a0
bc59d0c
4f4df8e
9b11653
bc3eedc
666bec0
1eb3263
68cdc6f
2d16c2c
f3f5a19
a4ab7db
1eb7d74
069da51
7edfe0d
3e9668a
b06476c
af98378
80dff96
d4a9b2a
94f99b5
cf5b5d3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,12 +13,16 @@ Author: Romain Brenguier, [email protected] | |
|
||
Function: string_constraint_generatort::add_axioms_for_index_of | ||
|
||
Inputs: a string expression, a character expression and an integer expression | ||
Inputs: | ||
str - a string expression | ||
c - an expression representing a character | ||
from_index - an expression representing an index in the string | ||
|
||
Outputs: a integer expression | ||
|
||
Purpose: add axioms that the returned value is either -1 or greater than | ||
from_index and the character at that position equals to c | ||
Purpose: Add axioms stating that the returned value is the index within | ||
str of the first occurence of c starting the search at from_index, | ||
or -1 if no such character occurs at or after position from_index. | ||
|
||
\*******************************************************************/ | ||
|
||
|
@@ -30,11 +34,11 @@ exprt string_constraint_generatort::add_axioms_for_index_of( | |
symbol_exprt contains=fresh_boolean("contains_in_index_of"); | ||
|
||
// We add axioms: | ||
// a1 : -1 <= index<|str| | ||
// a1 : -1 <= index < |str| | ||
// a2 : !contains <=> index=-1 | ||
// a3 : contains => from_index<=index&&str[index]=c | ||
// a4 : forall n, from_index<=n<index. contains => str[n]!=c | ||
// a5 : forall m, from_index<=n<|str|. !contains => str[m]!=c | ||
// a3 : contains ==> from_index <= index && str[index] = c | ||
// a4 : forall n, n:[from_index,index[. contains ==> str[n] != c | ||
// a5 : forall m, n:[from_index,|str|[. !contains ==> str[m] != c | ||
|
||
exprt minus1=from_integer(-1, index_type); | ||
and_exprt a1( | ||
|
@@ -73,87 +77,238 @@ exprt string_constraint_generatort::add_axioms_for_index_of( | |
|
||
Function: string_constraint_generatort::add_axioms_for_index_of_string | ||
|
||
Inputs: two string expressions and an integer expression | ||
Inputs: | ||
haystack - a string expression | ||
needle - a string expression | ||
from_index - an expression representing an index in strings | ||
|
||
Outputs: a integer expression | ||
Outputs: an integer expression representing the first index of needle in | ||
haystack after from_index, or -1 if there is none | ||
|
||
Purpose: add axioms stating that the returned value is either -1 or greater | ||
than from_index and the string beggining there has prefix substring | ||
Purpose: Add axioms stating that the returned value is the index within | ||
haystack of the first occurence of needle starting the search at | ||
from_index, or -1 if needle does not occur at or after position | ||
from_index. | ||
|
||
\*******************************************************************/ | ||
|
||
exprt string_constraint_generatort::add_axioms_for_index_of_string( | ||
const string_exprt &str, | ||
const string_exprt &substring, | ||
const string_exprt &haystack, | ||
const string_exprt &needle, | ||
const exprt &from_index) | ||
{ | ||
const typet &index_type=str.length().type(); | ||
const typet &index_type=haystack.length().type(); | ||
symbol_exprt offset=fresh_exist_index("index_of", index_type); | ||
symbol_exprt contains=fresh_boolean("contains_substring"); | ||
|
||
// We add axioms: | ||
// a1 : contains => |str|-|substring|>=offset>=from_index | ||
// a2 : !contains => offset=-1 | ||
// a3 : forall 0<=witness<|substring|. | ||
// contains => str[witness+offset]=substring[witness] | ||
// a1 : contains ==> from_index <= offset <= |haystack|-|needle| | ||
// a2 : !contains <=> offset=-1 | ||
// a3 : forall n:[0,|needle|[. | ||
// contains ==> haystack[n+offset]=needle[n] | ||
// a4 : forall n:[from_index,offset[. | ||
// contains ==> (exists m:[0,|needle|[. haystack[m+n] != needle[m]]) | ||
// a5: forall n:[from_index,|haystack|-|needle|[. | ||
// !contains ==> (exists m:[0,|needle|[. haystack[m+n] != needle[m]) | ||
|
||
implies_exprt a1( | ||
contains, | ||
and_exprt( | ||
str.axiom_for_is_longer_than(plus_exprt_with_overflow_check( | ||
substring.length(), offset)), | ||
binary_relation_exprt(offset, ID_ge, from_index))); | ||
binary_relation_exprt(from_index, ID_le, offset), | ||
binary_relation_exprt( | ||
offset, ID_le, minus_exprt(haystack.length(), needle.length())))); | ||
axioms.push_back(a1); | ||
|
||
implies_exprt a2( | ||
equal_exprt a2( | ||
not_exprt(contains), | ||
equal_exprt(offset, from_integer(-1, index_type))); | ||
axioms.push_back(a2); | ||
|
||
symbol_exprt qvar=fresh_univ_index("QA_index_of_string", index_type); | ||
string_constraintt a3( | ||
qvar, | ||
substring.length(), | ||
needle.length(), | ||
contains, | ||
equal_exprt(str[plus_exprt(qvar, offset)], substring[qvar])); | ||
equal_exprt(haystack[plus_exprt(qvar, offset)], needle[qvar])); | ||
axioms.push_back(a3); | ||
|
||
if(!is_constant_string(needle)) | ||
{ | ||
// string_not contains_constraintt are formulas of the form: | ||
// forall x in [lb,ub[. p(x) => exists y in [lb,ub[. s1[x+y] != s2[y] | ||
string_not_contains_constraintt a4( | ||
from_index, | ||
offset, | ||
contains, | ||
from_integer(0, index_type), | ||
needle.length(), | ||
haystack, | ||
needle); | ||
axioms.push_back(a4); | ||
|
||
string_not_contains_constraintt a5( | ||
from_index, | ||
minus_exprt(haystack.length(), needle.length()), | ||
not_exprt(contains), | ||
from_integer(0, index_type), | ||
needle.length(), | ||
haystack, | ||
needle); | ||
axioms.push_back(a5); | ||
} | ||
else | ||
{ | ||
// Unfold the existential quantifier as a disjunction in case of a constant | ||
// a4 && a5 <=> a6: | ||
// forall n:[from_index,|haystack|-|needle|]. | ||
// !contains || n < offset ==> | ||
// haystack[n] != needle[0] || ... || | ||
// haystack[n+|needle|-1] != needle[|needle|-1] | ||
symbol_exprt qvar2=fresh_univ_index("QA_index_of_string_2", index_type); | ||
mp_integer sub_length; | ||
assert(!to_integer(needle.length(), sub_length)); | ||
exprt::operandst disjuncts; | ||
for(mp_integer offset=0; offset<sub_length; ++offset) | ||
{ | ||
exprt expr_offset=from_integer(offset, index_type); | ||
plus_exprt shifted(expr_offset, qvar2); | ||
disjuncts.push_back( | ||
not_exprt(equal_exprt(haystack[shifted], needle[expr_offset]))); | ||
} | ||
|
||
or_exprt premise( | ||
not_exprt(contains), binary_relation_exprt(qvar2, ID_lt, offset)); | ||
minus_exprt length_diff(haystack.length(), needle.length()); | ||
string_constraintt a6( | ||
qvar2, | ||
from_index, | ||
plus_exprt(from_integer(1, index_type), length_diff), | ||
premise, | ||
disjunction(disjuncts)); | ||
axioms.push_back(a6); | ||
} | ||
|
||
return offset; | ||
} | ||
|
||
/*******************************************************************\ | ||
|
||
Function: string_constraint_generatort::add_axioms_for_last_index_of_string | ||
|
||
Inputs: | ||
haystack - a string expression | ||
needle - a string expression | ||
from_index - an expression representing an index in strings | ||
|
||
Outputs: an integer expression representing the last index of needle in | ||
haystack before or at from_index, or -1 if there is none | ||
|
||
Purpose: Add axioms stating that the returned value is the index within | ||
haystack of the last occurence of needle starting the search | ||
backward at from_index (ie the index is smaller or equal to | ||
from_index), or -1 if needle does not occur before from_index. | ||
|
||
\*******************************************************************/ | ||
|
||
exprt string_constraint_generatort::add_axioms_for_last_index_of_string( | ||
const string_exprt &str, | ||
const string_exprt &substring, | ||
const string_exprt &haystack, | ||
const string_exprt &needle, | ||
const exprt &from_index) | ||
{ | ||
const typet &index_type=str.length().type(); | ||
const typet &index_type=haystack.length().type(); | ||
symbol_exprt offset=fresh_exist_index("index_of", index_type); | ||
symbol_exprt contains=fresh_boolean("contains_substring"); | ||
|
||
// We add axioms: | ||
// a1 : contains => |substring| >= length &&offset <= from_index | ||
// a2 : !contains => offset=-1 | ||
// a3 : forall 0 <= witness<substring.length, | ||
// contains => str[witness+offset]=substring[witness] | ||
// a1 : contains ==> offset <= from_index && offset <= |haystack| - |needle| | ||
// a2 : !contains <=> offset = -1 | ||
// a3 : forall n:[0, needle.length[, | ||
// contains ==> haystack[n+offset] = needle[n] | ||
// a4 : forall n:[offset+1, min(from_index, |haystack| - |needle|)]. | ||
// contains ==> | ||
// (exists m:[0,|substring|[. haystack[m+n] != needle[m]]) | ||
// a5: forall n:[0, min(from_index, |haystack| - |needle|)]. | ||
// !contains ==> | ||
// (exists m:[0,|substring|[. haystack[m+n] != needle[m]) | ||
|
||
implies_exprt a1( | ||
contains, | ||
and_exprt( | ||
str.axiom_for_is_longer_than( | ||
plus_exprt_with_overflow_check(substring.length(), offset)), | ||
binary_relation_exprt( | ||
offset, ID_le, minus_exprt(haystack.length(), needle.length())), | ||
binary_relation_exprt(offset, ID_le, from_index))); | ||
axioms.push_back(a1); | ||
|
||
implies_exprt a2( | ||
equal_exprt a2( | ||
not_exprt(contains), | ||
equal_exprt(offset, from_integer(-1, index_type))); | ||
axioms.push_back(a2); | ||
|
||
symbol_exprt qvar=fresh_univ_index("QA_index_of_string", index_type); | ||
equal_exprt constr3(str[plus_exprt(qvar, offset)], substring[qvar]); | ||
string_constraintt a3(qvar, substring.length(), contains, constr3); | ||
equal_exprt constr3(haystack[plus_exprt(qvar, offset)], needle[qvar]); | ||
string_constraintt a3(qvar, needle.length(), contains, constr3); | ||
axioms.push_back(a3); | ||
|
||
// end_index is min(from_index, |str| - |substring|) | ||
minus_exprt length_diff(haystack.length(), needle.length()); | ||
if_exprt end_index( | ||
binary_relation_exprt(from_index, ID_le, length_diff), | ||
from_index, | ||
length_diff); | ||
|
||
if(!is_constant_string(needle)) | ||
{ | ||
string_not_contains_constraintt a4( | ||
plus_exprt(offset, from_integer(1, index_type)), | ||
plus_exprt(end_index, from_integer(1, index_type)), | ||
contains, | ||
from_integer(0, index_type), | ||
needle.length(), | ||
haystack, | ||
needle); | ||
axioms.push_back(a4); | ||
|
||
string_not_contains_constraintt a5( | ||
from_integer(0, index_type), | ||
plus_exprt(end_index, from_integer(1, index_type)), | ||
not_exprt(contains), | ||
from_integer(0, index_type), | ||
needle.length(), | ||
haystack, | ||
needle); | ||
axioms.push_back(a5); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code under the |
||
else | ||
{ | ||
// Unfold the existential quantifier as a disjunction in case of a constant | ||
// a4 && a5 <=> a6: | ||
// forall n:[0, min(from_index,|haystack|-|needle|)]. | ||
// !contains || n > offset ==> | ||
// haystack[n] != needle[0] || ... || | ||
// haystack[n+|needle|-1] != needle[|needle|-1] | ||
symbol_exprt qvar2=fresh_univ_index("QA_index_of_string_2", index_type); | ||
mp_integer sub_length; | ||
assert(!to_integer(needle.length(), sub_length)); | ||
exprt::operandst disjuncts; | ||
for(mp_integer offset=0; offset<sub_length; ++offset) | ||
{ | ||
exprt expr_offset=from_integer(offset, index_type); | ||
plus_exprt shifted(expr_offset, qvar2); | ||
disjuncts.push_back( | ||
not_exprt(equal_exprt(haystack[shifted], needle[expr_offset]))); | ||
} | ||
|
||
or_exprt premise( | ||
not_exprt(contains), binary_relation_exprt(qvar2, ID_gt, offset)); | ||
string_constraintt a6( | ||
qvar2, | ||
from_integer(0, index_type), | ||
plus_exprt(from_integer(1, index_type), end_index), | ||
premise, | ||
disjunction(disjuncts)); | ||
axioms.push_back(a6); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This implementation also seems to have a problem. When I run cbmc on this example:
I get that the assertion is falsified. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I found the mistake, line 304 the lower bound should be |
||
return offset; | ||
} | ||
|
||
|
@@ -201,6 +356,25 @@ exprt string_constraint_generatort::add_axioms_for_index_of( | |
} | ||
} | ||
|
||
/*******************************************************************\ | ||
|
||
Function: string_constraint_generatort::add_axioms_for_last_index_of | ||
|
||
Inputs: | ||
str - a string expression | ||
c - an expression representing a character | ||
from_index - an expression representing an index in the string | ||
|
||
Outputs: an integer expression representing the last index of c in | ||
str before or at from_index, or -1 if there is none | ||
|
||
Purpose: Add axioms stating that the returned value is the index within | ||
str of the last occurence of c starting the search backward at | ||
from_index, or -1 if no such character occurs at or before | ||
position from_index. | ||
|
||
\*******************************************************************/ | ||
|
||
exprt string_constraint_generatort::add_axioms_for_last_index_of( | ||
const string_exprt &str, const exprt &c, const exprt &from_index) | ||
{ | ||
|
@@ -211,10 +385,10 @@ exprt string_constraint_generatort::add_axioms_for_last_index_of( | |
|
||
// We add axioms: | ||
// a1 : -1 <= i <= from_index | ||
// a2 : (i=-1 <=> !contains) | ||
// a3 : (contains => i <= from_index &&s[i]=c) | ||
// a4 : forall n. i+1 <= n < from_index +1 &&contains => s[n]!=c | ||
// a5 : forall m. 0 <= m < from_index +1 &&!contains => s[m]!=c | ||
// a2 : i = -1 <=> !contains | ||
// a3 : contains ==> (i <= from_index && s[i] = c) | ||
// a4 : forall n:[i+1, from_index+1[ && contains ==> s[n] != c | ||
// a5 : forall m:[0, from_index+1[ && !contains ==> s[m] != c | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it works fine for me with commit 94f99b5 ... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK. I'd like to add/enable the corresponding regressions tests and add them to this PR. I'll do that after Chris' presentation, if that's alright. |
||
|
||
exprt index1=from_integer(1, index_type); | ||
exprt minus1=from_integer(-1, index_type); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not entirely sure this implementation is correct. Though I can't see anything wrong at first sight, the
not_contains
seems to have a problem.For instance, the verification of this java code:
seems to yield an infinite run of cbmc, and I don't think it's a complexity issue. It seems more like something is under-constrained.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
However when the assertions are valid, the verification is very quick (in the constant case).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I found the mistake https://github.com/diffblue/cbmc/pull/978/files/ffc7eb47a2588d26c26a70b36d27aa3d50ecab20#diff-5d51ff60a072ccbdb1bdaf7a1208c154R180 here
qvar
was used whereqvar2
was supposed to be, making the counter example finding part of the solver mistaken.