Skip to content

Specification for contains in the case of a constant argument #828

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 5, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/solvers/refinement/string_constraint_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,11 +305,14 @@ class string_constraint_generatort
return args;
}

private:
// Helper functions
exprt int_of_hex_char(const exprt &chr) const;
exprt is_high_surrogate(const exprt &chr) const;
exprt is_low_surrogate(const exprt &chr) const;
exprt character_equals_ignore_case(
exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z);
bool is_constant_string(const string_exprt &expr) const;
};

#endif
125 changes: 99 additions & 26 deletions src/solvers/refinement/string_constraint_generator_testing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,35 @@ exprt string_constraint_generatort::add_axioms_for_is_suffix(

/*******************************************************************\

Function: string_constraint_generatort::is_constant_string

Inputs:
expr - a string expression

Outputs: a Boolean

Purpose: tells whether the given string is a constant

\*******************************************************************/

bool string_constraint_generatort::is_constant_string(
const string_exprt &expr) const
{
if(expr.id()!=ID_struct ||
expr.operands().size()!=2 ||
expr.length().id()!=ID_constant ||
expr.content().id()!=ID_array)
return false;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What of this condition is related to expr being a well-formed string expression, and what to it being a constant string?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@cristina-david I believe the only condition related to "constantness" is the ...!=ID_constant. Why? Would you like them separated?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I was thinking that would make it clearer. Alternatively, you could swap the last two disjuncts to have some grouping of constraints.

for(const auto &element : expr.content().operands())
{
if(element.id()!=ID_constant)
return false;
}
return true;
}

/*******************************************************************\

Function: string_constraint_generatort::add_axioms_for_contains

Inputs: function application with two string arguments
Expand All @@ -206,46 +235,90 @@ exprt string_constraint_generatort::add_axioms_for_contains(
const function_application_exprt &f)
{
assert(f.type()==bool_typet() || f.type().id()==ID_c_bool);
symbol_exprt contains=fresh_boolean("contains");
typecast_exprt tc_contains(contains, f.type());
string_exprt s0=get_string_expr(args(f, 2)[0]);
string_exprt s1=get_string_expr(args(f, 2)[1]);
bool constant=is_constant_string(s1);

symbol_exprt contains=fresh_boolean("contains");
const refined_string_typet ref_type=to_refined_string_type(s0.type());
const typet &index_type=ref_type.get_index_type();

// We add axioms:
// a1 : contains => s0.length >= s1.length
// a2 : 0 <= startpos <= s0.length-s1.length
// a3 : forall qvar<s1.length. contains => s1[qvar]=s0[startpos + qvar]
// a4 : !contains => s1.length > s0.length
// || (forall startpos <= s0.length-s1.length.
// exists witness<s1.length &&s1[witness]!=s0[witness + startpos]
// a1 : contains ==> |s0| >= |s1|
// a2 : 0 <= startpos <= |s0|-|s1|
// a3 : forall qvar < |s1|. contains ==> s1[qvar] = s0[startpos + qvar]
// a4 : !contains ==> |s1| > |s0| ||
// (forall startpos <= |s0| - |s1|.
// exists witness < |s1|. s1[witness] != s0[witness + startpos])

implies_exprt a1(contains, s0.axiom_for_is_longer_than(s1));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this axiom be called something like "axiom_for_is_longer_equal_than" for clarity?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed. I have created an issue to correct these names. They do not convey the exact meaning of the generated axioms: diffblue/test-gen/issues/321.

axioms.push_back(a1);

symbol_exprt startpos=fresh_exist_index("startpos_contains", index_type);
minus_exprt length_diff(s0.length(), s1.length());
and_exprt a2(
and_exprt bounds(
axiom_for_is_positive_index(startpos),
binary_relation_exprt(startpos, ID_le, length_diff));
implies_exprt a2(contains, bounds);
axioms.push_back(a2);

symbol_exprt qvar=fresh_univ_index("QA_contains", index_type);
exprt qvar_shifted=plus_exprt(qvar, startpos);
string_constraintt a3(
qvar, s1.length(), contains, equal_exprt(s1[qvar], s0[qvar_shifted]));
axioms.push_back(a3);

// We rewrite the axiom for !contains as:
// forall startpos <= |s0|-|s1|. (!contains &&|s0| >= |s1| )
// ==> exists witness<|s1|. s1[witness]!=s0[startpos+witness]
string_not_contains_constraintt a4(
from_integer(0, index_type),
plus_exprt(from_integer(1, index_type), length_diff),
and_exprt(not_exprt(contains), s0.axiom_for_is_longer_than(s1)),
from_integer(0, index_type), s1.length(), s0, s1);
axioms.push_back(a4);

return tc_contains;
if(constant)
{
// If the string is constant, we can use a more efficient axiom for a3:
// contains ==> AND_{i < |s1|} s1[i] = s0[startpos + i]
mp_integer s1_length;
assert(!to_integer(s1.length(), s1_length));
exprt::operandst conjuncts;
for(mp_integer i=0; i<s1_length; ++i)
{
exprt expr_i=from_integer(i, index_type);
plus_exprt shifted_i(expr_i, startpos);
conjuncts.push_back(equal_exprt(s1[expr_i], s0[shifted_i]));
}
implies_exprt a3(contains, conjunction(conjuncts));
axioms.push_back(a3);

// The a4 constraint for constant strings translates to:
// !contains ==> |s1| > |s0| ||
// (forall qvar <= |s0| - |s1|.
// !(AND_{i < |s1|} s1[i] == s0[i + qvar])
//
// which we implement as:
// forall qvar <= |s0| - |s1|. (!contains && |s0| >= |s1|)
// ==> !(AND_{i < |s1|} (s1[i] == s0[qvar+i]))
symbol_exprt qvar=fresh_univ_index("QA_contains_constant", index_type);
exprt::operandst conjuncts1;
for(mp_integer i=0; i<s1_length; ++i)
{
exprt expr_i=from_integer(i, index_type);
plus_exprt shifted_i(expr_i, qvar);
conjuncts1.push_back(equal_exprt(s1[expr_i], s0[shifted_i]));
}

string_constraintt a4(
qvar,
plus_exprt(from_integer(1, index_type), length_diff),
and_exprt(not_exprt(contains), s0.axiom_for_is_longer_than(s1)),
not_exprt(conjunction(conjuncts1)));
axioms.push_back(a4);
}
else
{
symbol_exprt qvar=fresh_univ_index("QA_contains", index_type);
exprt qvar_shifted=plus_exprt(qvar, startpos);
string_constraintt a3(
qvar, s1.length(), contains, equal_exprt(s1[qvar], s0[qvar_shifted]));
axioms.push_back(a3);

// We rewrite axiom a4 as:
// forall startpos <= |s0|-|s1|. (!contains && |s0| >= |s1|)
// ==> exists witness < |s1|. s1[witness] != s0[startpos+witness]
string_not_contains_constraintt a4(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that for string_not_contains_constraintt you need to provide the lower bound for the universal quantifier (i.e. 0). Above, for string_constraintt, there is no need for this but you provide the actual index. Is there need for these to be so different to use?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Concerning the the lower bound, there exist two versions of string_constraintt, one with the lower bound, one where the lower bound is 0 by default. I believe it makes sense to keep both versions. We could imagine introducing a new version of string_not_contains_constraintt but I don't think it's really useful.

Specifying the actual quantified variable in string_not_contains_constraintt is not necessary as these constraints are built in a special way (see:

/*******************************************************************\
Function: string_refinementt::instantiate_not_contains
Inputs: a quantified formula representing `not_contains`, and a
list to which to add the created lemmas to
Purpose: instantiate a quantified formula representing `not_contains`
by substituting the quantifiers and generating axioms
\*******************************************************************/
void string_refinementt::instantiate_not_contains(
const string_not_contains_constraintt &axiom, std::list<exprt> &new_lemmas)
{
exprt s0=axiom.s0();
exprt s1=axiom.s1();
debug() << "instantiate not contains " << from_expr(s0) << " : "
<< from_expr(s1) << eom;
expr_sett index_set0=index_set[to_string_expr(s0).content()];
expr_sett index_set1=index_set[to_string_expr(s1).content()];
for(auto it0 : index_set0)
for(auto it1 : index_set1)
{
debug() << from_expr(it0) << " : " << from_expr(it1) << eom;
exprt val=minus_exprt(it0, it1);
exprt witness=generator.get_witness_of(axiom, val);
and_exprt prem_and_is_witness(
axiom.premise(),
equal_exprt(witness, it1));
not_exprt differ(
equal_exprt(
to_string_expr(s0)[it0],
to_string_expr(s1)[it1]));
exprt lemma=implies_exprt(prem_and_is_witness, differ);
new_lemmas.push_back(lemma);
// we put bounds on the witnesses:
// 0 <= v <= |s0| - |s1| ==> 0 <= v+w[v] < |s0| && 0 <= w[v] < |s1|
exprt zero=from_integer(0, val.type());
binary_relation_exprt c1(zero, ID_le, plus_exprt(val, witness));
binary_relation_exprt c2
(to_string_expr(s0).length(), ID_gt, plus_exprt(val, witness));
binary_relation_exprt c3(to_string_expr(s1).length(), ID_gt, witness);
binary_relation_exprt c4(zero, ID_le, witness);
minus_exprt diff(
to_string_expr(s0).length(),
to_string_expr(s1).length());
and_exprt premise(
binary_relation_exprt(zero, ID_le, val),
binary_relation_exprt(diff, ID_ge, val));
exprt witness_bounds=implies_exprt(
premise,
and_exprt(and_exprt(c1, c2), and_exprt(c3, c4)));
new_lemmas.push_back(witness_bounds);
}
}
).

from_integer(0, index_type),
plus_exprt(from_integer(1, index_type), length_diff),
and_exprt(not_exprt(contains), s0.axiom_for_is_longer_than(s1)),
from_integer(0, index_type), s1.length(), s0, s1);
axioms.push_back(a4);
}
return typecast_exprt(contains, f.type());
}