Skip to content

Corrections to the string solver #675

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/solvers/refinement/string_constraint.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ class string_constraintt: public exprt
return operands()[4];
}


private:
string_constraintt();

Expand Down
45 changes: 26 additions & 19 deletions src/solvers/refinement/string_constraint_generator.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Author: Romain Brenguier, [email protected]
#define CPROVER_SOLVERS_REFINEMENT_STRING_CONSTRAINT_GENERATOR_H

#include <util/string_expr.h>
#include <util/replace_expr.h>
#include <util/refined_string_type.h>
#include <solvers/refinement/string_constraint.h>

Expand Down Expand Up @@ -65,21 +66,20 @@ class string_constraint_generatort
symbol_exprt fresh_univ_index(const irep_idt &prefix, const typet &type);
symbol_exprt fresh_boolean(const irep_idt &prefix);
string_exprt fresh_string(const refined_string_typet &type);
string_exprt get_string_expr(const exprt &expr);
string_exprt convert_java_string_to_string_exprt(
const exprt &underlying);
plus_exprt plus_exprt_with_overflow_check(const exprt &op1, const exprt &op2);

// We maintain a map from symbols to strings.
std::map<irep_idt, string_exprt> symbol_to_string;
// Maps unresolved symbols to the string_exprt that was created for them
std::map<irep_idt, string_exprt> unresolved_symbols;

string_exprt find_or_add_string_of_symbol(const symbol_exprt &sym);

void assign_to_symbol(
const symbol_exprt &sym, const string_exprt &expr)
{
symbol_to_string[sym.get_identifier()]=expr;
}
string_exprt find_or_add_string_of_symbol(
const symbol_exprt &sym,
const refined_string_typet &ref_type);

string_exprt add_axioms_for_string_expr(const exprt &expr);
void set_string_symbol_equal_to_expr(
const symbol_exprt &sym, const exprt &str);
string_exprt add_axioms_for_refined_string(const exprt &expr);

exprt add_axioms_for_function_application(
const function_application_exprt &expr);
Expand All @@ -96,6 +96,8 @@ class string_constraint_generatort
const std::size_t MAX_FLOAT_LENGTH=15;
const std::size_t MAX_DOUBLE_LENGTH=30;

std::map<function_application_exprt, exprt> function_application_cache;

static irep_idt extract_java_string(const symbol_exprt &s);

exprt axiom_for_is_positive_index(const exprt &x);
Expand All @@ -117,6 +119,9 @@ class string_constraint_generatort
// The specification is partial: the actual value is not actually computed
// but we ensure that hash codes of equal strings are equal.
exprt add_axioms_for_hash_code(const function_application_exprt &f);
// To each string on which hash_code was called we associate a symbol
// representing the return value of the hash_code function.
std::map<string_exprt, exprt> hash_code_of_string;

exprt add_axioms_for_is_empty(const function_application_exprt &f);
exprt add_axioms_for_is_prefix(
Expand Down Expand Up @@ -224,7 +229,9 @@ class string_constraint_generatort
// the start for negative number
string_exprt add_axioms_from_float(const function_application_exprt &f);
string_exprt add_axioms_from_float(
const exprt &f, bool double_precision=false);
const exprt &f,
const refined_string_typet &ref_type,
bool double_precision);

// Add axioms corresponding to the String.valueOf(D) java function
// TODO: the specifications is only partial
Expand Down Expand Up @@ -253,6 +260,7 @@ class string_constraint_generatort
string_exprt add_axioms_for_code_point(
const exprt &code_point, const refined_string_typet &ref_type);
string_exprt add_axioms_for_java_char_array(const exprt &char_array);
exprt add_axioms_for_char_pointer(const function_application_exprt &fun);
string_exprt add_axioms_for_if(const if_exprt &expr);
exprt add_axioms_for_char_literal(const function_application_exprt &f);

Expand All @@ -270,6 +278,8 @@ class string_constraint_generatort
const function_application_exprt &f);

exprt add_axioms_for_parse_int(const function_application_exprt &f);
exprt add_axioms_for_correct_number_format(
const string_exprt &str, std::size_t max_size=10);
exprt add_axioms_for_to_char_array(const function_application_exprt &f);
exprt add_axioms_for_compare_to(const function_application_exprt &f);

Expand All @@ -278,6 +288,9 @@ class string_constraint_generatort
// string pointers
symbol_exprt add_axioms_for_intern(const function_application_exprt &f);

// Pool used for the intern method
std::map<string_exprt, symbol_exprt> intern_of_string;

// Tells which language is used. C and Java are supported
irep_idt mode;

Expand All @@ -293,14 +306,8 @@ class string_constraint_generatort
exprt int_of_hex_char(const exprt &chr) const;
exprt is_high_surrogate(const exprt &chr) const;
exprt is_low_surrogate(const exprt &chr) const;
static exprt character_equals_ignore_case(
exprt character_equals_ignore_case(
exprt char1, exprt char2, exprt char_a, exprt char_A, exprt char_Z);

// Pool used for the intern method
std::map<string_exprt, symbol_exprt> pool;

// Used to determine whether hashcode should be equal
std::map<string_exprt, symbol_exprt> hash;
};

#endif
18 changes: 10 additions & 8 deletions src/solvers/refinement/string_constraint_generator_code_points.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,17 +160,18 @@ exprt string_constraint_generatort::add_axioms_for_code_point_at(
{
typet return_type=f.type();
assert(return_type.id()==ID_signedbv);
string_exprt str=add_axioms_for_string_expr(args(f, 2)[0]);
string_exprt str=get_string_expr(args(f, 2)[0]);
const exprt &pos=args(f, 2)[1];

symbol_exprt result=fresh_symbol("char", return_type);
exprt index1=from_integer(1, str.length().type());
const exprt &char1=str[pos];
const exprt &char2=str[plus_exprt(pos, index1)];
const exprt &char2=str[plus_exprt_with_overflow_check(pos, index1)];
exprt char1_as_int=typecast_exprt(char1, return_type);
exprt char2_as_int=typecast_exprt(char2, return_type);
exprt pair=pair_value(char1_as_int, char2_as_int, return_type);
exprt is_low=is_low_surrogate(str[plus_exprt(pos, index1)]);
exprt is_low=is_low_surrogate(
str[plus_exprt_with_overflow_check(pos, index1)]);
exprt return_pair=and_exprt(is_high_surrogate(str[pos]), is_low);

axioms.push_back(implies_exprt(return_pair, equal_exprt(result, pair)));
Expand Down Expand Up @@ -199,7 +200,7 @@ exprt string_constraint_generatort::add_axioms_for_code_point_before(
typet return_type=f.type();
assert(return_type.id()==ID_signedbv);
symbol_exprt result=fresh_symbol("char", return_type);
string_exprt str=add_axioms_for_string_expr(args[0]);
string_exprt str=get_string_expr(args[0]);

const exprt &char1=
str[minus_exprt(args[1], from_integer(2, str.length().type()))];
Expand Down Expand Up @@ -234,7 +235,7 @@ Function: string_constraint_generatort::add_axioms_for_code_point_count
exprt string_constraint_generatort::add_axioms_for_code_point_count(
const function_application_exprt &f)
{
string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]);
string_exprt str=get_string_expr(args(f, 3)[0]);
const exprt &begin=args(f, 3)[1];
const exprt &end=args(f, 3)[2];
const typet &return_type=f.type();
Expand Down Expand Up @@ -265,14 +266,15 @@ Function: string_constraint_generatort::add_axioms_for_offset_by_code_point
exprt string_constraint_generatort::add_axioms_for_offset_by_code_point(
const function_application_exprt &f)
{
string_exprt str=add_axioms_for_string_expr(args(f, 3)[0]);
string_exprt str=get_string_expr(args(f, 3)[0]);
const exprt &index=args(f, 3)[1];
const exprt &offset=args(f, 3)[2];
const typet &return_type=f.type();
symbol_exprt result=fresh_symbol("offset_by_code_point", return_type);

exprt minimum=plus_exprt(index, offset);
exprt maximum=plus_exprt(index, plus_exprt(offset, offset));
exprt minimum=plus_exprt_with_overflow_check(index, offset);
exprt maximum=plus_exprt_with_overflow_check(
index, plus_exprt_with_overflow_check(offset, offset));
axioms.push_back(binary_relation_exprt(result, ID_le, maximum));
axioms.push_back(binary_relation_exprt(result, ID_ge, minimum));

Expand Down
102 changes: 55 additions & 47 deletions src/solvers/refinement/string_constraint_generator_comparison.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ Function: string_constraint_generatort::add_axioms_for_equals
Outputs: a expression of Boolean type

Purpose: add axioms stating that the result is true exactly when the strings
represented by the arguments are equal
represented by the arguments are equal.
the variable ending in `witness_unequal` is -1 if the length differs
or an index at which the strings are different

\*******************************************************************/

Expand All @@ -29,8 +31,8 @@ exprt string_constraint_generatort::add_axioms_for_equals(
symbol_exprt eq=fresh_boolean("equal");
typecast_exprt tc_eq(eq, f.type());

string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]);
string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]);
string_exprt s1=get_string_expr(args(f, 2)[0]);
string_exprt s2=get_string_expr(args(f, 2)[1]);
typet index_type=s1.length().type();

// We want to write:
Expand All @@ -54,9 +56,10 @@ exprt string_constraint_generatort::add_axioms_for_equals(
binary_relation_exprt(witness, ID_lt, s1.length()),
binary_relation_exprt(witness, ID_ge, zero));
and_exprt witnessing(bound_witness, notequal_exprt(s1[witness], s2[witness]));
implies_exprt a3(
not_exprt(eq),
or_exprt(notequal_exprt(s1.length(), s2.length()), witnessing));
and_exprt diff_length(
notequal_exprt(s1.length(), s2.length()),
equal_exprt(witness, from_integer(-1, index_type)));
implies_exprt a3(not_exprt(eq), or_exprt(diff_length, witnessing));
axioms.push_back(a3);

return tc_eq;
Expand Down Expand Up @@ -92,8 +95,13 @@ exprt string_constraint_generatort::character_equals_ignore_case(
// p3 : (is_up2&&'a'-'A'+char2=char1)
equal_exprt p1(char1, char2);
minus_exprt diff=minus_exprt(char_a, char_A);
and_exprt p2(is_upper_case_1, equal_exprt(plus_exprt(diff, char1), char2));
and_exprt p3(is_upper_case_2, equal_exprt(plus_exprt(diff, char2), char1));

// Overflow is not a problem here because is_upper_case conditions
// ensure that we are within a safe range.
and_exprt p2(is_upper_case_1,
equal_exprt(plus_exprt(diff, char1), char2));
and_exprt p3(is_upper_case_2,
equal_exprt(plus_exprt(diff, char2), char1));
return or_exprt(or_exprt(p1, p2), p3);
}

Expand All @@ -116,8 +124,8 @@ exprt string_constraint_generatort::add_axioms_for_equals_ignore_case(

symbol_exprt eq=fresh_boolean("equal_ignore_case");
typecast_exprt tc_eq(eq, f.type());
string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]);
string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]);
string_exprt s1=get_string_expr(args(f, 2)[0]);
string_exprt s2=get_string_expr(args(f, 2)[1]);
typet char_type=to_refined_string_type(s1.type()).get_char_type();
exprt char_a=constant_char('a', char_type);
exprt char_A=constant_char('A', char_type);
Expand Down Expand Up @@ -174,37 +182,35 @@ Function: string_constraint_generatort::add_axioms_for_hash_code
exprt string_constraint_generatort::add_axioms_for_hash_code(
const function_application_exprt &f)
{
string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]);
string_exprt str=get_string_expr(args(f, 1)[0]);
typet return_type=f.type();
typet index_type=str.length().type();

// initialisation of the missing pool variable
std::map<irep_idt, string_exprt>::iterator it;
for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++)
if(hash.find(it->second)==hash.end())
hash[it->second]=fresh_symbol("hash", return_type);
auto pair=hash_code_of_string.insert(
std::make_pair(str, fresh_symbol("hash", return_type)));
exprt hash=pair.first->second;

// for each string s. either:
// c1: hash(str)=hash(s)
// c2: |str|!=|s|
// c3: (|str|==|s| &&exists i<|s|. s[i]!=str[i])
// c3: (|str|==|s| && exists i<|s|. s[i]!=str[i])

// WARNING: the specification may be incomplete
for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++)
for(auto it : hash_code_of_string)
{
symbol_exprt i=fresh_exist_index("index_hash", index_type);
equal_exprt c1(hash[it->second], hash[str]);
not_exprt c2(equal_exprt(it->second.length(), str.length()));
equal_exprt c1(it.second, hash);
not_exprt c2(equal_exprt(it.first.length(), str.length()));
and_exprt c3(
equal_exprt(it->second.length(), str.length()),
equal_exprt(it.first.length(), str.length()),
and_exprt(
not_exprt(equal_exprt(str[i], it->second[i])),
not_exprt(equal_exprt(str[i], it.first[i])),
and_exprt(
str.axiom_for_is_strictly_longer_than(i),
axiom_for_is_positive_index(i))));
axioms.push_back(or_exprt(c1, or_exprt(c2, c3)));
}
return hash[str];
return hash;
}

/*******************************************************************\
Expand All @@ -222,8 +228,8 @@ Function: string_constraint_generatort::add_axioms_for_compare_to
exprt string_constraint_generatort::add_axioms_for_compare_to(
const function_application_exprt &f)
{
string_exprt s1=add_axioms_for_string_expr(args(f, 2)[0]);
string_exprt s2=add_axioms_for_string_expr(args(f, 2)[1]);
string_exprt s1=get_string_expr(args(f, 2)[0]);
string_exprt s2=get_string_expr(args(f, 2)[1]);
const typet &return_type=f.type();
symbol_exprt res=fresh_symbol("compare_to", return_type);
typet index_type=s1.length().type();
Expand All @@ -234,12 +240,12 @@ exprt string_constraint_generatort::add_axioms_for_compare_to(
// a1 : res==0 => |s1|=|s2|
// a2 : forall i<|s1|. s1[i]==s2[i]
// a3 : exists x.
// res!=0 ==> x> 0 &&
// ((|s1| <= |s2| &&x<|s1|) || (|s1| >= |s2| &&x<|s2|)
// &&res=s1[x]-s2[x] )
// || cond2:
// (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|)
// a4 : forall i<x. res!=0 => s1[i]=s2[i]
// res!=0 ==> x > 0
// && ((|s1| <= |s2| && x<|s1|) || (|s1| >= |s2| &&x<|s2|)
// && res=s1[x]-s2[x] )
// || cond2:
// (|s1|<|s2| &&x=|s1|) || (|s1| > |s2| &&x=|s2|) &&res=|s1|-|s2|)
// a4 : forall i'<x. res!=0 => s1[i]=s2[i]

assert(return_type.id()==ID_signedbv);

Expand Down Expand Up @@ -282,7 +288,9 @@ exprt string_constraint_generatort::add_axioms_for_compare_to(
or_exprt(cond1, cond2)));
axioms.push_back(a3);

string_constraintt a4(i, x, not_exprt(res_null), equal_exprt(s1[i], s2[i]));
symbol_exprt i2=fresh_univ_index("QA_compare_to", index_type);
string_constraintt a4(
i2, x, not_exprt(res_null), equal_exprt(s1[i2], s2[i2]));
axioms.push_back(a4);

return res;
Expand All @@ -304,46 +312,46 @@ Function: string_constraint_generatort::add_axioms_for_intern
symbol_exprt string_constraint_generatort::add_axioms_for_intern(
const function_application_exprt &f)
{
string_exprt str=add_axioms_for_string_expr(args(f, 1)[0]);
string_exprt str=get_string_expr(args(f, 1)[0]);
// For now we only enforce content equality and not pointer equality
const typet &return_type=f.type();

typet index_type=str.length().type();

// initialisation of the missing pool variable
std::map<irep_idt, string_exprt>::iterator it;
for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++)
if(pool.find(it->second)==pool.end())
pool[it->second]=fresh_symbol("pool", return_type);
auto pair=intern_of_string.insert(
std::make_pair(str, fresh_symbol("pool", return_type)));
symbol_exprt intern=pair.first->second;

// intern(str)=s_0 || s_1 || ...
// for each string s.
// intern(str)=intern(s) || |str|!=|s|
// || (|str|==|s| &&exists i<|s|. s[i]!=str[i])

exprt disj=false_exprt();
for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++)
for(auto it : intern_of_string)
disj=or_exprt(
disj, equal_exprt(pool[str], symbol_exprt(it->first, return_type)));
disj, equal_exprt(intern, it.second));

axioms.push_back(disj);


// WARNING: the specification may be incomplete or incorrect
for(it=symbol_to_string.begin(); it!=symbol_to_string.end(); it++)
if(it->second!=str)
for(auto it : intern_of_string)
if(it.second!=str)
{
symbol_exprt i=fresh_exist_index("index_intern", index_type);
axioms.push_back(
or_exprt(
equal_exprt(pool[it->second], pool[str]),
equal_exprt(it.second, intern),
or_exprt(
not_exprt(str.axiom_for_has_same_length_as(it->second)),
not_exprt(str.axiom_for_has_same_length_as(it.first)),
and_exprt(
str.axiom_for_has_same_length_as(it->second),
str.axiom_for_has_same_length_as(it.first),
and_exprt(
not_exprt(equal_exprt(str[i], it->second[i])),
not_exprt(equal_exprt(str[i], it.first[i])),
and_exprt(str.axiom_for_is_strictly_longer_than(i),
axiom_for_is_positive_index(i)))))));
}

return pool[str];
return intern;
}
Loading