Skip to content

[Don't Merge] TG-634 String refinement overhaul #1399

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/solvers/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ SRC = $(BOOLEFORCE_SRC) \
refinement/refine_arithmetic.cpp \
refinement/refine_arrays.cpp \
refinement/string_refinement.cpp \
refinement/string_constraint.cpp \
refinement/string_constraint_generator_code_points.cpp \
refinement/string_constraint_generator_comparison.cpp \
refinement/string_constraint_generator_concat.cpp \
Expand Down
78 changes: 78 additions & 0 deletions src/solvers/refinement/environment.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
class environmentt
{
public:
static environmentt java() {
environmentt env;
env.int_type_=java_int_type();
env.char_type_=java_int_type();
env.array_type_=array_typet(env.char_type_, infinity_exprt(env.int_type_));
env.string_type_=refined_string_typet(env.int_type_, env.char_type_);
return env;
}

template<
typename T,
typename std::enable_if<std::is_convertible<T, exprt>::value, int>::type=0>
T make_expr(T&& expr)
{ return std::move(expr); }

constant_exprt make_expr(char i)
{ return from_integer(i, char_type_); }

template<
typename T,
typename std::enable_if<std::is_integral<T>::value, int>::type=0>
constant_exprt make_expr(const T& i)
{ return from_integer(i, int_type_); }

template<
typename T,
typename std::enable_if<std::is_same<T, std::string>::value, int>::type=0>
string_exprt make_expr(const T &str)
{
const constant_exprt length=make_expr(str.length());
array_exprt content(array_type_);
for(const char ch : str)
content.copy_to_operands(make_expr(ch));
return string_exprt(length, content, string_type_);
}

template<
typename T,
typename std::enable_if<
std::is_convertible<T, std::string>::value &&
!std::is_same<T, std::string>::value, int>::type=0>
string_exprt make_expr(const T &str)
{ return make_expr(std::string(str)); }

template<typename...Args>
function_application_exprt make_expr(
const dstringt &id,
typet return_type,
Args&&... args)
{
function_application_exprt func(symbol_exprt(id), return_type);
func.arguments() = { make_expr(std::forward<Args>(args))... };
return func;
}

struct_exprt make_struct_expr(std::map<dstringt, exprt> members)
{
struct_typet type;
for (const auto& pair : members) {
type.components().push_back({ pair.first, pair.second.type() });
}
struct_exprt expr(type);
for (const auto& pair : members) {
expr.operands().push_back(pair.second);
}
return expr;
}
private:
environmentt()=default;

typet int_type_;
typet char_type_;
typet array_type_;
typet string_type_;
};
58 changes: 58 additions & 0 deletions src/solvers/refinement/expr_cast.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include <util/optional.h>

/// Convert exprt to a specific type. Throw bad_cast if conversion
/// cannot be performed
/// Generic case doesn't exist, specialize for different types accordingly
/// TODO: this should go to util

// Tag dispatching struct

template<typename T>
struct expr_cast_implt final { };

template<>
struct expr_cast_implt<mp_integer> final
{
optionalt<mp_integer> operator()(const exprt &expr) const
{
mp_integer out;
if(to_integer(expr, out))
return {};
return out;
}
};

template<>
struct expr_cast_implt<std::size_t> final
{
optionalt<std::size_t> operator()(const exprt &expr) const
{
if(const auto tmp=expr_cast_implt<mp_integer>()(expr))
if(tmp->is_long() && *tmp>=0)
return tmp->to_long();
return {};
}
};

template<>
struct expr_cast_implt<string_exprt> final
{
optionalt<string_exprt> operator()(const exprt &expr) const
{
if(is_refined_string_type(expr.type()))
return to_string_expr(expr);
return {};
}
};

template<typename T>
optionalt<T> expr_cast(const exprt& expr)
{ return expr_cast_implt<T>()(expr); }

template<typename T>
T expr_cast_v(const exprt &expr)
{
const auto maybe=expr_cast<T>(expr);
INVARIANT(maybe, "Bad conversion");
return *maybe;
}
229 changes: 229 additions & 0 deletions src/solvers/refinement/string_constraint.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
#include <stack>
#include "string_constraint.h"
#include <util/simplify_expr.h>

void replace(string_constraintt &axiom, const replace_mapt& symbol_resolve)
{
replace_expr(symbol_resolve, axiom.premise);
replace_expr(symbol_resolve, axiom.body);
replace_expr(symbol_resolve, axiom.univ_var);
replace_expr(symbol_resolve, axiom.upper_bound);
replace_expr(symbol_resolve, axiom.lower_bound);
}

exprt univ_within_bounds(const string_constraintt &axiom)
{
return and_exprt(
binary_relation_exprt(axiom.lower_bound, ID_le, axiom.univ_var),
binary_relation_exprt(axiom.upper_bound, ID_gt, axiom.univ_var));
}

std::string from_expr(
const namespacet &ns,
const irep_idt &identifier,
const string_constraintt &expr)
{
return "forall "+from_expr(ns, identifier, expr.univ_var)+" in ["+
from_expr(ns, identifier, expr.lower_bound)+", "+
from_expr(ns, identifier, expr.upper_bound)+"). "+
from_expr(ns, identifier, expr.premise)+" => "+
from_expr(ns, identifier, expr.body);
}

std::string from_expr(
const namespacet &ns,
const irep_idt &identifier,
const string_not_contains_constraintt &expr)
{
return "forall x in ["+
from_expr(ns, identifier, expr.univ_lower_bound())+", "+
from_expr(ns, identifier, expr.univ_upper_bound())+"). "+
from_expr(ns, identifier, expr.premise())+" => ("+
"exists y in ["+from_expr(ns, identifier, expr.exists_lower_bound())+", "+
from_expr(ns, identifier, expr.exists_upper_bound())+"). "+
from_expr(ns, identifier, expr.s0())+"[x+y] != "+
from_expr(ns, identifier, expr.s1())+"[y])";
}

const string_not_contains_constraintt
&to_string_not_contains_constraint(const exprt &expr)
{
PRECONDITION(expr.id()==ID_string_not_contains_constraint);
DATA_INVARIANT(
expr.operands().size()==7,
string_refinement_invariantt("string_not_contains_constraintt must have 7 "
"operands"));
return static_cast<const string_not_contains_constraintt &>(expr);
}

string_not_contains_constraintt
&to_string_not_contains_constraint(exprt &expr)
{
PRECONDITION(expr.id()==ID_string_not_contains_constraint);
DATA_INVARIANT(
expr.operands().size()==7,
string_refinement_invariantt("string_not_contains_constraintt must have 7 "
"operands"));
return static_cast<string_not_contains_constraintt &>(expr);
}

/// \related string_constraintt
typedef std::map<exprt, std::vector<exprt>> array_index_mapt;

/// \related string_constraintt
class gather_indices_visitort: public const_expr_visitort
{
public:
array_index_mapt indices;

gather_indices_visitort(): indices() {}

void operator()(const exprt &expr) override
{
if(expr.id()==ID_index)
{
const index_exprt &index=to_index_expr(expr);
const exprt &s(index.array());
const exprt &i(index.index());
indices[s].push_back(i);
}
}
};

/// \related string_constraintt
static array_index_mapt gather_indices(const exprt &expr)
{
gather_indices_visitort v;
expr.visit(v);
return v.indices;
}

/// \related string_constraintt
class is_linear_arithmetic_expr_visitort: public const_expr_visitort
{
public:
bool correct;

is_linear_arithmetic_expr_visitort(): correct(true) {}

void operator()(const exprt &expr) override
{
if(expr.id()!=ID_plus && expr.id()!=ID_minus && expr.id()!=ID_unary_minus)
{
// This represents that the expr is a valid leaf, may not be future proof
// or 100% enforced, but is correct prescriptively. All non-sum exprs must
// be leaves.
correct&=expr.operands().empty();
}
}
};

/// \related string_constraintt
static bool is_linear_arithmetic_expr(const exprt &expr)
{
is_linear_arithmetic_expr_visitort v;
expr.visit(v);
return v.correct;
}

/// The universally quantified variable is only allowed to occur in index
/// expressions in the body of a string constraint. This function returns true
/// if this is the case and false otherwise.
/// \related string_constraintt
/// \param [in] expr: The string constraint to check
/// \return true if the universal variable only occurs in index expressions,
/// false otherwise.
static bool universal_only_in_index(const string_constraintt &expr)
{
// For efficiency, we do a depth-first search of the
// body. The exprt visitors do a BFS and hide the stack/queue, so we would
// need to store a map from child to parent.

// The unsigned int represents index depth we are. For example, if we are
// considering the fragment `a[b[x]]` (not inside an index expression), then
// the stack would look something like `[..., (a, 0), (b, 1), (x, 2)]`.
typedef std::pair<exprt, unsigned> valuet;
std::stack<valuet> stack;
// We start at 0 since expr is not an index expression, so expr.body() is not
// in an index expression.
stack.push(valuet(expr.body, 0));
while(!stack.empty())
{
// Inspect current value
const valuet cur=stack.top();
stack.pop();
const exprt e=cur.first;
const unsigned index_depth=cur.second;
const unsigned child_index_depth=index_depth+(e.id()==ID_index?0:1);

// If we found the universal variable not in an index_exprt, fail
if(e==expr.univ_var && index_depth==0)
return false;
else
forall_operands(it, e)
stack.push(valuet(*it, child_index_depth));
}
return true;
}

bool is_valid_string_constraint(
messaget::mstreamt &stream,
const namespacet &ns,
const string_constraintt &expr)
{
const auto eom=messaget::eom;
// Condition 1: The premise cannot contain any string indices
const array_index_mapt premise_indices=gather_indices(expr.premise);
if(!premise_indices.empty())
{
stream << "Premise has indices: " << from_expr(ns, "", expr) << ", map: {";
for(const auto &pair : premise_indices)
{
stream << from_expr(ns, "", pair.first) << ": {";
for(const auto &i : pair.second)
stream << from_expr(ns, "", i) << ", ";
}
stream << "}}" << eom;
return false;
}

const array_index_mapt body_indices=gather_indices(expr.body);
// Must validate for each string. Note that we have an invariant that the
// second value in the pair is non-empty.
for(const auto &pair : body_indices)
{
// Condition 2: All indices of the same string must be the of the same form
const exprt rep=pair.second.back();
for(size_t j=0; j<pair.second.size()-1; j++)
{
const exprt i=pair.second[j];
const equal_exprt equals(rep, i);
const exprt result=simplify_expr(equals, ns);
if(result.is_false())
{
stream << "Indices not equal: " << from_expr(ns, "", expr) << ", str: "
<< from_expr(ns, "", pair.first) << eom;
return false;
}
}

// Condition 3: f must be linear
if(!is_linear_arithmetic_expr(rep))
{
stream << "f is not linear: " << from_expr(ns, "", expr) << ", str: "
<< from_expr(ns, "", pair.first) << eom;
return false;
}

// Condition 4: the quantified variable can only occur in indices in the
// body
if(!universal_only_in_index(expr))
{
stream << "Universal variable outside of index:"
<< from_expr(ns, "", expr) << eom;
return false;
}
}

return true;
}
Loading