Skip to content

Commit c13e602

Browse files
Adding only needed equations in symbol resolution
For cprover strings, adding all the equations to the union_find_replace structure could sometimes become counter performant. Instead here we look at what string expression are need for the function calls and in which equations they appear.
1 parent ae4deff commit c13e602

File tree

2 files changed

+173
-24
lines changed

2 files changed

+173
-24
lines changed

src/solvers/refinement/string_refinement.cpp

+167-24
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Author: Alberto Griggio, [email protected]
2727
#include <solvers/sat/satcheck.h>
2828
#include <solvers/refinement/string_constraint_instantiation.h>
2929
#include <java_bytecode/java_types.h>
30+
#include <unordered_set>
3031

3132
static exprt substitute_array_with_expr(const exprt &expr, const exprt &index);
3233

@@ -452,7 +453,8 @@ static union_find_replacet generate_symbol_resolution_from_equations(
452453
// function applications can be ignored because they will be replaced
453454
// in the convert_function_application step of dec_solve
454455
}
455-
else if(has_char_pointer_subtype(lhs.type()))
456+
else if(lhs.type().id() != ID_pointer &&
457+
has_char_pointer_subtype(lhs.type()))
456458
{
457459
if(rhs.type().id() == ID_struct)
458460
{
@@ -479,12 +481,125 @@ static union_find_replacet generate_symbol_resolution_from_equations(
479481
return solver;
480482
}
481483

482-
/// Symbol resolution for expressions of type string typet
484+
/// Maps equation to expressions contained in them and conversely expressions to
485+
/// equations that contain them. This can be used on a subset of expressions
486+
/// which interests us, in particular strings. Equations are identified by an
487+
/// index of type `std::size_t` for more efficient insertion and lookup.
488+
class equation_symbol_mappingt
489+
{
490+
public:
491+
// Record index of the equations that contain a given expression
492+
std::map<exprt, std::vector<std::size_t>> equations_containing;
493+
// Record expressions that are contained in the equation with the given index
494+
std::unordered_map<std::size_t, std::vector<exprt>> strings_in_equation;
495+
496+
void add(const std::size_t i, const exprt &expr)
497+
{
498+
equations_containing[expr].push_back(i);
499+
strings_in_equation[i].push_back(expr);
500+
}
501+
502+
std::vector<exprt> find_expressions(const std::size_t i)
503+
{
504+
return strings_in_equation[i];
505+
}
506+
507+
std::vector<std::size_t> find_equations(const exprt &expr)
508+
{
509+
return equations_containing[expr];
510+
}
511+
};
512+
513+
/// This is meant to be used on the lhs of an equation with string subtype.
514+
/// \param lhs: expression which is either of string type, or a symbol
515+
/// representing a struct with some string members
516+
/// \return if lhs is a string return this string, if it is a struct return the
517+
/// members of the expression that have string type.
518+
static std::vector<exprt> extract_strings_from_lhs(const exprt &lhs)
519+
{
520+
std::vector<exprt> result;
521+
if(lhs.type() == string_typet())
522+
result.push_back(lhs);
523+
else if(lhs.type().id() == ID_struct)
524+
{
525+
const struct_typet &struct_type = to_struct_type(lhs.type());
526+
for(const auto &comp : struct_type.components())
527+
{
528+
const std::vector<exprt> strings_in_comp = extract_strings_from_lhs(
529+
member_exprt(lhs, comp.get_name(), comp.type()));
530+
result.insert(
531+
result.end(), strings_in_comp.begin(), strings_in_comp.end());
532+
}
533+
}
534+
return result;
535+
}
536+
537+
/// \param expr: an expression
538+
/// \return all subexpressions of type string which are not if_exprt expressions
539+
/// this includes expressions of the form `e.x` if e is a symbol subexpression
540+
/// with a field `x` of type string
541+
static std::vector<exprt> extract_strings(const exprt &expr)
542+
{
543+
std::vector<exprt> result;
544+
for(auto it = expr.depth_begin(); it != expr.depth_end();)
545+
{
546+
if(it->type() == string_typet() && it->id() != ID_if)
547+
{
548+
result.push_back(*it);
549+
it.next_sibling_or_parent();
550+
}
551+
else if(it->id() == ID_symbol)
552+
{
553+
for(const exprt &e : extract_strings_from_lhs(*it))
554+
result.push_back(e);
555+
it.next_sibling_or_parent();
556+
}
557+
else
558+
++it;
559+
}
560+
return result;
561+
}
562+
563+
/// Given an equation on strings, mark these strings as belonging to the same
564+
/// set in the `symbol_resolve` structure. The lhs and rhs of the equation,
565+
/// should have string type or be struct with string members.
566+
/// \param eq: equation to add
567+
/// \param symbol_resolve: structure to which the equation will be added
568+
/// \param ns: namespace
569+
static void add_string_equation_to_symbol_resolution(
570+
const equal_exprt &eq,
571+
union_find_replacet &symbol_resolve,
572+
const namespacet &ns)
573+
{
574+
if(eq.rhs().type() == string_typet())
575+
{
576+
symbol_resolve.make_union(eq.lhs(), eq.rhs());
577+
}
578+
else if(has_string_subtype(eq.lhs().type()))
579+
{
580+
if(eq.rhs().type().id() == ID_struct)
581+
{
582+
const struct_typet &struct_type = to_struct_type(eq.rhs().type());
583+
for(const auto &comp : struct_type.components())
584+
{
585+
const member_exprt lhs_data(eq.lhs(), comp.get_name(), comp.type());
586+
const exprt rhs_data = simplify_expr(
587+
member_exprt(eq.rhs(), comp.get_name(), comp.type()), ns);
588+
add_string_equation_to_symbol_resolution(
589+
equal_exprt(lhs_data, rhs_data), symbol_resolve, ns);
590+
}
591+
}
592+
}
593+
}
594+
595+
/// Symbol resolution for expressions of type string typet.
596+
/// We record equality between these expressions in the output if one of the
597+
/// function calls depends on them.
483598
/// \param equations: list of equations
484599
/// \param ns: namespace
485600
/// \param stream: output stream
486601
/// \return union_find_replacet structure containing the correspondences.
487-
static union_find_replacet string_identifiers_resolution_from_equations(
602+
union_find_replacet string_identifiers_resolution_from_equations(
488603
std::vector<equal_exprt> &equations,
489604
const namespacet &ns,
490605
messaget::mstreamt &stream)
@@ -494,35 +609,63 @@ static union_find_replacet string_identifiers_resolution_from_equations(
494609
"WARNING string_refinement.cpp "
495610
"string_identifiers_resolution_from_equations:";
496611

497-
union_find_replacet result;
498-
for(const equal_exprt &eq : equations)
612+
equation_symbol_mappingt equation_map;
613+
614+
// Indexes of equations that need to be added to the result
615+
std::unordered_set<size_t> required_equations;
616+
std::stack<size_t> equations_to_treat;
617+
618+
for(std::size_t i = 0; i < equations.size(); ++i)
499619
{
500-
if(eq.rhs().type() == string_typet())
501-
result.make_union(eq.lhs(), eq.rhs());
502-
else if(has_string_subtype(eq.lhs().type()))
620+
const equal_exprt &eq = equations[i];
621+
if(eq.rhs().id() == ID_function_application)
622+
{
623+
if(required_equations.insert(i).second)
624+
equations_to_treat.push(i);
625+
626+
std::vector<exprt> rhs_strings = extract_strings(eq.rhs());
627+
for(const auto expr : rhs_strings)
628+
equation_map.add(i, expr);
629+
}
630+
else if(eq.lhs().type().id() != ID_pointer &&
631+
has_string_subtype(eq.lhs().type()))
503632
{
504-
if(eq.rhs().type().id() == ID_struct)
633+
std::vector<exprt> lhs_strings = extract_strings_from_lhs(eq.lhs());
634+
635+
for(const auto expr : lhs_strings)
636+
equation_map.add(i, expr);
637+
638+
if(lhs_strings.empty())
505639
{
506-
const struct_typet &struct_type = to_struct_type(eq.rhs().type());
507-
for(const auto &comp : struct_type.components())
508-
{
509-
if(comp.type() == string_typet())
510-
{
511-
const member_exprt lhs_data(eq.lhs(), comp.get_name(), comp.type());
512-
const exprt rhs_data = simplify_expr(
513-
member_exprt(eq.rhs(), comp.get_name(), comp.type()), ns);
514-
result.make_union(lhs_data, rhs_data);
515-
}
516-
}
640+
stream << log_message << "non struct with string subtype "
641+
<< from_expr(ns, "", eq.lhs()) << "\n * of type "
642+
<< from_type(ns, "", eq.lhs().type()) << eom;
517643
}
518-
else
644+
645+
for(const exprt &expr : extract_strings(eq.rhs()))
646+
equation_map.add(i, expr);
647+
}
648+
}
649+
650+
// transitively add all equations which depend on the equations to treat
651+
while(!equations_to_treat.empty())
652+
{
653+
const std::size_t i = equations_to_treat.top();
654+
equations_to_treat.pop();
655+
for(const exprt &string : equation_map.find_expressions(i))
656+
{
657+
for(const std::size_t j : equation_map.find_equations(string))
519658
{
520-
stream << log_message << "non struct with string subexpr "
521-
<< from_expr(ns, "", eq.rhs()) << "\n * of type "
522-
<< from_type(ns, "", eq.rhs().type()) << eom;
659+
if(required_equations.insert(j).second)
660+
equations_to_treat.push(j);
523661
}
524662
}
525663
}
664+
665+
union_find_replacet result;
666+
for(const std::size_t i : required_equations)
667+
add_string_equation_to_symbol_resolution(equations[i], result, ns);
668+
526669
return result;
527670
}
528671

src/solvers/refinement/string_refinement.h

+6
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,10 @@ bool has_subtype(
116116
const typet &type,
117117
const std::function<bool(const typet &)> &pred);
118118

119+
// Declaration required for unit-test:
120+
union_find_replacet string_identifiers_resolution_from_equations(
121+
std::vector<equal_exprt> &equations,
122+
const namespacet &ns,
123+
messaget::mstreamt &stream);
124+
119125
#endif

0 commit comments

Comments
 (0)