Skip to content

Commit 072ed0f

Browse files
romainbrenguierJoel Allred
authored and
Joel Allred
committed
Overhaul of string concretization
This fixes diffblue/test-gen#158 Change the concretization process to avoid calling the solver We do this by looking at the indexes for which we have concrete value because they are in the index set and assigning to the others the value of the next concrete value. This is more efficient than calling the solver an extra step and avoid problems because lemmas gets added. Changing the way arrays are concretized in get_array This is to be coherent with the way this is done in the concretize function. Updating comments and assertion of concretize_string Refactor concretize_string Improve the loop in concretize_string and factor out into a pad_vector function. Modifications requested by Peter Schrammel
1 parent 21eeb29 commit 072ed0f

File tree

2 files changed

+121
-24
lines changed

2 files changed

+121
-24
lines changed

src/solvers/refinement/string_refinement.cpp

+69-21
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,16 @@ Function: string_refinementt::concretize_string
340340
341341
Purpose: If the expression is of type string, then adds constants
342342
to the index set to force the solver to pick concrete values
343-
for each character, and fill the map `found_length`
343+
for each character, and fill the maps `found_length` and
344+
`found_content`.
345+
346+
The way this is done is by looking for the length of the string,
347+
then for each `i` in the index set, look at the value found by
348+
the solver and put it in the `result` table.
349+
For indexes that are not present in the index set, we put the
350+
same value as the next index that is present in the index set.
351+
We do so by traversing the array backward, remembering the
352+
last value that has been initialized.
344353
345354
\*******************************************************************/
346355

@@ -357,29 +366,55 @@ void string_refinementt::concretize_string(const exprt &expr)
357366
if(!to_integer(length, found_length))
358367
{
359368
assert(found_length.is_long());
360-
if(found_length<0)
361-
{
362-
// Lengths should not be negative.
363-
// TODO: Add constraints no the sign of string lengths.
364-
debug() << "concretize_results: WARNING found length is negative"
365-
<< eom;
366-
}
367-
else
369+
assert(found_length>=0);
370+
assert(found_length.to_long()<=generator.max_string_length);
371+
size_t concretize_limit=found_length.to_long();
372+
exprt content_expr=str.content();
373+
std::vector<exprt> result;
374+
375+
if(index_set[str.content()].empty())
376+
return;
377+
378+
// Use the last index as the default character value
379+
exprt last_concretized=simplify_expr(
380+
get(str[minus_exprt(length, from_integer(1, length.type()))]), ns);
381+
result.resize(concretize_limit, last_concretized);
382+
383+
// Keep track of the indexes for which we have actual values
384+
std::set<size_t> initialized;
385+
386+
for(const auto &i : index_set[str.content()])
368387
{
369-
size_t concretize_limit=found_length.to_long();
370-
assert(concretize_limit<=generator.max_string_length);
371-
concretize_limit=concretize_limit>generator.max_string_length?
372-
generator.max_string_length:concretize_limit;
373-
exprt content_expr=str.content();
374-
for(size_t i=0; i<concretize_limit; ++i)
388+
mp_integer mp_index;
389+
exprt simple_i=simplify_expr(get(i), ns);
390+
if(to_integer(simple_i, mp_index) ||
391+
mp_index<0 ||
392+
mp_index>=concretize_limit)
375393
{
376-
auto i_expr=from_integer(i, str.length().type());
377-
debug() << "Concretizing " << from_expr(content_expr)
378-
<< " / " << i << eom;
379-
current_index_set[str.content()].insert(i_expr);
380-
index_set[str.content()].insert(i_expr);
394+
debug() << "concretize_string: ignoring out of bound index: "
395+
<< from_expr(simple_i) << eom;
396+
}
397+
else
398+
{
399+
// Add an entry in the result vector
400+
size_t index=mp_index.to_long();
401+
exprt str_i=simplify_expr(str[simple_i], ns);
402+
exprt value=simplify_expr(get(str_i), ns);
403+
result[index]=value;
404+
initialized.insert(index);
381405
}
382406
}
407+
408+
// Pad the concretized values to the left to assign the uninitialized
409+
// values of result. The indices greater than concretize_limit are
410+
// already assigned to last_concretized.
411+
pad_vector(result, initialized, last_concretized);
412+
413+
array_exprt arr(to_array_type(content.type()));
414+
arr.operands()=result;
415+
debug() << "Concretized " << from_expr(content_expr)
416+
<< " = " << from_expr(arr) << eom;
417+
found_content[content]=arr;
383418
}
384419
}
385420
}
@@ -574,6 +609,9 @@ decision_proceduret::resultt string_refinementt::dec_solve()
574609
}
575610
}
576611

612+
found_length.clear();
613+
found_content.clear();
614+
577615
initial_index_set(universal_axioms);
578616
update_index_set(cur);
579617
cur.clear();
@@ -613,7 +651,7 @@ decision_proceduret::resultt string_refinementt::dec_solve()
613651
if(do_concretizing)
614652
{
615653
concretize_results();
616-
do_concretizing=false;
654+
return D_SATISFIABLE;
617655
}
618656
else
619657
{
@@ -769,6 +807,7 @@ exprt string_refinementt::get_array(const exprt &arr, const exprt &size) const
769807

770808
if(arr_val.id()=="array-list")
771809
{
810+
std::set<unsigned> initialized;
772811
for(size_t i=0; i<arr_val.operands().size()/2; i++)
773812
{
774813
exprt index=arr_val.operands()[i*2];
@@ -779,9 +818,14 @@ exprt string_refinementt::get_array(const exprt &arr, const exprt &size) const
779818
{
780819
exprt value=arr_val.operands()[i*2+1];
781820
to_unsigned_integer(to_constant_expr(value), concrete_array[idx]);
821+
initialized.insert(idx);
782822
}
783823
}
784824
}
825+
826+
// Pad the concretized values to the left to assign the uninitialized
827+
// values of result.
828+
pad_vector(concrete_array, initialized, concrete_array[n-1]);
785829
}
786830
else if(arr_val.id()==ID_array)
787831
{
@@ -1803,6 +1847,10 @@ exprt string_refinementt::get(const exprt &expr) const
18031847
replace_expr(symbol_resolve, ecopy);
18041848
if(is_char_array(ecopy.type()))
18051849
{
1850+
auto it_content=found_content.find(ecopy);
1851+
if(it_content!=found_content.end())
1852+
return it_content->second;
1853+
18061854
auto it=found_length.find(ecopy);
18071855
if(it!=found_length.end())
18081856
return get_array(ecopy, it->second);

src/solvers/refinement/string_refinement.h

+52-3
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,11 @@ class string_refinementt: public bv_refinementt
9090
// by the solver
9191
replace_mapt current_model;
9292

93+
// Length of char arrays found during concretization
94+
std::map<exprt, exprt> found_length;
95+
// Content of char arrays found during concretization
96+
std::map<exprt, array_exprt> found_content;
97+
9398
void add_equivalence(const irep_idt & lhs, const exprt & rhs);
9499

95100
void display_index_set();
@@ -138,18 +143,62 @@ class string_refinementt: public bv_refinementt
138143
std::map<exprt, int> &m, const typet &type, bool negated=false) const;
139144

140145
exprt simplify_sum(const exprt &f) const;
146+
template <typename T1, typename T2>
147+
void pad_vector(
148+
std::vector<T1> &result,
149+
std::set<T2> &initialized,
150+
T1 last_concretized) const;
141151

142152
void concretize_string(const exprt &expr);
143153
void concretize_results();
144154
void concretize_lengths();
145155

146-
// Length of char arrays found during concretization
147-
std::map<exprt, exprt> found_length;
148-
149156
exprt get_array(const exprt &arr, const exprt &size) const;
150157
exprt get_array(const exprt &arr) const;
151158

152159
std::string string_of_array(const array_exprt &arr);
153160
};
154161

162+
/*******************************************************************\
163+
164+
Function: string_refinementt::pad_vector
165+
166+
Inputs:
167+
concrete_array - the vector to populate
168+
initialized - the vector containing the indices of the
169+
concretized values
170+
last_concretized - initial value of the last concretized index
171+
172+
Purpose: Utility function for concretization of strings. Copies
173+
concretized values to the left to initialize the
174+
unconcretized indices of concrete_array.
175+
176+
\*******************************************************************/
177+
178+
template <typename T1, typename T2>
179+
void string_refinementt::pad_vector(
180+
std::vector<T1> &concrete_array,
181+
std::set<T2> &initialized,
182+
T1 last_concretized) const
183+
{
184+
// Pad the concretized values to the left to assign the uninitialized
185+
// values of result. The indices greater than concretize_limit are
186+
// already assigned to last_concretized.
187+
for(auto j=initialized.rbegin(); j!=initialized.rend();)
188+
{
189+
size_t i=*j;
190+
// The leftmost index to pad is the value + 1 of the next element in
191+
// 'initialized'. Since we cannot use the binary '+' operator on set
192+
// iterators, we must increment the iterator here instead of in the
193+
// for loop.
194+
j++;
195+
size_t leftmost_index_to_pad=(j!=initialized.rend()?*(j)+1:0);
196+
// pad until we reach the next initialized index (right to left)
197+
while(i>leftmost_index_to_pad)
198+
concrete_array[(i--)-1]=last_concretized;
199+
assert(i==leftmost_index_to_pad);
200+
if(i>0)
201+
last_concretized=concrete_array[i-1];
202+
}
203+
}
155204
#endif

0 commit comments

Comments
 (0)