Skip to content

Commit 195a296

Browse files
committed
Add quick filter into goto-synthesizer
The quick filter check if a candidate is consistent with the previous seen examples by evaluating the candidate on the examples. The idea is that quick filter is more efficient than CBMC for checking if a candidate is incorrect. So the synthesizer can use the quick filter to rule out incorrect candidates in an early stage before sending them to CBMC. In detail, we say a candidate is consistent with a positive (negative) example if the evaluation of the candidate on it is true (false). If a candidate is inconsistent with any example, it cannot be a valid loop invariant predicate. Examples are collected in the verification stage. In a CBMC trace, the valuations of variables upon the entry of the loop (loop_entry) must satisfy the loop invariants, so that they are positive examples. Oppositely, valuations of variables in the havoced iteration must not satisfy the loop invariants, so that they are negative examples. The quick filter sinificantly reduce the number of candidate sent to CBMC. As an example, in the benchmark `loop_contracts_synthesis_04`, 61 out of 67 bad candidates are ruled out be the quick filter.
1 parent 1f16a91 commit 195a296

File tree

8 files changed

+430
-9
lines changed

8 files changed

+430
-9
lines changed

regression/goto-synthesizer/loop_contracts_synthesis_03/main.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22

33
void main()
44
{
5-
unsigned long len;
6-
__CPROVER_assume(len <= SIZE);
5+
unsigned long long len;
76
__CPROVER_assume(len >= 8);
87
char *array = malloc(len);
98
const char *end = array + len;

regression/goto-synthesizer/loop_contracts_synthesis_04/test.desc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
CORE
22
main.c
3-
--pointer-check
3+
--pointer-check _ --verbosity 9
44
^EXIT=0$
55
^SIGNAL=0$
6+
Quick filter\: 6.* out of 67 candidates were filtered out\.
67
^\[main.pointer\_dereference.\d+\] .* SUCCESS$
78
^VERIFICATION SUCCESSFUL$
89
--

src/goto-synthesizer/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
SRC = cegis_verifier.cpp \
1+
SRC = cegis_evaluator.cpp \
2+
cegis_verifier.cpp \
23
dump_loop_contracts.cpp \
34
enumerative_loop_contracts_synthesizer.cpp \
45
expr_enumerator.cpp \
Lines changed: 343 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,343 @@
1+
/*******************************************************************\
2+
3+
Module: Evaluate if an expression is consistent with examples
4+
5+
Author: Qinheping Hu
6+
7+
\*******************************************************************/
8+
9+
/// \file
10+
/// Evaluate if an expression is consistent with examples
11+
12+
#include "cegis_evaluator.h"
13+
14+
#include <util/arith_tools.h>
15+
#include <util/format_expr.h>
16+
17+
bool cegis_evaluatort::evaluate()
18+
{
19+
auto is_inconsistent = false;
20+
// Check if checked_expr is consistent with all examples.
21+
for(const auto &cex : cexs)
22+
{
23+
// checked_expr is inconsistent with a positive example if its evaluation
24+
// false.
25+
is_inconsistent =
26+
is_inconsistent || !evaluate_rec_bool(checked_expr, cex, 1);
27+
// checked_expr is inconsistent with a negative example if its evaluation
28+
// false.
29+
is_inconsistent =
30+
is_inconsistent || evaluate_rec_bool(checked_expr, cex, 0);
31+
}
32+
return !is_inconsistent;
33+
}
34+
35+
bool cegis_evaluatort::evaluate_rec_bool(
36+
const exprt &expr,
37+
const cext &cex,
38+
const bool is_positive)
39+
{
40+
const auto id = expr.id();
41+
// eval(AND op1 op2) :=
42+
// eval(op1) && eval(op2)
43+
if(id == ID_and)
44+
{
45+
bool result = true;
46+
for(auto &op : expr.operands())
47+
{
48+
result = result && evaluate_rec_bool(op, cex, is_positive);
49+
}
50+
return result;
51+
}
52+
53+
// eval(OR op1 op2) :=
54+
// eval(op1) || eval(op2)
55+
if(id == ID_or)
56+
{
57+
bool result = false;
58+
for(auto &op : expr.operands())
59+
{
60+
result = result || evaluate_rec_bool(op, cex, is_positive);
61+
}
62+
return result;
63+
}
64+
65+
// eval(IMPLIES op1 op2) :=
66+
// eval(op1) => eval(op2)
67+
if(id == ID_implies)
68+
{
69+
return !evaluate_rec_bool(expr.operands()[0], cex, is_positive) ||
70+
evaluate_rec_bool(expr.operands()[1], cex, is_positive);
71+
}
72+
73+
// eval(NOT op) :=
74+
// !eval(op)
75+
if(id == ID_not)
76+
{
77+
return !evaluate_rec_bool(expr.operands()[0], cex, is_positive);
78+
}
79+
80+
// eval(EQUAL op1 op2) :=
81+
// eval(op1) == eval(op2)
82+
if(id == ID_equal)
83+
{
84+
return evaluate_rec_int(expr.operands()[0], cex, is_positive) ==
85+
evaluate_rec_int(expr.operands()[1], cex, is_positive);
86+
}
87+
88+
// eval(NOTEQUAL op1 op2) :=
89+
// eval(op1) != eval(op2)
90+
if(id == ID_notequal)
91+
{
92+
return evaluate_rec_int(expr.operands()[0], cex, is_positive) !=
93+
evaluate_rec_int(expr.operands()[1], cex, is_positive);
94+
}
95+
96+
// eval(LE op1 op2) :=
97+
// eval(op1) <= eval(op2)
98+
if(id == ID_le)
99+
{
100+
return evaluate_rec_int(expr.operands()[0], cex, is_positive) <=
101+
evaluate_rec_int(expr.operands()[1], cex, is_positive);
102+
}
103+
104+
// eval(LT op1 op2) :=
105+
// eval(op1) < eval(op2)
106+
if(id == ID_lt)
107+
{
108+
return evaluate_rec_int(expr.operands()[0], cex, is_positive) <
109+
evaluate_rec_int(expr.operands()[1], cex, is_positive);
110+
}
111+
112+
// eval(GE op1 op2) :=
113+
// eval(op1) >= eval(op2)
114+
if(id == ID_ge)
115+
{
116+
return evaluate_rec_int(expr.operands()[0], cex, is_positive) >=
117+
evaluate_rec_int(expr.operands()[1], cex, is_positive);
118+
}
119+
120+
// eval(GT op1 op2) :=
121+
// eval(op1) > eval(op2)
122+
if(id == ID_gt)
123+
{
124+
return evaluate_rec_int(expr.operands()[0], cex, is_positive) >
125+
evaluate_rec_int(expr.operands()[1], cex, is_positive);
126+
}
127+
128+
// eval(CONST op) :=
129+
// op
130+
if(id == ID_constant)
131+
{
132+
if(expr == true_exprt())
133+
{
134+
return true;
135+
}
136+
else if(expr == false_exprt())
137+
{
138+
return false;
139+
}
140+
UNREACHABLE_BECAUSE(
141+
"Boolean constant must be either true or false: " + expr.pretty());
142+
}
143+
144+
UNREACHABLE_BECAUSE(
145+
"Found a unsupported boolean operator during quick filtering: " +
146+
expr.id_string());
147+
}
148+
149+
mp_integer cegis_evaluatort::evaluate_rec_int(
150+
const exprt &expr,
151+
const cext &cex,
152+
const bool is_positive)
153+
{
154+
const auto id = expr.id();
155+
mp_integer result;
156+
157+
// For symbol expression, we look up their values from the example.
158+
// There are three cases:
159+
// 1. is_positive == true
160+
// We evaluate the expression on the positive examples, which is the
161+
// valuation of loop_entry(v). Note that the loop invariants must hold
162+
// for loop_entry valuations as the base case. So we look up the values
163+
// in the loop_entry set
164+
// 2. is_positive == false, expr in the havoced set
165+
// We evaluate the expression on the negative examples---the havoced set.
166+
// 3. is_positive == false, expr not in havoced set
167+
// The valuations of expr in the havoced iteration is the same as
168+
// in the loop entry. So we look up the values from loop_entry set.
169+
if(id == ID_symbol)
170+
{
171+
if(
172+
cex.havoced_values.find(expr) != cex.havoced_values.end() && !is_positive)
173+
{
174+
// Use havoc values if they exists and we are evaluating on
175+
// the negative example---is_positive is false.
176+
result = cex.havoced_values.at(expr);
177+
}
178+
else if(cex.loop_entry_values.find(expr) != cex.loop_entry_values.end())
179+
{
180+
result = cex.loop_entry_values.at(expr);
181+
}
182+
else
183+
{
184+
UNREACHABLE_BECAUSE(
185+
"Variable not found in the havoced set or the un-havoced set: " +
186+
expr.pretty());
187+
}
188+
189+
// Typecast `result` to the type of `expr`.
190+
auto tmp_expr = from_integer(result, expr.type());
191+
to_integer(tmp_expr, result);
192+
return result;
193+
}
194+
195+
// For loop_entry expression, we look up their values from loop_entry set.
196+
if(id == ID_loop_entry)
197+
{
198+
if(
199+
cex.loop_entry_values.find(expr.operands()[0]) !=
200+
cex.loop_entry_values.end())
201+
{
202+
result = cex.loop_entry_values.at(expr.operands()[0]);
203+
}
204+
else
205+
{
206+
UNREACHABLE_BECAUSE(
207+
"Variable not found in the havoced set or the un-havoced set: " +
208+
expr.pretty());
209+
}
210+
211+
// Typecast `result` to the type of `expr`.
212+
auto tmp_expr = from_integer(result, expr.type());
213+
to_integer(tmp_expr, result);
214+
return result;
215+
}
216+
217+
// Evaluate the underlying expression and then typecast the result.
218+
if(id == ID_typecast)
219+
{
220+
// Typecast `result` to the type of `expr`.
221+
result = evaluate_rec_int(expr.operands()[0], cex, is_positive);
222+
auto tmp_expr = from_integer(result, expr.type());
223+
to_integer(tmp_expr, result);
224+
return result;
225+
}
226+
227+
// For object_size expression, look up the size of the underlying pointer in
228+
// the example `cex`.
229+
if(id == ID_object_size)
230+
{
231+
if(cex.object_sizes.find(expr.operands()[0]) != cex.object_sizes.end())
232+
{
233+
result = cex.object_sizes.at(expr.operands()[0]);
234+
}
235+
else
236+
{
237+
UNREACHABLE_BECAUSE(
238+
"Variable not found in the object size set: " + expr.pretty());
239+
}
240+
241+
// Typecast `result` to the type of `expr`.
242+
auto tmp_expr = from_integer(result, expr.type());
243+
to_integer(tmp_expr, result);
244+
return result;
245+
}
246+
247+
// For pointer_offset expression, look up the offset of the underlying
248+
// pointer in the example `cex`.
249+
// A pointer offset expression can be of form
250+
// pointer_offset(p)
251+
// or
252+
// pointer_offset(loop_entry(p))
253+
// for some pointer p.
254+
// For the first case, we look up the offset in havoced
255+
// offset set. Note that if the pointer is not in the havoced set or
256+
// is_positive is set, we look up in loop_entry_offset set instead.
257+
// For the second case, we look up the offset in the loop_entry_offset set.
258+
if(id == ID_pointer_offset)
259+
{
260+
if(expr.operands()[0].id() != ID_loop_entry)
261+
{
262+
// If the expression is pointer_offset(p), look up the offset in the
263+
// havoced offset set.
264+
if(
265+
cex.havoced_pointer_offsets.find(expr.operands()[0]) !=
266+
cex.havoced_pointer_offsets.end() &&
267+
!is_positive)
268+
{
269+
// Use havoc values only if we are evaluating the expression against
270+
// the negative example---is_positive is false.
271+
result = cex.havoced_pointer_offsets.at(expr.operands()[0]);
272+
}
273+
else if(
274+
cex.loop_entry_offsets.find(expr.operands()[0]) !=
275+
cex.loop_entry_offsets.end())
276+
{
277+
// The pointer is not havoced. So look up the offset in the loop-entry
278+
// set instead.
279+
result = cex.loop_entry_offsets.at(expr.operands()[0]);
280+
}
281+
else
282+
{
283+
UNREACHABLE_BECAUSE(
284+
"Variable not found in the offset set: " + expr.pretty());
285+
}
286+
}
287+
else
288+
{
289+
// If the expression is pointer_offset(loop_entry(p)), look up the
290+
// offset in the offset-of-loop-entry-set.
291+
if(
292+
cex.loop_entry_offsets.find(expr.operands()[0].operands()[0]) !=
293+
cex.loop_entry_offsets.end())
294+
{
295+
result = cex.loop_entry_offsets.at(expr.operands()[0].operands()[0]);
296+
}
297+
else
298+
{
299+
UNREACHABLE_BECAUSE(
300+
"Variable not found in the offset set: " + expr.pretty());
301+
}
302+
}
303+
304+
// Typecast `result` to the type of `expr`.
305+
auto tmp_expr = from_integer(result, expr.type());
306+
to_integer(tmp_expr, result);
307+
return result;
308+
}
309+
310+
// For a constant expression, return its evaluation.
311+
if(id == ID_constant)
312+
{
313+
to_integer(to_constant_expr(expr), result);
314+
return result;
315+
}
316+
317+
// For a plus expression, return the sum of the evaluations of two operands.
318+
if(id == ID_plus)
319+
{
320+
result = evaluate_rec_int(expr.operands()[0], cex, is_positive) +
321+
evaluate_rec_int(expr.operands()[1], cex, is_positive);
322+
323+
// Typecast `result` to the type of `expr`.
324+
auto tmp_expr = from_integer(result, expr.type());
325+
to_integer(tmp_expr, result);
326+
return result;
327+
}
328+
329+
// For a minus expression, return difference of evaluations of two operands.
330+
if(id == ID_minus)
331+
{
332+
result = evaluate_rec_int(expr.operands()[0], cex, is_positive) -
333+
evaluate_rec_int(expr.operands()[1], cex, is_positive);
334+
335+
// Typecast `result` to the type of `expr`.
336+
auto tmp_expr = from_integer(result, expr.type());
337+
to_integer(tmp_expr, result);
338+
return result;
339+
}
340+
341+
UNREACHABLE_BECAUSE(
342+
"Found a unsupported operator during quick filtering: " + expr.id_string());
343+
}

0 commit comments

Comments
 (0)