Skip to content

Commit a9363da

Browse files
committed
Reachability slicer: split cfg walks into two phases
The first phase walks outwards towards __CPROVER__start, visiting all callers but recording callsites seen en route, then the second phase walks into callees not seen in the first phase, restricting the search to known reachable functions. This fixes the previous algorithm, which attempted the same logic but could visit too few functions when a particular functions was encountered as a callee before later being seen as a caller -- the latter results in more code being visited because the calling context is unknown, and so must be done first.
1 parent 0bfe213 commit a9363da

File tree

6 files changed

+265
-60
lines changed

6 files changed

+265
-60
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
void b();
2+
3+
void c()
4+
{
5+
__CPROVER_assert(0, "");
6+
b();
7+
}
8+
9+
void b()
10+
{
11+
a();
12+
c();
13+
}
14+
15+
void a()
16+
{
17+
c();
18+
}
19+
20+
int main()
21+
{
22+
a();
23+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
main.c
3+
--reachability-slice --show-goto-functions
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
main\(\)
7+
--
8+
^warning: ignoring
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
void intermediate();
2+
void root();
3+
4+
int main()
5+
{
6+
intermediate();
7+
return 1;
8+
}
9+
10+
void intermediate()
11+
{
12+
root();
13+
}
14+
15+
void root()
16+
{
17+
__CPROVER_assert(0, "");
18+
intermediate();
19+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
CORE
2+
main.c
3+
--reachability-slice-fb --show-goto-functions
4+
^EXIT=0$
5+
^SIGNAL=0$
6+
main#return_value = 1;
7+
--
8+
^warning: ignoring

src/goto-instrument/reachability_slicer.cpp

Lines changed: 192 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -56,25 +56,69 @@ static bool is_same_target(
5656
return it1->function == it2->function && it1 == it2;
5757
}
5858

59-
/// Perform backwards depth-first search of the control-flow graph of the
60-
/// goto program, starting from the nodes corresponding to the criterion and
61-
/// the instructions that might be executed concurrently. Set reaches_assertion
62-
/// to true for every instruction visited.
63-
/// \param is_threaded Instructions that might be executed concurrently
64-
/// \param criterion the criterion we are trying to hit
65-
void reachability_slicert::fixedpoint_to_assertions(
66-
const is_threadedt &is_threaded,
67-
slicing_criteriont &criterion)
59+
/// Perform backward depth-first search of the control-flow graph of the
60+
/// goto program, starting from a given set of nodes. At call sites this walks
61+
/// to all possible callers, and at return sites it remembers the site but
62+
/// doesn't walk in (this will be done in `backward_inwards_walk_from` below).
63+
/// \param stack: nodes to start from
64+
/// \return vector of return-site nodes encountered during the walk
65+
std::vector<reachability_slicert::cfgt::node_indext>
66+
reachability_slicert::backward_outwards_walk_from(
67+
std::vector<cfgt::node_indext> stack)
6868
{
69-
std::vector<search_stack_entryt> stack;
70-
std::vector<cfgt::node_indext> sources = get_sources(is_threaded, criterion);
71-
for(const auto source : sources)
72-
stack.emplace_back(source, false);
69+
std::vector<cfgt::node_indext> return_sites;
70+
71+
while(!stack.empty())
72+
{
73+
auto &node = cfg[stack.back()];
74+
stack.pop_back();
75+
76+
if(node.reaches_assertion)
77+
continue;
78+
node.reaches_assertion = true;
79+
80+
for(const auto &edge : node.in)
81+
{
82+
const auto &pred_node = cfg[edge.first];
83+
84+
if(pred_node.PC->is_end_function())
85+
{
86+
// This is an end-of-function -> successor-of-callsite edge.
87+
// Record the return site for later investigation and step over it:
88+
return_sites.push_back(edge.first);
7389

90+
INVARIANT(
91+
std::prev(node.PC)->is_function_call(),
92+
"all function return edges should point to the successor of a "
93+
"FUNCTION_CALL instruction");
94+
95+
stack.push_back(cfg.entry_map[std::prev(node.PC)]);
96+
}
97+
else
98+
{
99+
stack.push_back(edge.first);
100+
}
101+
}
102+
}
103+
104+
return return_sites;
105+
}
106+
107+
/// Perform backward depth-first search of the control-flow graph of the
108+
/// goto program, starting from a given set of nodes. This walks into return
109+
/// sites but *not* out of call sites; this is the opposite of
110+
/// `backward_outwards_walk_from` above. Note since the two functions use the
111+
/// same `reaches_assertion` flag to track where they have been, it is important
112+
/// the outwards walk is performed before the inwards walk, as encountering a
113+
/// function while walking outwards visits strictly more code than when walking
114+
/// inwards.
115+
/// \param stack: nodes to start from
116+
void reachability_slicert::backward_inwards_walk_from(
117+
std::vector<cfgt::node_indext> stack)
118+
{
74119
while(!stack.empty())
75120
{
76-
auto &node = cfg[stack.back().node_index];
77-
const auto caller_is_known = stack.back().caller_is_known;
121+
auto &node = cfg[stack.back()];
78122
stack.pop_back();
79123

80124
if(node.reaches_assertion)
@@ -88,48 +132,106 @@ void reachability_slicert::fixedpoint_to_assertions(
88132
if(pred_node.PC->is_end_function())
89133
{
90134
// This is an end-of-function -> successor-of-callsite edge.
91-
// Queue both the caller and the end of the callee.
135+
// Walk into the called function, and then walk from the callsite
136+
// backward:
137+
stack.push_back(edge.first);
138+
92139
INVARIANT(
93140
std::prev(node.PC)->is_function_call(),
94141
"all function return edges should point to the successor of a "
95142
"FUNCTION_CALL instruction");
96-
stack.emplace_back(edge.first, true);
97-
stack.emplace_back(cfg.entry_map[std::prev(node.PC)], caller_is_known);
143+
144+
stack.push_back(cfg.entry_map[std::prev(node.PC)]);
98145
}
99146
else if(pred_node.PC->is_function_call())
100147
{
101-
// Skip this predecessor, unless this is a bodyless function, or we
102-
// don't know who our callee was:
103-
if(!caller_is_known || is_same_target(std::next(pred_node.PC), node.PC))
104-
stack.emplace_back(edge.first, caller_is_known);
148+
// Skip -- the callsite relevant to this function was already queued
149+
// when we processed the return site.
105150
}
106151
else
107152
{
108-
stack.emplace_back(edge.first, caller_is_known);
153+
stack.push_back(edge.first);
109154
}
110155
}
111156
}
112157
}
113158

114-
/// Perform forwards depth-first search of the control-flow graph of the
159+
/// Perform backward depth-first search of the control-flow graph of the
115160
/// goto program, starting from the nodes corresponding to the criterion and
116161
/// the instructions that might be executed concurrently. Set reaches_assertion
117162
/// to true for every instruction visited.
118163
/// \param is_threaded Instructions that might be executed concurrently
119164
/// \param criterion the criterion we are trying to hit
120-
void reachability_slicert::fixedpoint_from_assertions(
165+
void reachability_slicert::fixedpoint_to_assertions(
121166
const is_threadedt &is_threaded,
122167
slicing_criteriont &criterion)
123168
{
124-
std::vector<search_stack_entryt> stack;
125169
std::vector<cfgt::node_indext> sources = get_sources(is_threaded, criterion);
126-
for(const auto source : sources)
127-
stack.emplace_back(source, false);
170+
171+
// First walk outwards towards __CPROVER_start, visiting all possible callers
172+
// and stepping over but recording callees as we go:
173+
std::vector<cfgt::node_indext> return_sites =
174+
backward_outwards_walk_from(sources);
175+
176+
// Now walk into those callees, restricting our walk to the known callsites:
177+
backward_inwards_walk_from(return_sites);
178+
}
179+
180+
/// Process a call instruction during a forwards reachability walk.
181+
/// \param call_node: function-call graph node. Its single successor will be
182+
/// the head of the callee if the callee body exists, or the call
183+
/// instruction's immediate successor otherwise.
184+
/// \param callsite_successor_stack: The index of the callsite's local successor
185+
/// node will be added to this vector if it is reachable.
186+
/// \param callee_head_stack: The index of the callee body head node will be
187+
/// added to this vector if the callee has a body.
188+
void reachability_slicert::forward_walk_call_instruction(
189+
const cfgt::nodet &call_node,
190+
std::vector<cfgt::node_indext> &callsite_successor_stack,
191+
std::vector<cfgt::node_indext> &callee_head_stack)
192+
{
193+
// Get the instruction's natural successor (function head, or next
194+
// instruction if the function is bodyless)
195+
INVARIANT(call_node.out.size() == 1, "Call sites should have one successor");
196+
const auto successor_index = call_node.out.begin()->first;
197+
198+
auto callsite_successor_pc = std::next(call_node.PC);
199+
200+
auto successor_pc = cfg[successor_index].PC;
201+
if(!is_same_target(successor_pc, callsite_successor_pc))
202+
{
203+
// Real call -- store the callee head node:
204+
callee_head_stack.push_back(successor_index);
205+
206+
// Check if it can return, and if so store the callsite's successor:
207+
while(!successor_pc->is_end_function())
208+
++successor_pc;
209+
210+
if(!cfg[cfg.entry_map[successor_pc]].out.empty())
211+
callsite_successor_stack.push_back(cfg.entry_map[callsite_successor_pc]);
212+
}
213+
else
214+
{
215+
// Bodyless function -- mark the successor instruction only.
216+
callsite_successor_stack.push_back(successor_index);
217+
}
218+
}
219+
220+
/// Perform forwards depth-first search of the control-flow graph of the
221+
/// goto program, starting from a given set of nodes. Steps over and records
222+
/// callsites for a later inwards walk; explores all possible callers at return
223+
/// sites, eventually walking out into __CPROVER__start.
224+
/// \param stack: nodes to start from
225+
/// \return vector of encounted callee head nodes
226+
std::vector<reachability_slicert::cfgt::node_indext>
227+
reachability_slicert::forward_outwards_walk_from(
228+
std::vector<cfgt::node_indext> stack)
229+
{
230+
std::vector<cfgt::node_indext> called_function_heads;
128231

129232
while(!stack.empty())
130233
{
131-
auto &node = cfg[stack.back().node_index];
132-
const auto caller_is_known = stack.back().caller_is_known;
234+
auto &node = cfg[stack.back()];
133235
stack.pop_back();
134236

135237
if(node.reachable_from_assertion)
@@ -138,51 +240,81 @@ void reachability_slicert::fixedpoint_from_assertions(
138240

139241
if(node.PC->is_function_call())
140242
{
141-
// Queue the instruction's natural successor (function head, or next
142-
// instruction if the function is bodyless)
143-
INVARIANT(node.out.size() == 1, "Call sites should have one successor");
144-
const auto successor_index = node.out.begin()->first;
145-
146-
// If the function has a body, mark the function head, but note that we
147-
// have already taken care of the return site.
148-
const auto &callee_head_node = cfg[successor_index];
149-
auto callee_it = callee_head_node.PC;
150-
if(!is_same_target(callee_it, std::next(node.PC)))
151-
{
152-
stack.emplace_back(successor_index, true);
243+
// Store the called function head for the later inwards walk;
244+
// visit the call instruction's local successor now.
245+
forward_walk_call_instruction(node, stack, called_function_heads);
246+
}
247+
else
248+
{
249+
// General case, including end of function: queue all successors.
250+
for(const auto &edge : node.out)
251+
stack.push_back(edge.first);
252+
}
253+
}
153254

154-
// Check if it can return, and if so mark the callsite's successor:
155-
while(!callee_it->is_end_function())
156-
++callee_it;
255+
return called_function_heads;
256+
}
157257

158-
if(!cfg[cfg.entry_map[callee_it]].out.empty())
159-
{
160-
stack.emplace_back(
161-
cfg.entry_map[std::next(node.PC)], caller_is_known);
162-
}
163-
}
164-
else
165-
{
166-
// Bodyless function -- mark the successor instruction only.
167-
stack.emplace_back(successor_index, caller_is_known);
168-
}
258+
/// Perform forwards depth-first search of the control-flow graph of the
259+
/// goto program, starting from a given set of nodes. Steps into callsites;
260+
/// ignores return sites, which have been taken care of by
261+
/// `forward_outwards_walk_from`. Note it is important this is done *after*
262+
/// the outwards walk, because the outwards walk visits strictly more functions
263+
/// as it visits all possible callers.
264+
/// \param stack: nodes to start from
265+
void reachability_slicert::forward_inwards_walk_from(
266+
std::vector<cfgt::node_indext> stack)
267+
{
268+
while(!stack.empty())
269+
{
270+
auto &node = cfg[stack.back()];
271+
stack.pop_back();
272+
273+
if(node.reachable_from_assertion)
274+
continue;
275+
node.reachable_from_assertion = true;
276+
277+
if(node.PC->is_function_call())
278+
{
279+
// Visit both the called function head and the callsite successor:
280+
forward_walk_call_instruction(node, stack, stack);
169281
}
170-
else if(node.PC->is_end_function() && caller_is_known)
282+
else if(node.PC->is_end_function())
171283
{
172284
// Special case -- the callsite successor was already queued when entering
173285
// this function, more precisely than we can see from the function return
174286
// edges (which lead to all possible callers), so nothing to do here.
175287
}
176288
else
177289
{
178-
// General case, including end-of-function where we don't know our caller.
179-
// Queue all successors.
290+
// General case: queue all successors.
180291
for(const auto &edge : node.out)
181-
stack.emplace_back(edge.first, caller_is_known);
292+
stack.push_back(edge.first);
182293
}
183294
}
184295
}
185296

297+
/// Perform forwards depth-first search of the control-flow graph of the
298+
/// goto program, starting from the nodes corresponding to the criterion and
299+
/// the instructions that might be executed concurrently. Set reaches_assertion
300+
/// to true for every instruction visited.
301+
/// \param is_threaded Instructions that might be executed concurrently
302+
/// \param criterion the criterion we are trying to hit
303+
void reachability_slicert::fixedpoint_from_assertions(
304+
const is_threadedt &is_threaded,
305+
slicing_criteriont &criterion)
306+
{
307+
std::vector<cfgt::node_indext> sources = get_sources(is_threaded, criterion);
308+
309+
// First walk outwards towards __CPROVER_start, visiting all possible callers
310+
// and stepping over but recording callees as we go:
311+
std::vector<cfgt::node_indext> return_sites =
312+
forward_outwards_walk_from(sources);
313+
314+
// Now walk into those callees, restricting our walk to the known callsites:
315+
forward_inwards_walk_from(return_sites);
316+
}
317+
186318
/// This function removes all instructions that have the flag
187319
/// reaches_assertion or reachable_from_assertion set to true;
188320
void reachability_slicert::slice(goto_functionst &goto_functions)

0 commit comments

Comments
 (0)