Skip to content

Commit a87ef74

Browse files
committed
Abstract interpretation framework: allow selective domain retention
This allows users of the `ait` abstract interpretation framework to specify that only certain states are needed after its fixpoint computation is finished. For example, perhaps a particular analysis, or a particular user of that analysis, only needs to know the results at END_FUNCTION instructions. In this case when a state does not need to be retained, and only has a single successor, that state will be altered in place and passed to its successor instruction using std::move, permitting an analysis to optimise away much (potentially all) state copying. In the best case (a long, straight-line function whose state grows with every instruction, such as a constant propagator reading "int x = 1; int y = 2; int z = 3; ...") this reduces analysis space and time complexity from quadratic to linear.
1 parent 7c4b5aa commit a87ef74

File tree

4 files changed

+529
-12
lines changed

4 files changed

+529
-12
lines changed

src/analyses/ai.cpp

+68-9
Original file line numberDiff line numberDiff line change
@@ -334,15 +334,33 @@ bool ai_baset::visit(
334334

335335
statet &current=get_state(l);
336336

337-
for(const auto &to_l : goto_program.get_successors(l))
337+
// Check if our user wants to be able to inspect this state after the
338+
// fixpoint algorithm terminates.
339+
// Default if the instantiating code hasn't supplied any specific advice is
340+
// to retain all states.
341+
bool must_retain_current_state = must_retain_state(l, goto_program);
342+
343+
std::list<locationt> successors = goto_program.get_successors(l);
344+
for(const auto &to_l : successors)
338345
{
339346
if(to_l==goto_program.instructions.end())
340347
continue;
341348

342-
std::unique_ptr<statet> tmp_state(
343-
make_temporary_state(current));
349+
// If permissible, alter the existing instruction's state in place;
350+
// otherwise use a temporary copy:
351+
std::unique_ptr<statet> tmp_state;
344352

345-
statet &new_values=*tmp_state;
353+
// Note this condition is stricter than `!must_retain_current_state` because
354+
// when we have multiple successors `transform` may do something different
355+
// for different successors, in which case we must use a clean copy of the
356+
// pre-existing state each time.
357+
bool may_work_in_place =
358+
successors.size() == 1 && !must_retain_current_state;
359+
360+
if(!may_work_in_place)
361+
tmp_state = make_temporary_state(current);
362+
363+
statet &new_values = may_work_in_place ? current : *tmp_state;
346364

347365
bool have_new_values=false;
348366

@@ -367,8 +385,7 @@ bool ai_baset::visit(
367385

368386
new_values.transform(l, to_l, *this, ns);
369387

370-
if(merge(new_values, l, to_l))
371-
have_new_values=true;
388+
have_new_values |= merge(std::move(new_values), l, to_l);
372389
}
373390

374391
if(have_new_values)
@@ -378,6 +395,12 @@ bool ai_baset::visit(
378395
}
379396
}
380397

398+
if(!must_retain_current_state)
399+
{
400+
// If this state isn't needed any longer, destroy it now:
401+
current.make_bottom();
402+
}
403+
381404
return new_data;
382405
}
383406

@@ -400,7 +423,7 @@ bool ai_baset::do_function_call(
400423
std::unique_ptr<statet> tmp_state(make_temporary_state(get_state(l_call)));
401424
tmp_state->transform(l_call, l_return, *this, ns);
402425

403-
return merge(*tmp_state, l_call, l_return);
426+
return merge(std::move(*tmp_state), l_call, l_return);
404427
}
405428

406429
assert(!goto_function.body.instructions.empty());
@@ -420,7 +443,7 @@ bool ai_baset::do_function_call(
420443
bool new_data=false;
421444

422445
// merge the new stuff
423-
if(merge(*tmp_state, l_call, l_begin))
446+
if(merge(std::move(*tmp_state), l_call, l_begin))
424447
new_data=true;
425448

426449
// do we need to do/re-do the fixedpoint of the body?
@@ -445,7 +468,7 @@ bool ai_baset::do_function_call(
445468
tmp_state->transform(l_end, l_return, *this, ns);
446469

447470
// Propagate those
448-
return merge(*tmp_state, l_end, l_return);
471+
return merge(std::move(*tmp_state), l_end, l_return);
449472
}
450473
}
451474

@@ -523,6 +546,42 @@ bool ai_baset::do_function_call_rec(
523546
return new_data;
524547
}
525548

549+
/// Determine whether we must retain the state for program point `l` even after
550+
/// fixpoint analysis completes (e.g. for our user to inspect), or if it solely
551+
/// for internal use and can be thrown away as and when suits us.
552+
/// \param l: program point
553+
/// \param l_program: program that l belongs to
554+
/// \return true if we must retain the state for that program point
555+
bool ai_baset::must_retain_state(
556+
locationt l, const goto_programt &l_program) const
557+
{
558+
// If the derived class doesn't specify otherwise, assume the old default
559+
// behaviour: always retain state.
560+
if(!must_retain_state_callback)
561+
return true;
562+
563+
// Regardless, always keep states with multiple predecessors, which is
564+
// required for termination when loops are present, and saves redundant
565+
// re-investigation of successors for other kinds of convergence.
566+
if(l->incoming_edges.size() > 1)
567+
return true;
568+
569+
// Similarly retain function head instructions, which may have multiple
570+
// incoming call-graph edges:
571+
if(l == l_program.instructions.begin())
572+
return true;
573+
574+
// ...and retain function end states, as they will be propagated to many
575+
// different callers (they have multiple successors, even though they appear
576+
// to have none intraprocedurally).
577+
if(l->is_end_function())
578+
return true;
579+
580+
// Finally, retain states where the particular subclass specifies they
581+
// should be kept:
582+
return must_retain_state_callback(l);
583+
}
584+
526585
void ai_baset::sequential_fixedpoint(
527586
const goto_functionst &goto_functions,
528587
const namespacet &ns)

src/analyses/ai.h

+26-3
Original file line numberDiff line numberDiff line change
@@ -96,14 +96,19 @@ class ai_domain_baset
9696
// also add
9797
//
9898
// bool merge(const T &b, locationt from, locationt to);
99+
// and optionally
100+
// bool merge(T &&b, location from, locationt to);
99101
//
100-
// This computes the join between "this" and "b".
102+
// These compute the join between "this" and "b".
101103
// Return true if "this" has changed.
102104
// In the usual case, "b" is the updated state after "from"
103105
// and "this" is the state before "to".
104106
//
105107
// PRECONDITION(from.is_dereferenceable(), "Must not be _::end()")
106108
// PRECONDITION(to.is_dereferenceable(), "Must not be _::end()")
109+
//
110+
// The T &&b overload, if any, must leave `b` in such a state that
111+
// `b.make_bottom()` or destruction of `b` could happen next without error.
107112

108113
// This method allows an expression to be simplified / evaluated using the
109114
// current state. It is used to evaluate assertions and in program
@@ -131,7 +136,9 @@ class ai_baset
131136
typedef ai_domain_baset statet;
132137
typedef goto_programt::const_targett locationt;
133138

134-
ai_baset()
139+
explicit ai_baset(
140+
std::function<bool(locationt)> must_retain_state_callback) :
141+
must_retain_state_callback(must_retain_state_callback)
135142
{
136143
}
137144

@@ -308,6 +315,11 @@ class ai_baset
308315
// the work-queue is sorted by location number
309316
typedef std::map<unsigned, locationt> working_sett;
310317

318+
// Callback that indicates if the user of this instance wants a particular
319+
// state (program point) to be retained for inspection once the fixpoint is
320+
// found. If no callback is supplied we by default retain all states.
321+
std::function<bool(locationt)> must_retain_state_callback;
322+
311323
locationt get_next(working_sett &working_set);
312324

313325
void put_in_working_set(
@@ -358,8 +370,11 @@ class ai_baset
358370
const exprt::operandst &arguments,
359371
const namespacet &ns);
360372

373+
bool must_retain_state(locationt, const goto_programt &) const;
374+
361375
// abstract methods
362376

377+
virtual bool merge(statet &&src, locationt from, locationt to)=0;
363378
virtual bool merge(const statet &src, locationt from, locationt to)=0;
364379
// for concurrent fixedpoint
365380
virtual bool merge_shared(
@@ -378,7 +393,8 @@ class ait:public ai_baset
378393
{
379394
public:
380395
// constructor
381-
ait():ai_baset()
396+
ait(std::function<bool(locationt)> must_retain_state_callback = nullptr) :
397+
ai_baset(must_retain_state_callback)
382398
{
383399
}
384400

@@ -436,6 +452,13 @@ class ait:public ai_baset
436452
return it->second;
437453
}
438454

455+
bool merge(statet &&src, locationt from, locationt to) override
456+
{
457+
statet &dest=get_state(to);
458+
return static_cast<domainT &>(dest).merge(
459+
static_cast<domainT &&>(src), from, to);
460+
}
461+
439462
bool merge(const statet &src, locationt from, locationt to) override
440463
{
441464
statet &dest=get_state(to);

unit/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ SRC = unit_tests.cpp \
88
# Test source files
99
SRC += unit_tests.cpp \
1010
analyses/ai/ai_simplify_lhs.cpp \
11+
analyses/ai/ai_selective_domains.cpp \
1112
analyses/call_graph.cpp \
1213
analyses/constant_propagator.cpp \
1314
analyses/disconnect_unreachable_nodes_in_graph.cpp \

0 commit comments

Comments
 (0)