Reachability slicer: mark reachable code more precisely

smowton · smowton · commit 96229105f1fd · 2018-05-17T17:37:36.000+01:00
The reachability slicer currently uses a very simple graph walk, and in particular walks out of a function
to all possible callers, regardless of whether we know the actual caller. This commit fixes that shortcoming
by adding the callsite successor *at the callsite*, and tracking the fact that the callee's successor has
already been taken care of in the graph search stack, thus allowing it to ignore the END_FUNCTION -&gt; callsite
edges which are less precise.

Functions whose caller is genuinely unknown, such as the root function containing a reachability target (e.g.
assert instruction) are treated as before, considering all possible callees. The backwards search is improved
similarly to the forwards.
diff --git a/src/goto-instrument/reachability_slicer.cpp b/src/goto-instrument/reachability_slicer.cpp
@@ -54,11 +54,49 @@ void reachability_slicert::fixedpoint_to_assertions(
   const is_threadedt &is_threaded,
   slicing_criteriont &criterion)
 {
-  std::vector<cfgt::node_indext> src = get_sources(is_threaded, criterion);
+  // Stack entries are pairs of node indices and a boolean flag that indicates
+  // whether the function's callsite is known, in which case it has already
+  // been placed on the stack and function heads are a no-op.
 
-  std::vector<cfgt::node_indext> reachable = cfg.get_reachable(src, false);
-  for(const auto index : reachable)
-    cfg[index].reaches_assertion = true;
+  std::vector<std::pair<cfgt::node_indext, bool>> stack;
+  std::vector<cfgt::node_indext> sources = get_sources(is_threaded, criterion);
+  for(const auto source : sources)
+    stack.emplace_back(source, false);
+
+  while(!stack.empty())
+  {
+    auto index = stack.back().first;
+    auto callsite_is_known = stack.back().second;
+    stack.pop_back();
+
+    auto &node = cfg[index];
+    if(node.reaches_assertion)
+      continue;
+    node.reaches_assertion = true;
+
+    for(const auto &edge : node.in)
+    {
+      const auto &pred_node = cfg[edge.first];
+
+      if(pred_node.PC->is_end_function())
+      {
+        stack.emplace_back(edge.first, true);
+        stack.emplace_back(
+          cfg.entry_map[std::prev(node.PC)], callsite_is_known);
+      }
+      else if(pred_node.PC->is_function_call())
+      {
+        // Skip this predecessor, unless this is a bodyless function, or we
+        // don't know who our callee was:
+        if(!callsite_is_known || pred_node.PC == std::prev(node.PC))
+          stack.emplace_back(edge.first, callsite_is_known);
+      }
+      else
+      {
+        stack.emplace_back(edge.first, callsite_is_known);
+      }
+    }
+  }
 }
 
 /// Perform forwards depth-first search of the control-flow graph of the
@@ -71,11 +109,71 @@ void reachability_slicert::fixedpoint_from_assertions(
   const is_threadedt &is_threaded,
   slicing_criteriont &criterion)
 {
-  std::vector<cfgt::node_indext> src = get_sources(is_threaded, criterion);
+  // Stack entries are pairs of node indices and a boolean flag that indicates
+  // whether the function's callsite is known, in which case it has already
+  // been placed on the stack and return sites are a no-op.
+
+  std::vector<std::pair<cfgt::node_indext, bool>> stack;
+  std::vector<cfgt::node_indext> sources = get_sources(is_threaded, criterion);
+  for(const auto source : sources)
+    stack.emplace_back(source, false);
+
+  while(!stack.empty())
+  {
+    auto index = stack.back().first;
+    auto callsite_is_known = stack.back().second;
+    stack.pop_back();
 
-  const std::vector<cfgt::node_indext> reachable = cfg.get_reachable(src, true);
-  for(const auto index : reachable)
-    cfg[index].reachable_from_assertion = true;
+    auto &node = cfg[index];
+    if(node.reachable_from_assertion)
+      continue;
+    node.reachable_from_assertion = true;
+
+    if(node.PC->is_function_call())
+    {
+      // Queue the instruction's natural successor (function head, or next
+      // instruction if the function is bodyless)
+      INVARIANT(node.out.size() == 1, "Call sites should have one successor");
+      auto successor_index = node.out.begin()->first;
+
+      // If the function has a body, mark the function head, but note that we
+      // have already taken care of the return site.
+      const auto &callee_head_node = cfg[successor_index];
+      auto callee_it = callee_head_node.PC;
+      if(callee_it != std::next(node.PC))
+      {
+        stack.emplace_back(successor_index, true);
+
+        // Check if it can return, and if so mark the callsite's successor:
+        while(!callee_it->is_end_function())
+          ++callee_it;
+
+        if(cfg[cfg.entry_map[callee_it]].out.size() != 0)
+        {
+          stack.emplace_back(
+            cfg.entry_map[std::next(node.PC)], callsite_is_known);
+        }
+      }
+      else
+      {
+        // Bodyless function -- mark the successor instruction only.
+        stack.emplace_back(successor_index, callsite_is_known);
+      }
+    }
+    else if(node.PC->is_end_function())
+    {
+      if(!callsite_is_known)
+      {
+        for(const auto &edge : node.out)
+          stack.emplace_back(edge.first, false);
+      }
+    }
+    else
+    {
+      for(const auto &edge : node.out)
+        stack.emplace_back(edge.first, callsite_is_known);
+    }
+  }
 }
 
 /// This function removes all instructions that have the flag