diff --git a/src/analyses/cfg_dominators.h b/src/analyses/cfg_dominators.h index fa7d23ccf0a..7cbe26b8f7b 100644 --- a/src/analyses/cfg_dominators.h +++ b/src/analyses/cfg_dominators.h @@ -16,17 +16,219 @@ Author: Georg Weissenbacher, georg@weissenbacher.name #include #include #include -#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +/// Dominator tree +/// Node indices are 0-based here (unlike in the internal data structures of +/// the algorithm). +template +struct dominators_datat +{ + explicit dominators_datat(std::size_t size) + : data(size), immediate_dominator(size) + { + } + dominators_datat( + std::vector data, + std::vector immediate_dominator) + : data(data), immediate_dominator(immediate_dominator) + { + } + + /// Maps node to T + std::vector data; + + /// Maps node to its immediate dominator + std::vector immediate_dominator; +}; + +/// An immutable set of dominators. Constant memory usage and creation time. +/// Immutability is necessary because the structure uses sharing. +template +class dominatorst +{ +public: + using datat = dominators_datat; + static const node_indext npos; + +private: + std::shared_ptr dominators_data; + node_indext node_index; + mutable std::size_t cached_distance; + +public: + /// Create an empty set + /// Note: Only unreachable nodes should be assigned empty sets after the + /// algorithm completes + dominatorst() : dominators_data(nullptr), node_index(npos), cached_distance(0) + { + } + + /// Create the dominators set of node_index + dominatorst(std::shared_ptr dominators_data, node_indext node_index) + : dominators_data(dominators_data), + node_index(node_index), + cached_distance(npos) + { + } + + dominatorst(const dominatorst &other) + : dominators_data(other.dominators_data), + node_index(other.node_index), + cached_distance(other.cached_distance) + { + } + + dominatorst &operator=(const dominatorst &rhs) + { + dominators_data = rhs.dominators_data; + node_index = rhs.node_index; + cached_distance = rhs.cached_distance; + return *this; + } + + class dominatorst_iteratort + : public std::iterator + { + public: + using parentt = const datat *; + using elemt = const T; + + private: + parentt data; + node_indext current_index; + + public: + dominatorst_iteratort(parentt cfg_dominators, node_indext current_index) + : data(cfg_dominators), current_index(current_index) + { + } + + dominatorst_iteratort &operator++() + { + INVARIANT( + current_index != npos, "Shouldn't try to increment end-iterator"); + current_index = data->immediate_dominator[current_index]; + return *this; + } + + dominatorst_iteratort operator++(int) + { + INVARIANT( + current_index != npos, "Shouldn't try to post-increment end-iterator"); + node_indext tmp = current_index; + ++(*this); + return dominatorst_iteratort(data, tmp); + } + + const elemt *get() const + { + INVARIANT( + current_index != npos, "Shouldn't try to dereference end-iterator"); + return &data->data[current_index]; + } + + const elemt &operator*() const + { + return *get(); + } + + const elemt *operator->() const + { + return get(); + } + + bool operator!=(const dominatorst_iteratort &other) const + { + INVARIANT( + data == other.data, "iterators from different sets are not comparable"); + return current_index != other.current_index; + } + + bool operator==(const dominatorst_iteratort &other) const + { + return !(*this != other); + } + }; + + using const_iterator = dominatorst_iteratort; + + const_iterator begin() const + { + return const_iterator(dominators_data.get(), node_index); + } + + const_iterator cbegin() const + { + return begin(); + } + + const_iterator end() const + { + return const_iterator(dominators_data.get(), npos); + } + + const_iterator cend() const + { + return end(); + } + + /// Return an iterator node if node is in this dominators set, end() otherwise + /// Note: O(n), when making many queries against the same set it is probably + /// worth copying into a std::set + const_iterator find(const T &node) const + { + std::less less; + // FIXME This works around a bug in other parts of the code in particular, + // dependence_graph.cpp, where iterators to different lists than those that + // are stored in this set are passed to find. The Debug libstdc++ will + // (correctly!) run into an assertion failure using std::find. std::less for + // some reason doesn't trigger this assertion failure, so we use this as an + // ugly workaround until that code is fixed. + + // NOLINTNEXTLINE + return std::find_if(cbegin(), cend(), [&](const T &other_node) { + return !less(node, other_node) && !less(other_node, node); + }); + } + + /// The size of the set. Linear time on the first call, constant after that. + std::size_t size() const + { + if(cached_distance == npos) + { + cached_distance = std::distance(begin(), end()); + } + return cached_distance; + } + + bool empty() const + { + return begin() == end(); + } +}; + +template +const node_indext + dominatorst::npos = std::numeric_limits::max(); + +/// Dominators for each instruction in a goto program template class cfg_dominators_templatet { public: - typedef std::set target_sett; + using node_indext = graph_nodet<>::node_indext; + using target_sett = dominatorst; struct nodet { @@ -44,135 +246,342 @@ class cfg_dominators_templatet protected: void initialise(P &program); - void fixedpoint(P &program); -}; -/// Print the result of the dominator computation -template -std::ostream &operator << ( - std::ostream &out, - const cfg_dominators_templatet &cfg_dominators) -{ - cfg_dominators.output(out); - return out; -} + struct fixedpointt + { + explicit fixedpointt(cfg_dominators_templatet &cfg_dominators) + : cfg_dominators(cfg_dominators), + dfs_counter(0), + // Data structures have size cfg.size() + 1 as node indices are 1-based + // to match the paper of Lengauer/Tarjan. + parent(cfg_dominators.cfg.size() + 1), + vertex(cfg_dominators.cfg.size() + 1), + dom(cfg_dominators.cfg.size() + 1), + semi(cfg_dominators.cfg.size() + 1), + bucket(cfg_dominators.cfg.size() + 1), + ancestor(cfg_dominators.cfg.size() + 1), + label(cfg_dominators.cfg.size() + 1), + size(cfg_dominators.cfg.size() + 1), + child(cfg_dominators.cfg.size() + 1) + { + } -/// Compute dominators -template -void cfg_dominators_templatet::operator()(P &program) -{ - initialise(program); - fixedpoint(program); -} + void fixedpoint(P &program); -/// Initialises the elements of the fixed point analysis -template -void cfg_dominators_templatet::initialise(P &program) -{ - cfg(program); -} + private: + cfg_dominators_templatet &cfg_dominators; + node_indext dfs_counter; -/// Computes the MOP for the dominator analysis -template -void cfg_dominators_templatet::fixedpoint(P &program) -{ - std::list worklist; + /// Maps node to its parent in the DFS-generated spanning tree + std::vector parent; - if(cfg.nodes_empty(program)) - return; + /// Maps number to node (according to the DFS numbering) + std::vector vertex; - if(post_dom) - entry_node=cfg.get_last_node(program); - else - entry_node=cfg.get_first_node(program); - typename cfgt::nodet &n=cfg[cfg.entry_map[entry_node]]; - n.dominators.insert(entry_node); + /// Maps node to its immediate dominator + std::vector dom; - for(typename cfgt::edgest::const_iterator - s_it=(post_dom?n.in:n.out).begin(); - s_it!=(post_dom?n.in:n.out).end(); - ++s_it) - worklist.push_back(cfg[s_it->first].PC); + /// Maps node to its semi-dominator (as defined by Lengauer/Tarjan) + /// A semidominator of a node w is the minimum node v (according to the DFS + /// numbering) for which there is a path from v to w such that all nodes + /// occuring on that path (other than v, w) have a larger number than w + /// (according to the DFS numbering) + std::vector semi; - while(!worklist.empty()) - { - // get node from worklist - T current=worklist.front(); - worklist.pop_front(); + /// Maps node to the set of nodes of which it is the semi-dominator + std::vector> bucket; + + // Used by link() and eval(), which are used to create and query + // an auxiliary data structure which is a forest that is contained + // in the DFS spanning tree. + std::vector ancestor; + std::vector label; + std::vector size; + std::vector child; - bool changed=false; - typename cfgt::nodet &node=cfg[cfg.entry_map[current]]; - if(node.dominators.empty()) + T get_entry_node(P &program) { - for(const auto &edge : (post_dom ? node.out : node.in)) - if(!cfg[edge.first].dominators.empty()) + if(post_dom) + { + return cfg_dominators.cfg.get_last_node(program); + } + else + { + return cfg_dominators.cfg.get_first_node(program); + } + }; + + /// DFS numbering + /// Number nodes in the order in which they are reached during a DFS, + /// intialize data structures + void dfs(node_indext root) + { + struct dfs_statet + { + node_indext parent; + node_indext current; + }; + std::stack work; + work.push({0, root}); + while(!work.empty()) + { + auto state = work.top(); + work.pop(); + node_indext v = state.current; + if(semi[v] == 0) { - node.dominators=cfg[edge.first].dominators; - node.dominators.insert(current); - changed=true; + // Initialize data structures + parent[v] = state.parent; + semi[v] = ++dfs_counter; + vertex[dfs_counter] = label[v] = v; + ancestor[v] = child[v] = 0; + size[v] = 1; + // Explore children + for_each_successor(v, [&](node_indext w) { work.push({v, w}); }); } + } } - // compute intersection of predecessors - for(const auto &edge : (post_dom ? node.out : node.in)) + /// Compress path from v to the root in the tree of the forest that contains + /// v, by directly attaching nodes to the root + void compress(node_indext v) { - const target_sett &other=cfg[edge.first].dominators; - if(other.empty()) - continue; + if(ancestor[ancestor[v]] != 0) + { + compress(ancestor[v]); + if(semi[label[ancestor[v]]] < semi[label[v]]) + { + label[v] = label[ancestor[v]]; + } + ancestor[v] = ancestor[ancestor[v]]; + } + } - typename target_sett::const_iterator n_it=node.dominators.begin(); - typename target_sett::const_iterator o_it=other.begin(); + /// Return node with minimum semidominator on the path from the root of the + /// tree in the forest containing v to v, and compress path + node_indext eval(node_indext v) + { + if(ancestor[v] == 0) + { + return label[v]; + } + compress(v); + if(semi[label[ancestor[v]]] >= semi[label[v]]) + { + return label[v]; + } + return label[ancestor[v]]; + } - // in-place intersection. not safe to use set_intersect - while(n_it!=node.dominators.end() && o_it!=other.end()) + /// Add an edge to the forest + /// \param v: source node of edge + /// \param w: target node of edge + void link(node_indext v, node_indext w) + { + node_indext s = w; + while(semi[label[w]] < semi[label[child[s]]]) { - if(*n_it==current) - ++n_it; - else if(*n_it<*o_it) + if(size[s] + size[child[child[s]]] >= 2 * size[child[s]]) { - changed=true; - node.dominators.erase(n_it++); + ancestor[child[s]] = s; + child[s] = child[child[s]]; } - else if(*o_it<*n_it) - ++o_it; else { - ++n_it; - ++o_it; + size[child[s]] = size[s]; + s = ancestor[s] = child[s]; } } + label[s] = label[w]; + size[v] = size[v] + size[w]; + if(size[v] < 2 * size[w]) + { + std::swap(s, child[v]); + } + while(s != 0) + { + ancestor[s] = v; + s = child[s]; + } + } - while(n_it!=node.dominators.end()) + /// Fill output data structures + void assign_dominators(node_indext root) + { + // Fill dominator tree output data structure + auto dominators_data = std::make_shared( + cfg_dominators.cfg.size()); + for(node_indext i = 0; i < cfg_dominators.cfg.size(); ++i) { - if(*n_it==current) - ++n_it; - else + dominators_data->immediate_dominator[i] = dom[i + 1] - 1; + dominators_data->data[i] = cfg_dominators.cfg[i].PC; + } + + // Assign immediate dominator to nodes in the cfg + std::stack work; + work.push(root); + while(!work.empty()) + { + node_indext v = work.top(); + work.pop(); + if(cfg_dominators.cfg[v - 1].dominators.empty()) { - changed=true; - node.dominators.erase(n_it++); + cfg_dominators.cfg[v - 1].dominators = + target_sett(dominators_data, v - 1); + for_each_successor(v, [&](node_indext w) { work.push(w); }); } } } - if(changed) // fixed point for node reached? + /// Perform action on each child node + template + void for_each_successor(node_indext node_index, Action action) + { + // The -1 / +1 adjusts indices from 1 based to 0 based and back + auto ix = node_index - 1; + for(auto const &next : + post_dom ? cfg_dominators.cfg.in(ix) : cfg_dominators.cfg.out(ix)) + { + action(next.first + 1); + } + } + + /// Perform action on each parent node + template + void for_each_predecessor(node_indext node_index, Action action) { - for(const auto &edge : (post_dom ? node.in : node.out)) + auto ix = node_index - 1; + for(auto const &prev : + post_dom ? cfg_dominators.cfg.out(ix) : cfg_dominators.cfg.in(ix)) { - worklist.push_back(cfg[edge.first].PC); + action(prev.first + 1); } } + }; +}; + +/// Dominator tree computation +/// Follows "A fast algorithm for finding dominators in a flow graph" of +/// Lengauer and Tarjan. Node indices are 1-based as in the paper, with the +/// first element (with index 0) of each data structure simply left empty. +template +void cfg_dominators_templatet::fixedpointt::fixedpoint( + P &program) +{ + // The nodes in the cfg data structure are represented by indices >= 0 and < + // cfg.size(), whereas the internal data structures of the algorithm use + // 1-based indices to represent nodes + if(cfg_dominators.cfg.nodes_empty(program)) + { + return; } + cfg_dominators.entry_node = get_entry_node(program); + node_indext root = + cfg_dominators.cfg.entry_map[cfg_dominators.entry_node] + 1; + + // The computation is carried out in four steps as given in the paper. + + // Step 1 + // Number nodes in the order in which they are reached during DFS, and + // initialize data structures + dfs_counter = 0; + dfs(root); + + for(node_indext i = dfs_counter; i >= 2; --i) + { + // Step 2 + // Compute semidominators + node_indext w = vertex[i]; + // NOLINTNEXTLINE + for_each_predecessor(w, [&](node_indext v) { + node_indext u = eval(v); + // Reachable nodes may have unreachable nodes as their parents + if(semi[u] != 0 && semi[u] < semi[w]) + { + semi[w] = semi[u]; + } + }); + + bucket[vertex[semi[w]]].insert(w); + link(parent[w], w); + + // Step 3 + // Implicitely define immediate dominator + auto &w_parent_bucket = bucket[parent[w]]; + for(auto v_it = begin(w_parent_bucket); v_it != end(w_parent_bucket);) + { + node_indext v = *v_it; + v_it = w_parent_bucket.erase(v_it); + node_indext u = eval(v); + if(semi[u] < semi[v]) + { + dom[v] = u; + } + else + { + dom[v] = parent[w]; + } + } + } + + // Step 4 + // Compute immediate dominator + for(node_indext i = 2; i <= dfs_counter; ++i) + { + node_indext w = vertex[i]; + if(dom[w] != vertex[semi[w]]) + { + dom[w] = dom[dom[w]]; + } + } + + // Fill output data structures + assign_dominators(root); } -/// Pretty-print a single node in the dominator tree. Supply a specialisation if -/// operator<< is not sufficient. -/// \par parameters: `node` to print and stream `out` to pretty-print it to +/// Print the result of the dominator computation +/// \param out: output stream +/// \param cfg_dominators: structure containing the result of the dominator +/// computation +template +std::ostream &operator<<( + std::ostream &out, + const cfg_dominators_templatet &cfg_dominators) +{ + cfg_dominators.output(out); + return out; +} + +/// Compute dominators +template +void cfg_dominators_templatet::operator()(P &program) +{ + initialise(program); + fixedpointt fixedpoint(*this); + fixedpoint.fixedpoint(program); +} + +/// Initialises the elements of the fixed point analysis +template +void cfg_dominators_templatet::initialise(P &program) +{ + cfg(program); +} + +/// Pretty-print a single node. Supply a specialisation if operator<< is not +/// sufficient. +/// \param node: node to print +/// \param out: output stream template void dominators_pretty_print_node(const T &node, std::ostream &out) { out << node; } +/// Pretty-print a single node. +/// \param target: node to print +/// \param out: output stream +template <> inline void dominators_pretty_print_node( const goto_programt::targett& target, std::ostream& out) @@ -180,7 +589,9 @@ inline void dominators_pretty_print_node( out << target->code.pretty(); } + /// Print the result of the dominator computation +/// \param out: output stream template void cfg_dominators_templatet::output(std::ostream &out) const { @@ -213,10 +624,13 @@ typedef cfg_dominators_templatet< const goto_programt, goto_programt::const_targett, true> cfg_post_dominatorst; -template<> +/// Pretty-print a single node. +/// \param node: node to print +/// \param out: output stream +template <> inline void dominators_pretty_print_node( - const goto_programt::const_targett &node, - std::ostream &out) + const goto_programt::const_targett& node, + std::ostream& out) { out << node->location_number; } diff --git a/unit/analyses/cfg_dominators.cpp b/unit/analyses/cfg_dominators.cpp new file mode 100644 index 00000000000..dd6e3944e85 --- /dev/null +++ b/unit/analyses/cfg_dominators.cpp @@ -0,0 +1,61 @@ +/*******************************************************************\ + +Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include +#include + +#include +#include +#include + +// Graph: + +SCENARIO("Looking up dominators") +{ + // Graph: + // int x = rand(); + // if (x<0) { + // x = -x; + // } else { + // x = x - 1; + // log(x); + // } + // return x; + using domt = dominatorst; + auto dominators_data = + std::make_shared>( + std::vector{"int x = rand();", + "if (x < 0)", + "x = -x;", + "x = x - 1;", + "log(x);", + "return x;"}, + std::vector{domt::npos, 0, 1, 1, 3, 1}); + + WHEN("Looking at the dominators of the root") + { + THEN("They should only exactly the root") + { + domt root_doms(dominators_data, 0); + REQUIRE(root_doms.size() == 1); + REQUIRE(*root_doms.begin() == dominators_data->data[0]); + } + } + + WHEN( + "Looking at the dominators of a node that should have multiple dominators") + { + THEN("They should actually have multiple dominators") + { + domt log_doms(dominators_data, 4); + REQUIRE(log_doms.size() == 4); + REQUIRE(log_doms.find(dominators_data->data[0]) != log_doms.end()); + REQUIRE(log_doms.find(dominators_data->data[1]) != log_doms.end()); + REQUIRE(log_doms.find(dominators_data->data[3]) != log_doms.end()); + REQUIRE(log_doms.find(dominators_data->data[4]) != log_doms.end()); + } + } +}