From c9da503e3653b1196e617c64dfc98d4b43e231bd Mon Sep 17 00:00:00 2001 From: Hannes Steffenhagen Date: Mon, 29 Jan 2018 10:40:46 +0000 Subject: [PATCH 1/3] Use more efficient algorithm to calculate dominators Use Lengauer&Tarjan's algorithm to calculate the tree of immediate dominators instead of the previous algorithm, which gives us a significant performance improvement. Also, instead of storing the dominators in a std::set we now just store the immediate dominators and make lookups in there instead. This is a significant performance improvement as long as the dominators set are only iterated over or only queried infrequently, as is currently the case, and also saves a lot of memory in the case of large functions (previous memory usage was quadratic with function size, now it is linear). In cases where a lot of queries are made against the same set of dominators, they can still be copied to a local std::set prior to that. --- src/analyses/cfg_dominators.h | 502 +++++++++++++++++++++++++------ unit/analyses/cfg_dominators.cpp | 61 ++++ 2 files changed, 474 insertions(+), 89 deletions(-) create mode 100644 unit/analyses/cfg_dominators.cpp diff --git a/src/analyses/cfg_dominators.h b/src/analyses/cfg_dominators.h index fa7d23ccf0a..9547c7796a3 100644 --- a/src/analyses/cfg_dominators.h +++ b/src/analyses/cfg_dominators.h @@ -16,17 +16,203 @@ Author: Georg Weissenbacher, georg@weissenbacher.name #include #include #include -#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +template +struct dominators_datat +{ + explicit dominators_datat(std::size_t size) + : data(size), immediate_dominator(size) + { + } + dominators_datat( + std::vector data, + std::vector immediate_dominator) + : data(data), immediate_dominator(immediate_dominator) + { + } + std::vector data; + std::vector immediate_dominator; +}; + +/// An immutable set of dominators. Constant memory usage and creation time, +/// but linear lookup time +/// Immutability is necessary because the structure uses sharing +template +class dominatorst +{ +public: + using datat = dominators_datat; + static const node_indext npos; + +private: + std::shared_ptr dominators_data; + node_indext node_index; + mutable std::size_t cached_distance; + +public: + /// Create an empty set + /// Note: Only unreachable nodes should be assigned + /// empty sets after the algorithm completes + dominatorst() : dominators_data(nullptr), node_index(npos), cached_distance(0) + { + } + + /// Create the dominators set of node_index + dominatorst(std::shared_ptr dominators_data, node_indext node_index) + : dominators_data(dominators_data), + node_index(node_index), + cached_distance(npos) + { + } + + dominatorst(const dominatorst &other) + : dominators_data(other.dominators_data), + node_index(other.node_index), + cached_distance(other.cached_distance) + { + } + + dominatorst &operator=(const dominatorst &rhs) + { + dominators_data = rhs.dominators_data; + node_index = rhs.node_index; + cached_distance = rhs.cached_distance; + return *this; + } + + class dominatorst_iteratort + : public std::iterator + { + public: + using parentt = const datat *; + using elemt = const T; + + private: + parentt data; + node_indext current_index; + + public: + dominatorst_iteratort(parentt cfg_dominators, node_indext current_index) + : data(cfg_dominators), current_index(current_index) + { + } + + dominatorst_iteratort &operator++() + { + INVARIANT( + current_index != npos, "Shouldn't try to increment end-iterator"); + current_index = data->immediate_dominator[current_index]; + return *this; + } + + dominatorst_iteratort operator++(int) + { + INVARIANT( + current_index != npos, "Shouldn't try to post-increment end-iterator"); + node_indext tmp = current_index; + ++(*this); + return dominatorst_iteratort(data, tmp); + } + + const elemt *get() const + { + INVARIANT( + current_index != npos, "Shouldn't try to dereference end-iterator"); + return &data->data[current_index]; + } + + const elemt &operator*() const + { + return *get(); + } + + const elemt *operator->() const + { + return get(); + } + + bool operator!=(const dominatorst_iteratort &other) const + { + INVARIANT( + data == other.data, "iterators from different sets are not comparable"); + return current_index != other.current_index; + } + + bool operator==(const dominatorst_iteratort &other) const + { + return !(*this != other); + } + }; + + using const_iterator = dominatorst_iteratort; + + const_iterator begin() const + { + return const_iterator(dominators_data.get(), node_index); + } + + const_iterator cbegin() const + { + return begin(); + } + + const_iterator end() const + { + return const_iterator(dominators_data.get(), npos); + } + + const_iterator cend() const + { + return end(); + } + + /// Return an iterator node if node is in this dominators set, end() otherwise + /// Note: O(n), when making many queries against the same set it is probably + /// worth copying into a std::set + + const_iterator find(const T &node) const + { + return std::find(begin(), end(), node); + } + + /// The size of the set; Linear time on the first call, + /// constant after that + std::size_t size() const + { + if(cached_distance == npos) + { + cached_distance = std::distance(begin(), end()); + } + return cached_distance; + } + + bool empty() const + { + return begin() == end(); + } +}; + +template +const node_indext + dominatorst::npos = std::numeric_limits::max(); + template class cfg_dominators_templatet { public: - typedef std::set target_sett; + using node_indext = graph_nodet<>::node_indext; + using target_sett = dominatorst; struct nodet { @@ -44,124 +230,262 @@ class cfg_dominators_templatet protected: void initialise(P &program); - void fixedpoint(P &program); -}; -/// Print the result of the dominator computation -template -std::ostream &operator << ( - std::ostream &out, - const cfg_dominators_templatet &cfg_dominators) -{ - cfg_dominators.output(out); - return out; -} - -/// Compute dominators -template -void cfg_dominators_templatet::operator()(P &program) -{ - initialise(program); - fixedpoint(program); -} - -/// Initialises the elements of the fixed point analysis -template -void cfg_dominators_templatet::initialise(P &program) -{ - cfg(program); -} + struct fixedpointt + { + explicit fixedpointt(cfg_dominators_templatet &cfg_dominators) + : cfg_dominators(cfg_dominators), + dfs_counter(0), + parent(cfg_dominators.cfg.size() + 1), + ancestor(cfg_dominators.cfg.size() + 1), + child(cfg_dominators.cfg.size() + 1), + vertex(cfg_dominators.cfg.size() + 1), + dom(cfg_dominators.cfg.size() + 1), + label(cfg_dominators.cfg.size() + 1), + semi(cfg_dominators.cfg.size() + 1), + size(cfg_dominators.cfg.size() + 1), + bucket(cfg_dominators.cfg.size() + 1) + { + } -/// Computes the MOP for the dominator analysis -template -void cfg_dominators_templatet::fixedpoint(P &program) -{ - std::list worklist; + void fixedpoint(P &program); - if(cfg.nodes_empty(program)) - return; + private: + cfg_dominators_templatet &cfg_dominators; + node_indext dfs_counter; + std::vector parent, ancestor, child, vertex, dom; - if(post_dom) - entry_node=cfg.get_last_node(program); - else - entry_node=cfg.get_first_node(program); - typename cfgt::nodet &n=cfg[cfg.entry_map[entry_node]]; - n.dominators.insert(entry_node); + std::vector label, semi, size; - for(typename cfgt::edgest::const_iterator - s_it=(post_dom?n.in:n.out).begin(); - s_it!=(post_dom?n.in:n.out).end(); - ++s_it) - worklist.push_back(cfg[s_it->first].PC); + std::vector> bucket; - while(!worklist.empty()) - { - // get node from worklist - T current=worklist.front(); - worklist.pop_front(); + T get_entry_node(P &program) + { + if(post_dom) + { + return cfg_dominators.cfg.get_last_node(program); + } + else + { + return cfg_dominators.cfg.get_first_node(program); + } + }; - bool changed=false; - typename cfgt::nodet &node=cfg[cfg.entry_map[current]]; - if(node.dominators.empty()) + void dfs(node_indext root) { - for(const auto &edge : (post_dom ? node.out : node.in)) - if(!cfg[edge.first].dominators.empty()) + struct dfs_statet + { + node_indext parent; + node_indext current; + }; + std::stack work; + work.push({0, root}); + while(!work.empty()) + { + auto state = work.top(); + work.pop(); + node_indext v = state.current; + if(semi[v] == 0) { - node.dominators=cfg[edge.first].dominators; - node.dominators.insert(current); - changed=true; + parent[v] = state.parent; + semi[v] = ++dfs_counter; + vertex[dfs_counter] = label[v] = v; + ancestor[v] = child[v] = 0; + size[v] = 1; + for_each_successor(v, [&](node_indext w) { work.push({v, w}); }); } + } } - // compute intersection of predecessors - for(const auto &edge : (post_dom ? node.out : node.in)) + void compress(node_indext v) { - const target_sett &other=cfg[edge.first].dominators; - if(other.empty()) - continue; + if(ancestor[ancestor[v]] != 0) + { + compress(ancestor[v]); + if(semi[label[ancestor[v]]] < semi[label[v]]) + { + label[v] = label[ancestor[v]]; + } + ancestor[v] = ancestor[ancestor[v]]; + } + } - typename target_sett::const_iterator n_it=node.dominators.begin(); - typename target_sett::const_iterator o_it=other.begin(); + node_indext eval(node_indext v) + { + if(ancestor[v] == 0) + { + return label[v]; + } + compress(v); + if(semi[label[ancestor[v]]] >= semi[label[v]]) + { + return label[v]; + } + return label[ancestor[v]]; + } - // in-place intersection. not safe to use set_intersect - while(n_it!=node.dominators.end() && o_it!=other.end()) + void link(node_indext v, node_indext w) + { + node_indext s = w; + while(semi[label[w]] < semi[label[child[s]]]) { - if(*n_it==current) - ++n_it; - else if(*n_it<*o_it) + if(size[s] + size[child[child[s]]] >= 2 * size[child[s]]) { - changed=true; - node.dominators.erase(n_it++); + ancestor[child[s]] = s; + child[s] = child[child[s]]; } - else if(*o_it<*n_it) - ++o_it; else { - ++n_it; - ++o_it; + size[child[s]] = size[s]; + s = ancestor[s] = child[s]; } } + label[s] = label[w]; + size[v] = size[v] + size[w]; + if(size[v] < 2 * size[w]) + { + std::swap(s, child[v]); + } + while(s != 0) + { + ancestor[s] = v; + s = child[s]; + } + } - while(n_it!=node.dominators.end()) + void assign_dominators(node_indext root) + { + auto dominators_data = std::make_shared( + cfg_dominators.cfg.size()); + for(node_indext i = 0; i < cfg_dominators.cfg.size(); ++i) { - if(*n_it==current) - ++n_it; - else + dominators_data->immediate_dominator[i] = dom[i + 1] - 1; + dominators_data->data[i] = cfg_dominators.cfg[i].PC; + } + std::stack work; + work.push(root); + while(!work.empty()) + { + node_indext v = work.top(); + work.pop(); + if(cfg_dominators.cfg[v - 1].dominators.empty()) { - changed=true; - node.dominators.erase(n_it++); + cfg_dominators.cfg[v - 1].dominators = + target_sett(dominators_data, v - 1); + for_each_successor(v, [&](node_indext w) { work.push(w); }); } } } - if(changed) // fixed point for node reached? + template + void for_each_successor(node_indext node_index, Action action) + { + // the -1 / +1 adjusts indices from 1 based to 0 based and back + auto ix = node_index - 1; + for(auto const &next : + post_dom ? cfg_dominators.cfg.in(ix) : cfg_dominators.cfg.out(ix)) + { + action(next.first + 1); + } + } + + template + void for_each_predecessor(node_indext node_index, Action action) + { + auto ix = node_index - 1; + for(auto const &prev : + post_dom ? cfg_dominators.cfg.out(ix) : cfg_dominators.cfg.in(ix)) + { + action(prev.first + 1); + } + } + }; +}; + +template +void cfg_dominators_templatet::fixedpointt::fixedpoint( + P &program) +{ + // Dominator Tree according to Lengauer and Tarjan + // "A fast algorithm for finding dominators in a flow graph" + // This is ununderstandable without reading the paper! + // assumption: Vertex indices >= 0 and < cfg.size() + if(cfg_dominators.cfg.nodes_empty(program)) + { + return; + } + cfg_dominators.entry_node = get_entry_node(program); + node_indext root = + cfg_dominators.cfg.entry_map[cfg_dominators.entry_node] + 1; + dfs_counter = 0; + dfs(root); + for(node_indext i = dfs_counter; i >= 2; --i) + { + node_indext w = vertex[i]; + // NOLINTNEXTLINE + for_each_predecessor(w, [&](node_indext v) { + node_indext u = eval(v); + // reachable nodes may have unreachable + // nodes as their parents + if(semi[u] != 0 && semi[u] < semi[w]) + { + semi[w] = semi[u]; + } + }); + bucket[vertex[semi[w]]].insert(w); + link(parent[w], w); + auto &w_parent_bucket = bucket[parent[w]]; + for(auto v_it = begin(w_parent_bucket); v_it != end(w_parent_bucket);) { - for(const auto &edge : (post_dom ? node.in : node.out)) + node_indext v = *v_it; + v_it = w_parent_bucket.erase(v_it); + node_indext u = eval(v); + if(semi[u] < semi[v]) + { + dom[v] = u; + } + else { - worklist.push_back(cfg[edge.first].PC); + dom[v] = parent[w]; } } } + for(node_indext i = 2; i <= dfs_counter; ++i) + { + node_indext w = vertex[i]; + if(dom[w] != vertex[semi[w]]) + { + dom[w] = dom[dom[w]]; + } + } + + assign_dominators(root); +} + +/// Print the result of the dominator computation +template +std::ostream &operator<<( + std::ostream &out, + const cfg_dominators_templatet &cfg_dominators) +{ + cfg_dominators.output(out); + return out; +} + +/// Compute dominators +template +void cfg_dominators_templatet::operator()(P &program) +{ + initialise(program); + fixedpointt fixedpoint(*this); + fixedpoint.fixedpoint(program); +} + +/// Initialises the elements of the fixed point analysis +template +void cfg_dominators_templatet::initialise(P &program) +{ + cfg(program); } /// Pretty-print a single node in the dominator tree. Supply a specialisation if @@ -213,10 +537,10 @@ typedef cfg_dominators_templatet< const goto_programt, goto_programt::const_targett, true> cfg_post_dominatorst; -template<> +template <> inline void dominators_pretty_print_node( - const goto_programt::const_targett &node, - std::ostream &out) + const goto_programt::const_targett& node, + std::ostream& out) { out << node->location_number; } diff --git a/unit/analyses/cfg_dominators.cpp b/unit/analyses/cfg_dominators.cpp new file mode 100644 index 00000000000..dd6e3944e85 --- /dev/null +++ b/unit/analyses/cfg_dominators.cpp @@ -0,0 +1,61 @@ +/*******************************************************************\ + +Author: DiffBlue Limited. All rights reserved. + +\*******************************************************************/ + +#include +#include + +#include +#include +#include + +// Graph: + +SCENARIO("Looking up dominators") +{ + // Graph: + // int x = rand(); + // if (x<0) { + // x = -x; + // } else { + // x = x - 1; + // log(x); + // } + // return x; + using domt = dominatorst; + auto dominators_data = + std::make_shared>( + std::vector{"int x = rand();", + "if (x < 0)", + "x = -x;", + "x = x - 1;", + "log(x);", + "return x;"}, + std::vector{domt::npos, 0, 1, 1, 3, 1}); + + WHEN("Looking at the dominators of the root") + { + THEN("They should only exactly the root") + { + domt root_doms(dominators_data, 0); + REQUIRE(root_doms.size() == 1); + REQUIRE(*root_doms.begin() == dominators_data->data[0]); + } + } + + WHEN( + "Looking at the dominators of a node that should have multiple dominators") + { + THEN("They should actually have multiple dominators") + { + domt log_doms(dominators_data, 4); + REQUIRE(log_doms.size() == 4); + REQUIRE(log_doms.find(dominators_data->data[0]) != log_doms.end()); + REQUIRE(log_doms.find(dominators_data->data[1]) != log_doms.end()); + REQUIRE(log_doms.find(dominators_data->data[3]) != log_doms.end()); + REQUIRE(log_doms.find(dominators_data->data[4]) != log_doms.end()); + } + } +} From a50ce24d213064e6a0e1b6f34732a43987428939 Mon Sep 17 00:00:00 2001 From: Hannes Steffenhagen Date: Fri, 9 Feb 2018 16:41:37 +0000 Subject: [PATCH 2/3] Apply workaround to iterator comparison bug This is a workaround to a bug that gets triggered in dependence_graph.cpp - in the function dep_graph_domaint::control_dependencies, find is called on cfg_dominators_templatet::target_sett, with an iterator parameter called "from". It is not ensured that this iterator is from the same list as the iterators within the dominators set, this is a problem according to C++ Draft Standard N3960: 24.2.1 An iterator j is called reachable from an iterator i if and only if there is a finite sequence of applications of the expression ++i that makes i == j. If j is reachable from i, they refer to elements of the same sequence. 24.2.5 The domain of == for forward iterators is that of iterators over the same underlying sequence. --- src/analyses/cfg_dominators.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/analyses/cfg_dominators.h b/src/analyses/cfg_dominators.h index 9547c7796a3..174511c19ea 100644 --- a/src/analyses/cfg_dominators.h +++ b/src/analyses/cfg_dominators.h @@ -183,7 +183,19 @@ class dominatorst const_iterator find(const T &node) const { - return std::find(begin(), end(), node); + std::less less; + // FIXME This works around a bug in other parts of the code + // in particular, dependence_graph.cpp, + // where iterators to different lists than those that are + // stored in this set are passed to find. + // The Debug libstdc++ will (correctly!) run into an assertion failure + // using std::find. std::less for some reason doesn't trigger this assertion + // failure, so we use this as an ugly workaround until that code is fixed. + + // NOLINTNEXTLINE + return std::find_if(cbegin(), cend(), [&](const T &other_node) { + return !less(node, other_node) && !less(other_node, node); + }); } /// The size of the set; Linear time on the first call, From 4f89a2bd7bfb8ff437cf5351e7e1e960ddae9923 Mon Sep 17 00:00:00 2001 From: Daniel Poetzl Date: Wed, 14 Feb 2018 18:15:50 +0000 Subject: [PATCH 3/3] Documentation for cfg dominator class --- src/analyses/cfg_dominators.h | 140 ++++++++++++++++++++++++++-------- 1 file changed, 109 insertions(+), 31 deletions(-) diff --git a/src/analyses/cfg_dominators.h b/src/analyses/cfg_dominators.h index 174511c19ea..7cbe26b8f7b 100644 --- a/src/analyses/cfg_dominators.h +++ b/src/analyses/cfg_dominators.h @@ -28,6 +28,9 @@ Author: Georg Weissenbacher, georg@weissenbacher.name #include #include +/// Dominator tree +/// Node indices are 0-based here (unlike in the internal data structures of +/// the algorithm). template struct dominators_datat { @@ -41,13 +44,16 @@ struct dominators_datat : data(data), immediate_dominator(immediate_dominator) { } + + /// Maps node to T std::vector data; + + /// Maps node to its immediate dominator std::vector immediate_dominator; }; -/// An immutable set of dominators. Constant memory usage and creation time, -/// but linear lookup time -/// Immutability is necessary because the structure uses sharing +/// An immutable set of dominators. Constant memory usage and creation time. +/// Immutability is necessary because the structure uses sharing. template class dominatorst { @@ -62,8 +68,8 @@ class dominatorst public: /// Create an empty set - /// Note: Only unreachable nodes should be assigned - /// empty sets after the algorithm completes + /// Note: Only unreachable nodes should be assigned empty sets after the + /// algorithm completes dominatorst() : dominators_data(nullptr), node_index(npos), cached_distance(0) { } @@ -180,17 +186,15 @@ class dominatorst /// Return an iterator node if node is in this dominators set, end() otherwise /// Note: O(n), when making many queries against the same set it is probably /// worth copying into a std::set - const_iterator find(const T &node) const { std::less less; - // FIXME This works around a bug in other parts of the code - // in particular, dependence_graph.cpp, - // where iterators to different lists than those that are - // stored in this set are passed to find. - // The Debug libstdc++ will (correctly!) run into an assertion failure - // using std::find. std::less for some reason doesn't trigger this assertion - // failure, so we use this as an ugly workaround until that code is fixed. + // FIXME This works around a bug in other parts of the code in particular, + // dependence_graph.cpp, where iterators to different lists than those that + // are stored in this set are passed to find. The Debug libstdc++ will + // (correctly!) run into an assertion failure using std::find. std::less for + // some reason doesn't trigger this assertion failure, so we use this as an + // ugly workaround until that code is fixed. // NOLINTNEXTLINE return std::find_if(cbegin(), cend(), [&](const T &other_node) { @@ -198,8 +202,7 @@ class dominatorst }); } - /// The size of the set; Linear time on the first call, - /// constant after that + /// The size of the set. Linear time on the first call, constant after that. std::size_t size() const { if(cached_distance == npos) @@ -219,6 +222,7 @@ template const node_indext dominatorst::npos = std::numeric_limits::max(); +/// Dominators for each instruction in a goto program template class cfg_dominators_templatet { @@ -248,15 +252,17 @@ class cfg_dominators_templatet explicit fixedpointt(cfg_dominators_templatet &cfg_dominators) : cfg_dominators(cfg_dominators), dfs_counter(0), + // Data structures have size cfg.size() + 1 as node indices are 1-based + // to match the paper of Lengauer/Tarjan. parent(cfg_dominators.cfg.size() + 1), - ancestor(cfg_dominators.cfg.size() + 1), - child(cfg_dominators.cfg.size() + 1), vertex(cfg_dominators.cfg.size() + 1), dom(cfg_dominators.cfg.size() + 1), - label(cfg_dominators.cfg.size() + 1), semi(cfg_dominators.cfg.size() + 1), + bucket(cfg_dominators.cfg.size() + 1), + ancestor(cfg_dominators.cfg.size() + 1), + label(cfg_dominators.cfg.size() + 1), size(cfg_dominators.cfg.size() + 1), - bucket(cfg_dominators.cfg.size() + 1) + child(cfg_dominators.cfg.size() + 1) { } @@ -265,12 +271,34 @@ class cfg_dominators_templatet private: cfg_dominators_templatet &cfg_dominators; node_indext dfs_counter; - std::vector parent, ancestor, child, vertex, dom; - std::vector label, semi, size; + /// Maps node to its parent in the DFS-generated spanning tree + std::vector parent; + + /// Maps number to node (according to the DFS numbering) + std::vector vertex; + + /// Maps node to its immediate dominator + std::vector dom; + + /// Maps node to its semi-dominator (as defined by Lengauer/Tarjan) + /// A semidominator of a node w is the minimum node v (according to the DFS + /// numbering) for which there is a path from v to w such that all nodes + /// occuring on that path (other than v, w) have a larger number than w + /// (according to the DFS numbering) + std::vector semi; + /// Maps node to the set of nodes of which it is the semi-dominator std::vector> bucket; + // Used by link() and eval(), which are used to create and query + // an auxiliary data structure which is a forest that is contained + // in the DFS spanning tree. + std::vector ancestor; + std::vector label; + std::vector size; + std::vector child; + T get_entry_node(P &program) { if(post_dom) @@ -283,6 +311,9 @@ class cfg_dominators_templatet } }; + /// DFS numbering + /// Number nodes in the order in which they are reached during a DFS, + /// intialize data structures void dfs(node_indext root) { struct dfs_statet @@ -299,16 +330,20 @@ class cfg_dominators_templatet node_indext v = state.current; if(semi[v] == 0) { + // Initialize data structures parent[v] = state.parent; semi[v] = ++dfs_counter; vertex[dfs_counter] = label[v] = v; ancestor[v] = child[v] = 0; size[v] = 1; + // Explore children for_each_successor(v, [&](node_indext w) { work.push({v, w}); }); } } } + /// Compress path from v to the root in the tree of the forest that contains + /// v, by directly attaching nodes to the root void compress(node_indext v) { if(ancestor[ancestor[v]] != 0) @@ -322,6 +357,8 @@ class cfg_dominators_templatet } } + /// Return node with minimum semidominator on the path from the root of the + /// tree in the forest containing v to v, and compress path node_indext eval(node_indext v) { if(ancestor[v] == 0) @@ -336,6 +373,9 @@ class cfg_dominators_templatet return label[ancestor[v]]; } + /// Add an edge to the forest + /// \param v: source node of edge + /// \param w: target node of edge void link(node_indext v, node_indext w) { node_indext s = w; @@ -365,8 +405,10 @@ class cfg_dominators_templatet } } + /// Fill output data structures void assign_dominators(node_indext root) { + // Fill dominator tree output data structure auto dominators_data = std::make_shared( cfg_dominators.cfg.size()); for(node_indext i = 0; i < cfg_dominators.cfg.size(); ++i) @@ -374,6 +416,8 @@ class cfg_dominators_templatet dominators_data->immediate_dominator[i] = dom[i + 1] - 1; dominators_data->data[i] = cfg_dominators.cfg[i].PC; } + + // Assign immediate dominator to nodes in the cfg std::stack work; work.push(root); while(!work.empty()) @@ -389,10 +433,11 @@ class cfg_dominators_templatet } } + /// Perform action on each child node template void for_each_successor(node_indext node_index, Action action) { - // the -1 / +1 adjusts indices from 1 based to 0 based and back + // The -1 / +1 adjusts indices from 1 based to 0 based and back auto ix = node_index - 1; for(auto const &next : post_dom ? cfg_dominators.cfg.in(ix) : cfg_dominators.cfg.out(ix)) @@ -401,6 +446,7 @@ class cfg_dominators_templatet } } + /// Perform action on each parent node template void for_each_predecessor(node_indext node_index, Action action) { @@ -414,14 +460,17 @@ class cfg_dominators_templatet }; }; +/// Dominator tree computation +/// Follows "A fast algorithm for finding dominators in a flow graph" of +/// Lengauer and Tarjan. Node indices are 1-based as in the paper, with the +/// first element (with index 0) of each data structure simply left empty. template void cfg_dominators_templatet::fixedpointt::fixedpoint( P &program) { - // Dominator Tree according to Lengauer and Tarjan - // "A fast algorithm for finding dominators in a flow graph" - // This is ununderstandable without reading the paper! - // assumption: Vertex indices >= 0 and < cfg.size() + // The nodes in the cfg data structure are represented by indices >= 0 and < + // cfg.size(), whereas the internal data structures of the algorithm use + // 1-based indices to represent nodes if(cfg_dominators.cfg.nodes_empty(program)) { return; @@ -429,23 +478,35 @@ void cfg_dominators_templatet::fixedpointt::fixedpoint( cfg_dominators.entry_node = get_entry_node(program); node_indext root = cfg_dominators.cfg.entry_map[cfg_dominators.entry_node] + 1; + + // The computation is carried out in four steps as given in the paper. + + // Step 1 + // Number nodes in the order in which they are reached during DFS, and + // initialize data structures dfs_counter = 0; dfs(root); + for(node_indext i = dfs_counter; i >= 2; --i) { + // Step 2 + // Compute semidominators node_indext w = vertex[i]; // NOLINTNEXTLINE for_each_predecessor(w, [&](node_indext v) { node_indext u = eval(v); - // reachable nodes may have unreachable - // nodes as their parents + // Reachable nodes may have unreachable nodes as their parents if(semi[u] != 0 && semi[u] < semi[w]) { semi[w] = semi[u]; } }); + bucket[vertex[semi[w]]].insert(w); link(parent[w], w); + + // Step 3 + // Implicitely define immediate dominator auto &w_parent_bucket = bucket[parent[w]]; for(auto v_it = begin(w_parent_bucket); v_it != end(w_parent_bucket);) { @@ -462,6 +523,9 @@ void cfg_dominators_templatet::fixedpointt::fixedpoint( } } } + + // Step 4 + // Compute immediate dominator for(node_indext i = 2; i <= dfs_counter; ++i) { node_indext w = vertex[i]; @@ -471,10 +535,14 @@ void cfg_dominators_templatet::fixedpointt::fixedpoint( } } + // Fill output data structures assign_dominators(root); } /// Print the result of the dominator computation +/// \param out: output stream +/// \param cfg_dominators: structure containing the result of the dominator +/// computation template std::ostream &operator<<( std::ostream &out, @@ -500,15 +568,20 @@ void cfg_dominators_templatet::initialise(P &program) cfg(program); } -/// Pretty-print a single node in the dominator tree. Supply a specialisation if -/// operator<< is not sufficient. -/// \par parameters: `node` to print and stream `out` to pretty-print it to +/// Pretty-print a single node. Supply a specialisation if operator<< is not +/// sufficient. +/// \param node: node to print +/// \param out: output stream template void dominators_pretty_print_node(const T &node, std::ostream &out) { out << node; } +/// Pretty-print a single node. +/// \param target: node to print +/// \param out: output stream +template <> inline void dominators_pretty_print_node( const goto_programt::targett& target, std::ostream& out) @@ -516,7 +589,9 @@ inline void dominators_pretty_print_node( out << target->code.pretty(); } + /// Print the result of the dominator computation +/// \param out: output stream template void cfg_dominators_templatet::output(std::ostream &out) const { @@ -549,6 +624,9 @@ typedef cfg_dominators_templatet< const goto_programt, goto_programt::const_targett, true> cfg_post_dominatorst; +/// Pretty-print a single node. +/// \param node: node to print +/// \param out: output stream template <> inline void dominators_pretty_print_node( const goto_programt::const_targett& node,