diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 79af2c0401d..85a0a323b65 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -163,7 +163,7 @@ struct DeviceContext : public Context { /* A writeable view of routing resource graph to be the ONLY database * for routing resource graph builder functions. */ - RRGraphBuilder rr_graph_builder{&rr_nodes}; + RRGraphBuilder rr_graph_builder{&rr_nodes, &rr_node_metadata, &rr_edge_metadata}; /* A read-only view of routing resource graph to be the ONLY database * for client functions: GUI, placer, router, timing analyzer etc. diff --git a/vpr/src/device/rr_graph_builder.cpp b/vpr/src/device/rr_graph_builder.cpp index 6cf28ca276c..38ca752a620 100644 --- a/vpr/src/device/rr_graph_builder.cpp +++ b/vpr/src/device/rr_graph_builder.cpp @@ -1,8 +1,18 @@ #include "vtr_log.h" #include "rr_graph_builder.h" +#include "vtr_time.h" +#include +#include +//#include -RRGraphBuilder::RRGraphBuilder(t_rr_graph_storage* node_storage) - : node_storage_(*node_storage) { +//#include "globals.h" + +RRGraphBuilder::RRGraphBuilder(t_rr_graph_storage* node_storage, + MetadataStorage* rr_node_metadata, + MetadataStorage>* rr_edge_metadata) + : node_storage_(*node_storage) + , rr_node_metadata_(*rr_node_metadata) + , rr_edge_metadata_(*rr_edge_metadata) { } t_rr_graph_storage& RRGraphBuilder::node_storage() { @@ -50,3 +60,71 @@ void RRGraphBuilder::add_node_to_all_locs(RRNodeId node) { void RRGraphBuilder::clear() { node_lookup_.clear(); } + +void RRGraphBuilder::reorder_nodes(e_rr_node_reorder_algorithm reorder_rr_graph_nodes_algorithm, + int reorder_rr_graph_nodes_threshold, + int reorder_rr_graph_nodes_seed) { + size_t v_num = node_storage_.size(); + if (reorder_rr_graph_nodes_threshold < 0 || v_num < (size_t)reorder_rr_graph_nodes_threshold) return; + vtr::ScopedStartFinishTimer timer("Reordering rr_graph nodes"); + vtr::vector src_order(v_num); // new id -> old id + size_t cur_idx = 0; + for (RRNodeId& n : src_order) { // Initialize to [0, 1, 2 ...] + n = RRNodeId(cur_idx++); + } + + // This method works well. The intution is that highly connected nodes are enumerated first (together), + // and since there will be a lot of nodes with the same degree, they are then ordered based on some + // distance from the starting node. + if (reorder_rr_graph_nodes_algorithm == DEGREE_BFS) { + vtr::vector bfs_idx(v_num); + vtr::vector degree(v_num); + std::queue que; + + // Compute both degree (in + out) and an index based on the BFS traversal + cur_idx = 0; + for (size_t i = 0; i < v_num; ++i) { + if (bfs_idx[RRNodeId(i)]) continue; + que.push(RRNodeId(i)); + bfs_idx[RRNodeId(i)] = cur_idx++; + while (!que.empty()) { + RRNodeId u = que.front(); + que.pop(); + degree[u] += node_storage_.num_edges(u); + for (RREdgeId edge = node_storage_.first_edge(u); edge < node_storage_.last_edge(u); edge = RREdgeId(size_t(edge) + 1)) { + RRNodeId v = node_storage_.edge_sink_node(edge); + degree[v]++; + if (bfs_idx[v]) continue; + bfs_idx[v] = cur_idx++; + que.push(v); + } + } + } + + // Sort by degree primarily, and BFS order secondarily + sort(src_order.begin(), src_order.end(), + [&](auto a, auto b) -> bool { + auto deg_a = degree[a]; + auto deg_b = degree[b]; + return deg_a > deg_b || (deg_a == deg_b && bfs_idx[a] < bfs_idx[b]); + }); + } else if (reorder_rr_graph_nodes_algorithm == RANDOM_SHUFFLE) { + std::mt19937 g(reorder_rr_graph_nodes_seed); + std::shuffle(src_order.begin(), src_order.end(), g); + } + vtr::vector dest_order(v_num); + cur_idx = 0; + for (auto u : src_order) + dest_order[u] = RRNodeId(cur_idx++); + + node_storage_.reorder(dest_order, src_order); + + node_lookup().reorder(dest_order); + + rr_node_metadata_.remap_keys([&](int node) { return size_t(dest_order[RRNodeId(node)]); }); + rr_edge_metadata_.remap_keys([&](std::tuple edge) { + return std::make_tuple(size_t(dest_order[RRNodeId(std::get<0>(edge))]), + size_t(dest_order[RRNodeId(std::get<1>(edge))]), + std::get<2>(edge)); + }); +} diff --git a/vpr/src/device/rr_graph_builder.h b/vpr/src/device/rr_graph_builder.h index 66fa3e94c28..53df08d1ae2 100644 --- a/vpr/src/device/rr_graph_builder.h +++ b/vpr/src/device/rr_graph_builder.h @@ -15,12 +15,15 @@ */ #include "rr_graph_storage.h" #include "rr_spatial_lookup.h" +#include "metadata_storage.h" class RRGraphBuilder { /* -- Constructors -- */ public: /* See detailed comments about the data structures in the internal data storage section of this file */ - RRGraphBuilder(t_rr_graph_storage* node_storage); + RRGraphBuilder(t_rr_graph_storage* node_storage, + MetadataStorage* rr_node_metadata, + MetadataStorage>* rr_edge_metadata); /* Disable copy constructors and copy assignment operator * This is to avoid accidental copy because it could be an expensive operation considering that the @@ -58,6 +61,29 @@ class RRGraphBuilder { /** @brief Clear all the underlying data storage */ void clear(); + /** @brief reorder all the nodes + * Reordering the rr-graph nodes may be helpful in + * - Increasing cache locality during routing + * - Improving compile time + * Reorder RRNodeId's using one of these algorithms: + * - DEGREE_BFS: Order by degree primarily, and BFS traversal order secondarily. + * - RANDOM_SHUFFLE: Shuffle using the specified seed. Great for testing. + * The DEGREE_BFS algorithm was selected because it had the best performance of seven + * existing algorithms here: https://github.com/SymbiFlow/vtr-rrgraph-reordering-tool + * It might be worth further research, as the DEGREE_BFS algorithm is simple and + * makes some arbitrary choices, such as the starting node. + * The re-ordering algorithm (DEGREE_BFS) does not speed up the router on most architectures + * vs. using the node ordering created by the rr-graph builder in VPR, so it is off by default. + * The other use of this algorithm is for some unit tests; by changing the order of the nodes + * in the rr-graph before routing we check that no code depends on the rr-graph node order + * Nonetheless, it does improve performance ~7% for the SymbiFlow Xilinx Artix 7 graph. + * + * NOTE: Re-ordering will invalidate any references to rr_graph nodes, so this + * should generally be called before creating such references. + */ + void reorder_nodes(e_rr_node_reorder_algorithm reorder_rr_graph_nodes_algorithm, + int reorder_rr_graph_nodes_threshold, + int reorder_rr_graph_nodes_seed); /** @brief Set capacity of this node (number of routes that can use it). */ inline void set_node_capacity(RRNodeId id, short new_capacity) { @@ -208,6 +234,14 @@ class RRGraphBuilder { t_rr_graph_storage& node_storage_; /* Fast look-up for rr nodes */ RRSpatialLookup node_lookup_; + + /* Metadata is an extra data on rr-nodes and edges, respectively, that is not used by vpr + * but simply passed through the flow so that it can be used by downstream tools. + * The main (perhaps only) current use of this metadata is the fasm tool of symbiflow, + * which needs extra metadata on which programming bits control which switch in order to produce a bitstream.*/ + + MetadataStorage& rr_node_metadata_; + MetadataStorage>& rr_edge_metadata_; }; #endif diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index d63c447207a..5242385d9d6 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -34,6 +34,7 @@ #include "router_lookahead_map.h" #include "rr_graph_clock.h" #include "edge_groups.h" +#include "rr_graph_builder.h" #include "rr_types.h" @@ -313,7 +314,7 @@ void create_rr_graph(const t_graph_type graph_type, const int num_directs, int* Warnings) { const auto& device_ctx = g_vpr_ctx.device(); - + auto& mutable_device_ctx = g_vpr_ctx.mutable_device(); if (!det_routing_arch->read_rr_graph_filename.empty()) { if (device_ctx.read_rr_graph_filename != det_routing_arch->read_rr_graph_filename) { free_rr_graph(); @@ -326,8 +327,11 @@ void create_rr_graph(const t_graph_type graph_type, det_routing_arch->read_rr_graph_filename.c_str(), router_opts.read_rr_edge_metadata, router_opts.do_check_rr_graph); - - reorder_rr_graph_nodes(router_opts); + if (router_opts.reorder_rr_graph_nodes_algorithm != DONT_REORDER) { + mutable_device_ctx.rr_graph_builder.reorder_nodes(router_opts.reorder_rr_graph_nodes_algorithm, + router_opts.reorder_rr_graph_nodes_threshold, + router_opts.reorder_rr_graph_nodes_seed); + } } } else { if (channel_widths_unchanged(device_ctx.chan_width, nodes_per_chan) && !device_ctx.rr_nodes.empty()) { @@ -357,7 +361,11 @@ void create_rr_graph(const t_graph_type graph_type, directs, num_directs, &det_routing_arch->wire_to_rr_ipin_switch, Warnings); - reorder_rr_graph_nodes(router_opts); + if (router_opts.reorder_rr_graph_nodes_algorithm != DONT_REORDER) { + mutable_device_ctx.rr_graph_builder.reorder_nodes(router_opts.reorder_rr_graph_nodes_algorithm, + router_opts.reorder_rr_graph_nodes_threshold, + router_opts.reorder_rr_graph_nodes_seed); + } } process_non_config_sets(); diff --git a/vpr/src/route/rr_graph_util.cpp b/vpr/src/route/rr_graph_util.cpp index 84b89c6e47c..b1b23194353 100644 --- a/vpr/src/route/rr_graph_util.cpp +++ b/vpr/src/route/rr_graph_util.cpp @@ -83,91 +83,6 @@ int seg_index_of_sblock(int from_node, int to_node) { } } -// Reorder RRNodeId's using one of these algorithms: -// - DONT_REORDER: The identity reordering (does nothing.) -// - DEGREE_BFS: Order by degree primarily, and BFS traversal order secondarily. -// - RANDOM_SHUFFLE: Shuffle using the specified seed. Great for testing. -// The DEGREE_BFS algorithm was selected because it had the best performance of seven -// existing algorithms here: https://github.com/SymbiFlow/vtr-rrgraph-reordering-tool -// It might be worth further research, as the DEGREE_BFS algorithm is simple and -// makes some arbitrary choices, such as the starting node. -// Nonetheless, it does improve performance ~7% for the SymbiFlow Xilinx Artix 7 graph. -// -// NOTE: Re-ordering will invalidate any references to rr_graph nodes, so this -// should generally be called before creating such references. -void reorder_rr_graph_nodes(const t_router_opts& router_opts) { - auto& device_ctx = g_vpr_ctx.mutable_device(); - auto& graph = device_ctx.rr_nodes; - auto& rr_graph = device_ctx.rr_graph; - size_t v_num = graph.size(); - - if (router_opts.reorder_rr_graph_nodes_algorithm == DONT_REORDER) return; - if (router_opts.reorder_rr_graph_nodes_threshold < 0 || v_num < (size_t)router_opts.reorder_rr_graph_nodes_threshold) return; - - vtr::ScopedStartFinishTimer timer("Reordering rr_graph nodes"); - - vtr::vector src_order(v_num); // new id -> old id - size_t cur_idx = 0; - for (RRNodeId& n : src_order) { // Initialize to [0, 1, 2 ...] - n = RRNodeId(cur_idx++); - } - - // This method works well. The intution is that highly connected nodes are enumerated first (together), - // and since there will be a lot of nodes with the same degree, they are then ordered based on some - // distance from the starting node. - if (router_opts.reorder_rr_graph_nodes_algorithm == DEGREE_BFS) { - vtr::vector bfs_idx(v_num); - vtr::vector degree(v_num); - std::queue que; - - // Compute both degree (in + out) and an index based on the BFS traversal - cur_idx = 0; - for (size_t i = 0; i < v_num; ++i) { - if (bfs_idx[RRNodeId(i)]) continue; - que.push(RRNodeId(i)); - bfs_idx[RRNodeId(i)] = cur_idx++; - while (!que.empty()) { - RRNodeId u = que.front(); - que.pop(); - degree[u] += graph.num_edges(u); - for (RREdgeId edge = rr_graph.node_first_edge(u); edge < rr_graph.node_last_edge(u); edge = RREdgeId(size_t(edge) + 1)) { - RRNodeId v = graph.edge_sink_node(edge); - degree[v]++; - if (bfs_idx[v]) continue; - bfs_idx[v] = cur_idx++; - que.push(v); - } - } - } - - // Sort by degree primarily, and BFS order secondarily - sort(src_order.begin(), src_order.end(), - [&](auto a, auto b) -> bool { - auto deg_a = degree[a]; - auto deg_b = degree[b]; - return deg_a > deg_b || (deg_a == deg_b && bfs_idx[a] < bfs_idx[b]); - }); - } else if (router_opts.reorder_rr_graph_nodes_algorithm == RANDOM_SHUFFLE) { - std::mt19937 g(router_opts.reorder_rr_graph_nodes_seed); - std::shuffle(src_order.begin(), src_order.end(), g); - } - vtr::vector dest_order(v_num); - cur_idx = 0; - for (auto u : src_order) - dest_order[u] = RRNodeId(cur_idx++); - - graph.reorder(dest_order, src_order); - - device_ctx.rr_graph_builder.node_lookup().reorder(dest_order); - - device_ctx.rr_node_metadata.remap_keys([&](int node) { return size_t(dest_order[RRNodeId(node)]); }); - device_ctx.rr_edge_metadata.remap_keys([&](std::tuple edge) { - return std::make_tuple(size_t(dest_order[RRNodeId(std::get<0>(edge))]), - size_t(dest_order[RRNodeId(std::get<1>(edge))]), - std::get<2>(edge)); - }); -} - vtr::vector> get_fan_in_list() { auto& rr_nodes = g_vpr_ctx.device().rr_nodes; diff --git a/vpr/src/route/rr_graph_util.h b/vpr/src/route/rr_graph_util.h index 514f396f0d2..3b61263c566 100644 --- a/vpr/src/route/rr_graph_util.h +++ b/vpr/src/route/rr_graph_util.h @@ -7,8 +7,6 @@ int seg_index_of_cblock(t_rr_type from_rr_type, int to_node); int seg_index_of_sblock(int from_node, int to_node); -void reorder_rr_graph_nodes(const t_router_opts& router_opts); - // This function generates and returns a vector indexed by RRNodeId // containing a list of fan-in edges for each node. vtr::vector> get_fan_in_list();