Skip to content

API reorder_rr_graph_node() created in rr_graph_util.cpp #1939

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vpr/src/base/vpr_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ struct DeviceContext : public Context {
/* A writeable view of routing resource graph to be the ONLY database
* for routing resource graph builder functions.
*/
RRGraphBuilder rr_graph_builder{&rr_nodes};
RRGraphBuilder rr_graph_builder{&rr_nodes, &rr_node_metadata, &rr_edge_metadata};

/* A read-only view of routing resource graph to be the ONLY database
* for client functions: GUI, placer, router, timing analyzer etc.
Expand Down
82 changes: 80 additions & 2 deletions vpr/src/device/rr_graph_builder.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,18 @@
#include "vtr_log.h"
#include "rr_graph_builder.h"
#include "vtr_time.h"
#include <queue>
#include <random>
//#include <algorithm>

RRGraphBuilder::RRGraphBuilder(t_rr_graph_storage* node_storage)
: node_storage_(*node_storage) {
//#include "globals.h"

RRGraphBuilder::RRGraphBuilder(t_rr_graph_storage* node_storage,
MetadataStorage<int>* rr_node_metadata,
MetadataStorage<std::tuple<int, int, short>>* rr_edge_metadata)
: node_storage_(*node_storage)
, rr_node_metadata_(*rr_node_metadata)
, rr_edge_metadata_(*rr_edge_metadata) {
}

t_rr_graph_storage& RRGraphBuilder::node_storage() {
Expand Down Expand Up @@ -50,3 +60,71 @@ void RRGraphBuilder::add_node_to_all_locs(RRNodeId node) {
void RRGraphBuilder::clear() {
node_lookup_.clear();
}

void RRGraphBuilder::reorder_nodes(e_rr_node_reorder_algorithm reorder_rr_graph_nodes_algorithm,
int reorder_rr_graph_nodes_threshold,
int reorder_rr_graph_nodes_seed) {
size_t v_num = node_storage_.size();
if (reorder_rr_graph_nodes_threshold < 0 || v_num < (size_t)reorder_rr_graph_nodes_threshold) return;
vtr::ScopedStartFinishTimer timer("Reordering rr_graph nodes");
vtr::vector<RRNodeId, RRNodeId> src_order(v_num); // new id -> old id
size_t cur_idx = 0;
for (RRNodeId& n : src_order) { // Initialize to [0, 1, 2 ...]
n = RRNodeId(cur_idx++);
}

// This method works well. The intution is that highly connected nodes are enumerated first (together),
// and since there will be a lot of nodes with the same degree, they are then ordered based on some
// distance from the starting node.
if (reorder_rr_graph_nodes_algorithm == DEGREE_BFS) {
vtr::vector<RRNodeId, size_t> bfs_idx(v_num);
vtr::vector<RRNodeId, size_t> degree(v_num);
std::queue<RRNodeId> que;

// Compute both degree (in + out) and an index based on the BFS traversal
cur_idx = 0;
for (size_t i = 0; i < v_num; ++i) {
if (bfs_idx[RRNodeId(i)]) continue;
que.push(RRNodeId(i));
bfs_idx[RRNodeId(i)] = cur_idx++;
while (!que.empty()) {
RRNodeId u = que.front();
que.pop();
degree[u] += node_storage_.num_edges(u);
for (RREdgeId edge = node_storage_.first_edge(u); edge < node_storage_.last_edge(u); edge = RREdgeId(size_t(edge) + 1)) {
RRNodeId v = node_storage_.edge_sink_node(edge);
degree[v]++;
if (bfs_idx[v]) continue;
bfs_idx[v] = cur_idx++;
que.push(v);
}
}
}

// Sort by degree primarily, and BFS order secondarily
sort(src_order.begin(), src_order.end(),
[&](auto a, auto b) -> bool {
auto deg_a = degree[a];
auto deg_b = degree[b];
return deg_a > deg_b || (deg_a == deg_b && bfs_idx[a] < bfs_idx[b]);
});
} else if (reorder_rr_graph_nodes_algorithm == RANDOM_SHUFFLE) {
std::mt19937 g(reorder_rr_graph_nodes_seed);
std::shuffle(src_order.begin(), src_order.end(), g);
}
vtr::vector<RRNodeId, RRNodeId> dest_order(v_num);
cur_idx = 0;
for (auto u : src_order)
dest_order[u] = RRNodeId(cur_idx++);

node_storage_.reorder(dest_order, src_order);

node_lookup().reorder(dest_order);

rr_node_metadata_.remap_keys([&](int node) { return size_t(dest_order[RRNodeId(node)]); });
rr_edge_metadata_.remap_keys([&](std::tuple<int, int, short> edge) {
return std::make_tuple(size_t(dest_order[RRNodeId(std::get<0>(edge))]),
size_t(dest_order[RRNodeId(std::get<1>(edge))]),
std::get<2>(edge));
});
}
36 changes: 35 additions & 1 deletion vpr/src/device/rr_graph_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@
*/
#include "rr_graph_storage.h"
#include "rr_spatial_lookup.h"
#include "metadata_storage.h"

class RRGraphBuilder {
/* -- Constructors -- */
public:
/* See detailed comments about the data structures in the internal data storage section of this file */
RRGraphBuilder(t_rr_graph_storage* node_storage);
RRGraphBuilder(t_rr_graph_storage* node_storage,
MetadataStorage<int>* rr_node_metadata,
MetadataStorage<std::tuple<int, int, short>>* rr_edge_metadata);

/* Disable copy constructors and copy assignment operator
* This is to avoid accidental copy because it could be an expensive operation considering that the
Expand Down Expand Up @@ -58,6 +61,29 @@ class RRGraphBuilder {

/** @brief Clear all the underlying data storage */
void clear();
/** @brief reorder all the nodes
* Reordering the rr-graph nodes may be helpful in
* - Increasing cache locality during routing
* - Improving compile time
* Reorder RRNodeId's using one of these algorithms:
* - DEGREE_BFS: Order by degree primarily, and BFS traversal order secondarily.
* - RANDOM_SHUFFLE: Shuffle using the specified seed. Great for testing.
* The DEGREE_BFS algorithm was selected because it had the best performance of seven
* existing algorithms here: https://github.com/SymbiFlow/vtr-rrgraph-reordering-tool
* It might be worth further research, as the DEGREE_BFS algorithm is simple and
* makes some arbitrary choices, such as the starting node.
* The re-ordering algorithm (DEGREE_BFS) does not speed up the router on most architectures
* vs. using the node ordering created by the rr-graph builder in VPR, so it is off by default.
* The other use of this algorithm is for some unit tests; by changing the order of the nodes
* in the rr-graph before routing we check that no code depends on the rr-graph node order
* Nonetheless, it does improve performance ~7% for the SymbiFlow Xilinx Artix 7 graph.
*
* NOTE: Re-ordering will invalidate any references to rr_graph nodes, so this
* should generally be called before creating such references.
*/
void reorder_nodes(e_rr_node_reorder_algorithm reorder_rr_graph_nodes_algorithm,
int reorder_rr_graph_nodes_threshold,
int reorder_rr_graph_nodes_seed);

/** @brief Set capacity of this node (number of routes that can use it). */
inline void set_node_capacity(RRNodeId id, short new_capacity) {
Expand Down Expand Up @@ -208,6 +234,14 @@ class RRGraphBuilder {
t_rr_graph_storage& node_storage_;
/* Fast look-up for rr nodes */
RRSpatialLookup node_lookup_;

/* Metadata is an extra data on rr-nodes and edges, respectively, that is not used by vpr
* but simply passed through the flow so that it can be used by downstream tools.
* The main (perhaps only) current use of this metadata is the fasm tool of symbiflow,
* which needs extra metadata on which programming bits control which switch in order to produce a bitstream.*/

MetadataStorage<int>& rr_node_metadata_;
MetadataStorage<std::tuple<int, int, short>>& rr_edge_metadata_;
};

#endif
16 changes: 12 additions & 4 deletions vpr/src/route/rr_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "router_lookahead_map.h"
#include "rr_graph_clock.h"
#include "edge_groups.h"
#include "rr_graph_builder.h"

#include "rr_types.h"

Expand Down Expand Up @@ -313,7 +314,7 @@ void create_rr_graph(const t_graph_type graph_type,
const int num_directs,
int* Warnings) {
const auto& device_ctx = g_vpr_ctx.device();

auto& mutable_device_ctx = g_vpr_ctx.mutable_device();
if (!det_routing_arch->read_rr_graph_filename.empty()) {
if (device_ctx.read_rr_graph_filename != det_routing_arch->read_rr_graph_filename) {
free_rr_graph();
Expand All @@ -326,8 +327,11 @@ void create_rr_graph(const t_graph_type graph_type,
det_routing_arch->read_rr_graph_filename.c_str(),
router_opts.read_rr_edge_metadata,
router_opts.do_check_rr_graph);

reorder_rr_graph_nodes(router_opts);
if (router_opts.reorder_rr_graph_nodes_algorithm != DONT_REORDER) {
mutable_device_ctx.rr_graph_builder.reorder_nodes(router_opts.reorder_rr_graph_nodes_algorithm,
router_opts.reorder_rr_graph_nodes_threshold,
router_opts.reorder_rr_graph_nodes_seed);
}
}
} else {
if (channel_widths_unchanged(device_ctx.chan_width, nodes_per_chan) && !device_ctx.rr_nodes.empty()) {
Expand Down Expand Up @@ -357,7 +361,11 @@ void create_rr_graph(const t_graph_type graph_type,
directs, num_directs,
&det_routing_arch->wire_to_rr_ipin_switch,
Warnings);
reorder_rr_graph_nodes(router_opts);
if (router_opts.reorder_rr_graph_nodes_algorithm != DONT_REORDER) {
mutable_device_ctx.rr_graph_builder.reorder_nodes(router_opts.reorder_rr_graph_nodes_algorithm,
router_opts.reorder_rr_graph_nodes_threshold,
router_opts.reorder_rr_graph_nodes_seed);
}
}

process_non_config_sets();
Expand Down
85 changes: 0 additions & 85 deletions vpr/src/route/rr_graph_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,91 +83,6 @@ int seg_index_of_sblock(int from_node, int to_node) {
}
}

// Reorder RRNodeId's using one of these algorithms:
// - DONT_REORDER: The identity reordering (does nothing.)
// - DEGREE_BFS: Order by degree primarily, and BFS traversal order secondarily.
// - RANDOM_SHUFFLE: Shuffle using the specified seed. Great for testing.
// The DEGREE_BFS algorithm was selected because it had the best performance of seven
// existing algorithms here: https://github.com/SymbiFlow/vtr-rrgraph-reordering-tool
// It might be worth further research, as the DEGREE_BFS algorithm is simple and
// makes some arbitrary choices, such as the starting node.
// Nonetheless, it does improve performance ~7% for the SymbiFlow Xilinx Artix 7 graph.
//
// NOTE: Re-ordering will invalidate any references to rr_graph nodes, so this
// should generally be called before creating such references.
void reorder_rr_graph_nodes(const t_router_opts& router_opts) {
auto& device_ctx = g_vpr_ctx.mutable_device();
auto& graph = device_ctx.rr_nodes;
auto& rr_graph = device_ctx.rr_graph;
size_t v_num = graph.size();

if (router_opts.reorder_rr_graph_nodes_algorithm == DONT_REORDER) return;
if (router_opts.reorder_rr_graph_nodes_threshold < 0 || v_num < (size_t)router_opts.reorder_rr_graph_nodes_threshold) return;

vtr::ScopedStartFinishTimer timer("Reordering rr_graph nodes");

vtr::vector<RRNodeId, RRNodeId> src_order(v_num); // new id -> old id
size_t cur_idx = 0;
for (RRNodeId& n : src_order) { // Initialize to [0, 1, 2 ...]
n = RRNodeId(cur_idx++);
}

// This method works well. The intution is that highly connected nodes are enumerated first (together),
// and since there will be a lot of nodes with the same degree, they are then ordered based on some
// distance from the starting node.
if (router_opts.reorder_rr_graph_nodes_algorithm == DEGREE_BFS) {
vtr::vector<RRNodeId, size_t> bfs_idx(v_num);
vtr::vector<RRNodeId, size_t> degree(v_num);
std::queue<RRNodeId> que;

// Compute both degree (in + out) and an index based on the BFS traversal
cur_idx = 0;
for (size_t i = 0; i < v_num; ++i) {
if (bfs_idx[RRNodeId(i)]) continue;
que.push(RRNodeId(i));
bfs_idx[RRNodeId(i)] = cur_idx++;
while (!que.empty()) {
RRNodeId u = que.front();
que.pop();
degree[u] += graph.num_edges(u);
for (RREdgeId edge = rr_graph.node_first_edge(u); edge < rr_graph.node_last_edge(u); edge = RREdgeId(size_t(edge) + 1)) {
RRNodeId v = graph.edge_sink_node(edge);
degree[v]++;
if (bfs_idx[v]) continue;
bfs_idx[v] = cur_idx++;
que.push(v);
}
}
}

// Sort by degree primarily, and BFS order secondarily
sort(src_order.begin(), src_order.end(),
[&](auto a, auto b) -> bool {
auto deg_a = degree[a];
auto deg_b = degree[b];
return deg_a > deg_b || (deg_a == deg_b && bfs_idx[a] < bfs_idx[b]);
});
} else if (router_opts.reorder_rr_graph_nodes_algorithm == RANDOM_SHUFFLE) {
std::mt19937 g(router_opts.reorder_rr_graph_nodes_seed);
std::shuffle(src_order.begin(), src_order.end(), g);
}
vtr::vector<RRNodeId, RRNodeId> dest_order(v_num);
cur_idx = 0;
for (auto u : src_order)
dest_order[u] = RRNodeId(cur_idx++);

graph.reorder(dest_order, src_order);

device_ctx.rr_graph_builder.node_lookup().reorder(dest_order);

device_ctx.rr_node_metadata.remap_keys([&](int node) { return size_t(dest_order[RRNodeId(node)]); });
device_ctx.rr_edge_metadata.remap_keys([&](std::tuple<int, int, short> edge) {
return std::make_tuple(size_t(dest_order[RRNodeId(std::get<0>(edge))]),
size_t(dest_order[RRNodeId(std::get<1>(edge))]),
std::get<2>(edge));
});
}

vtr::vector<RRNodeId, std::vector<RREdgeId>> get_fan_in_list() {
auto& rr_nodes = g_vpr_ctx.device().rr_nodes;

Expand Down
2 changes: 0 additions & 2 deletions vpr/src/route/rr_graph_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ int seg_index_of_cblock(t_rr_type from_rr_type, int to_node);

int seg_index_of_sblock(int from_node, int to_node);

void reorder_rr_graph_nodes(const t_router_opts& router_opts);

// This function generates and returns a vector indexed by RRNodeId
// containing a list of fan-in edges for each node.
vtr::vector<RRNodeId, std::vector<RREdgeId>> get_fan_in_list();
Expand Down