Skip to content

Commit b382e74

Browse files
committed
address review comments
1 parent a40c0df commit b382e74

11 files changed

+71
-49
lines changed

vpr/src/route/DecompNetlistRouter.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
/** @file Parallel and net-decomposing case for NetlistRouter. Works like
44
* \see ParallelNetlistRouter, but tries to "decompose" nets and assign them to
5-
* the next level of the partition tree where possible. */
5+
* the next level of the partition tree where possible.
6+
* See "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */
67
#include "netlist_routers.h"
78

89
#include <tbb/task_group.h>
@@ -57,6 +58,7 @@ class DecompNetlistRouter : public NetlistRouter {
5758
* \ref route_net for each net, which will handle other global updates.
5859
* \return RouteIterResults for this iteration. */
5960
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
61+
/** Inform the PartitionTree of the nets with updated bounding boxes */
6062
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
6163
/** Set RCV enable flag for all routers managed by this netlist router.
6264
* Net decomposition does not work with RCV, so calling this fn with x=true is a fatal error. */
@@ -66,9 +68,13 @@ class DecompNetlistRouter : public NetlistRouter {
6668
private:
6769
/** Should we decompose this net? */
6870
bool should_decompose_net(ParentNetId net_id, const PartitionTreeNode& node);
69-
/** Get a bitset with sinks to route before net decomposition */
71+
/** Get a bitset of sinks to route before net decomposition. Output bitset is
72+
* [1..num_sinks] where the corresponding index is set to 1 if the sink needs to
73+
* be routed */
7074
vtr::dynamic_bitset<> get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node);
71-
/** Get a bitset with sinks to route before virtual net decomposition */
75+
/** Get a bitset of sinks to route before virtual net decomposition. Output bitset is
76+
* [1..num_sinks] where the corresponding index is set to 1 if the sink needs to
77+
* be routed */
7278
vtr::dynamic_bitset<> get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node);
7379
/** Decompose and route a regular net. Output the resulting vnets to \p left and \p right.
7480
* \return Success status: true if routing is successful and left and right now contain valid virtual nets: false otherwise. */
@@ -116,7 +122,7 @@ class DecompNetlistRouter : public NetlistRouter {
116122
float _pres_fac;
117123
float _worst_neg_slack;
118124

119-
/** The partition tree */
125+
/** The partition tree. Holds the groups of nets for each partition */
120126
vtr::optional<PartitionTree> _tree;
121127

122128
/** Sinks to be always sampled for decomposition for each net: [0.._net_list.size()-1]

vpr/src/route/DecompNetlistRouter.tpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -22,23 +22,25 @@ inline RouteIterResults DecompNetlistRouter<HeapType>::route_netlist(int itry, f
2222
_pres_fac = pres_fac;
2323
_worst_neg_slack = worst_neg_slack;
2424

25-
vtr::Timer t;
25+
vtr::Timer timer;
2626

2727
/* Organize netlist into a PartitionTree.
2828
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
2929
if(!_tree){
3030
_tree = PartitionTree(_net_list);
31-
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(t.elapsed_sec()) + " s");
31+
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s");
3232
}
3333

34-
/* Remove all virtual nets: we will create them for each iteration */
34+
/* Remove all virtual nets: we will create them for each iteration.
35+
* This needs to be done because the partition tree can change between iterations
36+
* due to bounding box updates, which invalidates virtual nets */
3537
_tree->clear_vnets();
3638

3739
/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
38-
tbb::task_group g;
39-
route_partition_tree_node(g, _tree->root());
40-
g.wait();
41-
PartitionTreeDebug::log("Routing all nets took " + std::to_string(t.elapsed_sec()) + " s");
40+
tbb::task_group group;
41+
route_partition_tree_node(group, _tree->root());
42+
group.wait();
43+
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");
4244

4345
/* Combine results from threads */
4446
RouteIterResults out;
@@ -52,7 +54,6 @@ inline RouteIterResults DecompNetlistRouter<HeapType>::route_netlist(int itry, f
5254
return out;
5355
}
5456

55-
/* TODO: Handle this in route_netlist */
5657
template<typename HeapType>
5758
void DecompNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& nets) {
5859
VTR_ASSERT(_tree);
@@ -139,8 +140,9 @@ inline bool should_decompose_vnet(const VirtualNet& vnet, const PartitionTreeNod
139140
template<typename HeapType>
140141
void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) {
141142
auto& route_ctx = g_vpr_ctx.mutable_routing();
142-
vtr::Timer t;
143+
vtr::Timer timer;
143144

145+
/* node.nets is an unordered set, copy into vector to sort */
144146
std::vector<ParentNetId> nets(node.nets.begin(), node.nets.end());
145147

146148
/* Sort so that nets with the most sinks are routed first.
@@ -256,7 +258,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
256258

257259
PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size())
258260
+ " nets and " + std::to_string(node.vnets.size())
259-
+ " virtual nets routed in " + std::to_string(t.elapsed_sec())
261+
+ " virtual nets routed in " + std::to_string(timer.elapsed_sec())
260262
+ " s");
261263

262264
/* This node is finished: add left & right branches to the task queue */
@@ -674,7 +676,7 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask_vnet
674676
* sinks in the small side and unblock. Add convex hull since we are in a vnet which
675677
* may not have a source at all */
676678
if (inside_bb(tree.root().inode, vnet.clipped_bb)) { /* We have source, no need to sample after reduction in most cases */
677-
bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out);
679+
bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out);
678680
bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1);
679681
if (!is_reduced || source_on_cutline){
680682
convex_hull_downsample(vnet.net_id, vnet.clipped_bb, out);

vpr/src/route/ParallelNetlistRouter.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
* Note that the parallel router does not support graphical router breakpoints.
1010
*
11-
* [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */
11+
* [0]: "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */
1212
#include "netlist_routers.h"
1313
#include "vtr_optional.h"
1414

@@ -53,6 +53,7 @@ class ParallelNetlistRouter : public NetlistRouter {
5353
* \ref route_net for each net, which will handle other global updates.
5454
* \return RouteIterResults for this iteration. */
5555
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
56+
/** Inform the PartitionTree of the nets with updated bounding boxes */
5657
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
5758
void set_rcv_enabled(bool x);
5859
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);
@@ -98,7 +99,7 @@ class ParallelNetlistRouter : public NetlistRouter {
9899
float _pres_fac;
99100
float _worst_neg_slack;
100101

101-
/** The partition tree */
102+
/** The partition tree. Holds the groups of nets for each partition */
102103
vtr::optional<PartitionTree> _tree;
103104
};
104105

vpr/src/route/ParallelNetlistRouter.tpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,17 @@ inline RouteIterResults ParallelNetlistRouter<HeapType>::route_netlist(int itry,
2121

2222
/* Organize netlist into a PartitionTree.
2323
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
24-
vtr::Timer t;
24+
vtr::Timer timer;
2525
if(!_tree){
2626
_tree = PartitionTree(_net_list);
27-
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(t.elapsed_sec()) + " s");
27+
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s");
2828
}
2929

3030
/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
31-
tbb::task_group g;
32-
route_partition_tree_node(g, _tree->root());
33-
g.wait();
34-
PartitionTreeDebug::log("Routing all nets took " + std::to_string(t.elapsed_sec()) + " s");
31+
tbb::task_group group;
32+
route_partition_tree_node(group, _tree->root());
33+
group.wait();
34+
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");
3535

3636
/* Combine results from threads */
3737
RouteIterResults out;
@@ -48,14 +48,15 @@ template<typename HeapType>
4848
void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) {
4949
auto& route_ctx = g_vpr_ctx.mutable_routing();
5050

51+
/* node.nets is an unordered set, copy into vector to sort */
5152
std::vector<ParentNetId> nets(node.nets.begin(), node.nets.end());
5253

5354
/* Sort so net with most sinks is routed first. */
5455
std::stable_sort(nets.begin(), nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
5556
return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
5657
});
5758

58-
vtr::Timer t;
59+
vtr::Timer timer;
5960
for (auto net_id : nets) {
6061
auto flags = route_net(
6162
_routers_th.local(),
@@ -95,7 +96,7 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&
9596

9697
PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size())
9798
+ " nets and " + std::to_string(node.vnets.size())
98-
+ " virtual nets routed in " + std::to_string(t.elapsed_sec())
99+
+ " virtual nets routed in " + std::to_string(timer.elapsed_sec())
99100
+ " s");
100101

101102
/* This node is finished: add left & right branches to the task queue */
@@ -111,7 +112,6 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&
111112
}
112113
}
113114

114-
/* TODO: Handle this in route_netlist */
115115
template<typename HeapType>
116116
void ParallelNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& nets) {
117117
VTR_ASSERT(_tree);

vpr/src/route/SerialNetlistRouter.tpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
1111
auto& route_ctx = g_vpr_ctx.mutable_routing();
1212
RouteIterResults out;
1313

14-
vtr::Timer t;
14+
vtr::Timer timer;
1515

1616
/* Sort so net with most sinks is routed first */
1717
auto sorted_nets = std::vector<ParentNetId>(_net_list.nets().begin(), _net_list.nets().end());
@@ -48,7 +48,9 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
4848
}
4949

5050
if (flags.retry_with_full_bb) {
51-
/* Grow the BB and retry this net right away. We don't populate out.bb_updated_nets */
51+
/* Grow the BB and retry this net right away.
52+
* We don't populate out.bb_updated_nets for the serial router, since
53+
* there is no partition tree to update. */
5254
route_ctx.route_bb[net_id] = full_device_bb();
5355
inet--;
5456
continue;
@@ -62,11 +64,10 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
6264
}
6365
}
6466

65-
PartitionTreeDebug::log("Routing all nets took " + std::to_string(t.elapsed_sec()) + " s");
67+
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");
6668
return out;
6769
}
6870

69-
/* TODO: Handle this in route_netlist */
7071
template<typename HeapType>
7172
void SerialNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& /* nets */) {
7273
}

vpr/src/route/connection_router.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
#include "rr_graph.h"
55
#include "rr_graph_fwd.h"
66

7+
/** Used for the flat router. The node isn't relevant to the target if
8+
* it is an intra-block node outside of our target block */
79
static bool relevant_node_to_target(const RRGraphView* rr_graph,
810
RRNodeId node_to_add,
911
RRNodeId target_node);
@@ -997,12 +999,7 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
997999
continue;
9981000
RRNodeId rr_node_to_add = rt_node.inode;
9991001

1000-
bool is_inside_bb = inside_bb(rr_node_to_add, net_bounding_box);
1001-
1002-
if(!is_inside_bb)
1003-
continue;
1004-
1005-
/* TODO: Why are we doing this? */
1002+
/* Flat router: don't go into clusters other than the target one */
10061003
if (is_flat_) {
10071004
if (!relevant_node_to_target(rr_graph_, rr_node_to_add, target_node))
10081005
continue;
@@ -1041,7 +1038,8 @@ t_bb ConnectionRouter<Heap>::add_high_fanout_route_tree_to_heap(
10411038
}
10421039
if (done) break;
10431040
}
1044-
//If the target bin, and it's surrounding bins were empty, just add the full route tree
1041+
/* If we didn't find enough nodes to branch off near the target
1042+
* or they are on the wrong grid layer, just add the full route tree */
10451043
if (chan_nodes_added <= SINGLE_BIN_MIN_NODES || !found_node_on_same_layer) {
10461044
add_route_tree_to_heap(rt_root, target_node, cost_params, net_bounding_box);
10471045
return net_bounding_box;

vpr/src/route/netlist_routers.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ struct RouteIterResults {
3838
bool is_routable = true;
3939
/** Net IDs with changed routing */
4040
std::vector<ParentNetId> rerouted_nets;
41-
/** Net IDs with changed bounding box */
41+
/** Net IDs with changed bounding box for this iteration.
42+
* Used by the parallel router to update the \ref PartitionTree */
4243
std::vector<ParentNetId> bb_updated_nets;
4344
/** RouterStats for this iteration */
4445
RouterStats stats;
@@ -56,7 +57,8 @@ class NetlistRouter {
5657
* \return RouteIterResults for this iteration. */
5758
virtual RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack) = 0;
5859

59-
/** Handle net bounding box updates. No-op for the serial router */
60+
/** Handle net bounding box updates by passing them to the PartitionTree.
61+
* No-op for the serial router */
6062
virtual void handle_bb_updated_nets(const std::vector<ParentNetId>& nets) = 0;
6163

6264
/** Enable RCV for each of the ConnectionRouters this NetlistRouter manages.*/

vpr/src/route/partition_tree.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,23 @@ PartitionTree::PartitionTree(const Netlist<>& netlist) {
1515
_root = build_helper(netlist, all_nets, 0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1);
1616
}
1717

18+
/** Build a branch of the PartitionTree given a set of \p nets and a bounding box.
19+
* Calls itself recursively with smaller and smaller bounding boxes until there are less
20+
* nets than \ref MIN_NETS_TO_PARTITION. */
1821
std::unique_ptr<PartitionTreeNode> PartitionTree::build_helper(const Netlist<>& netlist, const std::unordered_set<ParentNetId>& nets, int x1, int y1, int x2, int y2) {
1922
if (nets.empty())
2023
return nullptr;
2124

2225
const auto& route_ctx = g_vpr_ctx.routing();
26+
27+
/* Only build this for 2 dimensions. Ignore the layers for now */
28+
const auto& device_ctx = g_vpr_ctx.device();
29+
int layer_max = device_ctx.grid.get_num_layers() - 1;
30+
2331
auto out = std::make_unique<PartitionTreeNode>();
2432

2533
if (nets.size() < MIN_NETS_TO_PARTITION) {
26-
out->bb = {x1, x2, y1, y2, 0, 0};
34+
out->bb = {x1, x2, y1, y2, 0, layer_max};
2735
out->nets = nets;
2836
/* Build net to ptree node lookup */
2937
for(auto net_id: nets){
@@ -119,7 +127,7 @@ std::unique_ptr<PartitionTreeNode> PartitionTree::build_helper(const Netlist<>&
119127

120128
/* Couldn't find a cutline: all cutlines result in a one-way cut */
121129
if (std::isnan(best_pos)) {
122-
out->bb = {x1, x2, y1, y2, 0, 0};
130+
out->bb = {x1, x2, y1, y2, 0, layer_max};
123131
out->nets = nets;
124132
/* Build net to ptree node lookup */
125133
for(auto net_id: nets){
@@ -184,7 +192,6 @@ inline bool net_in_ptree_node(ParentNetId net_id, const PartitionTreeNode* node)
184192
return bb.xmin >= node->bb.xmin && bb.xmax <= node->bb.xmax && bb.ymin >= node->bb.ymin && bb.ymax <= node->bb.ymax;
185193
}
186194

187-
/** These nets had a bounding box update. Find new partition tree nodes for them */
188195
void PartitionTree::update_nets(const std::vector<ParentNetId>& nets) {
189196
for(auto net_id: nets){
190197
PartitionTreeNode* old_ptree_node = _net_to_ptree_node[net_id];

vpr/src/route/partition_tree.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,14 @@ class PartitionTree {
8686
/** Access root. Shouldn't cause a segfault, because PartitionTree constructor always makes a _root */
8787
inline PartitionTreeNode& root(void) { return *_root; }
8888

89+
/** Handle nets which had a bounding box update.
90+
* Bounding boxes can only grow, so we should find a new partition tree node for
91+
* these nets by moving them up until they fit in a node's bounds */
8992
void update_nets(const std::vector<ParentNetId>& nets);
9093

94+
/** Delete all virtual nets in the tree. Used for the net decomposing router.
95+
* Virtual nets are invalidated between iterations due to changing bounding
96+
* boxes. */
9197
void clear_vnets(void);
9298

9399
private:

vpr/src/route/sink_sampling.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,20 +116,16 @@ inline std::vector<SinkPoint> quickhull(const std::vector<SinkPoint>& points) {
116116
} // namespace sink_sampling
117117

118118
/** Which side of the cutline is this RRNode on?
119-
* Cutlines are always assumed to be at cutline_axis = (cutline_pos + 0.5).
120-
* In the context of the parallel router, a RR node is considered to be inside a bounding
121-
* box if its drive point is inside it (xlow, ylow if the node doesn't have a direction) */
119+
* Cutlines are always assumed to be at cutline_axis = (cutline_pos + 0.5). */
122120
inline Side which_side(RRNodeId inode, Axis cutline_axis, int cutline_pos) {
123121
auto& device_ctx = g_vpr_ctx.device();
124122
const auto& rr_graph = device_ctx.rr_graph;
125123

126-
Direction dir = rr_graph.node_direction(inode);
127-
128124
if (cutline_axis == Axis::X) {
129-
int x = dir == Direction::DEC ? rr_graph.node_xhigh(inode) : rr_graph.node_xlow(inode);
125+
int x = rr_graph.node_xlow(inode);
130126
return Side(x > cutline_pos); /* 1 is RIGHT */
131127
} else {
132-
int y = dir == Direction::DEC ? rr_graph.node_yhigh(inode) : rr_graph.node_ylow(inode);
128+
int y = rr_graph.node_ylow(inode);
133129
return Side(y > cutline_pos);
134130
}
135131
}

vpr/src/route/spatial_route_tree_lookup.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ SpatialRouteTreeLookup build_route_tree_spatial_lookup(const Netlist<>& net_list
1717
float bb_area_per_sink = bb_area / fanout;
1818
float bin_area = BIN_AREA_PER_SINK_FACTOR * bb_area_per_sink;
1919

20+
/* Set a minimum bin dimension so that we don't get minuscule bin sizes
21+
* when flat routing is enabled and every LUT input becomes a sink.
22+
* (P.S. This took some time to debug.) */
2023
constexpr float MIN_BIN_DIM = 3;
2124
float bin_dim = std::max(MIN_BIN_DIM, std::ceil(std::sqrt(bin_area)));
2225

0 commit comments

Comments
 (0)