Skip to content

Commit 40814f8

Browse files
committed
Build PartitionTree incrementally, tune net-decomposing router
1 parent 920e8ab commit 40814f8

15 files changed

+240
-72
lines changed

vpr/src/route/DecompNetlistRouter.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
/** @file Parallel and net-decomposing case for NetlistRouter. Works like
44
* \see ParallelNetlistRouter, but tries to "decompose" nets and assign them to
5-
* the next level of the partition tree where possible. */
5+
* the next level of the partition tree where possible.
6+
* See "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */
67
#include "netlist_routers.h"
78

89
#include <tbb/task_group.h>
@@ -57,6 +58,8 @@ class DecompNetlistRouter : public NetlistRouter {
5758
* \ref route_net for each net, which will handle other global updates.
5859
* \return RouteIterResults for this iteration. */
5960
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
61+
/** Inform the PartitionTree of the nets with updated bounding boxes */
62+
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
6063
/** Set RCV enable flag for all routers managed by this netlist router.
6164
* Net decomposition does not work with RCV, so calling this fn with x=true is a fatal error. */
6265
void set_rcv_enabled(bool x);
@@ -65,10 +68,14 @@ class DecompNetlistRouter : public NetlistRouter {
6568
private:
6669
/** Should we decompose this net? */
6770
bool should_decompose_net(ParentNetId net_id, const PartitionTreeNode& node);
68-
/** Get a bitset with sinks to route before net decomposition */
71+
/** Get a bitset of sinks to route before net decomposition. Output bitset is
72+
* [1..num_sinks] where the corresponding index is set to 1 if the sink needs to
73+
* be routed */
6974
vtr::dynamic_bitset<> get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node);
70-
/** Get a bitset with sinks to route before virtual net decomposition */
71-
vtr::dynamic_bitset<> get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node);
75+
/** Get a bitset of sinks to route before virtual net decomposition. Output bitset is
76+
* [1..num_sinks] where the corresponding index is set to 1 if the sink needs to
77+
* be routed */
78+
vtr::dynamic_bitset<> get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node);
7279
/** Decompose and route a regular net. Output the resulting vnets to \p left and \p right.
7380
* \return Success status: true if routing is successful and left and right now contain valid virtual nets: false otherwise. */
7481
bool decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right);
@@ -115,6 +122,9 @@ class DecompNetlistRouter : public NetlistRouter {
115122
float _pres_fac;
116123
float _worst_neg_slack;
117124

125+
/** The partition tree. Holds the groups of nets for each partition */
126+
vtr::optional<PartitionTree> _tree;
127+
118128
/** Sinks to be always sampled for decomposition for each net: [0.._net_list.size()-1]
119129
* (i.e. when routing fails after decomposition for a sink, sample it on next iteration) */
120130
vtr::vector<ParentNetId, vtr::dynamic_bitset<>> _net_known_samples;

vpr/src/route/DecompNetlistRouter.tpp

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
/** @file Impls for DecompNetlistRouter */
44

55
#include "DecompNetlistRouter.h"
6+
#include "globals.h"
67
#include "netlist_routers.h"
78
#include "route_net.h"
89
#include "sink_sampling.h"
@@ -21,25 +22,44 @@ inline RouteIterResults DecompNetlistRouter<HeapType>::route_netlist(int itry, f
2122
_pres_fac = pres_fac;
2223
_worst_neg_slack = worst_neg_slack;
2324

25+
vtr::Timer timer;
26+
2427
/* Organize netlist into a PartitionTree.
2528
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
26-
PartitionTree tree(_net_list);
29+
if(!_tree){
30+
_tree = PartitionTree(_net_list);
31+
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s");
32+
}
33+
34+
/* Remove all virtual nets: we will create them for each iteration.
35+
* This needs to be done because the partition tree can change between iterations
36+
* due to bounding box updates, which invalidates virtual nets */
37+
_tree->clear_vnets();
2738

2839
/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
29-
tbb::task_group g;
30-
route_partition_tree_node(g, tree.root());
31-
g.wait();
40+
tbb::task_group group;
41+
route_partition_tree_node(group, _tree->root());
42+
group.wait();
43+
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");
3244

3345
/* Combine results from threads */
3446
RouteIterResults out;
3547
for (auto& results : _results_th) {
3648
out.stats.combine(results.stats);
3749
out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end());
50+
out.bb_updated_nets.insert(out.bb_updated_nets.end(), results.bb_updated_nets.begin(), results.bb_updated_nets.end());
3851
out.is_routable &= results.is_routable;
3952
}
53+
4054
return out;
4155
}
4256

57+
template<typename HeapType>
58+
void DecompNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& nets) {
59+
VTR_ASSERT(_tree);
60+
_tree->update_nets(nets);
61+
}
62+
4363
template<typename HeapType>
4464
void DecompNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
4565
if (x)
@@ -120,6 +140,10 @@ inline bool should_decompose_vnet(const VirtualNet& vnet, const PartitionTreeNod
120140
template<typename HeapType>
121141
void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) {
122142
auto& route_ctx = g_vpr_ctx.mutable_routing();
143+
vtr::Timer timer;
144+
145+
/* node.nets is an unordered set, copy into vector to sort */
146+
std::vector<ParentNetId> nets(node.nets.begin(), node.nets.end());
123147

124148
/* Sort so that nets with the most sinks are routed first.
125149
* We want to interleave virtual nets with regular ones, so sort an "index vector"
@@ -129,15 +153,14 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
129153
std::vector<size_t> order(node.nets.size() + node.vnets.size());
130154
std::iota(order.begin(), order.end(), 0);
131155
std::stable_sort(order.begin(), order.end(), [&](size_t i, size_t j) -> bool {
132-
ParentNetId id1 = i < node.nets.size() ? node.nets[i] : node.vnets[i - node.nets.size()].net_id;
133-
ParentNetId id2 = j < node.nets.size() ? node.nets[j] : node.vnets[j - node.nets.size()].net_id;
156+
ParentNetId id1 = i < node.nets.size() ? nets[i] : node.vnets[i - nets.size()].net_id;
157+
ParentNetId id2 = j < node.nets.size() ? nets[j] : node.vnets[j - nets.size()].net_id;
134158
return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
135159
});
136160

137-
vtr::Timer t;
138161
for (size_t i : order) {
139-
if (i < node.nets.size()) { /* Regular net (not decomposed) */
140-
ParentNetId net_id = node.nets[i];
162+
if (i < nets.size()) { /* Regular net (not decomposed) */
163+
ParentNetId net_id = nets[i];
141164
if (!should_route_net(_net_list, net_id, _connections_inf, _budgeting_inf, _worst_neg_slack, true))
142165
continue;
143166
/* Setup the net (reset or prune) only once here in the flow. Then all calls to route_net turn off auto-setup */
@@ -188,6 +211,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
188211
if (flags.retry_with_full_bb) {
189212
/* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */
190213
route_ctx.route_bb[net_id] = full_device_bb();
214+
_results_th.local().bb_updated_nets.push_back(net_id);
191215
/* Disable decomposition for nets like this: they're already problematic */
192216
_is_decomp_disabled[net_id] = true;
193217
continue;
@@ -206,7 +230,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
206230
continue;
207231
}
208232
}
209-
/* Route the full vnet. Again we don't care about the flags, they should be handled by the regular path */
233+
/* Route the full vnet. We don't care about the flags, they should be handled by the regular path */
210234
auto sink_mask = get_vnet_sink_mask(vnet);
211235
route_net(
212236
_routers_th.local(),
@@ -234,7 +258,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
234258

235259
PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size())
236260
+ " nets and " + std::to_string(node.vnets.size())
237-
+ " virtual nets routed in " + std::to_string(t.elapsed_sec())
261+
+ " virtual nets routed in " + std::to_string(timer.elapsed_sec())
238262
+ " s");
239263

240264
/* This node is finished: add left & right branches to the task queue */
@@ -277,7 +301,7 @@ inline void make_vnet_pair(ParentNetId net_id, const t_bb& bb, Axis cutline_axis
277301

278302
template<typename HeapType>
279303
bool DecompNetlistRouter<HeapType>::decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right) {
280-
auto& route_ctx = g_vpr_ctx.routing();
304+
auto& route_ctx = g_vpr_ctx.mutable_routing();
281305
auto& net_bb = route_ctx.route_bb[net_id];
282306

283307
/* Sample enough sinks to provide branch-off points to the virtual nets we create */
@@ -382,7 +406,7 @@ inline std::string describe_vnet(const VirtualNet& vnet) {
382406
template<typename HeapType>
383407
bool DecompNetlistRouter<HeapType>::decompose_and_route_vnet(VirtualNet& vnet, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right) {
384408
/* Sample enough sinks to provide branch-off points to the virtual nets we create */
385-
auto sink_mask = get_vnet_decomposition_mask(vnet, node);
409+
auto sink_mask = get_decomposition_mask_vnet(vnet, node);
386410

387411
/* Route the *parent* net with the given mask: only the sinks we ask for will be routed */
388412
auto flags = route_net(
@@ -499,6 +523,7 @@ inline bool get_reduction_mask(ParentNetId net_id, Axis cutline_axis, int cutlin
499523
template<typename HeapType>
500524
vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node) {
501525
const auto& route_ctx = g_vpr_ctx.routing();
526+
502527
const RouteTree& tree = route_ctx.route_trees[net_id].value();
503528
size_t num_sinks = tree.num_sinks();
504529

@@ -512,6 +537,7 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask(Pare
512537
bool is_reduced = get_reduction_mask(net_id, node.cutline_axis, node.cutline_pos, out);
513538

514539
bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1);
540+
515541
if (!is_reduced || source_on_cutline)
516542
convex_hull_downsample(net_id, route_ctx.route_bb[net_id], out);
517543

@@ -638,7 +664,7 @@ inline bool get_reduction_mask_vnet_with_source(const VirtualNet& vnet, Axis cut
638664
}
639665

640666
template<typename HeapType>
641-
vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node) {
667+
vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node) {
642668
const auto& route_ctx = g_vpr_ctx.routing();
643669
const RouteTree& tree = route_ctx.route_trees[vnet.net_id].value();
644670
int num_sinks = tree.num_sinks();
@@ -652,8 +678,9 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_vnet_decomposition_mask
652678
if (inside_bb(tree.root().inode, vnet.clipped_bb)) { /* We have source, no need to sample after reduction in most cases */
653679
bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out);
654680
bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1);
655-
if (!is_reduced || source_on_cutline)
681+
if (!is_reduced || source_on_cutline){
656682
convex_hull_downsample(vnet.net_id, vnet.clipped_bb, out);
683+
}
657684
} else {
658685
int reduced_sides = get_reduction_mask_vnet_no_source(vnet, node.cutline_axis, node.cutline_pos, out);
659686
if (reduced_sides < 2) {
@@ -666,9 +693,11 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_vnet_decomposition_mask
666693
/* Sample if a sink is too close to the cutline (and unreached).
667694
* Those sinks are likely to fail routing */
668695
for (size_t isink : isinks) {
696+
RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink];
697+
if (!inside_bb(rr_sink, vnet.clipped_bb))
698+
continue;
669699
if (is_isink_reached.get(isink))
670700
continue;
671-
RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink];
672701
if (is_close_to_cutline(rr_sink, node.cutline_axis, node.cutline_pos, 1)) {
673702
out.set(isink, true);
674703
continue;

vpr/src/route/ParallelNetlistRouter.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
*
99
* Note that the parallel router does not support graphical router breakpoints.
1010
*
11-
* [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */
11+
* [0]: "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */
1212
#include "netlist_routers.h"
13+
#include "vtr_optional.h"
1314

1415
#include <tbb/task_group.h>
1516

@@ -52,6 +53,8 @@ class ParallelNetlistRouter : public NetlistRouter {
5253
* \ref route_net for each net, which will handle other global updates.
5354
* \return RouteIterResults for this iteration. */
5455
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
56+
/** Inform the PartitionTree of the nets with updated bounding boxes */
57+
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
5558
void set_rcv_enabled(bool x);
5659
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);
5760

@@ -95,6 +98,9 @@ class ParallelNetlistRouter : public NetlistRouter {
9598
int _itry;
9699
float _pres_fac;
97100
float _worst_neg_slack;
101+
102+
/** The partition tree. Holds the groups of nets for each partition */
103+
vtr::optional<PartitionTree> _tree;
98104
};
99105

100106
#include "ParallelNetlistRouter.tpp"

vpr/src/route/ParallelNetlistRouter.tpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
/** @file Impls for ParallelNetlistRouter */
44

5+
#include <string>
56
#include "netlist_routers.h"
67
#include "route_net.h"
78
#include "vtr_time.h"
@@ -20,18 +21,24 @@ inline RouteIterResults ParallelNetlistRouter<HeapType>::route_netlist(int itry,
2021

2122
/* Organize netlist into a PartitionTree.
2223
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
23-
PartitionTree tree(_net_list);
24+
vtr::Timer timer;
25+
if(!_tree){
26+
_tree = PartitionTree(_net_list);
27+
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s");
28+
}
2429

2530
/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
26-
tbb::task_group g;
27-
route_partition_tree_node(g, tree.root());
28-
g.wait();
31+
tbb::task_group group;
32+
route_partition_tree_node(group, _tree->root());
33+
group.wait();
34+
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");
2935

3036
/* Combine results from threads */
3137
RouteIterResults out;
3238
for (auto& results : _results_th) {
3339
out.stats.combine(results.stats);
3440
out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end());
41+
out.bb_updated_nets.insert(out.bb_updated_nets.end(), results.bb_updated_nets.begin(), results.bb_updated_nets.end());
3542
out.is_routable &= results.is_routable;
3643
}
3744
return out;
@@ -41,13 +48,16 @@ template<typename HeapType>
4148
void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) {
4249
auto& route_ctx = g_vpr_ctx.mutable_routing();
4350

51+
/* node.nets is an unordered set, copy into vector to sort */
52+
std::vector<ParentNetId> nets(node.nets.begin(), node.nets.end());
53+
4454
/* Sort so net with most sinks is routed first. */
45-
std::stable_sort(node.nets.begin(), node.nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
55+
std::stable_sort(nets.begin(), nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
4656
return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
4757
});
4858

49-
vtr::Timer t;
50-
for (auto net_id : node.nets) {
59+
vtr::Timer timer;
60+
for (auto net_id : nets) {
5161
auto flags = route_net(
5262
_routers_th.local(),
5363
_net_list,
@@ -76,13 +86,18 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&
7686
if (flags.retry_with_full_bb) {
7787
/* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */
7888
route_ctx.route_bb[net_id] = full_device_bb();
89+
_results_th.local().bb_updated_nets.push_back(net_id);
7990
continue;
8091
}
8192
if (flags.was_rerouted) {
8293
_results_th.local().rerouted_nets.push_back(net_id);
8394
}
8495
}
85-
PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets routed in " + std::to_string(t.elapsed_sec()) + " s");
96+
97+
PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size())
98+
+ " nets and " + std::to_string(node.vnets.size())
99+
+ " virtual nets routed in " + std::to_string(timer.elapsed_sec())
100+
+ " s");
86101

87102
/* This node is finished: add left & right branches to the task queue */
88103
if (node.left && node.right) {
@@ -97,6 +112,12 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&
97112
}
98113
}
99114

115+
template<typename HeapType>
116+
void ParallelNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& nets) {
117+
VTR_ASSERT(_tree);
118+
_tree->update_nets(nets);
119+
}
120+
100121
template<typename HeapType>
101122
void ParallelNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
102123
for (auto& router : _routers_th) {

vpr/src/route/SerialNetlistRouter.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class SerialNetlistRouter : public NetlistRouter {
3535
~SerialNetlistRouter() {}
3636

3737
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
38+
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
3839
void set_rcv_enabled(bool x);
3940
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);
4041

vpr/src/route/SerialNetlistRouter.tpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44

55
#include "SerialNetlistRouter.h"
66
#include "route_net.h"
7+
#include "vtr_time.h"
78

89
template<typename HeapType>
910
inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, float pres_fac, float worst_neg_slack) {
1011
auto& route_ctx = g_vpr_ctx.mutable_routing();
1112
RouteIterResults out;
1213

14+
vtr::Timer timer;
15+
1316
/* Sort so net with most sinks is routed first */
1417
auto sorted_nets = std::vector<ParentNetId>(_net_list.nets().begin(), _net_list.nets().end());
1518
std::stable_sort(sorted_nets.begin(), sorted_nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
@@ -45,7 +48,9 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
4548
}
4649

4750
if (flags.retry_with_full_bb) {
48-
/* Grow the BB and retry this net right away. */
51+
/* Grow the BB and retry this net right away.
52+
* We don't populate out.bb_updated_nets for the serial router, since
53+
* there is no partition tree to update. */
4954
route_ctx.route_bb[net_id] = full_device_bb();
5055
inet--;
5156
continue;
@@ -59,9 +64,14 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
5964
}
6065
}
6166

67+
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");
6268
return out;
6369
}
6470

71+
template<typename HeapType>
72+
void SerialNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& /* nets */) {
73+
}
74+
6575
template<typename HeapType>
6676
void SerialNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
6777
_router.set_rcv_enabled(x);

0 commit comments

Comments
 (0)