Skip to content

Net decomposition: tuning and polishing #2516

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions vpr/src/route/DecompNetlistRouter.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

/** @file Parallel and net-decomposing case for NetlistRouter. Works like
* \see ParallelNetlistRouter, but tries to "decompose" nets and assign them to
* the next level of the partition tree where possible. */
* the next level of the partition tree where possible.
* See "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */
#include "netlist_routers.h"

#include <tbb/task_group.h>
Expand Down Expand Up @@ -57,6 +58,8 @@ class DecompNetlistRouter : public NetlistRouter {
* \ref route_net for each net, which will handle other global updates.
* \return RouteIterResults for this iteration. */
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
/** Inform the PartitionTree of the nets with updated bounding boxes */
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
/** Set RCV enable flag for all routers managed by this netlist router.
* Net decomposition does not work with RCV, so calling this fn with x=true is a fatal error. */
void set_rcv_enabled(bool x);
Expand All @@ -65,10 +68,14 @@ class DecompNetlistRouter : public NetlistRouter {
private:
/** Should we decompose this net? */
bool should_decompose_net(ParentNetId net_id, const PartitionTreeNode& node);
/** Get a bitset with sinks to route before net decomposition */
/** Get a bitset of sinks to route before net decomposition. Output bitset is
* [1..num_sinks] where the corresponding index is set to 1 if the sink needs to
* be routed */
vtr::dynamic_bitset<> get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node);
/** Get a bitset with sinks to route before virtual net decomposition */
vtr::dynamic_bitset<> get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node);
/** Get a bitset of sinks to route before virtual net decomposition. Output bitset is
* [1..num_sinks] where the corresponding index is set to 1 if the sink needs to
* be routed */
vtr::dynamic_bitset<> get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node);
/** Decompose and route a regular net. Output the resulting vnets to \p left and \p right.
* \return Success status: true if routing is successful and left and right now contain valid virtual nets: false otherwise. */
bool decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right);
Expand Down Expand Up @@ -115,6 +122,9 @@ class DecompNetlistRouter : public NetlistRouter {
float _pres_fac;
float _worst_neg_slack;

/** The partition tree. Holds the groups of nets for each partition */
vtr::optional<PartitionTree> _tree;

/** Sinks to be always sampled for decomposition for each net: [0.._net_list.size()-1]
* (i.e. when routing fails after decomposition for a sink, sample it on next iteration) */
vtr::vector<ParentNetId, vtr::dynamic_bitset<>> _net_known_samples;
Expand Down
61 changes: 45 additions & 16 deletions vpr/src/route/DecompNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
/** @file Impls for DecompNetlistRouter */

#include "DecompNetlistRouter.h"
#include "globals.h"
#include "netlist_routers.h"
#include "route_net.h"
#include "sink_sampling.h"
Expand All @@ -21,25 +22,44 @@ inline RouteIterResults DecompNetlistRouter<HeapType>::route_netlist(int itry, f
_pres_fac = pres_fac;
_worst_neg_slack = worst_neg_slack;

vtr::Timer timer;

/* Organize netlist into a PartitionTree.
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
PartitionTree tree(_net_list);
if(!_tree){
_tree = PartitionTree(_net_list);
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s");
}

/* Remove all virtual nets: we will create them for each iteration.
* This needs to be done because the partition tree can change between iterations
* due to bounding box updates, which invalidates virtual nets */
_tree->clear_vnets();

/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
tbb::task_group g;
route_partition_tree_node(g, tree.root());
g.wait();
tbb::task_group group;
route_partition_tree_node(group, _tree->root());
group.wait();
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");

/* Combine results from threads */
RouteIterResults out;
for (auto& results : _results_th) {
out.stats.combine(results.stats);
out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end());
out.bb_updated_nets.insert(out.bb_updated_nets.end(), results.bb_updated_nets.begin(), results.bb_updated_nets.end());
out.is_routable &= results.is_routable;
}

return out;
}

template<typename HeapType>
void DecompNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& nets) {
VTR_ASSERT(_tree);
_tree->update_nets(nets);
}

template<typename HeapType>
void DecompNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
if (x)
Expand Down Expand Up @@ -120,6 +140,10 @@ inline bool should_decompose_vnet(const VirtualNet& vnet, const PartitionTreeNod
template<typename HeapType>
void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) {
auto& route_ctx = g_vpr_ctx.mutable_routing();
vtr::Timer timer;

/* node.nets is an unordered set, copy into vector to sort */
std::vector<ParentNetId> nets(node.nets.begin(), node.nets.end());

/* Sort so that nets with the most sinks are routed first.
* We want to interleave virtual nets with regular ones, so sort an "index vector"
Expand All @@ -129,15 +153,14 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
std::vector<size_t> order(node.nets.size() + node.vnets.size());
std::iota(order.begin(), order.end(), 0);
std::stable_sort(order.begin(), order.end(), [&](size_t i, size_t j) -> bool {
ParentNetId id1 = i < node.nets.size() ? node.nets[i] : node.vnets[i - node.nets.size()].net_id;
ParentNetId id2 = j < node.nets.size() ? node.nets[j] : node.vnets[j - node.nets.size()].net_id;
ParentNetId id1 = i < node.nets.size() ? nets[i] : node.vnets[i - nets.size()].net_id;
ParentNetId id2 = j < node.nets.size() ? nets[j] : node.vnets[j - nets.size()].net_id;
return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
});

vtr::Timer t;
for (size_t i : order) {
if (i < node.nets.size()) { /* Regular net (not decomposed) */
ParentNetId net_id = node.nets[i];
if (i < nets.size()) { /* Regular net (not decomposed) */
ParentNetId net_id = nets[i];
if (!should_route_net(_net_list, net_id, _connections_inf, _budgeting_inf, _worst_neg_slack, true))
continue;
/* Setup the net (reset or prune) only once here in the flow. Then all calls to route_net turn off auto-setup */
Expand Down Expand Up @@ -188,6 +211,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
if (flags.retry_with_full_bb) {
/* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */
route_ctx.route_bb[net_id] = full_device_bb();
_results_th.local().bb_updated_nets.push_back(net_id);
/* Disable decomposition for nets like this: they're already problematic */
_is_decomp_disabled[net_id] = true;
continue;
Expand All @@ -206,7 +230,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g
continue;
}
}
/* Route the full vnet. Again we don't care about the flags, they should be handled by the regular path */
/* Route the full vnet. We don't care about the flags, they should be handled by the regular path */
auto sink_mask = get_vnet_sink_mask(vnet);
route_net(
_routers_th.local(),
Expand Down Expand Up @@ -234,7 +258,7 @@ void DecompNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g

PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size())
+ " nets and " + std::to_string(node.vnets.size())
+ " virtual nets routed in " + std::to_string(t.elapsed_sec())
+ " virtual nets routed in " + std::to_string(timer.elapsed_sec())
+ " s");

/* This node is finished: add left & right branches to the task queue */
Expand Down Expand Up @@ -277,7 +301,7 @@ inline void make_vnet_pair(ParentNetId net_id, const t_bb& bb, Axis cutline_axis

template<typename HeapType>
bool DecompNetlistRouter<HeapType>::decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right) {
auto& route_ctx = g_vpr_ctx.routing();
auto& route_ctx = g_vpr_ctx.mutable_routing();
auto& net_bb = route_ctx.route_bb[net_id];

/* Sample enough sinks to provide branch-off points to the virtual nets we create */
Expand Down Expand Up @@ -382,7 +406,7 @@ inline std::string describe_vnet(const VirtualNet& vnet) {
template<typename HeapType>
bool DecompNetlistRouter<HeapType>::decompose_and_route_vnet(VirtualNet& vnet, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right) {
/* Sample enough sinks to provide branch-off points to the virtual nets we create */
auto sink_mask = get_vnet_decomposition_mask(vnet, node);
auto sink_mask = get_decomposition_mask_vnet(vnet, node);

/* Route the *parent* net with the given mask: only the sinks we ask for will be routed */
auto flags = route_net(
Expand Down Expand Up @@ -499,6 +523,7 @@ inline bool get_reduction_mask(ParentNetId net_id, Axis cutline_axis, int cutlin
template<typename HeapType>
vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node) {
const auto& route_ctx = g_vpr_ctx.routing();

const RouteTree& tree = route_ctx.route_trees[net_id].value();
size_t num_sinks = tree.num_sinks();

Expand All @@ -512,6 +537,7 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask(Pare
bool is_reduced = get_reduction_mask(net_id, node.cutline_axis, node.cutline_pos, out);

bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1);

if (!is_reduced || source_on_cutline)
convex_hull_downsample(net_id, route_ctx.route_bb[net_id], out);

Expand Down Expand Up @@ -638,7 +664,7 @@ inline bool get_reduction_mask_vnet_with_source(const VirtualNet& vnet, Axis cut
}

template<typename HeapType>
vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node) {
vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_decomposition_mask_vnet(const VirtualNet& vnet, const PartitionTreeNode& node) {
const auto& route_ctx = g_vpr_ctx.routing();
const RouteTree& tree = route_ctx.route_trees[vnet.net_id].value();
int num_sinks = tree.num_sinks();
Expand All @@ -652,8 +678,9 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_vnet_decomposition_mask
if (inside_bb(tree.root().inode, vnet.clipped_bb)) { /* We have source, no need to sample after reduction in most cases */
bool is_reduced = get_reduction_mask_vnet_with_source(vnet, node.cutline_axis, node.cutline_pos, out);
bool source_on_cutline = is_close_to_cutline(tree.root().inode, node.cutline_axis, node.cutline_pos, 1);
if (!is_reduced || source_on_cutline)
if (!is_reduced || source_on_cutline){
convex_hull_downsample(vnet.net_id, vnet.clipped_bb, out);
}
} else {
int reduced_sides = get_reduction_mask_vnet_no_source(vnet, node.cutline_axis, node.cutline_pos, out);
if (reduced_sides < 2) {
Expand All @@ -666,9 +693,11 @@ vtr::dynamic_bitset<> DecompNetlistRouter<HeapType>::get_vnet_decomposition_mask
/* Sample if a sink is too close to the cutline (and unreached).
* Those sinks are likely to fail routing */
for (size_t isink : isinks) {
RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink];
if (!inside_bb(rr_sink, vnet.clipped_bb))
continue;
if (is_isink_reached.get(isink))
continue;
RRNodeId rr_sink = route_ctx.net_rr_terminals[vnet.net_id][isink];
if (is_close_to_cutline(rr_sink, node.cutline_axis, node.cutline_pos, 1)) {
out.set(isink, true);
continue;
Expand Down
8 changes: 7 additions & 1 deletion vpr/src/route/ParallelNetlistRouter.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
*
* Note that the parallel router does not support graphical router breakpoints.
*
* [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */
* [0]: "Parallel FPGA Routing with On-the-Fly Net Decomposition", FPT'24 */
#include "netlist_routers.h"
#include "vtr_optional.h"

#include <tbb/task_group.h>

Expand Down Expand Up @@ -52,6 +53,8 @@ class ParallelNetlistRouter : public NetlistRouter {
* \ref route_net for each net, which will handle other global updates.
* \return RouteIterResults for this iteration. */
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
/** Inform the PartitionTree of the nets with updated bounding boxes */
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
void set_rcv_enabled(bool x);
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);

Expand Down Expand Up @@ -95,6 +98,9 @@ class ParallelNetlistRouter : public NetlistRouter {
int _itry;
float _pres_fac;
float _worst_neg_slack;

/** The partition tree. Holds the groups of nets for each partition */
vtr::optional<PartitionTree> _tree;
};

#include "ParallelNetlistRouter.tpp"
37 changes: 29 additions & 8 deletions vpr/src/route/ParallelNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

/** @file Impls for ParallelNetlistRouter */

#include <string>
#include "netlist_routers.h"
#include "route_net.h"
#include "vtr_time.h"
Expand All @@ -20,18 +21,24 @@ inline RouteIterResults ParallelNetlistRouter<HeapType>::route_netlist(int itry,

/* Organize netlist into a PartitionTree.
* Nets in a given level of nodes are guaranteed to not have any overlapping bounding boxes, so they can be routed in parallel. */
PartitionTree tree(_net_list);
vtr::Timer timer;
if(!_tree){
_tree = PartitionTree(_net_list);
PartitionTreeDebug::log("Iteration " + std::to_string(itry) + ": built partition tree in " + std::to_string(timer.elapsed_sec()) + " s");
}

/* Put the root node on the task queue, which will add its child nodes when it's finished. Wait until the entire tree gets routed. */
tbb::task_group g;
route_partition_tree_node(g, tree.root());
g.wait();
tbb::task_group group;
route_partition_tree_node(group, _tree->root());
group.wait();
PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");

/* Combine results from threads */
RouteIterResults out;
for (auto& results : _results_th) {
out.stats.combine(results.stats);
out.rerouted_nets.insert(out.rerouted_nets.end(), results.rerouted_nets.begin(), results.rerouted_nets.end());
out.bb_updated_nets.insert(out.bb_updated_nets.end(), results.bb_updated_nets.begin(), results.bb_updated_nets.end());
out.is_routable &= results.is_routable;
}
return out;
Expand All @@ -41,13 +48,16 @@ template<typename HeapType>
void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node) {
auto& route_ctx = g_vpr_ctx.mutable_routing();

/* node.nets is an unordered set, copy into vector to sort */
std::vector<ParentNetId> nets(node.nets.begin(), node.nets.end());

/* Sort so net with most sinks is routed first. */
std::stable_sort(node.nets.begin(), node.nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
std::stable_sort(nets.begin(), nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
return _net_list.net_sinks(id1).size() > _net_list.net_sinks(id2).size();
});

vtr::Timer t;
for (auto net_id : node.nets) {
vtr::Timer timer;
for (auto net_id : nets) {
auto flags = route_net(
_routers_th.local(),
_net_list,
Expand Down Expand Up @@ -76,13 +86,18 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&
if (flags.retry_with_full_bb) {
/* ConnectionRouter thinks we should grow the BB. Do that and leave this net unrouted for now */
route_ctx.route_bb[net_id] = full_device_bb();
_results_th.local().bb_updated_nets.push_back(net_id);
continue;
}
if (flags.was_rerouted) {
_results_th.local().rerouted_nets.push_back(net_id);
}
}
PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size()) + " nets routed in " + std::to_string(t.elapsed_sec()) + " s");

PartitionTreeDebug::log("Node with " + std::to_string(node.nets.size())
+ " nets and " + std::to_string(node.vnets.size())
+ " virtual nets routed in " + std::to_string(timer.elapsed_sec())
+ " s");

/* This node is finished: add left & right branches to the task queue */
if (node.left && node.right) {
Expand All @@ -97,6 +112,12 @@ void ParallelNetlistRouter<HeapType>::route_partition_tree_node(tbb::task_group&
}
}

template<typename HeapType>
void ParallelNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& nets) {
VTR_ASSERT(_tree);
_tree->update_nets(nets);
}

template<typename HeapType>
void ParallelNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
for (auto& router : _routers_th) {
Expand Down
1 change: 1 addition & 0 deletions vpr/src/route/SerialNetlistRouter.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class SerialNetlistRouter : public NetlistRouter {
~SerialNetlistRouter() {}

RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
void handle_bb_updated_nets(const std::vector<ParentNetId>& nets);
void set_rcv_enabled(bool x);
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);

Expand Down
12 changes: 11 additions & 1 deletion vpr/src/route/SerialNetlistRouter.tpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@

#include "SerialNetlistRouter.h"
#include "route_net.h"
#include "vtr_time.h"

template<typename HeapType>
inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, float pres_fac, float worst_neg_slack) {
auto& route_ctx = g_vpr_ctx.mutable_routing();
RouteIterResults out;

vtr::Timer timer;

/* Sort so net with most sinks is routed first */
auto sorted_nets = std::vector<ParentNetId>(_net_list.nets().begin(), _net_list.nets().end());
std::stable_sort(sorted_nets.begin(), sorted_nets.end(), [&](ParentNetId id1, ParentNetId id2) -> bool {
Expand Down Expand Up @@ -45,7 +48,9 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
}

if (flags.retry_with_full_bb) {
/* Grow the BB and retry this net right away. */
/* Grow the BB and retry this net right away.
* We don't populate out.bb_updated_nets for the serial router, since
* there is no partition tree to update. */
route_ctx.route_bb[net_id] = full_device_bb();
inet--;
continue;
Expand All @@ -59,9 +64,14 @@ inline RouteIterResults SerialNetlistRouter<HeapType>::route_netlist(int itry, f
}
}

PartitionTreeDebug::log("Routing all nets took " + std::to_string(timer.elapsed_sec()) + " s");
return out;
}

template<typename HeapType>
void SerialNetlistRouter<HeapType>::handle_bb_updated_nets(const std::vector<ParentNetId>& /* nets */) {
}

template<typename HeapType>
void SerialNetlistRouter<HeapType>::set_rcv_enabled(bool x) {
_router.set_rcv_enabled(x);
Expand Down
Loading