Skip to content

Commit 4dc3756

Browse files
committed
add DecompNetlistRouter
1 parent fe9089c commit 4dc3756

27 files changed

+1341
-201
lines changed

libs/EXTERNAL/libargparse/argparse_test.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -399,10 +399,11 @@ int main(
399399
.show_in(argparse::ShowIn::HELP_ONLY);
400400
route_grp.add_argument(args.router_algorithm, "--router_algorithm")
401401
.help("Specifies the router algorithm to use.\n"
402-
" * parallel: timing_driven with tricks to run on multiple cores (may be worse)\n"
403-
" * timing driven: focuses on routability and circuit speed\n")
402+
" * timing driven: focuses on routability and circuit speed [default]\n"
403+
" * parallel: timing_driven with nets in different regions of the chip routed in parallel\n"
404+
" * parallel_decomp: timing_driven with additional parallelism obtained by decomposing high-fanout nets, possibly reducing quality\n")
404405
.default_value("timing_driven")
405-
.choices({"parallel", "timing_driven"})
406+
.choices({"parallel", "parallel_decomp", "timing_driven"})
406407
.show_in(argparse::ShowIn::HELP_ONLY);
407408
route_grp.add_argument(args.min_incremental_reroute_fanout, "--min_incremental_reroute_fanout")
408409
.help("The net fanout thershold above which nets will be re-routed incrementally.")

libs/librrgraph/src/base/rr_graph_storage.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -667,11 +667,6 @@ class t_rr_graph_storage {
667667
static inline Direction get_node_direction(
668668
vtr::array_view_id<RRNodeId, const t_rr_node_data> node_storage,
669669
RRNodeId id) {
670-
auto& node_data = node_storage[id];
671-
if (node_data.type_ != CHANX && node_data.type_ != CHANY) {
672-
VTR_LOG_ERROR("Attempted to access RR node 'direction' for non-channel type '%s'",
673-
rr_node_typename[node_data.type_]);
674-
}
675670
return node_storage[id].dir_side_.direction;
676671
}
677672

libs/libvtrutil/src/vtr_dynamic_bitset.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@ class dynamic_bitset {
2020
static_assert(std::numeric_limits<Storage>::is_integer,
2121
"dynamic_bitset storage must be integer!");
2222

23+
constexpr dynamic_bitset() = default;
24+
constexpr dynamic_bitset(Index size) {
25+
resize(size);
26+
}
27+
2328
///@brief Reize to the determined size
2429
void resize(size_t size) {
2530
array_.resize((size + kWidth - 1) / kWidth);
@@ -63,6 +68,39 @@ class dynamic_bitset {
6368
return (array_[index_value / kWidth] & (1u << (index_value % kWidth))) != 0;
6469
}
6570

71+
///@brief Return count of set bits.
72+
constexpr size_t count(void) const {
73+
size_t out = 0;
74+
for (auto x : array_)
75+
out += __builtin_popcount(x);
76+
return out;
77+
}
78+
79+
///@brief Bitwise OR with rhs. Truncate the operation if one operand is smaller.
80+
constexpr dynamic_bitset<Index, Storage>& operator|=(const dynamic_bitset<Index, Storage>& x) {
81+
size_t n = std::min(array_.size(), x.array_.size());
82+
for (size_t i = 0; i < n; i++)
83+
array_[i] |= x.array_[i];
84+
return *this;
85+
}
86+
87+
///@brief Bitwise AND with rhs. Truncate the operation if one operand is smaller.
88+
constexpr dynamic_bitset<Index, Storage>& operator&=(const dynamic_bitset<Index, Storage>& x) {
89+
size_t n = std::min(array_.size(), x.array_.size());
90+
for (size_t i = 0; i < n; i++)
91+
array_[i] &= x.array_[i];
92+
return *this;
93+
}
94+
95+
///@brief Return inverted bitset.
96+
inline dynamic_bitset<Index, Storage> operator~(void) const {
97+
dynamic_bitset<Index, Storage> out(size());
98+
size_t n = array_.size();
99+
for (size_t i = 0; i < n; i++)
100+
out.array_[i] = ~array_[i];
101+
return out;
102+
}
103+
66104
private:
67105
std::vector<Storage> array_;
68106
};

vpr/src/base/ShowSetup.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,9 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
263263
case PARALLEL:
264264
VTR_LOG("PARALLEL\n");
265265
break;
266+
case PARALLEL_DECOMP:
267+
VTR_LOG("PARALLEL_DECOMP\n");
268+
break;
266269
case TIMING_DRIVEN:
267270
VTR_LOG("TIMING_DRIVEN\n");
268271
break;

vpr/src/base/read_options.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ struct ParseRouterAlgorithm {
174174
ConvertedValue<e_router_algorithm> conv_value;
175175
if (str == "parallel")
176176
conv_value.set_value(PARALLEL);
177+
else if (str == "parallel_decomp")
178+
conv_value.set_value(PARALLEL_DECOMP);
177179
else if (str == "timing_driven")
178180
conv_value.set_value(TIMING_DRIVEN);
179181
else {
@@ -2403,10 +2405,11 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
24032405
route_grp.add_argument<e_router_algorithm, ParseRouterAlgorithm>(args.RouterAlgorithm, "--router_algorithm")
24042406
.help(
24052407
"Specifies the router algorithm to use.\n"
2406-
" * parallel: [experimental] timing_driven but multithreaded\n"
2407-
" * timing_driven: focuses on routability and circuit speed\n")
2408+
" * timing driven: focuses on routability and circuit speed [default]\n"
2409+
" * parallel: timing_driven with nets in different regions of the chip routed in parallel\n"
2410+
" * parallel_decomp: timing_driven with additional parallelism obtained by decomposing high-fanout nets, possibly reducing quality\n")
24082411
.default_value("timing_driven")
2409-
.choices({"parallel", "timing_driven"})
2412+
.choices({"parallel", "parallel_decomp", "timing_driven"})
24102413
.show_in(argparse::ShowIn::HELP_ONLY);
24112414

24122415
route_grp.add_argument(args.min_incremental_reroute_fanout, "--min_incremental_reroute_fanout")

vpr/src/base/vpr_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1345,6 +1345,7 @@ struct t_placer_opts {
13451345

13461346
enum e_router_algorithm {
13471347
PARALLEL,
1348+
PARALLEL_DECOMP,
13481349
TIMING_DRIVEN,
13491350
};
13501351

vpr/src/route/DecompNetlistRouter.h

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
#pragma once
2+
3+
/** @file Parallel and net-decomposing case for NetlistRouter. Works like
4+
* \see ParallelNetlistRouter, but tries to "decompose" nets and assign them to
5+
* the next level of the partition tree where possible. */
6+
#include "netlist_routers.h"
7+
8+
#include <tbb/task_group.h>
9+
10+
/** Maximum number of iterations for net decomposition
11+
* 5 is found experimentally: higher values get more speedup on initial iters but # of iters increases */
12+
const int MAX_DECOMP_ITER = 5;
13+
14+
/** Maximum # of decomposition for a net: 2 means one net gets divided down to <4 virtual nets.
15+
* Higher values are more aggressive: better thread utilization but worse congestion resolving */
16+
const int MAX_DECOMP_DEPTH = 2;
17+
18+
/** Minimum # of fanouts of a net to consider decomp. */
19+
const int MIN_DECOMP_SINKS = 8;
20+
21+
/** Minimum # of fanouts of a virtual net to consider decomp. */
22+
const int MIN_DECOMP_SINKS_VNET = 8;
23+
24+
template<typename HeapType>
25+
class DecompNetlistRouter : public NetlistRouter {
26+
public:
27+
DecompNetlistRouter(
28+
const Netlist<>& net_list,
29+
const RouterLookahead* router_lookahead,
30+
const t_router_opts& router_opts,
31+
CBRR& connections_inf,
32+
NetPinsMatrix<float>& net_delay,
33+
const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
34+
std::shared_ptr<SetupHoldTimingInfo> timing_info,
35+
NetPinTimingInvalidator* pin_timing_invalidator,
36+
route_budgets& budgeting_inf,
37+
const RoutingPredictor& routing_predictor,
38+
const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& choking_spots,
39+
bool is_flat)
40+
: _routers_th(_make_router(router_lookahead, is_flat))
41+
, _net_list(net_list)
42+
, _router_opts(router_opts)
43+
, _connections_inf(connections_inf)
44+
, _net_delay(net_delay)
45+
, _netlist_pin_lookup(netlist_pin_lookup)
46+
, _timing_info(timing_info)
47+
, _pin_timing_invalidator(pin_timing_invalidator)
48+
, _budgeting_inf(budgeting_inf)
49+
, _routing_predictor(routing_predictor)
50+
, _choking_spots(choking_spots)
51+
, _is_flat(is_flat)
52+
, _net_known_samples(net_list.nets().size())
53+
, _is_decomp_disabled(net_list.nets().size()) {}
54+
~DecompNetlistRouter() {}
55+
56+
/** Run a single iteration of netlist routing for this->_net_list. This usually means calling
57+
* \ref route_net for each net, which will handle other global updates.
58+
* \return RouteIterResults for this iteration. */
59+
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
60+
/** Set RCV enable flag for all routers managed by this netlist router.
61+
* Net decomposition does not work with RCV, so calling this fn with x=true is a fatal error. */
62+
void set_rcv_enabled(bool x);
63+
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);
64+
65+
private:
66+
/** Should we decompose this net? */
67+
bool should_decompose_net(ParentNetId net_id, const PartitionTreeNode& node);
68+
/** Get a bitset with sinks to route before net decomposition */
69+
vtr::dynamic_bitset<> get_decomposition_mask(ParentNetId net_id, const PartitionTreeNode& node);
70+
/** Get a bitset with sinks to route before virtual net decomposition */
71+
vtr::dynamic_bitset<> get_vnet_decomposition_mask(const VirtualNet& vnet, const PartitionTreeNode& node);
72+
/** Decompose and route a regular net. Output the resulting vnets to \p left and \p right.
73+
* \return Success status: true if routing is successful and left and right now contain valid virtual nets: false otherwise. */
74+
bool decompose_and_route_net(ParentNetId net_id, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right);
75+
/** Decompose and route a virtual net. Output the resulting vnets to \p left and \p right.
76+
* \return Success status: true if routing is successful and left and right now contain valid virtual nets: false otherwise. */
77+
bool decompose_and_route_vnet(VirtualNet& vnet, const PartitionTreeNode& node, VirtualNet& left, VirtualNet& right);
78+
/** A single task to route nets inside a PartitionTree node and add tasks for its child nodes to task group \p g. */
79+
void route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node);
80+
81+
ConnectionRouter<HeapType> _make_router(const RouterLookahead* router_lookahead, bool is_flat) {
82+
auto& device_ctx = g_vpr_ctx.device();
83+
auto& route_ctx = g_vpr_ctx.mutable_routing();
84+
85+
return ConnectionRouter<HeapType>(
86+
device_ctx.grid,
87+
*router_lookahead,
88+
device_ctx.rr_graph.rr_nodes(),
89+
&device_ctx.rr_graph,
90+
device_ctx.rr_rc_data,
91+
device_ctx.rr_graph.rr_switch(),
92+
route_ctx.rr_node_route_inf,
93+
is_flat);
94+
}
95+
96+
/* Context fields. Most of them will be forwarded to route_net (see route_net.tpp) */
97+
/** Per-thread storage for ConnectionRouters. */
98+
tbb::enumerable_thread_specific<ConnectionRouter<HeapType>> _routers_th;
99+
const Netlist<>& _net_list;
100+
const t_router_opts& _router_opts;
101+
CBRR& _connections_inf;
102+
/** Per-thread storage for RouteIterResults. */
103+
tbb::enumerable_thread_specific<RouteIterResults> _results_th;
104+
NetPinsMatrix<float>& _net_delay;
105+
const ClusteredPinAtomPinsLookup& _netlist_pin_lookup;
106+
std::shared_ptr<SetupHoldTimingInfo> _timing_info;
107+
NetPinTimingInvalidator* _pin_timing_invalidator;
108+
route_budgets& _budgeting_inf;
109+
const RoutingPredictor& _routing_predictor;
110+
const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& _choking_spots;
111+
bool _is_flat;
112+
113+
/** Cached routing parameters for current iteration (inputs to \see route_netlist()) */
114+
int _itry;
115+
float _pres_fac;
116+
float _worst_neg_slack;
117+
118+
/** Sinks to be always sampled for decomposition for each net: [0.._net_list.size()-1]
119+
* (i.e. when routing fails after decomposition for a sink, sample it on next iteration) */
120+
vtr::vector<ParentNetId, vtr::dynamic_bitset<>> _net_known_samples;
121+
122+
/** Is decomposition disabled for this net? [0.._net_list.size()-1] */
123+
vtr::vector<ParentNetId, bool> _is_decomp_disabled;
124+
};
125+
126+
#include "DecompNetlistRouter.tpp"

0 commit comments

Comments
 (0)