Skip to content

Commit b11b2eb

Browse files
committed
[Router] Enhanced the Connection Router by Optimizing the Heap Structure
- The heap node structure in the connection router is simplified by only maintaining the node index (int32) and node total cost (fp32). - Eliminated dynamic memory allocation for the heap node structure. - The above optimization leads to an algorithmic change for path search; specifically, we need to update node states right before pushing and modify the prune functions. The RCV feature also works when using `--routing_budgets_algorithm yoyo` for the VPR launch.
1 parent a342b67 commit b11b2eb

File tree

7 files changed

+503
-234
lines changed

7 files changed

+503
-234
lines changed

.gitignore

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,4 +153,10 @@ tags
153153
.idea
154154
cmake-build-debug
155155
cmake-build-release
156-
/.metadata/
156+
/.metadata/
157+
158+
#
159+
# Clangd
160+
#
161+
.cache
162+
compile_commands.json

vpr/src/base/vpr_types.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1684,6 +1684,13 @@ struct t_rr_node_route_inf {
16841684
float acc_cost;
16851685
float path_cost;
16861686
float backward_path_cost;
1687+
float R_upstream; // TODO: Investigate the effect of adding the R_upstream field in
1688+
// this struct. It is put in for the fine-grained parallel
1689+
// router's benefits. It is increasing the working set, which
1690+
// can have some performance implications. This could affect
1691+
// the performance of the serial connection router, which will
1692+
// make the Hybrid Connection Router less efficient (but that
1693+
// needs to be investigated).
16871694

16881695
public: //Accessors
16891696
short occ() const { return occ_; }

vpr/src/route/connection_router.cpp

Lines changed: 160 additions & 169 deletions
Large diffs are not rendered by default.

vpr/src/route/connection_router.h

Lines changed: 45 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,20 @@
1010
#include "router_stats.h"
1111
#include "spatial_route_tree_lookup.h"
1212

13+
#include "d_ary_heap.h"
14+
15+
// `node_t` is a simplified version of `t_heap`, and is used as a bundle of node
16+
// information in the functions inside the routing loop.
17+
struct node_t {
18+
float total_cost;
19+
float backward_path_cost;
20+
float R_upstream;
21+
RREdgeId prev_edge;
22+
t_heap_path* path_data;
23+
};
24+
1325
// Prune the heap when it contains 4x the number of nodes in the RR graph.
14-
constexpr size_t kHeapPruneFactor = 4;
26+
// constexpr size_t kHeapPruneFactor = 4;
1527

1628
// This class encapsolates the timing driven connection router. This class
1729
// routes from some initial set of sources (via the input rt tree) to a
@@ -46,8 +58,13 @@ class ConnectionRouter : public ConnectionRouterInterface {
4658
, router_stats_(nullptr)
4759
, router_debug_(false) {
4860
heap_.init_heap(grid);
49-
heap_.set_prune_limit(rr_nodes_.size(), kHeapPruneFactor * rr_nodes_.size());
61+
// heap_.set_prune_limit(rr_nodes_.size(), kHeapPruneFactor * rr_nodes_.size());
5062
only_opin_inter_layer = (grid.get_num_layers() > 1) && inter_layer_connections_limited_to_opin(*rr_graph);
63+
rcv_path_data.resize(rr_node_route_inf.size());
64+
}
65+
66+
~ConnectionRouter() {
67+
VTR_LOG("Serial Connection Router is being destroyed. Time spent computing SSSP: %.3f seconds\n.", this->sssp_total_time.count() / 1000000.0);
5168
}
5269

5370
// Clear's the modified list. Should be called after reset_path_costs
@@ -58,7 +75,12 @@ class ConnectionRouter : public ConnectionRouterInterface {
5875

5976
// Reset modified data in rr_node_route_inf based on modified_rr_node_inf.
6077
void reset_path_costs() final {
78+
// Reset the node info stored in rr_node_route_inf variable
6179
::reset_path_costs(modified_rr_node_inf_);
80+
// Reset the node info stored inside the connection router
81+
for (const auto& node : modified_rr_node_inf_) {
82+
rcv_path_data[node] = nullptr;
83+
}
6284
}
6385

6486
/** Finds a path from the route tree rooted at rt_root to sink_node.
@@ -137,17 +159,17 @@ class ConnectionRouter : public ConnectionRouterInterface {
137159
}
138160

139161
// Update the route path to the node pointed to by cheapest.
140-
inline void update_cheapest(t_heap* cheapest) {
141-
update_cheapest(cheapest, &rr_node_route_inf_[cheapest->index]);
162+
inline void update_cheapest(const node_t& cheapest, RRNodeId inode) {
163+
update_cheapest(cheapest, inode, &rr_node_route_inf_[inode]);
142164
}
143165

144-
inline void update_cheapest(t_heap* cheapest, t_rr_node_route_inf* route_inf) {
166+
inline void update_cheapest(const node_t& cheapest, RRNodeId inode, t_rr_node_route_inf* route_inf) {
145167
//Record final link to target
146-
add_to_mod_list(cheapest->index);
168+
add_to_mod_list(inode);
147169

148-
route_inf->prev_edge = cheapest->prev_edge();
149-
route_inf->path_cost = cheapest->cost;
150-
route_inf->backward_path_cost = cheapest->backward_path_cost;
170+
route_inf->prev_edge = cheapest.prev_edge;
171+
route_inf->path_cost = cheapest.total_cost;
172+
route_inf->backward_path_cost = cheapest.backward_path_cost;
151173
}
152174

153175
/** Common logic from timing_driven_route_connection_from_route_tree and
@@ -159,7 +181,7 @@ class ConnectionRouter : public ConnectionRouterInterface {
159181
* @param[in] bounding_box Keep search confined to this bounding box
160182
* @return bool Signal to retry this connection with a full-device bounding box,
161183
* @return t_heap* Heap element describing the path found. */
162-
std::tuple<bool, t_heap*> timing_driven_route_connection_common_setup(
184+
bool timing_driven_route_connection_common_setup(
163185
const RouteTreeNode& rt_root,
164186
RRNodeId sink_node,
165187
const t_conn_cost_params& cost_params,
@@ -174,24 +196,26 @@ class ConnectionRouter : public ConnectionRouterInterface {
174196
//
175197
// Returns either the last element of the path, or nullptr if no path is
176198
// found
177-
t_heap* timing_driven_route_connection_from_heap(
199+
void timing_driven_route_connection_from_heap(
178200
RRNodeId sink_node,
179201
const t_conn_cost_params& cost_params,
180202
const t_bb& bounding_box);
181203

182204
// Expand this current node if it is a cheaper path.
183205
void timing_driven_expand_cheapest(
184-
t_heap* cheapest,
206+
RRNodeId from_node,
207+
float new_total_cost,
185208
RRNodeId target_node,
186209
const t_conn_cost_params& cost_params,
187210
const t_bb& bounding_box,
188211
const t_bb& target_bb);
189212

190213
// Expand each neighbor of the current node.
191214
void timing_driven_expand_neighbours(
192-
t_heap* current,
215+
const node_t& current,
193216
const t_conn_cost_params& cost_params,
194217
const t_bb& bounding_box,
218+
RRNodeId from_node,
195219
RRNodeId target_node,
196220
const t_bb& target_bb);
197221

@@ -201,7 +225,7 @@ class ConnectionRouter : public ConnectionRouterInterface {
201225
// RR nodes outside bounding box specified in bounding_box are not added
202226
// to the heap.
203227
void timing_driven_expand_neighbour(
204-
t_heap* current,
228+
const node_t& current,
205229
RRNodeId from_node,
206230
RREdgeId from_edge,
207231
RRNodeId to_node,
@@ -214,15 +238,15 @@ class ConnectionRouter : public ConnectionRouterInterface {
214238
// non-configurable edges
215239
void timing_driven_add_to_heap(
216240
const t_conn_cost_params& cost_params,
217-
const t_heap* current,
241+
const node_t& current,
218242
RRNodeId from_node,
219243
RRNodeId to_node,
220244
RREdgeId from_edge,
221245
RRNodeId target_node);
222246

223247
// Calculates the cost of reaching to_node
224248
void evaluate_timing_driven_node_costs(
225-
t_heap* to,
249+
node_t* to,
226250
const t_conn_cost_params& cost_params,
227251
RRNodeId from_node,
228252
RRNodeId to_node,
@@ -234,8 +258,6 @@ class ConnectionRouter : public ConnectionRouterInterface {
234258
const t_conn_cost_params& cost_params,
235259
const t_bb& bounding_box);
236260

237-
void empty_heap_annotating_node_route_inf();
238-
239261
//Adds the route tree rooted at rt_node to the heap, preparing it to be
240262
//used as branch-points for further routing.
241263
void add_route_tree_to_heap(const RouteTreeNode& rt_node,
@@ -281,13 +303,17 @@ class ConnectionRouter : public ConnectionRouterInterface {
281303
std::vector<RRNodeId> modified_rr_node_inf_;
282304
RouterStats* router_stats_;
283305
const ConnectionParameters* conn_params_;
284-
HeapImplementation heap_;
306+
DAryHeap heap_;
285307
bool router_debug_;
286308

287309
bool only_opin_inter_layer;
288310

311+
// Timing
312+
std::chrono::microseconds sssp_total_time{0};
313+
289314
// The path manager for RCV, keeps track of the route tree as a set, also manages the allocation of the heap types
290315
PathManager rcv_path_manager;
316+
vtr::vector<RRNodeId, t_heap_path*> rcv_path_data;
291317
};
292318

293319
/** Construct a connection router that uses the specified heap type.

vpr/src/route/d_ary_heap.h

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#ifndef _VTR_D_ARY_HEAP_H
2+
#define _VTR_D_ARY_HEAP_H
3+
4+
#include <cstdint>
5+
#include <tuple>
6+
#include <vector>
7+
8+
#include "device_grid.h"
9+
#include "rr_graph_fwd.h"
10+
#include "d_ary_heap.tpp"
11+
12+
using pq_prio_t = float;
13+
using pq_index_t = uint32_t;
14+
15+
inline pq_index_t cast_RRNodeId_to_pq_index_t(RRNodeId node) {
16+
static_assert(sizeof(RRNodeId) == sizeof(pq_index_t));
17+
return static_cast<pq_index_t>(std::size_t(node));
18+
}
19+
20+
class DAryHeap {
21+
public:
22+
using pq_pair_t = std::tuple<pq_prio_t /*priority*/, pq_index_t>;
23+
struct pq_compare {
24+
bool operator()(const pq_pair_t& u, const pq_pair_t& v) {
25+
return std::get<0>(u) > std::get<0>(v);
26+
}
27+
};
28+
using pq_io_t = customized_d_ary_priority_queue<4, pq_pair_t, std::vector<pq_pair_t>, pq_compare>;
29+
30+
DAryHeap() {
31+
pq_ = new pq_io_t();
32+
}
33+
34+
~DAryHeap(){
35+
delete pq_;
36+
}
37+
38+
void init_heap(const DeviceGrid& grid) {
39+
size_t target_heap_size = (grid.width() - 1) * (grid.height() - 1);
40+
pq_->reserve(target_heap_size);
41+
}
42+
43+
bool try_pop(pq_prio_t &prio, RRNodeId &node) {
44+
if (pq_->empty()) {
45+
return false;
46+
} else {
47+
pq_index_t node_id;
48+
std::tie(prio, node_id) = pq_->top();
49+
static_assert(sizeof(RRNodeId) == sizeof(pq_index_t));
50+
node = RRNodeId(node_id);
51+
pq_->pop();
52+
return true;
53+
}
54+
}
55+
56+
void add_to_heap(const pq_prio_t& prio, const RRNodeId& node) {
57+
pq_->push({prio, cast_RRNodeId_to_pq_index_t(node)});
58+
}
59+
60+
void push_back(const pq_prio_t& prio, const RRNodeId& node) {
61+
pq_->push({prio, cast_RRNodeId_to_pq_index_t(node)});
62+
}
63+
64+
bool is_empty_heap() const {
65+
return (bool)(pq_->empty());
66+
}
67+
68+
bool is_valid() const {
69+
return true;
70+
}
71+
72+
void empty_heap() {
73+
pq_->clear();
74+
}
75+
76+
void build_heap() {}
77+
78+
private:
79+
pq_io_t* pq_;
80+
};
81+
82+
#endif /* _VTR_D_ARY_HEAP_H */

0 commit comments

Comments
 (0)