Skip to content

Commit 3a4f53c

Browse files
committed
Updates to lookahead map.
- New sampling method for lookahead map. - New spiral hole filling algorithm - Use a lightweight version of PQ_Entry (PQ_Entry_Lite) - Use maps in run_dijkstra() - Move context of router_lookahead_map_utils inside of util namespace. - Parallelization of lookahead generation Signed-off-by: Dusty DeWeese <[email protected]> Signed-off-by: Keith Rothman <[email protected]> Signed-off-by: Alessandro Comodi <[email protected]>
1 parent 3807432 commit 3a4f53c

9 files changed

+723
-1
lines changed

vpr/src/base/SetupVPR.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ void SetupVPR(const t_options* Options,
160160
}
161161

162162
Segments = Arch->Segments;
163+
device_ctx.segment_inf = Arch->Segments;
163164

164165
SetupSwitches(*Arch, RoutingArch, Arch->Switches, Arch->num_switches);
165166
SetupRoutingArch(*Arch, RoutingArch);

vpr/src/base/echo_files.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ void alloc_and_load_echo_file_info() {
114114
setEchoFileName(E_ECHO_CHAN_DETAILS, "chan_details.txt");
115115
setEchoFileName(E_ECHO_SBLOCK_PATTERN, "sblock_pattern.txt");
116116
setEchoFileName(E_ECHO_ENDPOINT_TIMING, "endpoint_timing.echo.json");
117+
setEchoFileName(E_ECHO_LOOKAHEAD_MAP, "lookahead_map.echo");
117118
}
118119

119120
void free_echo_file_info() {

vpr/src/base/echo_files.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ enum e_echo_files {
4646
E_ECHO_CHAN_DETAILS,
4747
E_ECHO_SBLOCK_PATTERN,
4848
E_ECHO_ENDPOINT_TIMING,
49+
E_ECHO_LOOKAHEAD_MAP,
4950

5051
//Timing Graphs
5152
E_ECHO_PRE_PACKING_TIMING_GRAPH,

vpr/src/base/vpr_context.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,8 @@ struct DeviceContext : public Context {
161161
///@brief The indicies of rr nodes of a given type at a specific x,y grid location
162162
t_rr_node_indices rr_node_indices; // [0..NUM_RR_TYPES-1][0..grid.width()-1][0..grid.width()-1][0..size-1]
163163

164-
std::vector<t_rr_switch_inf> rr_switch_inf; // autogenerated in build_rr_graph based on switch fan-in. [0..(num_rr_switches-1)]
164+
std::vector<t_rr_switch_inf> rr_switch_inf; /* autogenerated in build_rr_graph based on switch fan-in. [0..(num_rr_switches-1)] */
165+
std::vector<t_segment_inf> segment_inf;
165166

166167
///@brief Wire segment types in RR graph
167168
std::vector<t_segment_inf> rr_segments;

vpr/src/route/route_timing.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include "connection_router_interface.h"
1414
#include "heap_type.h"
1515

16+
extern bool f_router_debug;
17+
1618
int get_max_pins_per_net();
1719

1820
bool try_timing_driven_route(const t_router_opts& router_opts,
Lines changed: 246 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,246 @@
1+
#include "router_lookahead_map_utils.h"
2+
3+
#include "globals.h"
4+
#include "vpr_context.h"
5+
#include "vtr_math.h"
6+
#include "route_common.h"
7+
8+
namespace util {
9+
10+
PQ_Entry::PQ_Entry(
11+
int set_rr_node_ind,
12+
int switch_ind,
13+
float parent_delay,
14+
float parent_R_upstream,
15+
float parent_congestion_upstream,
16+
bool starting_node,
17+
float Tsw_adjust) {
18+
this->rr_node_ind = set_rr_node_ind;
19+
20+
auto& device_ctx = g_vpr_ctx.device();
21+
this->delay = parent_delay;
22+
this->congestion_upstream = parent_congestion_upstream;
23+
this->R_upstream = parent_R_upstream;
24+
if (!starting_node) {
25+
float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel;
26+
Tsw += Tsw_adjust;
27+
VTR_ASSERT(Tsw >= 0.f);
28+
float Rsw = device_ctx.rr_switch_inf[switch_ind].R;
29+
float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C();
30+
float Rnode = device_ctx.rr_nodes[set_rr_node_ind].R();
31+
32+
float T_linear = 0.f;
33+
if (device_ctx.rr_switch_inf[switch_ind].buffered()) {
34+
T_linear = Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode;
35+
} else { /* Pass transistor */
36+
T_linear = Tsw + 0.5 * Rsw * Cnode;
37+
}
38+
39+
float base_cost = 0.f;
40+
if (device_ctx.rr_switch_inf[switch_ind].configurable()) {
41+
base_cost = get_single_rr_cong_base_cost(set_rr_node_ind);
42+
}
43+
44+
VTR_ASSERT(T_linear >= 0.);
45+
VTR_ASSERT(base_cost >= 0.);
46+
this->delay += T_linear;
47+
48+
this->congestion_upstream += base_cost;
49+
}
50+
51+
/* set the cost of this node */
52+
this->cost = this->delay;
53+
}
54+
55+
util::PQ_Entry_Delay::PQ_Entry_Delay(
56+
int set_rr_node_ind,
57+
int switch_ind,
58+
const util::PQ_Entry_Delay* parent) {
59+
this->rr_node_ind = set_rr_node_ind;
60+
61+
if (parent != nullptr) {
62+
auto& device_ctx = g_vpr_ctx.device();
63+
float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel;
64+
float Rsw = device_ctx.rr_switch_inf[switch_ind].R;
65+
float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C();
66+
float Rnode = device_ctx.rr_nodes[set_rr_node_ind].R();
67+
68+
float T_linear = 0.f;
69+
if (device_ctx.rr_switch_inf[switch_ind].buffered()) {
70+
T_linear = Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode;
71+
} else { /* Pass transistor */
72+
T_linear = Tsw + 0.5 * Rsw * Cnode;
73+
}
74+
75+
VTR_ASSERT(T_linear >= 0.);
76+
this->delay_cost = parent->delay_cost + T_linear;
77+
} else {
78+
this->delay_cost = 0.f;
79+
}
80+
}
81+
82+
util::PQ_Entry_Base_Cost::PQ_Entry_Base_Cost(
83+
int set_rr_node_ind,
84+
int switch_ind,
85+
const util::PQ_Entry_Base_Cost* parent) {
86+
this->rr_node_ind = set_rr_node_ind;
87+
88+
if (parent != nullptr) {
89+
auto& device_ctx = g_vpr_ctx.device();
90+
if (device_ctx.rr_switch_inf[switch_ind].configurable()) {
91+
this->base_cost = parent->base_cost + get_single_rr_cong_base_cost(set_rr_node_ind);
92+
} else {
93+
this->base_cost = parent->base_cost;
94+
}
95+
} else {
96+
this->base_cost = 0.f;
97+
}
98+
}
99+
100+
/* returns cost entry with the smallest delay */
101+
util::Cost_Entry util::Expansion_Cost_Entry::get_smallest_entry() const {
102+
util::Cost_Entry smallest_entry;
103+
104+
for (auto entry : this->cost_vector) {
105+
if (!smallest_entry.valid() || entry.delay < smallest_entry.delay) {
106+
smallest_entry = entry;
107+
}
108+
}
109+
110+
return smallest_entry;
111+
}
112+
113+
/* returns a cost entry that represents the average of all the recorded entries */
114+
util::Cost_Entry util::Expansion_Cost_Entry::get_average_entry() const {
115+
float avg_delay = 0;
116+
float avg_congestion = 0;
117+
118+
for (auto cost_entry : this->cost_vector) {
119+
avg_delay += cost_entry.delay;
120+
avg_congestion += cost_entry.congestion;
121+
}
122+
123+
avg_delay /= (float)this->cost_vector.size();
124+
avg_congestion /= (float)this->cost_vector.size();
125+
126+
return util::Cost_Entry(avg_delay, avg_congestion);
127+
}
128+
129+
/* returns a cost entry that represents the geomean of all the recorded entries */
130+
util::Cost_Entry util::Expansion_Cost_Entry::get_geomean_entry() const {
131+
float geomean_delay = 0;
132+
float geomean_cong = 0;
133+
for (auto cost_entry : this->cost_vector) {
134+
geomean_delay += log(cost_entry.delay);
135+
geomean_cong += log(cost_entry.congestion);
136+
}
137+
138+
geomean_delay = exp(geomean_delay / (float)this->cost_vector.size());
139+
geomean_cong = exp(geomean_cong / (float)this->cost_vector.size());
140+
141+
return util::Cost_Entry(geomean_delay, geomean_cong);
142+
}
143+
144+
/* returns a cost entry that represents the medial of all recorded entries */
145+
util::Cost_Entry util::Expansion_Cost_Entry::get_median_entry() const {
146+
/* find median by binning the delays of all entries and then chosing the bin
147+
* with the largest number of entries */
148+
149+
int num_bins = 10;
150+
151+
/* find entries with smallest and largest delays */
152+
util::Cost_Entry min_del_entry;
153+
util::Cost_Entry max_del_entry;
154+
for (auto entry : this->cost_vector) {
155+
if (!min_del_entry.valid() || entry.delay < min_del_entry.delay) {
156+
min_del_entry = entry;
157+
}
158+
if (!max_del_entry.valid() || entry.delay > max_del_entry.delay) {
159+
max_del_entry = entry;
160+
}
161+
}
162+
163+
/* get the bin size */
164+
float delay_diff = max_del_entry.delay - min_del_entry.delay;
165+
float bin_size = delay_diff / (float)num_bins;
166+
167+
/* sort the cost entries into bins */
168+
std::vector<std::vector<util::Cost_Entry>> entry_bins(num_bins, std::vector<util::Cost_Entry>());
169+
for (auto entry : this->cost_vector) {
170+
float bin_num = floor((entry.delay - min_del_entry.delay) / bin_size);
171+
172+
VTR_ASSERT(vtr::nint(bin_num) >= 0 && vtr::nint(bin_num) <= num_bins);
173+
if (vtr::nint(bin_num) == num_bins) {
174+
/* largest entry will otherwise have an out-of-bounds bin number */
175+
bin_num -= 1;
176+
}
177+
entry_bins[vtr::nint(bin_num)].push_back(entry);
178+
}
179+
180+
/* find the bin with the largest number of elements */
181+
int largest_bin = 0;
182+
int largest_size = 0;
183+
for (int ibin = 0; ibin < num_bins; ibin++) {
184+
if (entry_bins[ibin].size() > (unsigned)largest_size) {
185+
largest_bin = ibin;
186+
largest_size = (unsigned)entry_bins[ibin].size();
187+
}
188+
}
189+
190+
/* get the representative delay of the largest bin */
191+
util::Cost_Entry representative_entry = entry_bins[largest_bin][0];
192+
193+
return representative_entry;
194+
}
195+
196+
template<typename Entry>
197+
void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes,
198+
const Entry& parent_entry,
199+
std::vector<util::Search_Path>* paths,
200+
std::vector<bool>* node_expanded,
201+
std::priority_queue<Entry,
202+
std::vector<Entry>,
203+
std::greater<Entry>>* pq) {
204+
int parent_ind = size_t(parent_entry.rr_node_ind);
205+
206+
auto& parent_node = rr_nodes[parent_ind];
207+
208+
for (int iedge = 0; iedge < parent_node.num_edges(); iedge++) {
209+
int child_node_ind = parent_node.edge_sink_node(iedge);
210+
int switch_ind = parent_node.edge_switch(iedge);
211+
212+
/* skip this child if it has already been expanded from */
213+
if ((*node_expanded)[child_node_ind]) {
214+
continue;
215+
}
216+
217+
Entry child_entry(child_node_ind, switch_ind, &parent_entry);
218+
VTR_ASSERT(child_entry.cost() >= 0);
219+
220+
/* Create (if it doesn't exist) or update (if the new cost is lower)
221+
* to specified node */
222+
Search_Path path_entry = {child_entry.cost(), parent_ind, iedge};
223+
auto& path = (*paths)[child_node_ind];
224+
if (path_entry.cost < path.cost) {
225+
pq->push(child_entry);
226+
path = path_entry;
227+
}
228+
}
229+
}
230+
231+
template void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes,
232+
const PQ_Entry_Delay& parent_entry,
233+
std::vector<Search_Path>* paths,
234+
std::vector<bool>* node_expanded,
235+
std::priority_queue<PQ_Entry_Delay,
236+
std::vector<PQ_Entry_Delay>,
237+
std::greater<PQ_Entry_Delay>>* pq);
238+
template void expand_dijkstra_neighbours(const t_rr_graph_storage& rr_nodes,
239+
const PQ_Entry_Base_Cost& parent_entry,
240+
std::vector<Search_Path>* paths,
241+
std::vector<bool>* node_expanded,
242+
std::priority_queue<PQ_Entry_Base_Cost,
243+
std::vector<PQ_Entry_Base_Cost>,
244+
std::greater<PQ_Entry_Base_Cost>>* pq);
245+
246+
} // namespace util

0 commit comments

Comments
 (0)