Skip to content

Commit 2ada670

Browse files
authored
Merge pull request #2778 from verilog-to-routing/add_z_cost_placement
Add Z Cost to Placement Cost Function [WIP]
2 parents 8dd84f0 + 45abf25 commit 2ada670

File tree

2 files changed

+90
-4
lines changed

2 files changed

+90
-4
lines changed

vpr/src/place/net_cost_handler.cpp

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@
3131
#include "placer_state.h"
3232
#include "move_utils.h"
3333
#include "place_timing_update.h"
34-
#include "noc_place_utils.h"
3534
#include "vtr_math.h"
35+
#include "vtr_ndmatrix.h"
36+
#include "vtr_ndoffsetmatrix.h"
3637

3738
#include <array>
3839

@@ -53,9 +54,6 @@ constexpr std::array<float, MAX_FANOUT_CROSSING_COUNT> cross_count = {1.0000, 1.
5354
2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410,
5455
2.7671, 2.7933};
5556

56-
57-
58-
5957
/**
6058
* @brief If the moving pin is of type type SINK, update bb_pin_sink_count_new which stores the number of sink pins on each layer of "net_id"
6159
* @param pin_old_loc Old location of the moving pin
@@ -229,6 +227,70 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c
229227
chany_place_cost_fac_[high][low] = pow((double)chany_place_cost_fac_[high][low], (double)place_cost_exp);
230228
}
231229
}
230+
231+
if (device_ctx.grid.get_num_layers() > 1) {
232+
alloc_and_load_for_fast_vertical_cost_update_(place_cost_exp);
233+
}
234+
}
235+
236+
void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_(float place_cost_exp) {
237+
const auto& device_ctx = g_vpr_ctx.device();
238+
const auto& rr_graph = device_ctx.rr_graph;
239+
240+
const size_t grid_height = device_ctx.grid.height();
241+
const size_t grid_width = device_ctx.grid.width();
242+
243+
244+
chanz_place_cost_fac_ = vtr::NdMatrix<float, 4>({grid_width, grid_height, grid_width, grid_height}, 0.);
245+
246+
vtr::NdMatrix<float, 2> tile_num_inter_die_conn({grid_width, grid_height}, 0.);
247+
248+
for (const auto& src_rr_node : rr_graph.nodes()) {
249+
for (const auto& rr_edge_idx : rr_graph.configurable_edges(src_rr_node)) {
250+
const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx);
251+
if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) {
252+
// We assume that the nodes driving the inter-layer connection or being driven by it
253+
// are not streched across multiple tiles
254+
int src_x = rr_graph.node_xhigh(src_rr_node);
255+
int src_y = rr_graph.node_yhigh(src_rr_node);
256+
VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_ylow(src_rr_node) == src_y);
257+
258+
tile_num_inter_die_conn[src_x][src_y]++;
259+
}
260+
}
261+
262+
for (const auto& rr_edge_idx : rr_graph.non_configurable_edges(src_rr_node)) {
263+
const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx);
264+
if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) {
265+
int src_x = rr_graph.node_xhigh(src_rr_node);
266+
VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_xlow(src_rr_node) == src_x);
267+
int src_y = rr_graph.node_yhigh(src_rr_node);
268+
VTR_ASSERT(rr_graph.node_ylow(src_rr_node) == src_y && rr_graph.node_ylow(src_rr_node) == src_y);
269+
tile_num_inter_die_conn[src_x][src_y]++;
270+
}
271+
}
272+
}
273+
274+
for (int x_high = 0; x_high < (int)device_ctx.grid.width(); x_high++) {
275+
for (int y_high = 0; y_high < (int)device_ctx.grid.height(); y_high++) {
276+
for (int x_low = 0; x_low <= x_high; x_low++) {
277+
for (int y_low = 0; y_low <= y_high; y_low++) {
278+
int num_inter_die_conn = 0;
279+
for (int x = x_low; x <= x_high; x++) {
280+
for (int y = y_low; y <= y_high; y++) {
281+
num_inter_die_conn += tile_num_inter_die_conn[x][y];
282+
}
283+
}
284+
int seen_num_tiles = (x_high - x_low + 1) * (y_high - y_low + 1);
285+
chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = seen_num_tiles / static_cast<float>(num_inter_die_conn);
286+
287+
chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = pow(
288+
(double)chanz_place_cost_fac_[x_high][y_high][x_low][y_low],
289+
(double)place_cost_exp);
290+
}
291+
}
292+
}
293+
}
232294
}
233295

234296
double NetCostHandler::comp_bb_cost(e_cost_methods method) {
@@ -1395,6 +1457,8 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
13951457

13961458
const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : placer_state_.move().bb_coords[net_id];
13971459

1460+
const bool is_multi_layer = (g_vpr_ctx.device().grid.get_num_layers() > 1);
1461+
13981462
double crossing = wirelength_crossing_count(cluster_ctx.clb_nlist.net_pins(net_id).size());
13991463

14001464
/* Could insert a check for xmin == xmax. In that case, assume *
@@ -1413,6 +1477,9 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
14131477
double ncost;
14141478
ncost = (bb.xmax - bb.xmin + 1) * crossing * chanx_place_cost_fac_[bb.ymax][bb.ymin - 1];
14151479
ncost += (bb.ymax - bb.ymin + 1) * crossing * chany_place_cost_fac_[bb.xmax][bb.xmin - 1];
1480+
if (is_multi_layer) {
1481+
ncost += (bb.layer_max - bb.layer_min) * crossing * chanz_place_cost_fac_[bb.xmax][bb.ymax][bb.xmin][bb.ymin];
1482+
}
14161483

14171484
return ncost;
14181485
}

vpr/src/place/net_cost_handler.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,13 @@ class NetCostHandler {
195195
*/
196196
vtr::NdOffsetMatrix<float, 2> chanx_place_cost_fac_; // [-1...device_ctx.grid.width()-1]
197197
vtr::NdOffsetMatrix<float, 2> chany_place_cost_fac_; // [-1...device_ctx.grid.height()-1]
198+
/**
199+
@brief This data structure functions similarly to the matrices described above
200+
but is applied to 3D connections linking different FPGA layers. It is used in the
201+
placement cost function calculation, where the height of the bounding box is divided
202+
by the average number of inter-die connections within the bounding box.
203+
*/
204+
vtr::NdMatrix<float, 4> chanz_place_cost_fac_; // [0...device_ctx.grid.width()-1][0...device_ctx.grid.height()-1][0...device_ctx.grid.width()-1][0...device_ctx.grid.height()-1]
198205

199206

200207
private:
@@ -249,6 +256,18 @@ class NetCostHandler {
249256
*/
250257
void alloc_and_load_chan_w_factors_for_place_cost_(float place_cost_exp);
251258

259+
/**
260+
* @brief Allocates and loads the chanz_place_cost_fac array with the inverse of
261+
* the average number of inter-die connections between [subhigh] and [sublow].
262+
*
263+
* @details This is only useful for multi-die FPGAs. The place_cost_exp factor specifies to
264+
* what power the average number of inter-die connections should be take -- larger numbers make narrower channels more expensive.
265+
*
266+
* @param place_cost_exp It is an exponent to which you take the average number of inter-die connections;
267+
* a higher value would favour areas with more inter-die connections over areas with less of those during placement (usually we use 1).
268+
*/
269+
void alloc_and_load_for_fast_vertical_cost_update_(float place_cost_exp);
270+
252271
/**
253272
* @brief Calculate the new connection delay and timing cost of all the
254273
* sink pins affected by moving a specific pin to a new location. Also

0 commit comments

Comments
 (0)