@@ -151,79 +151,48 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
151
151
}
152
152
153
153
void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_ () {
154
- const double place_cost_exp = static_cast <double >(placer_opts_.place_cost_exp );
155
- auto & device_ctx = g_vpr_ctx.device ();
156
-
157
- const int grid_height = device_ctx.grid .height ();
158
- const int grid_width = device_ctx.grid .width ();
159
-
160
- /* Access arrays below as chan?_place_cost_fac_(subhigh, sublow). Since subhigh must be greater than or
161
- * equal to sublow, we will only access the lower half of a matrix, but we allocate the whole matrix anyway
162
- * for simplicity, so we can use the vtr utility matrix functions. */
163
- chanx_place_cost_fac_ = vtr::NdOffsetMatrix<float , 2 >({{{-1 , grid_height}, {-1 , grid_height}}});
164
- chany_place_cost_fac_ = vtr::NdOffsetMatrix<float , 2 >({{{-1 , grid_width}, {-1 , grid_width}}});
165
-
166
- // First compute the number of tracks between channel high and channel low, inclusive.
167
- chanx_place_cost_fac_[-1 ][-1 ] = 0 ;
168
-
169
- for (int high = 0 ; high < grid_height; high++) {
170
- chanx_place_cost_fac_[high][high] = (float )device_ctx.chan_width .x_list [high];
171
- for (int low = -1 ; low < high; low++) {
172
- chanx_place_cost_fac_[high][low] = chanx_place_cost_fac_[high - 1 ][low] + (float )device_ctx.chan_width .x_list [high];
173
- }
174
- }
175
-
176
- /* Now compute the inverse of the average number of tracks per channel *
177
- * between high and low. The cost function divides by the average *
178
- * number of tracks per channel, so by storing the inverse I convert *
179
- * this to a faster multiplication. Take this final number to the *
180
- * place_cost_exp power -- numbers other than one mean this is no *
181
- * longer a simple "average number of tracks"; it is some power of *
182
- * that, allowing greater penalization of narrow channels. */
183
- for (int high = -1 ; high < grid_height; high++) {
184
- for (int low = -1 ; low <= high; low++) {
185
- /* Since we will divide the wiring cost by the average channel *
186
- * capacity between high and low, having only 0 width channels *
187
- * will result in infinite wiring capacity normalization *
188
- * factor, and extremely bad placer behaviour. Hence we change *
189
- * this to a small (1 track) channel capacity instead. */
190
- if (chanx_place_cost_fac_[high][low] == 0 .0f ) {
191
- VTR_LOG_WARN (" CHANX place cost fac is 0 at %d %d\n " , high, low);
192
- chanx_place_cost_fac_[high][low] = 1 .0f ;
193
- }
154
+ const auto & device_ctx = g_vpr_ctx.device ();
194
155
195
- chanx_place_cost_fac_[high][low] = (high - low + 1 .) / chanx_place_cost_fac_[high][low];
196
- chanx_place_cost_fac_[high][low] = pow ((double )chanx_place_cost_fac_[high][low], place_cost_exp);
197
- }
198
- }
156
+ const int grid_height = (int )device_ctx.grid .height ();
157
+ const int grid_width = (int )device_ctx.grid .width ();
158
+
159
+ /* These arrays contain accumulative channel width between channel zero and
160
+ * the channel specified by the given index. The accumulated channel width
161
+ * is inclusive, meaning that it includes both channel zero and channel `idx`.
162
+ * To compute the total channel width between channels 'low' and 'high', use the
163
+ * following formula:
164
+ * acc_chan?_width_[high] - acc_chan?_width_[low - 1]
165
+ * This returns the total number of tracks between channels 'low' and 'high',
166
+ * including tracks in these channels.
167
+ *
168
+ * Channel -1 doesn't exist, so we can say it has zero tracks. We need to be able
169
+ * to access these arrays with index -1 to handle cases where the lower channel is 0.
170
+ */
171
+ acc_chanx_width_ = vtr::NdOffsetMatrix<int , 1 >({{{-1 , grid_height}}});
172
+ acc_chany_width_ = vtr::NdOffsetMatrix<int , 1 >({{{-1 , grid_width}}});
199
173
200
- /* Now do the same thing for the y-directed channels. First get the
201
- * number of tracks between channel high and channel low, inclusive. */
202
- chany_place_cost_fac_[-1 ][-1 ] = 0 ;
174
+ // initialize the first element (index -1) with zero
175
+ acc_chanx_width_[-1 ] = 0 ;
176
+ for (int y = 0 ; y < grid_height; y++) {
177
+ acc_chanx_width_[y] = acc_chanx_width_[y - 1 ] + device_ctx.chan_width .x_list [y];
203
178
204
- for (int high = 0 ; high < grid_width; high++) {
205
- chany_place_cost_fac_[high][high] = device_ctx.chan_width .y_list [high];
206
- for (int low = -1 ; low < high; low++) {
207
- chany_place_cost_fac_[high][low] = chany_place_cost_fac_[high - 1 ][low] + device_ctx.chan_width .y_list [high];
179
+ /* If the number of tracks in a channel is zero, two consecutive elements take the same
180
+ * value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this
181
+ * potential issue, we assume that the channel width is at least 1.
182
+ */
183
+ if (acc_chanx_width_[y] == acc_chanx_width_[y - 1 ]) {
184
+ acc_chanx_width_[y]++;
208
185
}
209
186
}
210
187
211
- /* Now compute the inverse of the average number of tracks per channel
212
- * between high and low. Take to specified power. */
213
- for (int high = -1 ; high < grid_width; high++) {
214
- for (int low = -1 ; low <= high; low++) {
215
- /* Since we will divide the wiring cost by the average channel *
216
- * capacity between high and low, having only 0 width channels *
217
- * will result in infinite wiring capacity normalization *
218
- * factor, and extremely bad placer behaviour. Hence we change *
219
- * this to a small (1 track) channel capacity instead. */
220
- if (chany_place_cost_fac_[high][low] == 0 .0f ) {
221
- VTR_LOG_WARN (" CHANY place cost fac is 0 at %d %d\n " , high, low);
222
- chany_place_cost_fac_[high][low] = 1 .0f ;
223
- }
188
+ // initialize the first element (index -1) with zero
189
+ acc_chany_width_[-1 ] = 0 ;
190
+ for (int x = 0 ; x < grid_width; x++) {
191
+ acc_chany_width_[x] = acc_chany_width_[x - 1 ] + device_ctx.chan_width .y_list [x];
224
192
225
- chany_place_cost_fac_[high][low] = (high - low + 1 .) / chany_place_cost_fac_[high][low];
226
- chany_place_cost_fac_[high][low] = pow ((double )chany_place_cost_fac_[high][low], place_cost_exp);
193
+ // to avoid a division by zero
194
+ if (acc_chany_width_[x] == acc_chany_width_[x - 1 ]) {
195
+ acc_chany_width_[x]++;
227
196
}
228
197
}
229
198
@@ -239,33 +208,32 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() {
239
208
const size_t grid_height = device_ctx.grid .height ();
240
209
const size_t grid_width = device_ctx.grid .width ();
241
210
242
-
243
- acc_tile_num_inter_die_conn_ = vtr::NdMatrix<int , 2 >({grid_width, grid_height}, 0 .);
211
+ acc_tile_num_inter_die_conn_ = vtr::NdMatrix<int , 2 >({grid_width, grid_height}, 0 );
244
212
245
213
vtr::NdMatrix<float , 2 > tile_num_inter_die_conn ({grid_width, grid_height}, 0 .);
246
214
247
215
/*
248
- * Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location
249
- * in the device. We count all these edges, regardless of which layers they connect. Then we divide by
250
- * the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors
251
- * what we do for the horizontal and vertical channels where we assume the channel width doesn't change
252
- * along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited
216
+ * Step 1: iterate over the rr-graph, recording how many edges go between layers at each (x,y) location
217
+ * in the device. We count all these edges, regardless of which layers they connect. Then we divide by
218
+ * the number of layers - 1 to get the average cross-layer edge count per (x,y) location -- this mirrors
219
+ * what we do for the horizontal and vertical channels where we assume the channel width doesn't change
220
+ * along the length of the channel. It lets us be more memory-efficient for 3D devices, and could be revisited
253
221
* if someday we have architectures with widely varying connectivity between different layers in a stack.
254
- */
222
+ */
255
223
256
224
/*
257
- * To calculate the accumulative number of inter-die connections we first need to get the number of
258
- * inter-die connection per location. To be able to work for the cases that RR Graph is read instead
259
- * of being made from the architecture file, we calculate this number by iterating over the RR graph. Once
260
- * tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. First,
261
- * we populate the first row and column. Then, we iterate over the rest of blocks and get the number of
262
- * inter-die connections by adding up the number of inter-die block at that location + the accumulation
263
- * for the block below and left to it. Then, since the accumulated number of inter-die connection to
264
- * the block on the lower left connection of the block is added twice, that part needs to be removed.
265
- */
266
- for (const auto & src_rr_node : rr_graph.nodes ()) {
267
- for (const auto & rr_edge_idx : rr_graph.edges (src_rr_node)) {
268
- const auto & sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
225
+ * To calculate the accumulative number of inter-die connections we first need to get the number of
226
+ * inter-die connection per location. To be able to work for the cases that RR Graph is read instead
227
+ * of being made from the architecture file, we calculate this number by iterating over the RR graph. Once
228
+ * tile_num_inter_die_conn is populated, we can start populating acc_tile_num_inter_die_conn_. First,
229
+ * we populate the first row and column. Then, we iterate over the rest of blocks and get the number of
230
+ * inter-die connections by adding up the number of inter-die block at that location + the accumulation
231
+ * for the block below and left to it. Then, since the accumulated number of inter-die connection to
232
+ * the block on the lower left connection of the block is added twice, that part needs to be removed.
233
+ */
234
+ for (const RRNodeId src_rr_node : rr_graph.nodes ()) {
235
+ for (const t_edge_size rr_edge_idx : rr_graph.edges (src_rr_node)) {
236
+ const RRNodeId sink_rr_node = rr_graph.edge_sink_node (src_rr_node, rr_edge_idx);
269
237
if (rr_graph.node_layer (src_rr_node) != rr_graph.node_layer (sink_rr_node)) {
270
238
// We assume that the nodes driving the inter-layer connection or being driven by it
271
239
// are not stretched across multiple tiles
@@ -290,20 +258,20 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() {
290
258
// Initialize the first row and column
291
259
for (size_t x = 1 ; x < device_ctx.grid .width (); x++) {
292
260
acc_tile_num_inter_die_conn_[x][0 ] = acc_tile_num_inter_die_conn_[x-1 ][0 ] +
293
- tile_num_inter_die_conn[x][0 ];
261
+ tile_num_inter_die_conn[x][0 ];
294
262
}
295
263
296
264
for (size_t y = 1 ; y < device_ctx.grid .height (); y++) {
297
265
acc_tile_num_inter_die_conn_[0 ][y] = acc_tile_num_inter_die_conn_[0 ][y-1 ] +
298
- tile_num_inter_die_conn[0 ][y];
266
+ tile_num_inter_die_conn[0 ][y];
299
267
}
300
268
301
269
for (size_t x_high = 1 ; x_high < device_ctx.grid .width (); x_high++) {
302
270
for (size_t y_high = 1 ; y_high < device_ctx.grid .height (); y_high++) {
303
271
acc_tile_num_inter_die_conn_[x_high][y_high] = acc_tile_num_inter_die_conn_[x_high-1 ][y_high] +
304
- acc_tile_num_inter_die_conn_[x_high][y_high-1 ] +
305
- tile_num_inter_die_conn[x_high][y_high] -
306
- acc_tile_num_inter_die_conn_[x_high-1 ][y_high-1 ];
272
+ acc_tile_num_inter_die_conn_[x_high][y_high-1 ] +
273
+ tile_num_inter_die_conn[x_high][y_high] -
274
+ acc_tile_num_inter_die_conn_[x_high-1 ][y_high-1 ];
307
275
}
308
276
}
309
277
}
@@ -1421,7 +1389,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
1421
1389
1422
1390
const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : placer_state_.move ().bb_coords [net_id];
1423
1391
1424
- double crossing = wirelength_crossing_count (cluster_ctx.clb_nlist .net_pins (net_id).size ());
1392
+ const double crossing = wirelength_crossing_count (cluster_ctx.clb_nlist .net_pins (net_id).size ());
1425
1393
1426
1394
/* Could insert a check for xmin == xmax. In that case, assume *
1427
1395
* connection will be made with no bends and hence no x-cost. *
@@ -1437,8 +1405,9 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
1437
1405
*/
1438
1406
1439
1407
double ncost;
1440
- ncost = (bb.xmax - bb.xmin + 1 ) * chanx_place_cost_fac_[bb.ymax ][bb.ymin - 1 ];
1441
- ncost += (bb.ymax - bb.ymin + 1 ) * chany_place_cost_fac_[bb.xmax ][bb.xmin - 1 ];
1408
+ const auto [chanx_cost_fac, chany_cost_fac] = get_chanxy_cost_fac_ (bb);
1409
+ ncost = (bb.xmax - bb.xmin + 1 ) * chanx_cost_fac;
1410
+ ncost += (bb.ymax - bb.ymin + 1 ) * chany_cost_fac;
1442
1411
if (is_multi_layer_) {
1443
1412
ncost += (bb.layer_max - bb.layer_min ) * get_chanz_cost_factor_ (bb);
1444
1413
}
@@ -1448,6 +1417,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
1448
1417
return ncost;
1449
1418
}
1450
1419
1420
+
1451
1421
double NetCostHandler::get_net_per_layer_bb_cost_ (ClusterNetId net_id , bool use_ts) {
1452
1422
const auto & move_ctx = placer_state_.move ();
1453
1423
@@ -1469,7 +1439,7 @@ double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id , bool use
1469
1439
/* Adjust the bounding box half perimeter by the wirelength correction
1470
1440
* factor based on terminal count, which is 1 for the source + the number
1471
1441
* of sinks on this layer. */
1472
- double crossing = wirelength_crossing_count (layer_pin_sink_count[layer_num] + 1 );
1442
+ const double crossing = wirelength_crossing_count (layer_pin_sink_count[layer_num] + 1 );
1473
1443
1474
1444
/* Could insert a check for xmin == xmax. In that case, assume *
1475
1445
* connection will be made with no bends and hence no x-cost. *
@@ -1484,11 +1454,10 @@ double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id , bool use
1484
1454
* chan?_place_cost_fac_ objects can handle -1 indices internally.
1485
1455
*/
1486
1456
1487
- ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1 ) * crossing
1488
- * chanx_place_cost_fac_[bb[layer_num].ymax ][bb[layer_num].ymin - 1 ];
1489
-
1490
- ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1 ) * crossing
1491
- * chany_place_cost_fac_[bb[layer_num].xmax ][bb[layer_num].xmin - 1 ];
1457
+ const auto [chanx_cost_fac, chany_cost_fac] = get_chanxy_cost_fac_ (bb[layer_num]);
1458
+ ncost += (bb[layer_num].xmax - bb[layer_num].xmin + 1 ) * chanx_cost_fac;
1459
+ ncost += (bb[layer_num].ymax - bb[layer_num].ymin + 1 ) * chany_cost_fac;
1460
+ ncost *= crossing;
1492
1461
}
1493
1462
1494
1463
return ncost;
@@ -1546,8 +1515,6 @@ double NetCostHandler::get_net_wirelength_from_layer_bb_(ClusterNetId net_id) {
1546
1515
}
1547
1516
1548
1517
float NetCostHandler::get_chanz_cost_factor_ (const t_bb& bb) {
1549
- float place_cost_exp = placer_opts_.place_cost_exp ;
1550
-
1551
1518
int num_inter_dir_conn;
1552
1519
1553
1520
if (bb.xmin == 0 && bb.ymin == 0 ) {
@@ -1571,7 +1538,6 @@ float NetCostHandler::get_chanz_cost_factor_(const t_bb& bb) {
1571
1538
} else {
1572
1539
int bb_num_tiles = (bb.xmax - bb.xmin + 1 ) * (bb.ymax - bb.ymin + 1 );
1573
1540
z_cost_factor = bb_num_tiles / static_cast <float >(num_inter_dir_conn);
1574
- z_cost_factor = pow ((double )z_cost_factor, (double )place_cost_exp);
1575
1541
}
1576
1542
1577
1543
return z_cost_factor;
0 commit comments