Skip to content

Commit d7ea38a

Browse files
authored
Merge branch 'master' into ueqri-enhanced-heap-for-connection-router
2 parents 93d051d + 387f187 commit d7ea38a

File tree

18 files changed

+127
-42
lines changed

18 files changed

+127
-42
lines changed

doc/src/vpr/command_line_usage.rst

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -200,12 +200,14 @@ General Options
200200

201201
.. option:: --device <string>
202202

203-
Specifies which device layout/floorplan to use from the architecture file.
203+
Specifies which device layout/floorplan to use from the architecture file. Valid values are:
204204

205-
``auto`` uses the smallest device satisfying the circuit's resource requirements.
206-
Other values are assumed to be the names of device layouts defined in the :ref:`arch_grid_layout` section of the architecture file.
205+
* ``auto`` VPR uses the smallest device satisfying the circuit's resource requirements. This option will use the ``<auto_layout>`` tag if it is present in the architecture file in order to construct the smallest FPGA that has sufficient resources to fit the design. If the ``<auto_layout>`` tag is not present, the ``auto`` option chooses the smallest device amongst all the architecture file's ``<fixed_layout>`` specifications into which the design can be packed.
206+
* Any string matching ``name`` attribute of a device layout defined with a ``<fixed_layout>`` tag in the :ref:`arch_grid_layout` section of the architecture file.
207207

208-
.. note:: If the architecture contains both ``<auto_layout>`` and ``<fixed_layout>`` specifications, specifying an ``auto`` device will use the ``<auto_layout>``.
208+
If the value specified is neither ``auto`` nor matches the ``name`` attribute value of a ``<fixed_layout>`` tag, VPR issues an error.
209+
210+
.. note:: If the only layout in the architecture file is a single device specified using ``<fixed_layout>``, it is recommended to always specify the ``--device`` option; this prevents the value ``--device auto`` from interfering with operations supported only for ``<fixed_layout>`` grids.
209211

210212
**Default:** ``auto``
211213

libs/libarchfpga/src/physical_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,7 @@ class t_pb_graph_pin {
13821382
float tco_max = std::numeric_limits<float>::quiet_NaN(); /* For sequential logic elements the maximum clock to output time */
13831383
t_pb_graph_pin* associated_clock_pin = nullptr; /* For sequentail elements, the associated clock */
13841384

1385-
/* This member is used when flat-routing and has_choking_spot are enabled.
1385+
/* This member is used when flat-routing and router_opt_choke_points are enabled.
13861386
* It is used to identify choke points.
13871387
* This is only valid for IPINs, and it only contain the pins that are reachable to the pin by a forwarding path.
13881388
* It doesn't take into account feed-back connection.

vpr/src/base/SetupVPR.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ void SetupVPR(const t_options* options,
318318
vtr::ScopedStartFinishTimer timer("Allocate intra-cluster resources");
319319
// The following two functions should be called when the data structured related to t_pb_graph_node, t_pb_type,
320320
// and t_pb_graph_edge are initialized
321-
alloc_and_load_intra_cluster_resources(routerOpts->has_choking_spot);
321+
alloc_and_load_intra_cluster_resources(routerOpts->has_choke_point);
322322
add_intra_tile_switches();
323323
}
324324

@@ -510,7 +510,7 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts)
510510
RouterOpts->max_logged_overused_rr_nodes = Options.max_logged_overused_rr_nodes;
511511
RouterOpts->generate_rr_node_overuse_report = Options.generate_rr_node_overuse_report;
512512
RouterOpts->flat_routing = Options.flat_routing;
513-
RouterOpts->has_choking_spot = Options.has_choking_spot;
513+
RouterOpts->has_choke_point = Options.router_opt_choke_points;
514514
RouterOpts->custom_3d_sb_fanin_fanout = Options.custom_3d_sb_fanin_fanout;
515515
RouterOpts->with_timing_analysis = Options.timing_analysis;
516516
}

vpr/src/base/ShowSetup.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -255,11 +255,11 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
255255
VTR_LOG("false\n");
256256
}
257257

258-
VTR_LOG("RouterOpts.has_choking_spot: ");
259-
if (RouterOpts.has_choking_spot) {
260-
VTR_LOG("true\n");
258+
VTR_LOG("RouterOpts.choke_points: ");
259+
if (RouterOpts.has_choke_point) {
260+
VTR_LOG("on\n");
261261
} else {
262-
VTR_LOG("false\n");
262+
VTR_LOG("off\n");
263263
}
264264

265265
VTR_ASSERT(GLOBAL == RouterOpts.route_type || DETAILED == RouterOpts.route_type);

vpr/src/base/place_and_route.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
398398

399399
init_route_structs(router_net_list,
400400
router_opts.bb_factor,
401-
router_opts.has_choking_spot,
401+
router_opts.has_choke_point,
402402
is_flat);
403403

404404
restore_routing(best_routing,

vpr/src/base/read_options.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2487,13 +2487,13 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
24872487
.default_value("off")
24882488
.show_in(argparse::ShowIn::HELP_ONLY);
24892489

2490-
route_grp.add_argument(args.has_choking_spot, "--has_choking_spot")
2490+
route_grp.add_argument<bool, ParseOnOff>(args.router_opt_choke_points, "--router_opt_choke_points")
24912491
.help(
24922492
""
2493-
"Some FPGA architectures, due to the lack of full connectivity inside the cluster, may have"
2494-
" a choking spot inside the cluster. Thus, if routing doesn't converge, enabling this option may"
2495-
" help it.")
2496-
.default_value("false")
2493+
"Some FPGA architectures with limited fan-out options within a cluster (e.g. fracturable LUTs with shared pins) do"
2494+
" not converge well in routing unless these fan-out choke points are discovered and optimized for during net routing."
2495+
" This option helps router convergence for such architectures.")
2496+
.default_value("on")
24972497
.show_in(argparse::ShowIn::HELP_ONLY);
24982498

24992499

vpr/src/base/read_options.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ struct t_options {
218218
argparse::ArgValue<int> reorder_rr_graph_nodes_threshold;
219219
argparse::ArgValue<int> reorder_rr_graph_nodes_seed;
220220
argparse::ArgValue<bool> flat_routing;
221-
argparse::ArgValue<bool> has_choking_spot;
221+
argparse::ArgValue<bool> router_opt_choke_points;
222222
argparse::ArgValue<int> route_verbosity;
223223
argparse::ArgValue<int> custom_3d_sb_fanin_fanout;
224224

vpr/src/base/read_route.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ bool read_route(const char* route_file, const t_router_opts& router_opts, bool v
107107
const Netlist<>& router_net_list = (flat_router) ? (const Netlist<>&)g_vpr_ctx.atom().nlist : (const Netlist<>&)g_vpr_ctx.clustering().clb_nlist;
108108
init_route_structs(router_net_list,
109109
router_opts.bb_factor,
110-
router_opts.has_choking_spot,
110+
router_opts.has_choke_point,
111111
flat_router);
112112

113113
/*Check dimensions*/

vpr/src/base/vpr_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1339,7 +1339,7 @@ struct t_router_opts {
13391339
bool generate_rr_node_overuse_report;
13401340

13411341
bool flat_routing;
1342-
bool has_choking_spot;
1342+
bool has_choke_point;
13431343

13441344
int custom_3d_sb_fanin_fanout = 1;
13451345

vpr/src/place/net_cost_handler.cpp

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@
3131
#include "placer_state.h"
3232
#include "move_utils.h"
3333
#include "place_timing_update.h"
34-
#include "noc_place_utils.h"
3534
#include "vtr_math.h"
35+
#include "vtr_ndmatrix.h"
36+
#include "vtr_ndoffsetmatrix.h"
3637

3738
#include <array>
3839

@@ -53,9 +54,6 @@ constexpr std::array<float, MAX_FANOUT_CROSSING_COUNT> cross_count = {1.0000, 1.
5354
2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410,
5455
2.7671, 2.7933};
5556

56-
57-
58-
5957
/**
6058
* @brief If the moving pin is of type type SINK, update bb_pin_sink_count_new which stores the number of sink pins on each layer of "net_id"
6159
* @param pin_old_loc Old location of the moving pin
@@ -229,6 +227,70 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c
229227
chany_place_cost_fac_[high][low] = pow((double)chany_place_cost_fac_[high][low], (double)place_cost_exp);
230228
}
231229
}
230+
231+
if (device_ctx.grid.get_num_layers() > 1) {
232+
alloc_and_load_for_fast_vertical_cost_update_(place_cost_exp);
233+
}
234+
}
235+
236+
void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_(float place_cost_exp) {
237+
const auto& device_ctx = g_vpr_ctx.device();
238+
const auto& rr_graph = device_ctx.rr_graph;
239+
240+
const size_t grid_height = device_ctx.grid.height();
241+
const size_t grid_width = device_ctx.grid.width();
242+
243+
244+
chanz_place_cost_fac_ = vtr::NdMatrix<float, 4>({grid_width, grid_height, grid_width, grid_height}, 0.);
245+
246+
vtr::NdMatrix<float, 2> tile_num_inter_die_conn({grid_width, grid_height}, 0.);
247+
248+
for (const auto& src_rr_node : rr_graph.nodes()) {
249+
for (const auto& rr_edge_idx : rr_graph.configurable_edges(src_rr_node)) {
250+
const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx);
251+
if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) {
252+
// We assume that the nodes driving the inter-layer connection or being driven by it
253+
// are not streched across multiple tiles
254+
int src_x = rr_graph.node_xhigh(src_rr_node);
255+
int src_y = rr_graph.node_yhigh(src_rr_node);
256+
VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_ylow(src_rr_node) == src_y);
257+
258+
tile_num_inter_die_conn[src_x][src_y]++;
259+
}
260+
}
261+
262+
for (const auto& rr_edge_idx : rr_graph.non_configurable_edges(src_rr_node)) {
263+
const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx);
264+
if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) {
265+
int src_x = rr_graph.node_xhigh(src_rr_node);
266+
VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_xlow(src_rr_node) == src_x);
267+
int src_y = rr_graph.node_yhigh(src_rr_node);
268+
VTR_ASSERT(rr_graph.node_ylow(src_rr_node) == src_y && rr_graph.node_ylow(src_rr_node) == src_y);
269+
tile_num_inter_die_conn[src_x][src_y]++;
270+
}
271+
}
272+
}
273+
274+
for (int x_high = 0; x_high < (int)device_ctx.grid.width(); x_high++) {
275+
for (int y_high = 0; y_high < (int)device_ctx.grid.height(); y_high++) {
276+
for (int x_low = 0; x_low <= x_high; x_low++) {
277+
for (int y_low = 0; y_low <= y_high; y_low++) {
278+
int num_inter_die_conn = 0;
279+
for (int x = x_low; x <= x_high; x++) {
280+
for (int y = y_low; y <= y_high; y++) {
281+
num_inter_die_conn += tile_num_inter_die_conn[x][y];
282+
}
283+
}
284+
int seen_num_tiles = (x_high - x_low + 1) * (y_high - y_low + 1);
285+
chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = seen_num_tiles / static_cast<float>(num_inter_die_conn);
286+
287+
chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = pow(
288+
(double)chanz_place_cost_fac_[x_high][y_high][x_low][y_low],
289+
(double)place_cost_exp);
290+
}
291+
}
292+
}
293+
}
232294
}
233295

234296
double NetCostHandler::comp_bb_cost(e_cost_methods method) {
@@ -1395,6 +1457,8 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
13951457

13961458
const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : placer_state_.move().bb_coords[net_id];
13971459

1460+
const bool is_multi_layer = (g_vpr_ctx.device().grid.get_num_layers() > 1);
1461+
13981462
double crossing = wirelength_crossing_count(cluster_ctx.clb_nlist.net_pins(net_id).size());
13991463

14001464
/* Could insert a check for xmin == xmax. In that case, assume *
@@ -1413,6 +1477,9 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
14131477
double ncost;
14141478
ncost = (bb.xmax - bb.xmin + 1) * crossing * chanx_place_cost_fac_[bb.ymax][bb.ymin - 1];
14151479
ncost += (bb.ymax - bb.ymin + 1) * crossing * chany_place_cost_fac_[bb.xmax][bb.xmin - 1];
1480+
if (is_multi_layer) {
1481+
ncost += (bb.layer_max - bb.layer_min) * crossing * chanz_place_cost_fac_[bb.xmax][bb.ymax][bb.xmin][bb.ymin];
1482+
}
14161483

14171484
return ncost;
14181485
}

vpr/src/place/net_cost_handler.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,13 @@ class NetCostHandler {
195195
*/
196196
vtr::NdOffsetMatrix<float, 2> chanx_place_cost_fac_; // [-1...device_ctx.grid.width()-1]
197197
vtr::NdOffsetMatrix<float, 2> chany_place_cost_fac_; // [-1...device_ctx.grid.height()-1]
198+
/**
199+
@brief This data structure functions similarly to the matrices described above
200+
but is applied to 3D connections linking different FPGA layers. It is used in the
201+
placement cost function calculation, where the height of the bounding box is divided
202+
by the average number of inter-die connections within the bounding box.
203+
*/
204+
vtr::NdMatrix<float, 4> chanz_place_cost_fac_; // [0...device_ctx.grid.width()-1][0...device_ctx.grid.height()-1][0...device_ctx.grid.width()-1][0...device_ctx.grid.height()-1]
198205

199206

200207
private:
@@ -249,6 +256,18 @@ class NetCostHandler {
249256
*/
250257
void alloc_and_load_chan_w_factors_for_place_cost_(float place_cost_exp);
251258

259+
/**
260+
* @brief Allocates and loads the chanz_place_cost_fac array with the inverse of
261+
* the average number of inter-die connections between [subhigh] and [sublow].
262+
*
263+
* @details This is only useful for multi-die FPGAs. The place_cost_exp factor specifies to
264+
* what power the average number of inter-die connections should be take -- larger numbers make narrower channels more expensive.
265+
*
266+
* @param place_cost_exp It is an exponent to which you take the average number of inter-die connections;
267+
* a higher value would favour areas with more inter-die connections over areas with less of those during placement (usually we use 1).
268+
*/
269+
void alloc_and_load_for_fast_vertical_cost_update_(float place_cost_exp);
270+
252271
/**
253272
* @brief Calculate the new connection delay and timing cost of all the
254273
* sink pins affected by moving a specific pin to a new location. Also

vpr/src/route/connection_router.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -764,7 +764,7 @@ void ConnectionRouter<Heap>::evaluate_timing_driven_node_costs(RTExploredNode* t
764764
//cost.
765765
cong_cost = 0.;
766766
}
767-
if (conn_params_->has_choking_spot_ && is_flat_ && rr_graph_->node_type(to->index) == IPIN) {
767+
if (conn_params_->router_opt_choke_points_ && is_flat_ && rr_graph_->node_type(to->index) == IPIN) {
768768
auto find_res = conn_params_->connection_choking_spots_.find(to->index);
769769
if (find_res != conn_params_->connection_choking_spots_.end()) {
770770
cong_cost = cong_cost / pow(2, (float)find_res->second);

vpr/src/route/route.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ bool route(const Netlist<>& net_list,
7171

7272
init_route_structs(net_list,
7373
router_opts.bb_factor,
74-
router_opts.has_choking_spot,
74+
router_opts.has_choke_point,
7575
is_flat);
7676

7777
IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.logical_block_types);
@@ -80,7 +80,7 @@ bool route(const Netlist<>& net_list,
8080
auto choking_spots = set_nets_choking_spots(net_list,
8181
route_ctx.net_terminal_groups,
8282
route_ctx.net_terminal_group_num,
83-
router_opts.has_choking_spot,
83+
router_opts.has_choke_point,
8484
is_flat);
8585

8686
//Initially, the router runs normally trying to reduce congestion while

vpr/src/route/route_net.tpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,8 @@ inline NetResultFlags route_sink(ConnectionRouter& router,
436436
bool sink_critical = (cost_params.criticality > HIGH_FANOUT_CRITICALITY_THRESHOLD);
437437
bool net_is_clock = route_ctx.is_clock_net[net_id] != 0;
438438

439-
bool has_choking_spot = ((int)choking_spots[target_pin].size() != 0) && router_opts.has_choking_spot;
440-
ConnectionParameters conn_params(net_id, target_pin, has_choking_spot, choking_spots[target_pin]);
439+
bool router_opt_choke_points = ((int)choking_spots[target_pin].size() != 0) && router_opts.has_choke_point;
440+
ConnectionParameters conn_params(net_id, target_pin, router_opt_choke_points, choking_spots[target_pin]);
441441

442442
//We normally route high fanout nets by only adding spatially close-by routing to the heap (reduces run-time).
443443
//However, if the current sink is 'critical' from a timing perspective, we put the entire route tree back onto

vpr/src/route/route_utils.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -391,21 +391,18 @@ vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>> set_net
391391
std::vector<std::vector<int>>>& net_terminal_groups,
392392
const vtr::vector<ParentNetId,
393393
std::vector<int>>& net_terminal_group_num,
394-
bool has_choking_spot,
394+
bool router_opt_choke_points,
395395
bool is_flat) {
396396
vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>> choking_spots(net_list.nets().size());
397397
for (const auto& net_id : net_list.nets()) {
398398
choking_spots[net_id].resize(net_list.net_pins(net_id).size());
399399
}
400400

401-
// Return if the architecture doesn't have any potential choke points
402-
if (!has_choking_spot) {
401+
// Return if the architecture doesn't have any potential choke points or flat router is not enabled
402+
if (!router_opt_choke_points || !is_flat) {
403403
return choking_spots;
404404
}
405405

406-
// We only identify choke points if flat_routing is enabled.
407-
VTR_ASSERT(is_flat);
408-
409406
const auto& device_ctx = g_vpr_ctx.device();
410407
const auto& rr_graph = device_ctx.rr_graph;
411408
const auto& route_ctx = g_vpr_ctx.routing();

0 commit comments

Comments
 (0)