Skip to content

Commit 6102f58

Browse files
committed
2 parents 1281b1d + f520c17 commit 6102f58

File tree

83 files changed

+4985
-2376
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

83 files changed

+4985
-2376
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,3 +155,8 @@ tags
155155
cmake-build-debug
156156
cmake-build-release
157157
/.metadata/
158+
159+
#
160+
# Clangd
161+
#
162+
compile_commands.json

doc/src/api/vpr/route_tree.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,10 @@ RouteTreeNode
2020
.. doxygenclass:: RouteTreeNode
2121
:project: vpr
2222
:members:
23+
24+
RTExploredNode
25+
-------------
26+
27+
.. doxygenclass:: RTExploredNode
28+
:project: vpr
29+
:members:

doc/src/api/vprinternals/router_heap.rst

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2,30 +2,13 @@
22
Router Heap
33
==============
44

5-
t_heap
6-
----------
7-
.. doxygenstruct:: t_heap
8-
:project: vpr
9-
:members:
10-
115
HeapInterface
126
----------
137
.. doxygenclass:: HeapInterface
148
:project: vpr
159
:members:
1610

17-
HeapStorage
18-
----------
19-
.. doxygenclass:: HeapStorage
20-
:project: vpr
21-
:members:
22-
23-
KAryHeap
11+
DAryHeap
2412
----------
25-
.. doxygenclass:: KAryHeap
13+
.. doxygenclass:: DAryHeap
2614
:project: vpr
27-
28-
FourAryHeap
29-
----------
30-
.. doxygenclass:: FourAryHeap
31-
:project: vpr

doc/src/vpr/command_line_usage.rst

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -200,12 +200,14 @@ General Options
200200

201201
.. option:: --device <string>
202202

203-
Specifies which device layout/floorplan to use from the architecture file.
203+
Specifies which device layout/floorplan to use from the architecture file. Valid values are:
204204

205-
``auto`` uses the smallest device satisfying the circuit's resource requirements.
206-
Other values are assumed to be the names of device layouts defined in the :ref:`arch_grid_layout` section of the architecture file.
205+
* ``auto`` VPR uses the smallest device satisfying the circuit's resource requirements. This option will use the ``<auto_layout>`` tag if it is present in the architecture file in order to construct the smallest FPGA that has sufficient resources to fit the design. If the ``<auto_layout>`` tag is not present, the ``auto`` option chooses the smallest device amongst all the architecture file's ``<fixed_layout>`` specifications into which the design can be packed.
206+
* Any string matching ``name`` attribute of a device layout defined with a ``<fixed_layout>`` tag in the :ref:`arch_grid_layout` section of the architecture file.
207207

208-
.. note:: If the architecture contains both ``<auto_layout>`` and ``<fixed_layout>`` specifications, specifying an ``auto`` device will use the ``<auto_layout>``.
208+
If the value specified is neither ``auto`` nor matches the ``name`` attribute value of a ``<fixed_layout>`` tag, VPR issues an error.
209+
210+
.. note:: If the only layout in the architecture file is a single device specified using ``<fixed_layout>``, it is recommended to always specify the ``--device`` option; this prevents the value ``--device auto`` from interfering with operations supported only for ``<fixed_layout>`` grids.
209211

210212
**Default:** ``auto``
211213

@@ -1394,15 +1396,17 @@ The following options are only valid when the router is in timing-driven mode (t
13941396

13951397
**Default:** ``safe``
13961398

1397-
.. option:: --routing_budgets_algorithm { disable | minimax | scale_delay }
1399+
.. option:: --routing_budgets_algorithm { disable | minimax | yoyo | scale_delay }
13981400

13991401
.. warning:: Experimental
14001402

14011403
Controls how the routing budgets are created. Routing budgets are used to guid VPR's routing algorithm to consider both short path and long path timing constraints :cite:`RCV_algorithm`.
14021404

14031405
``disable`` is used to disable the budget feature. This uses the default VPR and ignores hold time constraints.
14041406

1405-
``minimax`` sets the minimum and maximum budgets by distributing the long path and short path slacks depending on the the current delay values. This uses the routing cost valleys and Minimax-PERT algorithm :cite:`minimax_pert,RCV_algorithm`.
1407+
``minimax`` sets the minimum and maximum budgets by distributing the long path and short path slacks depending on the the current delay values. This uses the Minimax-PERT algorithm :cite:`minimax_pert`.
1408+
1409+
``yoyo`` allocates budgets using minimax algorithm (as above), and enables hold slack resolution in the router using the Routing Cost Valleys (RCV) algorithm :cite:`RCV_algorithm`.
14061410

14071411
``scale_delay`` has the minimum budgets set to 0 and the maximum budgets is set to the delay of a net scaled by the pin criticality (net delay/pin criticality).
14081412

libs/libarchfpga/src/physical_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1382,7 +1382,7 @@ class t_pb_graph_pin {
13821382
float tco_max = std::numeric_limits<float>::quiet_NaN(); /* For sequential logic elements the maximum clock to output time */
13831383
t_pb_graph_pin* associated_clock_pin = nullptr; /* For sequentail elements, the associated clock */
13841384

1385-
/* This member is used when flat-routing and has_choking_spot are enabled.
1385+
/* This member is used when flat-routing and router_opt_choke_points are enabled.
13861386
* It is used to identify choke points.
13871387
* This is only valid for IPINs, and it only contain the pins that are reachable to the pin by a forwarding path.
13881388
* It doesn't take into account feed-back connection.

utils/route_diag/src/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ static void do_one_route(const Netlist<>& net_list,
114114
is_flat);
115115
enable_router_debug(router_opts, ParentNetId(), sink_node, 1, &router);
116116
bool found_path;
117-
t_heap cheapest;
117+
RTExploredNode cheapest;
118118
ConnectionParameters conn_params(ParentNetId::INVALID(),
119119
-1,
120120
false,

vpr/src/base/SetupVPR.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ void SetupVPR(const t_options* options,
318318
vtr::ScopedStartFinishTimer timer("Allocate intra-cluster resources");
319319
// The following two functions should be called when the data structured related to t_pb_graph_node, t_pb_type,
320320
// and t_pb_graph_edge are initialized
321-
alloc_and_load_intra_cluster_resources(routerOpts->has_choking_spot);
321+
alloc_and_load_intra_cluster_resources(routerOpts->has_choke_point);
322322
add_intra_tile_switches();
323323
}
324324

@@ -510,7 +510,7 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts)
510510
RouterOpts->max_logged_overused_rr_nodes = Options.max_logged_overused_rr_nodes;
511511
RouterOpts->generate_rr_node_overuse_report = Options.generate_rr_node_overuse_report;
512512
RouterOpts->flat_routing = Options.flat_routing;
513-
RouterOpts->has_choking_spot = Options.has_choking_spot;
513+
RouterOpts->has_choke_point = Options.router_opt_choke_points;
514514
RouterOpts->custom_3d_sb_fanin_fanout = Options.custom_3d_sb_fanin_fanout;
515515
RouterOpts->with_timing_analysis = Options.timing_analysis;
516516
}

vpr/src/base/ShowSetup.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -255,11 +255,11 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
255255
VTR_LOG("false\n");
256256
}
257257

258-
VTR_LOG("RouterOpts.has_choking_spot: ");
259-
if (RouterOpts.has_choking_spot) {
260-
VTR_LOG("true\n");
258+
VTR_LOG("RouterOpts.choke_points: ");
259+
if (RouterOpts.has_choke_point) {
260+
VTR_LOG("on\n");
261261
} else {
262-
VTR_LOG("false\n");
262+
VTR_LOG("off\n");
263263
}
264264

265265
VTR_ASSERT(GLOBAL == RouterOpts.route_type || DETAILED == RouterOpts.route_type);
@@ -468,9 +468,6 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
468468
case e_heap_type::FOUR_ARY_HEAP:
469469
VTR_LOG("FOUR_ARY_HEAP\n");
470470
break;
471-
case e_heap_type::BUCKET_HEAP_APPROXIMATION:
472-
VTR_LOG("BUCKET_HEAP_APPROXIMATION\n");
473-
break;
474471
default:
475472
VPR_FATAL_ERROR(VPR_ERROR_UNKNOWN, "Unknown router_heap\n");
476473
}

vpr/src/base/place_and_route.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
398398

399399
init_route_structs(router_net_list,
400400
router_opts.bb_factor,
401-
router_opts.has_choking_spot,
401+
router_opts.has_choke_point,
402402
is_flat);
403403

404404
restore_routing(best_routing,

vpr/src/base/read_options.cpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ struct RouteBudgetsAlgorithm {
272272
}
273273

274274
std::vector<std::string> default_choices() {
275-
return {"minimax", "scale_delay", "disable"};
275+
return {"minimax", "yoyo", "scale_delay", "disable"};
276276
}
277277
};
278278

@@ -1063,8 +1063,6 @@ struct ParseRouterHeap {
10631063
conv_value.set_value(e_heap_type::BINARY_HEAP);
10641064
else if (str == "four_ary")
10651065
conv_value.set_value(e_heap_type::FOUR_ARY_HEAP);
1066-
else if (str == "bucket")
1067-
conv_value.set_value(e_heap_type::BUCKET_HEAP_APPROXIMATION);
10681066
else {
10691067
std::stringstream msg;
10701068
msg << "Invalid conversion from '" << str << "' to e_heap_type (expected one of: " << argparse::join(default_choices(), ", ") << ")";
@@ -1077,11 +1075,9 @@ struct ParseRouterHeap {
10771075
ConvertedValue<std::string> conv_value;
10781076
if (val == e_heap_type::BINARY_HEAP)
10791077
conv_value.set_value("binary");
1080-
else if (val == e_heap_type::FOUR_ARY_HEAP)
1081-
conv_value.set_value("four_ary");
10821078
else {
1083-
VTR_ASSERT(val == e_heap_type::BUCKET_HEAP_APPROXIMATION);
1084-
conv_value.set_value("bucket");
1079+
VTR_ASSERT(val == e_heap_type::FOUR_ARY_HEAP);
1080+
conv_value.set_value("four_ary");
10851081
}
10861082
return conv_value;
10871083
}
@@ -2491,13 +2487,13 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
24912487
.default_value("off")
24922488
.show_in(argparse::ShowIn::HELP_ONLY);
24932489

2494-
route_grp.add_argument(args.has_choking_spot, "--has_choking_spot")
2490+
route_grp.add_argument<bool, ParseOnOff>(args.router_opt_choke_points, "--router_opt_choke_points")
24952491
.help(
24962492
""
2497-
"Some FPGA architectures, due to the lack of full connectivity inside the cluster, may have"
2498-
" a choking spot inside the cluster. Thus, if routing doesn't converge, enabling this option may"
2499-
" help it.")
2500-
.default_value("false")
2493+
"Some FPGA architectures with limited fan-out options within a cluster (e.g. fracturable LUTs with shared pins) do"
2494+
" not converge well in routing unless these fan-out choke points are discovered and optimized for during net routing."
2495+
" This option helps router convergence for such architectures.")
2496+
.default_value("on")
25012497
.show_in(argparse::ShowIn::HELP_ONLY);
25022498

25032499

vpr/src/base/read_options.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ struct t_options {
218218
argparse::ArgValue<int> reorder_rr_graph_nodes_threshold;
219219
argparse::ArgValue<int> reorder_rr_graph_nodes_seed;
220220
argparse::ArgValue<bool> flat_routing;
221-
argparse::ArgValue<bool> has_choking_spot;
221+
argparse::ArgValue<bool> router_opt_choke_points;
222222
argparse::ArgValue<int> route_verbosity;
223223
argparse::ArgValue<int> custom_3d_sb_fanin_fanout;
224224

vpr/src/base/read_route.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
#include <ctime>
2121
#include <sstream>
2222
#include <string>
23-
#include <unordered_set>
2423

2524
#include "atom_netlist.h"
2625
#include "atom_netlist_utils.h"
@@ -46,7 +45,6 @@
4645
#include "route_common.h"
4746
#include "route_tree.h"
4847
#include "read_route.h"
49-
#include "four_ary_heap.h"
5048

5149
#include "old_traceback.h"
5250

@@ -109,7 +107,7 @@ bool read_route(const char* route_file, const t_router_opts& router_opts, bool v
109107
const Netlist<>& router_net_list = (flat_router) ? (const Netlist<>&)g_vpr_ctx.atom().nlist : (const Netlist<>&)g_vpr_ctx.clustering().clb_nlist;
110108
init_route_structs(router_net_list,
111109
router_opts.bb_factor,
112-
router_opts.has_choking_spot,
110+
router_opts.has_choke_point,
113111
flat_router);
114112

115113
/*Check dimensions*/

vpr/src/base/vpr_types.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1339,7 +1339,7 @@ struct t_router_opts {
13391339
bool generate_rr_node_overuse_report;
13401340

13411341
bool flat_routing;
1342-
bool has_choking_spot;
1342+
bool has_choke_point;
13431343

13441344
int custom_3d_sb_fanin_fanout = 1;
13451345

@@ -1607,14 +1607,18 @@ constexpr bool is_src_sink(e_rr_type type) { return (type == SOURCE || type == S
16071607
* is being used.
16081608
* @param backward_path_cost Total cost of the path up to and including this
16091609
* node.
1610-
* @param occ The current occupancy of the associated rr node
1610+
* @param R_upstream Upstream resistance to ground from this node in the current
1611+
* path search (connection routing), including the resistance
1612+
* of the node itself (device_ctx.rr_nodes[index].R).
1613+
* @param occ The current occupancy of the associated rr node.
16111614
*/
16121615
struct t_rr_node_route_inf {
16131616
RREdgeId prev_edge;
16141617

16151618
float acc_cost;
16161619
float path_cost;
16171620
float backward_path_cost;
1621+
float R_upstream;
16181622

16191623
public: //Accessors
16201624
short occ() const { return occ_; }

vpr/src/place/net_cost_handler.cpp

Lines changed: 71 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@
3131
#include "placer_state.h"
3232
#include "move_utils.h"
3333
#include "place_timing_update.h"
34-
#include "noc_place_utils.h"
3534
#include "vtr_math.h"
35+
#include "vtr_ndmatrix.h"
36+
#include "vtr_ndoffsetmatrix.h"
3637

3738
#include <array>
3839

@@ -53,9 +54,6 @@ constexpr std::array<float, MAX_FANOUT_CROSSING_COUNT> cross_count = {1.0000, 1.
5354
2.5610, 2.5864, 2.6117, 2.6371, 2.6625, 2.6887, 2.7148, 2.7410,
5455
2.7671, 2.7933};
5556

56-
57-
58-
5957
/**
6058
* @brief If the moving pin is of type type SINK, update bb_pin_sink_count_new which stores the number of sink pins on each layer of "net_id"
6159
* @param pin_old_loc Old location of the moving pin
@@ -229,6 +227,70 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_(float place_c
229227
chany_place_cost_fac_[high][low] = pow((double)chany_place_cost_fac_[high][low], (double)place_cost_exp);
230228
}
231229
}
230+
231+
if (device_ctx.grid.get_num_layers() > 1) {
232+
alloc_and_load_for_fast_vertical_cost_update_(place_cost_exp);
233+
}
234+
}
235+
236+
void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_(float place_cost_exp) {
237+
const auto& device_ctx = g_vpr_ctx.device();
238+
const auto& rr_graph = device_ctx.rr_graph;
239+
240+
const size_t grid_height = device_ctx.grid.height();
241+
const size_t grid_width = device_ctx.grid.width();
242+
243+
244+
chanz_place_cost_fac_ = vtr::NdMatrix<float, 4>({grid_width, grid_height, grid_width, grid_height}, 0.);
245+
246+
vtr::NdMatrix<float, 2> tile_num_inter_die_conn({grid_width, grid_height}, 0.);
247+
248+
for (const auto& src_rr_node : rr_graph.nodes()) {
249+
for (const auto& rr_edge_idx : rr_graph.configurable_edges(src_rr_node)) {
250+
const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx);
251+
if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) {
252+
// We assume that the nodes driving the inter-layer connection or being driven by it
253+
// are not streched across multiple tiles
254+
int src_x = rr_graph.node_xhigh(src_rr_node);
255+
int src_y = rr_graph.node_yhigh(src_rr_node);
256+
VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_ylow(src_rr_node) == src_y);
257+
258+
tile_num_inter_die_conn[src_x][src_y]++;
259+
}
260+
}
261+
262+
for (const auto& rr_edge_idx : rr_graph.non_configurable_edges(src_rr_node)) {
263+
const auto& sink_rr_node = rr_graph.edge_sink_node(src_rr_node, rr_edge_idx);
264+
if (rr_graph.node_layer(src_rr_node) != rr_graph.node_layer(sink_rr_node)) {
265+
int src_x = rr_graph.node_xhigh(src_rr_node);
266+
VTR_ASSERT(rr_graph.node_xlow(src_rr_node) == src_x && rr_graph.node_xlow(src_rr_node) == src_x);
267+
int src_y = rr_graph.node_yhigh(src_rr_node);
268+
VTR_ASSERT(rr_graph.node_ylow(src_rr_node) == src_y && rr_graph.node_ylow(src_rr_node) == src_y);
269+
tile_num_inter_die_conn[src_x][src_y]++;
270+
}
271+
}
272+
}
273+
274+
for (int x_high = 0; x_high < (int)device_ctx.grid.width(); x_high++) {
275+
for (int y_high = 0; y_high < (int)device_ctx.grid.height(); y_high++) {
276+
for (int x_low = 0; x_low <= x_high; x_low++) {
277+
for (int y_low = 0; y_low <= y_high; y_low++) {
278+
int num_inter_die_conn = 0;
279+
for (int x = x_low; x <= x_high; x++) {
280+
for (int y = y_low; y <= y_high; y++) {
281+
num_inter_die_conn += tile_num_inter_die_conn[x][y];
282+
}
283+
}
284+
int seen_num_tiles = (x_high - x_low + 1) * (y_high - y_low + 1);
285+
chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = seen_num_tiles / static_cast<float>(num_inter_die_conn);
286+
287+
chanz_place_cost_fac_[x_high][y_high][x_low][y_low] = pow(
288+
(double)chanz_place_cost_fac_[x_high][y_high][x_low][y_low],
289+
(double)place_cost_exp);
290+
}
291+
}
292+
}
293+
}
232294
}
233295

234296
double NetCostHandler::comp_bb_cost(e_cost_methods method) {
@@ -1395,6 +1457,8 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
13951457

13961458
const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : placer_state_.move().bb_coords[net_id];
13971459

1460+
const bool is_multi_layer = (g_vpr_ctx.device().grid.get_num_layers() > 1);
1461+
13981462
double crossing = wirelength_crossing_count(cluster_ctx.clb_nlist.net_pins(net_id).size());
13991463

14001464
/* Could insert a check for xmin == xmax. In that case, assume *
@@ -1413,6 +1477,9 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
14131477
double ncost;
14141478
ncost = (bb.xmax - bb.xmin + 1) * crossing * chanx_place_cost_fac_[bb.ymax][bb.ymin - 1];
14151479
ncost += (bb.ymax - bb.ymin + 1) * crossing * chany_place_cost_fac_[bb.xmax][bb.xmin - 1];
1480+
if (is_multi_layer) {
1481+
ncost += (bb.layer_max - bb.layer_min) * crossing * chanz_place_cost_fac_[bb.xmax][bb.ymax][bb.xmin][bb.ymin];
1482+
}
14161483

14171484
return ncost;
14181485
}

0 commit comments

Comments
 (0)