Skip to content

Improved parallel router: add NetlistRouter #2411

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions utils/route_diag/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "router_delay_profiling.h"
#include "route_tree.h"
#include "route_common.h"
#include "route_timing.h"
#include "route_net.h"
#include "route_export.h"
#include "rr_graph.h"
#include "rr_graph2.h"
Expand Down Expand Up @@ -124,8 +124,7 @@ static void do_one_route(const Netlist<>& net_list,
cost_params,
bounding_box,
router_stats,
conn_params,
true);
conn_params);

if (found_path) {
VTR_ASSERT(cheapest.index == sink_node);
Expand Down
1 change: 1 addition & 0 deletions vpr/src/base/SetupVPR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,7 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts)
RouterOpts->generate_rr_node_overuse_report = Options.generate_rr_node_overuse_report;
RouterOpts->flat_routing = Options.flat_routing;
RouterOpts->has_choking_spot = Options.has_choking_spot;
RouterOpts->with_timing_analysis = Options.timing_analysis;
}

static void SetupAnnealSched(const t_options& Options,
Expand Down
54 changes: 28 additions & 26 deletions vpr/src/base/place_and_route.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "place.h"
#include "read_place.h"
#include "read_route.h"
#include "route.h"
#include "route_export.h"
#include "draw.h"
#include "stats.h"
Expand Down Expand Up @@ -191,19 +192,19 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
arch->num_directs,
false);
}
success = try_route(router_net_list,
current,
router_opts,
analysis_opts,
det_routing_arch, segment_inf,
net_delay,
timing_info,
delay_calc,
arch->Chans,
arch->Directs,
arch->num_directs,
(attempt_count == 0) ? ScreenUpdatePriority::MAJOR : ScreenUpdatePriority::MINOR,
is_flat);
success = route(router_net_list,
current,
router_opts,
analysis_opts,
det_routing_arch, segment_inf,
net_delay,
timing_info,
delay_calc,
arch->Chans,
arch->Directs,
arch->num_directs,
(attempt_count == 0) ? ScreenUpdatePriority::MAJOR : ScreenUpdatePriority::MINOR,
is_flat);

attempt_count++;
fflush(stdout);
Expand Down Expand Up @@ -331,19 +332,20 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
false);
}

success = try_route(router_net_list,
current,
router_opts,
analysis_opts,
det_routing_arch, segment_inf,
net_delay,
timing_info,
delay_calc,
arch->Chans,
arch->Directs,
arch->num_directs,
ScreenUpdatePriority::MINOR,
is_flat);
success = route(router_net_list,
current,
router_opts,
analysis_opts,
det_routing_arch,
segment_inf,
net_delay,
timing_info,
delay_calc,
arch->Chans,
arch->Directs,
arch->num_directs,
ScreenUpdatePriority::MINOR,
is_flat);

if (success && Fc_clipped == false) {
final = current;
Expand Down
2 changes: 0 additions & 2 deletions vpr/src/base/read_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2986,8 +2986,6 @@ void set_conditional_defaults(t_options& args) {
*/
//Base cost type
if (args.base_cost_type.provenance() != Provenance::SPECIFIED) {
VTR_ASSERT(args.RouterAlgorithm == TIMING_DRIVEN || args.RouterAlgorithm == PARALLEL);

if (args.RouteType == DETAILED) {
if (args.timing_analysis) {
args.base_cost_type.set(DELAY_NORMALIZED_LENGTH, Provenance::INFERRED);
Expand Down
37 changes: 19 additions & 18 deletions vpr/src/base/vpr_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include "pb_type_graph.h"
#include "route_common.h"
#include "timing_place_lookup.h"
#include "route.h"
#include "route_export.h"
#include "vpr_api.h"
#include "read_sdc.h"
Expand All @@ -61,9 +62,9 @@
#include "lb_type_rr_graph.h"
#include "read_activity.h"
#include "net_delay.h"
#include "AnalysisDelayCalculator.h"
#include "concrete_timing_info.h"
#include "netlist_writer.h"
#include "AnalysisDelayCalculator.h"
#include "RoutingDelayCalculator.h"
#include "check_route.h"
#include "constant_nets.h"
Expand Down Expand Up @@ -367,7 +368,6 @@ bool vpr_flow(t_vpr_setup& vpr_setup, t_arch& arch) {
}

#ifdef VPR_USE_TBB

/* Set this here, because tbb::global_control doesn't control anything once it's out of scope
* (contrary to the name). */
tbb::global_control c(tbb::global_control::max_allowed_parallelism, vpr_setup.num_workers);
Expand Down Expand Up @@ -805,10 +805,11 @@ RouteStatus vpr_route_flow(const Netlist<>& net_list,
std::shared_ptr<RoutingDelayCalculator> routing_delay_calc = nullptr;
if (vpr_setup.Timing.timing_analysis_enabled) {
auto& atom_ctx = g_vpr_ctx.atom();

routing_delay_calc = std::make_shared<RoutingDelayCalculator>(atom_ctx.nlist, atom_ctx.lookup, net_delay, is_flat);

timing_info = make_setup_hold_timing_info(routing_delay_calc, router_opts.timing_update_type);
} else {
/* No delay calculator (segfault if the code calls into it) and wirelength driven routing */
timing_info = make_constant_timing_info(0);
}

if (router_opts.doRouting == STAGE_DO) {
Expand Down Expand Up @@ -922,20 +923,20 @@ RouteStatus vpr_route_fixed_W(const Netlist<>& net_list,
VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Fixed channel width must be specified when routing at fixed channel width (was %d)", fixed_channel_width);
}
bool status = false;
status = try_route(net_list,
fixed_channel_width,
vpr_setup.RouterOpts,
vpr_setup.AnalysisOpts,
&vpr_setup.RoutingArch,
vpr_setup.Segments,
net_delay,
timing_info,
delay_calc,
arch.Chans,
arch.Directs,
arch.num_directs,
ScreenUpdatePriority::MAJOR,
is_flat);
status = route(net_list,
fixed_channel_width,
vpr_setup.RouterOpts,
vpr_setup.AnalysisOpts,
&vpr_setup.RoutingArch,
vpr_setup.Segments,
net_delay,
timing_info,
delay_calc,
arch.Chans,
arch.Directs,
arch.num_directs,
ScreenUpdatePriority::MAJOR,
is_flat);

return RouteStatus(status, fixed_channel_width);
}
Expand Down
2 changes: 2 additions & 0 deletions vpr/src/base/vpr_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1449,6 +1449,8 @@ struct t_router_opts {
bool flat_routing;
bool has_choking_spot;

bool with_timing_analysis;

// Options related to rr_node reordering, for testing and possible cache optimization
e_rr_node_reorder_algorithm reorder_rr_graph_nodes_algorithm = DONT_REORDER;
int reorder_rr_graph_nodes_threshold = 0;
Expand Down
2 changes: 1 addition & 1 deletion vpr/src/draw/draw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
# endif

# include "rr_graph.h"
# include "route_util.h"
# include "route_utilization.h"
# include "place_macro.h"
# include "buttons.h"
# include "draw_rr.h"
Expand Down
2 changes: 1 addition & 1 deletion vpr/src/draw/draw_basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
# endif

# include "rr_graph.h"
# include "route_util.h"
# include "route_utilization.h"
# include "place_macro.h"
# include "buttons.h"

Expand Down
2 changes: 1 addition & 1 deletion vpr/src/draw/draw_rr.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
# include "manual_moves.h"

# include "rr_graph.h"
# include "route_util.h"
# include "route_utilization.h"
# include "place_macro.h"
# include "buttons.h"

Expand Down
2 changes: 1 addition & 1 deletion vpr/src/draw/draw_rr_edges.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
# include "manual_moves.h"

# include "rr_graph.h"
# include "route_util.h"
# include "route_utilization.h"
# include "place_macro.h"
# include "buttons.h"

Expand Down
2 changes: 1 addition & 1 deletion vpr/src/draw/draw_searchbar.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
# include "manual_moves.h"

# include "rr_graph.h"
# include "route_util.h"
# include "route_utilization.h"
# include "place_macro.h"
# include "buttons.h"

Expand Down
2 changes: 1 addition & 1 deletion vpr/src/draw/draw_toggle_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
# include "manual_moves.h"

# include "rr_graph.h"
# include "route_util.h"
# include "route_utilization.h"
# include "place_macro.h"
# include "buttons.h"

Expand Down
2 changes: 1 addition & 1 deletion vpr/src/draw/draw_triangle.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
# include "manual_moves.h"

# include "rr_graph.h"
# include "route_util.h"
# include "route_utilization.h"
# include "place_macro.h"
# include "buttons.h"

Expand Down
2 changes: 1 addition & 1 deletion vpr/src/draw/search_bar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
# endif

# include "rr_graph.h"
# include "route_util.h"
# include "route_utilization.h"
# include "place_macro.h"

extern std::string rr_highlight_message;
Expand Down
2 changes: 2 additions & 0 deletions vpr/src/place/place_timing_update.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
#include "timing_place.h"
#include "place_util.h"

#include "NetPinTimingInvalidator.h"

///@brief Initialize the timing information and structures in the placer.
void initialize_timing_info(const PlaceCritParams& crit_params,
const PlaceDelayModel* delay_model,
Expand Down
2 changes: 1 addition & 1 deletion vpr/src/place/timing_place_lookup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include "globals.h"
#include "place_and_route.h"
#include "route_common.h"
#include "route_timing.h"
#include "route_net.h"
#include "route_export.h"
#include "rr_graph.h"
#include "timing_place_lookup.h"
Expand Down
93 changes: 93 additions & 0 deletions vpr/src/route/ParallelNetlistRouter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#pragma once

/** @file Parallel case for NetlistRouter. Builds a \ref PartitionTree from the
* netlist according to net bounding boxes. Tree nodes are then routed in parallel
* using tbb::task_group. Each task routes the nets inside a node serially and then adds
* its child nodes to the task queue. This approach is serially equivalent & deterministic,
* but it can reduce QoR in congested cases [0].
*
* Note that the parallel router does not support graphical router breakpoints.
*
* [0]: F. Koşar, "A net-decomposing parallel FPGA router", MS thesis, UofT ECE, 2023 */
#include "netlist_routers.h"

#include <tbb/task_group.h>

/** Parallel impl for NetlistRouter.
* Holds enough context members to glue together ConnectionRouter and net routing functions,
* such as \ref route_net. Keeps the members in thread-local storage where needed,
* i.e. ConnectionRouters and RouteIterResults-es.
* See \ref route_net. */
template<typename HeapType>
class ParallelNetlistRouter : public NetlistRouter {
public:
ParallelNetlistRouter(
const Netlist<>& net_list,
const RouterLookahead* router_lookahead,
const t_router_opts& router_opts,
CBRR& connections_inf,
NetPinsMatrix<float>& net_delay,
const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
std::shared_ptr<SetupHoldTimingInfo> timing_info,
NetPinTimingInvalidator* pin_timing_invalidator,
route_budgets& budgeting_inf,
const RoutingPredictor& routing_predictor,
const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& choking_spots,
bool is_flat)
: _routers_th(_make_router(router_lookahead, is_flat))
, _net_list(net_list)
, _router_opts(router_opts)
, _connections_inf(connections_inf)
, _net_delay(net_delay)
, _netlist_pin_lookup(netlist_pin_lookup)
, _timing_info(timing_info)
, _pin_timing_invalidator(pin_timing_invalidator)
, _budgeting_inf(budgeting_inf)
, _routing_predictor(routing_predictor)
, _choking_spots(choking_spots)
, _is_flat(is_flat) {}
~ParallelNetlistRouter() {}

/** Run a single iteration of netlist routing for this->_net_list. This usually means calling
* \ref route_net for each net, which will handle other global updates.
* \return RouteIterResults for this iteration. */
RouteIterResults route_netlist(int itry, float pres_fac, float worst_neg_slack);
void set_rcv_enabled(bool x);
void set_timing_info(std::shared_ptr<SetupHoldTimingInfo> timing_info);

private:
/** A single task to route nets inside a PartitionTree node and add tasks for its child nodes to task group \p g. */
void route_partition_tree_node(tbb::task_group& g, PartitionTreeNode& node, int itry, float pres_fac, float worst_neg_slack);

ConnectionRouter<HeapType> _make_router(const RouterLookahead* router_lookahead, bool is_flat) {
auto& device_ctx = g_vpr_ctx.device();
auto& route_ctx = g_vpr_ctx.mutable_routing();

return ConnectionRouter<HeapType>(
device_ctx.grid,
*router_lookahead,
device_ctx.rr_graph.rr_nodes(),
&device_ctx.rr_graph,
device_ctx.rr_rc_data,
device_ctx.rr_graph.rr_switch(),
route_ctx.rr_node_route_inf,
is_flat);
}

/* Context fields */
tbb::enumerable_thread_specific<ConnectionRouter<HeapType>> _routers_th;
const Netlist<>& _net_list;
const t_router_opts& _router_opts;
CBRR& _connections_inf;
tbb::enumerable_thread_specific<RouteIterResults> _results_th;
NetPinsMatrix<float>& _net_delay;
const ClusteredPinAtomPinsLookup& _netlist_pin_lookup;
std::shared_ptr<SetupHoldTimingInfo> _timing_info;
NetPinTimingInvalidator* _pin_timing_invalidator;
route_budgets& _budgeting_inf;
const RoutingPredictor& _routing_predictor;
const vtr::vector<ParentNetId, std::vector<std::unordered_map<RRNodeId, int>>>& _choking_spots;
bool _is_flat;
};

#include "ParallelNetlistRouter.tpp"
Loading