diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 2bd43848964..12d6638ce0b 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -342,6 +342,7 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts) RouterOpts->congested_routing_iteration_threshold_frac = Options.congested_routing_iteration_threshold_frac; RouterOpts->route_bb_update = Options.route_bb_update; RouterOpts->clock_modeling = Options.clock_modeling; + RouterOpts->two_stage_clock_routing = Options.two_stage_clock_routing; RouterOpts->high_fanout_threshold = Options.router_high_fanout_threshold; RouterOpts->router_debug_net = Options.router_debug_net; RouterOpts->router_debug_sink_rr = Options.router_debug_sink_rr; diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 95f85fb605a..5f654445efd 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -907,9 +907,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg "Specifies how constant nets (i.e. those driven to a constant\n" "value) are handled:\n" " * global: Treat constant nets as globals (not routed)\n" - " * route : Treat constant nets as normal nets (routed)\n" - " * dedicated_network : Build a dedicated clock network based on the\n" - " clock network specified in the architecture file\n") + " * route : Treat constant nets as normal nets (routed)\n") .default_value("global") .show_in(argparse::ShowIn::HELP_ONLY); @@ -919,10 +917,21 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg " * ideal: Treat clock pins as ideal\n" " (i.e. no routing delays on clocks)\n" " * route: Treat the clock pins as normal nets\n" - " (i.e. routed using inter-block routing)\n") + " (i.e. routed using inter-block routing)\n" + " * dedicated_network : Build a dedicated clock network based on the\n" + " clock network specified in the architecture file\n") .default_value("ideal") .show_in(argparse::ShowIn::HELP_ONLY); + gen_grp.add_argument(args.two_stage_clock_routing, "--two_stage_clock_routing") + .help( + "Routes clock nets in two stages if using a dedicated clock network.\n" + " * First stage: From the Net source to a dedicated clock network source\n" + " * Second stage: From the clock network source to net sinks\n") + .default_value("off") + .action(argparse::Action::STORE_TRUE) + .show_in(argparse::ShowIn::HELP_ONLY); + gen_grp.add_argument(args.exit_before_pack, "--exit_before_pack") .help("Causes VPR to exit before packing starts (useful for statistics collection)") .default_value("off") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index d1f45e50aad..97d320abbe3 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -56,6 +56,7 @@ struct t_options { argparse::ArgValue target_device_utilization; argparse::ArgValue constant_net_method; argparse::ArgValue clock_modeling; + argparse::ArgValue two_stage_clock_routing; argparse::ArgValue exit_before_pack; argparse::ArgValue strict_checks; argparse::ArgValue disable_errors; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 348545f58ea..0854b644f8b 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -216,6 +216,7 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a vpr_setup->device_layout = options->device_layout; vpr_setup->constant_net_method = options->constant_net_method; vpr_setup->clock_modeling = options->clock_modeling; + vpr_setup->two_stage_clock_routing = options->two_stage_clock_routing; vpr_setup->exit_before_pack = options->exit_before_pack; VTR_LOG("\n"); diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 40a81bb41b5..7a72c7b2c97 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -162,6 +162,12 @@ struct DeviceContext : public Context { std::vector> clock_networks; std::vector> clock_connections; + // rr_node idx that connects to the input of all clock network wires + // Useful for two stage clock routing + // XXX: currently only one place to source the clock networks so only storing + // a single value + int virtual_clock_network_root_idx; + /** Attributes for each rr_node. * key: rr_node index * value: map of diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index f3acaea19cd..f1c74839f64 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -939,6 +939,7 @@ struct t_router_opts { float congested_routing_iteration_threshold_frac; e_route_bb_update route_bb_update; enum e_clock_modeling clock_modeling; //How clock pins and nets should be handled + bool two_stage_clock_routing; //How clock nets on dedicated networks should be routed int high_fanout_threshold; int router_debug_net; int router_debug_sink_rr; @@ -1292,6 +1293,7 @@ struct t_vpr_setup { std::string device_layout; e_constant_net_method constant_net_method; //How constant nets should be handled e_clock_modeling clock_modeling; //How clocks should be handled + bool two_stage_clock_routing; //How clocks should be routed in the presence of a dedicated clock network bool exit_before_pack; //Exits early before starting packing (useful for collecting statistics without running/loading any stages) }; diff --git a/vpr/src/route/check_route.cpp b/vpr/src/route/check_route.cpp index e52e2ce97e6..67a6782d83f 100644 --- a/vpr/src/route/check_route.cpp +++ b/vpr/src/route/check_route.cpp @@ -442,7 +442,8 @@ static bool check_adjacent(int from_node, int to_node) { } else if (to_type == CHANY) { num_adj += chanx_chany_adjacent(from_node, to_node); } else { - VTR_ASSERT(0); + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, + "in check_adjacent: %d and %d are not adjacent", from_node, to_node); } break; @@ -473,7 +474,8 @@ static bool check_adjacent(int from_node, int to_node) { } else if (to_type == CHANX) { num_adj += chanx_chany_adjacent(to_node, from_node); } else { - VTR_ASSERT(0); + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, + "in check_adjacent: %d and %d are not adjacent", from_node, to_node); } break; diff --git a/vpr/src/route/clock_connection_builders.cpp b/vpr/src/route/clock_connection_builders.cpp index b32a2004cce..68adb5a4f62 100644 --- a/vpr/src/route/clock_connection_builders.cpp +++ b/vpr/src/route/clock_connection_builders.cpp @@ -49,6 +49,9 @@ void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_ auto& rr_nodes = device_ctx.rr_nodes; auto& rr_node_indices = device_ctx.rr_node_indices; + int virtual_clock_network_root_idx = create_virtual_clock_network_sink_node(switch_location.x, switch_location.y); + device_ctx.virtual_clock_network_root_idx = virtual_clock_network_root_idx; + // rr_node indices for x and y channel routing wires and clock wires to connect to auto x_wire_indices = get_rr_node_chan_wires_at_location( rr_node_indices, CHANX, switch_location.x, switch_location.y); @@ -73,9 +76,32 @@ void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_ for (size_t i = 0; i < num_wires_y; i++) { rr_nodes[y_wire_indices[i]].add_edge(clock_index, rr_switch_idx); } + + // Connect to virtual clock sink node + // used by the two stage router + rr_nodes[clock_index].add_edge(virtual_clock_network_root_idx, rr_switch_idx); } } +int RoutingToClockConnection::create_virtual_clock_network_sink_node( + int x, + int y) { + auto& device_ctx = g_vpr_ctx.mutable_device(); + auto& rr_nodes = device_ctx.rr_nodes; + rr_nodes.emplace_back(); + auto node_index = rr_nodes.size() - 1; + + rr_nodes[node_index].set_coordinates(x, y, x, y); + rr_nodes[node_index].set_capacity(1); + rr_nodes[node_index].set_cost_index(SINK_COST_INDEX); + rr_nodes[node_index].set_type(SINK); + float R = 0.; + float C = 0.; + rr_nodes[node_index].set_rc_index(find_create_rr_rc_data(R, C)); + + return node_index; +} + /* * ClockToClockConneciton (setters) */ diff --git a/vpr/src/route/clock_connection_builders.h b/vpr/src/route/clock_connection_builders.h index 8e39b8873f3..8076907b656 100644 --- a/vpr/src/route/clock_connection_builders.h +++ b/vpr/src/route/clock_connection_builders.h @@ -54,6 +54,7 @@ class RoutingToClockConnection : public ClockConnection { */ /* Connects the inter-block routing to the clock source at the specified coordinates */ void create_switches(const ClockRRGraphBuilder& clock_graph); + int create_virtual_clock_network_sink_node(int x, int y); }; class ClockToClockConneciton : public ClockConnection { diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index 0311e784a2b..ea367bdbb6b 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -274,8 +274,8 @@ int ClockRib::create_chanx_wire(int x_start, rr_nodes[node_index].set_type(CHANX); rr_nodes[node_index].set_capacity(1); rr_nodes[node_index].set_track_num(ptc_num); - auto rc_index = find_create_rr_rc_data(x_chan_wire.layer.r_metal, x_chan_wire.layer.c_metal); - rr_nodes[node_index].set_rc_index(rc_index); + rr_nodes[node_index].set_rc_index(find_create_rr_rc_data( + x_chan_wire.layer.r_metal, x_chan_wire.layer.c_metal)); rr_nodes[node_index].set_direction(direction); short seg_index = 0; @@ -523,8 +523,8 @@ int ClockSpine::create_chany_wire(int y_start, rr_nodes[node_index].set_type(CHANY); rr_nodes[node_index].set_capacity(1); rr_nodes[node_index].set_track_num(ptc_num); - auto rc_index = find_create_rr_rc_data(y_chan_wire.layer.r_metal, y_chan_wire.layer.c_metal); - rr_nodes[node_index].set_rc_index(rc_index); + rr_nodes[node_index].set_rc_index(find_create_rr_rc_data( + y_chan_wire.layer.r_metal, y_chan_wire.layer.c_metal)); rr_nodes[node_index].set_direction(direction); short seg_index = 0; diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index 350a3406bb4..396517ed1ba 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -799,6 +799,31 @@ void node_to_heap(int inode, float total_cost, int prev_node, int prev_edge, flo add_to_heap(hptr); } +void drop_traceback_tail(ClusterNetId net_id) { + /* Removes the tail node from the routing traceback and updates + * it with the previous node from the traceback. + * This funtion is primarily called to remove the virtual clock + * sink from the routing traceback and replace it with the clock + * network root. */ + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + auto* tail_ptr = route_ctx.trace[net_id].tail; + auto node = tail_ptr->index; + route_ctx.trace_nodes[net_id].erase(node); + auto* trace_ptr = route_ctx.trace[net_id].head; + while (trace_ptr != nullptr) { + t_trace* next_ptr = trace_ptr->next; + if (next_ptr == tail_ptr) { + trace_ptr->iswitch = tail_ptr->iswitch; + trace_ptr->next = nullptr; + route_ctx.trace[net_id].tail = trace_ptr; + break; + } + trace_ptr = next_ptr; + } + free_trace_data(tail_ptr); +} + void free_traceback(ClusterNetId net_id) { /* Puts the entire traceback (old routing) for this net on the free list * * and sets the route_ctx.trace_head pointers etc. for the net to NULL. */ diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h index ea9ed19f12f..147ff9ddc23 100644 --- a/vpr/src/route/route_common.h +++ b/vpr/src/route/route_common.h @@ -79,6 +79,7 @@ void node_to_heap(int inode, float cost, int prev_node, int prev_edge, float bac bool is_empty_heap(); void free_traceback(ClusterNetId net_id); +void drop_traceback_tail(ClusterNetId net_id); void free_traceback(t_trace* tptr); void add_to_mod_list(int inode, std::vector& modified_rr_node_inf); diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index 580fee3e6f4..b1ac06a1eca 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -38,7 +38,6 @@ #include "tatum/TimingReporter.hpp" #define CONGESTED_SLOPE_VAL -0.04 -//#define ROUTER_DEBUG enum class RouterCongestionMode { NORMAL, @@ -147,6 +146,17 @@ static bool timing_driven_route_sink(ClusterNetId net_id, SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats); +static bool timing_driven_pre_route_to_clock_root( + ClusterNetId net_id, + int sink_node, + const t_conn_cost_params cost_params, + float pres_fac, + int high_fanout_threshold, + t_rt_node* rt_root, + const RouterLookahead& router_lookahead, + SpatialRouteTreeLookup& spatial_rt_lookup, + RouterStats& router_stats); + static t_heap* timing_driven_route_connection_from_route_tree_high_fanout(t_rt_node* rt_root, int sink_node, const t_conn_cost_params cost_params, @@ -168,6 +178,7 @@ static std::vector timing_driven_find_all_shortest_paths_from_heap(const std::vector& modified_rr_node_inf, RouterStats& router_stats); +void disable_expansion_and_remove_sink_from_route_tree_nodes(t_rt_node* node); static void timing_driven_expand_cheapest(t_heap* cheapest, int target_node, const t_conn_cost_params cost_params, @@ -457,7 +468,6 @@ bool try_timing_driven_route(const t_router_opts& router_opts, route_timing_info, budgeting_inf, was_rerouted); - if (!is_routable) { return (false); //Impossible to route } @@ -995,6 +1005,27 @@ bool timing_driven_route_net(ClusterNetId net_id, cost_params.bend_cost = router_opts.bend_cost; cost_params.delay_budget = ((budgeting_inf.if_set()) ? &conn_delay_budget : nullptr); + // Pre-route to clock source for clock nets (marked as global nets) + if (cluster_ctx.clb_nlist.net_is_global(net_id) && router_opts.two_stage_clock_routing) { + VTR_ASSERT(router_opts.clock_modeling == DEDICATED_NETWORK); + int sink_node = device_ctx.virtual_clock_network_root_idx; + enable_router_debug(router_opts, net_id, sink_node); + // Set to the max timing criticality which should intern minimize clock insertion + // delay by selecting a direct route from the clock source to the virtual sink + cost_params.criticality = router_opts.max_criticality; + if (!timing_driven_pre_route_to_clock_root( + net_id, + sink_node, + cost_params, + pres_fac, + router_opts.high_fanout_threshold, + rt_root, + router_lookahead, + spatial_route_tree_lookup, + router_stats)) { + return false; + } + } // explore in order of decreasing criticality (no longer need sink_order array) for (unsigned itarget = 0; itarget < remaining_targets.size(); ++itarget) { int target_pin = remaining_targets[itarget]; @@ -1051,6 +1082,104 @@ bool timing_driven_route_net(ClusterNetId net_id, return (true); } +static bool timing_driven_pre_route_to_clock_root( + ClusterNetId net_id, + int sink_node, + const t_conn_cost_params cost_params, + float pres_fac, + int high_fanout_threshold, + t_rt_node* rt_root, + const RouterLookahead& router_lookahead, + SpatialRouteTreeLookup& spatial_rt_lookup, + RouterStats& router_stats) { + auto& route_ctx = g_vpr_ctx.mutable_routing(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& m_route_ctx = g_vpr_ctx.mutable_routing(); + + bool high_fanout = is_high_fanout(cluster_ctx.clb_nlist.net_sinks(net_id).size(), high_fanout_threshold); + + VTR_LOGV_DEBUG(f_router_debug, "Net %zu pre-route to (%s)\n", size_t(net_id), describe_rr_node(sink_node).c_str()); + + std::vector modified_rr_node_inf; + + profiling::sink_criticality_start(); + + VTR_ASSERT_DEBUG(verify_traceback_route_tree_equivalent(route_ctx.trace[net_id].head, rt_root)); + + t_heap* cheapest = nullptr; + t_bb bounding_box = route_ctx.route_bb[net_id]; + + cheapest = timing_driven_route_connection_from_route_tree(rt_root, + sink_node, + cost_params, + bounding_box, + router_lookahead, + modified_rr_node_inf, + router_stats); + + // TODO: Parts of the rest of this function are repetitive to code in timing_driven_route_sink. Should refactor. + if (cheapest == nullptr) { + ClusterBlockId src_block = cluster_ctx.clb_nlist.net_driver_block(net_id); + VTR_LOG("Failed to route connection from '%s' to '%s' for net '%s' (#%zu)\n", + cluster_ctx.clb_nlist.block_name(src_block).c_str(), + describe_rr_node(sink_node).c_str(), + cluster_ctx.clb_nlist.net_name(net_id).c_str(), + size_t(net_id)); + if (f_router_debug) { + update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr); + } + return false; + } else { + //Record final link to target + add_to_mod_list(cheapest->index, modified_rr_node_inf); + + route_ctx.rr_node_route_inf[cheapest->index].prev_node = cheapest->u.prev.node; + route_ctx.rr_node_route_inf[cheapest->index].prev_edge = cheapest->u.prev.edge; + route_ctx.rr_node_route_inf[cheapest->index].path_cost = cheapest->cost; + route_ctx.rr_node_route_inf[cheapest->index].backward_path_cost = cheapest->backward_path_cost; + } + + profiling::sink_criticality_end(cost_params.criticality); + + /* NB: In the code below I keep two records of the partial routing: the * + * traceback and the route_tree. The route_tree enables fast recomputation * + * of the Elmore delay to each node in the partial routing. The traceback * + * lets me reuse all the routines written for breadth-first routing, which * + * all take a traceback structure as input. */ + + t_trace* new_route_start_tptr = update_traceback(cheapest, net_id); + VTR_ASSERT_DEBUG(validate_traceback(route_ctx.trace[net_id].head)); + update_route_tree(cheapest, ((high_fanout) ? &spatial_rt_lookup : nullptr)); + VTR_ASSERT_DEBUG(verify_route_tree(rt_root)); + VTR_ASSERT_DEBUG(verify_traceback_route_tree_equivalent(route_ctx.trace[net_id].head, rt_root)); + VTR_ASSERT_DEBUG(!high_fanout || validate_route_tree_spatial_lookup(rt_root, spatial_rt_lookup)); + if (f_router_debug) { + update_screen(ScreenUpdatePriority::MAJOR, "Routed connection successfully", ROUTING, nullptr); + } + free_heap_data(cheapest); + pathfinder_update_path_cost(new_route_start_tptr, 1, pres_fac); + empty_heap(); + + // need to guarentee ALL nodes' path costs are HUGE_POSITIVE_FLOAT at the start of routing to a sink + // do this by resetting all the path_costs that have been touched while routing to the current sink + reset_path_costs(modified_rr_node_inf); + + // Post route trace back and route tree clean up: + // - remove sink from trace back and route tree + // - fix routing for all nodes leading to the sink + // - free up vitual sink occupancy + disable_expansion_and_remove_sink_from_route_tree_nodes(rt_root); + VTR_LOGV_DEBUG(f_router_debug, "Traceback tail before update %d \n", + route_ctx.trace[net_id].tail->index); + drop_traceback_tail(net_id); + VTR_LOGV_DEBUG(f_router_debug, "Updated traceback ptrs: %d %d \n", + route_ctx.trace[net_id].head->index, route_ctx.trace[net_id].tail->index); + m_route_ctx.rr_node_route_inf[sink_node].set_occ(0); + + // routed to a sink successfully + return true; +} + static bool timing_driven_route_sink(ClusterNetId net_id, unsigned itarget, int target_pin, @@ -1635,6 +1764,34 @@ static t_rt_node* setup_routing_resources(int itry, return rt_root; } +void disable_expansion_and_remove_sink_from_route_tree_nodes(t_rt_node* rt_node) { + /* Remove sink in route tree and mark all nodes + * leading to the sink as unexpandable. + */ + auto& device_ctx = g_vpr_ctx.device(); + t_rt_node* child_node; + t_linked_rt_edge* linked_rt_edge; + linked_rt_edge = rt_node->u.child_list; + + while (linked_rt_edge != nullptr) { + child_node = linked_rt_edge->child; + if (device_ctx.rr_nodes[child_node->inode].type() == SINK) { + VTR_LOGV_DEBUG(f_router_debug, + "Removing sink %d from route tree\n", child_node->inode); + rt_node->u.child_list = nullptr; + rt_node->u.next = nullptr; + free(child_node); + break; + } else { + rt_node->re_expand = false; + VTR_LOGV_DEBUG(f_router_debug, + "unexpanding: %d in route tree\n", rt_node->inode); + } + disable_expansion_and_remove_sink_from_route_tree_nodes(child_node); + linked_rt_edge = linked_rt_edge->next; + } +} + static void add_route_tree_to_heap(t_rt_node* rt_node, int target_node, const t_conn_cost_params cost_params, diff --git a/vpr/src/route/rr_graph_area.cpp b/vpr/src/route/rr_graph_area.cpp index 7c2ab986641..963bba43e90 100644 --- a/vpr/src/route/rr_graph_area.cpp +++ b/vpr/src/route/rr_graph_area.cpp @@ -428,6 +428,9 @@ void count_unidir_routing_transistors(std::vector& /*segment_inf* } break; + case SINK: + break; //ignore virtual sinks + default: VPR_ERROR(VPR_ERROR_ROUTE, "in count_routing_transistors:\n" diff --git a/vpr/src/route/rr_node.cpp b/vpr/src/route/rr_node.cpp index 13e3bd160cf..97aa653d450 100644 --- a/vpr/src/route/rr_node.cpp +++ b/vpr/src/route/rr_node.cpp @@ -118,6 +118,7 @@ float t_rr_node::R() const { float t_rr_node::C() const { auto& device_ctx = g_vpr_ctx.device(); + VTR_ASSERT(rc_index() < (short)device_ctx.rr_rc_data.size()); return device_ctx.rr_rc_data[rc_index()].C; } diff --git a/vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_htree0_40nm.xml b/vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_htree0_40nm.xml index 29091d1c253..f7adb5ed34d 100644 --- a/vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_htree0_40nm.xml +++ b/vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_htree0_40nm.xml @@ -1528,8 +1528,8 @@ + to all clock pins. TODO: switch to specify a port name. In this case the clock + port names are clk for the clb clock pins and clock for the io clock pins --> diff --git a/vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_htree0_routedCLK_40nm.xml b/vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_htree0_routedCLK_40nm.xml index 52f6a0f4145..b4cc4086b65 100644 --- a/vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_htree0_routedCLK_40nm.xml +++ b/vtr_flow/arch/timing/k6_frac_N10_frac_chain_mem32K_htree0_routedCLK_40nm.xml @@ -1520,8 +1520,8 @@ + to all clock pins. TODO: switch to specify a port name. In this case the clock + port names are clk for the clb clock pins and clock for the io clock pins -->