diff --git a/abc/CMakeLists.txt b/abc/CMakeLists.txt index 7cf6f19af1e..300ce808cbb 100644 --- a/abc/CMakeLists.txt +++ b/abc/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.3.0) +cmake_minimum_required(VERSION 3.3.0...3.13) # ! This line is edited to get rid of a CMake deprecation error include(CMakeParseArguments) include(CheckCCompilerFlag) diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst index 933d4748eff..41ace87adb2 100644 --- a/doc/src/vpr/command_line_usage.rst +++ b/doc/src/vpr/command_line_usage.rst @@ -1127,7 +1127,7 @@ VPR uses a negotiated congestion algorithm (based on Pathfinder) to perform rout * ``delay_normalized_length_frequency`` like ``delay_normalized``, but scaled by routing resource length and scaled inversely by routing resource frequency. - **Default:** ``delay_normalized_length`` for the timing-driven router and ``demand_only`` for the breadth-first router + **Default:** ``delay_normalized_length`` .. option:: --bend_cost @@ -1172,22 +1172,13 @@ VPR uses a negotiated congestion algorithm (based on Pathfinder) to perform rout This option attempts to verify the minimum by routing at successively lower channel widths until two consecutive routing failures are observed. -.. option:: --router_algorithm {breadth_first | timing_driven} +.. option:: --router_algorithm {parallel | timing_driven} Selects which router algorithm to use. .. warning:: - The ``breadth_first`` router **should NOT be used to compare the run-time/quality** of alternate routing algorithms. - - It is inferrior to the ``timing_driven`` router from a circuit speed (2x - 10x slower) and run-time perspective (takes 10-100x longer on the large benchmarks). - The ``breadth_first`` router is deprecated and may be removed in a future release. - - The ``breadth_first`` router :cite:`betz_arch_cad` focuses solely on routing a design successfully, while the ``timing_driven`` router :cite:`betz_arch_cad,murray_air` focuses both on achieving a successful route and achieving good circuit speed. - - The breadth-first router is capable of routing a design using slightly fewer tracks than the timing-driving router (typically 5% if the timing-driven router uses its default parameters. - This can be reduced to about 2% if the router parameters are set so the timing-driven router pays more attention to routability and less to area). - The designs produced by the timing-driven router are much faster, however, (2x - 10x) and it uses less CPU time to route. + The ``parallel`` router is experimental. (TODO: more explanation) **Default:** ``timing_driven`` diff --git a/libs/EXTERNAL/capnproto/CMakeLists.txt b/libs/EXTERNAL/capnproto/CMakeLists.txt index eb40764019a..17f12819f94 100644 --- a/libs/EXTERNAL/capnproto/CMakeLists.txt +++ b/libs/EXTERNAL/capnproto/CMakeLists.txt @@ -1,3 +1,3 @@ -cmake_minimum_required(VERSION 3.4) +cmake_minimum_required(VERSION 3.4...3.13) # ! This line is edited to get rid of a CMake deprecation error project("Cap'n Proto Root" CXX) add_subdirectory(c++) diff --git a/libs/EXTERNAL/capnproto/c++/CMakeLists.txt b/libs/EXTERNAL/capnproto/c++/CMakeLists.txt index 548dfd1fe7a..2acc7811582 100644 --- a/libs/EXTERNAL/capnproto/c++/CMakeLists.txt +++ b/libs/EXTERNAL/capnproto/c++/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.4) +cmake_minimum_required(VERSION 3.4...3.13) # ! This line is edited to get rid of a CMake deprecation error project("Cap'n Proto" CXX) set(VERSION 0.9.1) diff --git a/libs/EXTERNAL/libargparse/argparse_test.cpp b/libs/EXTERNAL/libargparse/argparse_test.cpp index eeb1d4a3276..4d7b63dce4a 100644 --- a/libs/EXTERNAL/libargparse/argparse_test.cpp +++ b/libs/EXTERNAL/libargparse/argparse_test.cpp @@ -399,10 +399,10 @@ int main( .show_in(argparse::ShowIn::HELP_ONLY); route_grp.add_argument(args.router_algorithm, "--router_algorithm") .help("Specifies the router algorithm to use.\n" - " * breadth_first: focuses solely on routability\n" + " * parallel: timing_driven with tricks to run on multiple cores (may be worse)\n" " * timing driven: focuses on routability and circuit speed\n") .default_value("timing_driven") - .choices({"breadth_first", "timing_driven"}) + .choices({"parallel", "timing_driven"}) .show_in(argparse::ShowIn::HELP_ONLY); route_grp.add_argument(args.min_incremental_reroute_fanout, "--min_incremental_reroute_fanout") .help("The net fanout thershold above which nets will be re-routed incrementally.") diff --git a/libs/EXTERNAL/libtatum/CMakeLists.txt b/libs/EXTERNAL/libtatum/CMakeLists.txt index 29d78daf0e0..664d5fef6f5 100644 --- a/libs/EXTERNAL/libtatum/CMakeLists.txt +++ b/libs/EXTERNAL/libtatum/CMakeLists.txt @@ -2,8 +2,7 @@ cmake_minimum_required(VERSION 3.16) project("tatum") -# Parallel tatum gives non-deterministic results. Use serial by default until resolved -set(TATUM_EXECUTION_ENGINE "serial" CACHE STRING "Specify the framework for (potential) parallel execution") +set(TATUM_EXECUTION_ENGINE "auto" CACHE STRING "Specify the framework for (potential) parallel execution") set_property(CACHE TATUM_EXECUTION_ENGINE PROPERTY STRINGS auto serial tbb) list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules") diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/SerialIncrWalker.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/SerialIncrWalker.hpp index 79ee6b05383..8ece8e44f9a 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/SerialIncrWalker.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/graph_walkers/SerialIncrWalker.hpp @@ -1,5 +1,10 @@ #pragma once #include + +#ifdef TATUM_USE_TBB +#include +#endif + #include "tatum/graph_walkers/TimingGraphWalker.hpp" #include "tatum/TimingGraph.hpp" #include "tatum/delay_calc/DelayCalculator.hpp" @@ -431,9 +436,16 @@ class SerialIncrWalker : public TimingGraphWalker { t_incr_traversal_update incr_arr_update_; t_incr_traversal_update incr_req_update_; - //Set of invalidated edges, and bitset for membership + /** Set of invalidated edges, and bitset for membership. + * Use thread safe alternatives when TBB is on, since invalidate_edge_impl + * may be called concurrently */ +#ifdef TATUM_USE_TBB + tbb::concurrent_vector invalidated_edges_; + tatum::util::linear_map edge_invalidated_; +#else std::vector invalidated_edges_; - tatum::util::linear_map edge_invalidated_; + tatum::util::linear_map edge_invalidated_; +#endif //Nodes which have been modified during timing update, and bitset for membership std::vector nodes_modified_; diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/util/tatum_strong_id.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/util/tatum_strong_id.hpp index 69f020f854d..71f5b7a874e 100644 --- a/libs/EXTERNAL/libtatum/libtatum/tatum/util/tatum_strong_id.hpp +++ b/libs/EXTERNAL/libtatum/libtatum/tatum/util/tatum_strong_id.hpp @@ -176,22 +176,24 @@ class StrongId { //Default to the sentinel value constexpr StrongId() : id_(sentinel) {} - //Only allow explict constructions from a raw Id (no automatic conversions) - explicit StrongId(T id) noexcept : id_(id) {} + //Only allow explicit constructions from a raw Id (no automatic conversions) + explicit constexpr StrongId(T id) noexcept : id_(id) {} //Allow some explicit conversion to useful types //Allow explicit conversion to bool (e.g. if(id)) - explicit operator bool() const { return *this != INVALID(); } + explicit operator bool() const { return id_ != sentinel; } + + /// @brief Another name for the bool cast + constexpr bool is_valid() const { return id_ != sentinel; } //Allow explicit conversion to size_t (e.g. my_vector[size_t(strong_id)]) explicit operator std::size_t() const { return static_cast(id_); } - - //To enable hasing Ids + //To enable hashing Ids friend std::hash>; - //To enable comparisions between Ids + //To enable comparisons between Ids // Note that since these are templated functions we provide an empty set of template parameters // after the function name (i.e. <>) friend bool operator== <>(const StrongId& lhs, const StrongId& rhs); diff --git a/libs/librrgraph/src/base/check_rr_graph.cpp b/libs/librrgraph/src/base/check_rr_graph.cpp index 8ed76ed2f58..104e069af65 100644 --- a/libs/librrgraph/src/base/check_rr_graph.cpp +++ b/libs/librrgraph/src/base/check_rr_graph.cpp @@ -608,7 +608,7 @@ static void check_rr_edge(const RRGraphView& rr_graph, std::string msg = "Non-configurable BUFFER type switch must have only one driver. "; msg += vtr::string_fmt(" Actual fan-in was %d (expected 1).\n", to_fanin); msg += " Possible cause is complex block output pins connecting to:\n"; - msg += " " + describe_rr_node(rr_graph, grid, rr_indexed_data, to_node, is_flat); + msg += " " + describe_rr_node(rr_graph, grid, rr_indexed_data, RRNodeId(to_node), is_flat); VPR_FATAL_ERROR(VPR_ERROR_ROUTE, msg.c_str()); } break; diff --git a/libs/librrgraph/src/base/rr_graph_fwd.h b/libs/librrgraph/src/base/rr_graph_fwd.h index fee69b34cd2..41d0b8f3d58 100644 --- a/libs/librrgraph/src/base/rr_graph_fwd.h +++ b/libs/librrgraph/src/base/rr_graph_fwd.h @@ -1,5 +1,8 @@ #ifndef RR_GRAPH_OBJ_FWD_H #define RR_GRAPH_OBJ_FWD_H + +#include + #include "vtr_strong_id.h" /*************************************************************** @@ -20,11 +23,11 @@ struct rr_switch_id_tag; struct rr_segment_id_tag; struct rc_index_tag; -typedef vtr::StrongId RRNodeId; -typedef vtr::StrongId RREdgeId; -typedef vtr::StrongId RRIndexedDataId; -typedef vtr::StrongId RRSwitchId; -typedef vtr::StrongId RRSegmentId; -typedef vtr::StrongId NodeRCIndex; +typedef vtr::StrongId RRNodeId; +typedef vtr::StrongId RREdgeId; +typedef vtr::StrongId RRIndexedDataId; +typedef vtr::StrongId RRSwitchId; +typedef vtr::StrongId RRSegmentId; +typedef vtr::StrongId NodeRCIndex; #endif diff --git a/libs/librrgraph/src/utils/describe_rr_node.cpp b/libs/librrgraph/src/utils/describe_rr_node.cpp index ee74b482686..a4982cce8f7 100644 --- a/libs/librrgraph/src/utils/describe_rr_node.cpp +++ b/libs/librrgraph/src/utils/describe_rr_node.cpp @@ -4,51 +4,50 @@ #include "physical_types_util.h" #include "vtr_util.h" -/* TODO: This function should adapt RRNodeId */ std::string describe_rr_node(const RRGraphView& rr_graph, const DeviceGrid& grid, const vtr::vector& rr_indexed_data, - int inode, + RRNodeId inode, bool is_flat) { std::string msg = vtr::string_fmt("RR node: %d", inode); - if (rr_graph.node_type(RRNodeId(inode)) == CHANX || rr_graph.node_type(RRNodeId(inode)) == CHANY) { - auto cost_index = rr_graph.node_cost_index(RRNodeId(inode)); + if (rr_graph.node_type(inode) == CHANX || rr_graph.node_type(inode) == CHANY) { + auto cost_index = rr_graph.node_cost_index(inode); int seg_index = rr_indexed_data[cost_index].seg_index; - std::string rr_node_direction_string = rr_graph.node_direction_string(RRNodeId(inode)); + std::string rr_node_direction_string = rr_graph.node_direction_string(inode); if (seg_index < (int)rr_graph.num_rr_segments()) { msg += vtr::string_fmt(" track: %d longline: %d", - rr_graph.node_track_num(RRNodeId(inode)), + rr_graph.node_track_num(inode), rr_graph.rr_segments(RRSegmentId(seg_index)).longline); } else { msg += vtr::string_fmt(" track: %d seg_type: ILLEGAL_SEG_INDEX %d", - rr_graph.node_track_num(RRNodeId(inode)), + rr_graph.node_track_num(inode), seg_index); } - } else if (rr_graph.node_type(RRNodeId(inode)) == IPIN || rr_graph.node_type(RRNodeId(inode)) == OPIN) { - auto type = grid.get_physical_type({rr_graph.node_xlow(RRNodeId(inode)), - rr_graph.node_ylow(RRNodeId(inode)), - rr_graph.node_layer(RRNodeId(inode))}); + } else if (rr_graph.node_type(inode) == IPIN || rr_graph.node_type(inode) == OPIN) { + auto type = grid.get_physical_type({rr_graph.node_xlow(inode), + rr_graph.node_ylow(inode), + rr_graph.node_layer(inode)}); - std::string pin_name = block_type_pin_index_to_name(type, rr_graph.node_pin_num(RRNodeId(inode)), is_flat); + std::string pin_name = block_type_pin_index_to_name(type, rr_graph.node_pin_num(inode), is_flat); msg += vtr::string_fmt(" pin: %d pin_name: %s", - rr_graph.node_pin_num(RRNodeId(inode)), + rr_graph.node_pin_num(inode), pin_name.c_str()); } else { - VTR_ASSERT(rr_graph.node_type(RRNodeId(inode)) == SOURCE || rr_graph.node_type(RRNodeId(inode)) == SINK); + VTR_ASSERT(rr_graph.node_type(inode) == SOURCE || rr_graph.node_type(inode) == SINK); - msg += vtr::string_fmt(" class: %d", rr_graph.node_class_num(RRNodeId(inode))); + msg += vtr::string_fmt(" class: %d", rr_graph.node_class_num(inode)); } - msg += vtr::string_fmt(" capacity: %d", rr_graph.node_capacity(RRNodeId(inode))); - msg += vtr::string_fmt(" fan-in: %d", rr_graph.node_fan_in(RRNodeId(inode))); - msg += vtr::string_fmt(" fan-out: %d", rr_graph.num_edges(RRNodeId(inode))); + msg += vtr::string_fmt(" capacity: %d", rr_graph.node_capacity(inode)); + msg += vtr::string_fmt(" fan-in: %d", rr_graph.node_fan_in(inode)); + msg += vtr::string_fmt(" fan-out: %d", rr_graph.num_edges(inode)); - msg += " " + rr_graph.node_coordinate_to_string(RRNodeId(inode)); + msg += " " + rr_graph.node_coordinate_to_string(inode); return msg; } \ No newline at end of file diff --git a/libs/librrgraph/src/utils/describe_rr_node.h b/libs/librrgraph/src/utils/describe_rr_node.h index fe2f6228f71..6b3e9355620 100644 --- a/libs/librrgraph/src/utils/describe_rr_node.h +++ b/libs/librrgraph/src/utils/describe_rr_node.h @@ -9,7 +9,7 @@ std::string describe_rr_node(const RRGraphView& rr_graph, const DeviceGrid& grid, const vtr::vector& rr_indexed_data, - int inode, + RRNodeId inode, bool is_flat); #endif \ No newline at end of file diff --git a/libs/libvtrutil/src/vtr_range.h b/libs/libvtrutil/src/vtr_range.h index 493a379fbd9..42375b24771 100644 --- a/libs/libvtrutil/src/vtr_range.h +++ b/libs/libvtrutil/src/vtr_range.h @@ -41,21 +41,21 @@ template class Range { public: ///@brief constructor - Range(T b, T e) + constexpr Range(T b, T e) : begin_(b) , end_(e) {} ///@brief Return an iterator to the start of the range - T begin() { return begin_; } + constexpr T begin() { return begin_; } ///@brief Return an iterator to the end of the range - T end() { return end_; } + constexpr T end() { return end_; } ///@brief Return an iterator to the start of the range (immutable) - const T begin() const { return begin_; } + constexpr const T begin() const { return begin_; } ///@brief Return an iterator to the end of the range (immutable) - const T end() const { return end_; } + constexpr const T end() const { return end_; } ///@brief Return true if empty - bool empty() { return begin_ == end_; } + constexpr bool empty() { return begin_ == end_; } ///@brief Return the range size - size_t size() { return std::distance(begin_, end_); } + constexpr size_t size() { return std::distance(begin_, end_); } private: T begin_; @@ -72,13 +72,13 @@ class Range { * auto my_range = vtr::make_range(my_vec.begin(), my_vec.end()); */ template -auto make_range(T b, T e) { return Range(b, e); } +constexpr auto make_range(T b, T e) { return Range(b, e); } /** * @brief Creates a vtr::Range from a container */ template -auto make_range(const Container& c) { return make_range(std::begin(c), std::end(c)); } +inline auto make_range(const Container& c) { return make_range(std::begin(c), std::end(c)); } } // namespace vtr diff --git a/libs/libvtrutil/src/vtr_strong_id.h b/libs/libvtrutil/src/vtr_strong_id.h index 1ce922ab5da..4f3849df094 100644 --- a/libs/libvtrutil/src/vtr_strong_id.h +++ b/libs/libvtrutil/src/vtr_strong_id.h @@ -4,7 +4,7 @@ * @file * @brief This header provides the StrongId class. * - * It is template which can be used to create strong Id's + * It is template which can be used to create strong Id's * which avoid accidental type conversions (generating compiler errors when they occur). * * Motivation @@ -146,6 +146,7 @@ #include //for std::is_integral #include //for std::size_t #include //for std::hash +#include //for std::ostream namespace vtr { @@ -160,13 +161,16 @@ class StrongId; * friend them */ template -bool operator==(const StrongId& lhs, const StrongId& rhs); +constexpr bool operator==(const StrongId& lhs, const StrongId& rhs); template -bool operator!=(const StrongId& lhs, const StrongId& rhs); +constexpr bool operator!=(const StrongId& lhs, const StrongId& rhs); template -bool operator<(const StrongId& lhs, const StrongId& rhs); +constexpr bool operator<(const StrongId& lhs, const StrongId& rhs); + +template +std::ostream& operator<<(std::ostream& out, const StrongId& rhs); ///@brief Class template definition with default template parameters template @@ -175,38 +179,46 @@ class StrongId { public: ///@brief Gets the invalid Id - static constexpr StrongId INVALID() { return StrongId(); } + static constexpr StrongId INVALID() noexcept { return StrongId(); } ///@brief Default to the sentinel value constexpr StrongId() : id_(sentinel) {} - ///@brief Only allow explict constructions from a raw Id (no automatic conversions) + ///@brief Only allow explicit constructions from a raw Id (no automatic conversions) explicit constexpr StrongId(T id) : id_(id) {} // Allow some explicit conversion to useful types: - ///@brief Allow explicit conversion to bool (e.g. if(id)) - explicit operator bool() const { return *this != INVALID(); } + /// @brief Allow explicit conversion to bool (e.g. if(id)) + explicit constexpr operator bool() const { return id_ != sentinel; } - ///@brief Allow explicit conversion to size_t (e.g. my_vector[size_t(strong_id)]) - explicit operator std::size_t() const { return static_cast(id_); } + /// @brief Another name for the bool cast + constexpr bool is_valid() const { return id_ != sentinel; } - ///@brief To enable hasing Ids + /// @brief Allow explicit conversion to size_t (e.g. my_vector[size_t(strong_id)]) + explicit constexpr operator std::size_t() const { return static_cast(id_); } + + /// @brief To enable hashing Ids friend std::hash>; /** - * @brief To enable comparisions between Ids + * @brief To enable comparisons between Ids * * Note that since these are templated functions we provide an empty set of template parameters * after the function name (i.e. <>) */ - friend bool operator== <>(const StrongId& lhs, const StrongId& rhs); + friend constexpr bool operator== <>(const StrongId& lhs, const StrongId& rhs); ///@brief != operator - friend bool operator!= <>(const StrongId& lhs, const StrongId& rhs); + friend constexpr bool operator!= <>(const StrongId& lhs, const StrongId& rhs); ///@brief < operator - friend bool operator< <>(const StrongId& lhs, const StrongId& rhs); + friend constexpr bool operator< <>(const StrongId& lhs, const StrongId& rhs); + + /** + * @brief to be able to print them out + */ + friend std::ostream& operator<< <>(std::ostream& out, const StrongId& rhs); private: T id_; @@ -214,22 +226,28 @@ class StrongId { ///@brief == operator template -bool operator==(const StrongId& lhs, const StrongId& rhs) { +constexpr bool operator==(const StrongId& lhs, const StrongId& rhs) { return lhs.id_ == rhs.id_; } ///@brief != operator template -bool operator!=(const StrongId& lhs, const StrongId& rhs) { +constexpr bool operator!=(const StrongId& lhs, const StrongId& rhs) { return !(lhs == rhs); } ///@brief operator < Needed for std::map-like containers template -bool operator<(const StrongId& lhs, const StrongId& rhs) { +constexpr bool operator<(const StrongId& lhs, const StrongId& rhs) { return lhs.id_ < rhs.id_; } +///@brief operator << Needed for print-debugging +template +std::ostream& operator<<(std::ostream& out, const StrongId& rhs) { + out << rhs.id_; + return out; +} } //namespace vtr ///@brief Specialize std::hash for StrongId's (needed for std::unordered_map-like containers) diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp index 892674cc43b..571c17c30e6 100644 --- a/utils/route_diag/src/main.cpp +++ b/utils/route_diag/src/main.cpp @@ -60,8 +60,8 @@ constexpr int INTERRUPTED_EXIT_CODE = 3; //VPR was interrupted by the user (e.g. static void do_one_route(const Netlist<>& net_list, const t_det_routing_arch& det_routing_arch, - int source_node, - int sink_node, + RRNodeId source_node, + RRNodeId sink_node, const t_router_opts& router_opts, const std::vector& segment_inf, bool is_flat) { @@ -117,12 +117,13 @@ static void do_one_route(const Netlist<>& net_list, -1, false, std::unordered_map()); - std::tie(found_path, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(), + std::tie(found_path, std::ignore, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(), sink_node, cost_params, bounding_box, router_stats, - conn_params); + conn_params, + true); if (found_path) { VTR_ASSERT(cheapest.index == sink_node); @@ -137,7 +138,7 @@ static void do_one_route(const Netlist<>& net_list, tree.print(); VTR_LOG("\n"); - VTR_ASSERT_MSG(route_ctx.rr_node_route_inf[size_t(tree.root().inode)].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested"); + VTR_ASSERT_MSG(route_ctx.rr_node_route_inf[tree.root().inode].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested"); } else { VTR_LOG("Routing failed"); } @@ -148,7 +149,7 @@ static void do_one_route(const Netlist<>& net_list, static void profile_source(const Netlist<>& net_list, const t_det_routing_arch& det_routing_arch, - int source_rr_node, + RRNodeId source_rr_node, const t_router_opts& router_opts, const std::vector& segment_inf, bool is_flat) { @@ -187,27 +188,28 @@ static void profile_source(const Netlist<>& net_list, bool successfully_routed; for (int sink_ptc : best_sink_ptcs) { VTR_ASSERT(sink_ptc != OPEN); + //TODO: should pass layer_num instead of 0 to node_lookup once the multi-die FPGAs support is completed - int sink_rr_node = size_t(device_ctx.rr_graph.node_lookup().find_node(0,sink_x, sink_y, SINK, sink_ptc)); + RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(0, sink_x, sink_y, SINK, sink_ptc); if (directconnect_exists(source_rr_node, sink_rr_node)) { //Skip if we shouldn't measure direct connects and a direct connect exists continue; } - VTR_ASSERT(sink_rr_node != OPEN); + VTR_ASSERT(sink_rr_node); { vtr::ScopedStartFinishTimer delay_timer(vtr::string_fmt( "Routing Src: %d Sink: %d", source_rr_node, sink_rr_node)); - successfully_routed = profiler.calculate_delay(source_rr_node, sink_rr_node, + successfully_routed = profiler.calculate_delay(RRNodeId(source_rr_node), RRNodeId(sink_rr_node), router_opts, &delays[sink_x][sink_y]); } if (successfully_routed) { - sink_nodes[sink_x][sink_y] = sink_rr_node; + sink_nodes[sink_x][sink_y] = size_t(sink_rr_node); break; } } @@ -334,15 +336,15 @@ int main(int argc, const char **argv) { if(route_options.profile_source) { profile_source(net_list, vpr_setup.RoutingArch, - route_options.source_rr_node, + RRNodeId(route_options.source_rr_node), vpr_setup.RouterOpts, vpr_setup.Segments, is_flat); } else { do_one_route(net_list, vpr_setup.RoutingArch, - route_options.source_rr_node, - route_options.sink_rr_node, + RRNodeId(route_options.source_rr_node), + RRNodeId(route_options.sink_rr_node), vpr_setup.RouterOpts, vpr_setup.Segments, is_flat); diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index ba8e9d74cc2..61f1bf772c3 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -260,8 +260,8 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) { if (DETAILED == RouterOpts.route_type) { VTR_LOG("RouterOpts.router_algorithm: "); switch (RouterOpts.router_algorithm) { - case BREADTH_FIRST: - VTR_LOG("BREADTH_FIRST\n"); + case PARALLEL: + VTR_LOG("PARALLEL\n"); break; case TIMING_DRIVEN: VTR_LOG("TIMING_DRIVEN\n"); @@ -432,8 +432,8 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) { VTR_LOG("RouterOpts.router_algorithm: "); switch (RouterOpts.router_algorithm) { - case BREADTH_FIRST: - VTR_LOG("BREADTH_FIRST\n"); + case PARALLEL: + VTR_LOG("PARALLEL\n"); break; case TIMING_DRIVEN: VTR_LOG("TIMING_DRIVEN\n"); diff --git a/vpr/src/base/old_traceback.cpp b/vpr/src/base/old_traceback.cpp index 317a4fe27a0..a1bb21be232 100644 --- a/vpr/src/base/old_traceback.cpp +++ b/vpr/src/base/old_traceback.cpp @@ -10,9 +10,7 @@ std::pair traceback_from_route_tree_recurr(t_trace* head, t_ bool validate_traceback_recurr(t_trace* trace, std::set& seen_rr_nodes); void free_trace_data(t_trace* tptr); -/* Builds a skeleton route tree from a traceback - * does not calculate R_upstream, C_downstream, or Tdel (left uninitialized) - * returns the root of the converted route tree */ +/** Build a route tree from a traceback */ vtr::optional TracebackCompat::traceback_to_route_tree(t_trace* head) { if (head == nullptr) return vtr::nullopt; @@ -121,8 +119,8 @@ void print_traceback(const t_trace* trace) { auto& route_ctx = g_vpr_ctx.routing(); const t_trace* prev = nullptr; while (trace) { - int inode = trace->index; - VTR_LOG("%d (%s)", inode, rr_node_typename[rr_graph.node_type(RRNodeId(inode))]); + RRNodeId inode(trace->index); + VTR_LOG("%d (%s)", inode, rr_node_typename[rr_graph.node_type(inode)]); if (trace->iswitch == OPEN) { VTR_LOG(" !"); //End of branch @@ -132,7 +130,7 @@ void print_traceback(const t_trace* trace) { VTR_LOG("*"); //Reached non-configurably } - if (route_ctx.rr_node_route_inf[inode].occ() > rr_graph.node_capacity(RRNodeId(inode))) { + if (route_ctx.rr_node_route_inf[inode].occ() > rr_graph.node_capacity(inode)) { VTR_LOG(" x"); //Overused } VTR_LOG("\n"); diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 74a5159da96..35fd191c488 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -172,8 +172,8 @@ struct ParseRoutePredictor { struct ParseRouterAlgorithm { ConvertedValue from_str(std::string str) { ConvertedValue conv_value; - if (str == "breadth_first") - conv_value.set_value(BREADTH_FIRST); + if (str == "parallel") + conv_value.set_value(PARALLEL); else if (str == "timing_driven") conv_value.set_value(TIMING_DRIVEN); else { @@ -186,8 +186,8 @@ struct ParseRouterAlgorithm { ConvertedValue to_str(e_router_algorithm val) { ConvertedValue conv_value; - if (val == BREADTH_FIRST) - conv_value.set_value("breadth_first"); + if (val == PARALLEL) + conv_value.set_value("parallel"); else { VTR_ASSERT(val == TIMING_DRIVEN); conv_value.set_value("timing_driven"); @@ -196,7 +196,7 @@ struct ParseRouterAlgorithm { } std::vector default_choices() { - return {"breadth_first", "timing_driven"}; + return {"parallel", "timing_driven"}; } }; @@ -2279,8 +2279,7 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg " * delay_normalized_length_frequency: like delay_normalized\n" " but scaled by routing resource length, and inversely\n" " by segment type frequency\n" - "(Default: demand_only for breadth-first router,\n" - " delay_normalized_length for timing-driven router)") + "(Default: delay_normalized_length)") .show_in(argparse::ShowIn::HELP_ONLY); route_grp.add_argument(args.bend_cost, "--bend_cost") @@ -2316,10 +2315,10 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg route_grp.add_argument(args.RouterAlgorithm, "--router_algorithm") .help( "Specifies the router algorithm to use.\n" - " * breadth_first: focuses solely on routability [DEPRECATED, inferior quality & run-time]\n" + " * parallel: [experimental] timing_driven but multithreaded\n" " * timing_driven: focuses on routability and circuit speed\n") .default_value("timing_driven") - .choices({"breadth_first", "timing_driven"}) + .choices({"parallel", "timing_driven"}) .show_in(argparse::ShowIn::HELP_ONLY); route_grp.add_argument(args.min_incremental_reroute_fanout, "--min_incremental_reroute_fanout") @@ -2909,22 +2908,18 @@ void set_conditional_defaults(t_options& args) { */ //Base cost type if (args.base_cost_type.provenance() != Provenance::SPECIFIED) { - if (args.RouterAlgorithm == BREADTH_FIRST) { - args.base_cost_type.set(DEMAND_ONLY, Provenance::INFERRED); - } else { - VTR_ASSERT(args.RouterAlgorithm == TIMING_DRIVEN); - - if (args.RouteType == DETAILED) { - if (args.timing_analysis) { - args.base_cost_type.set(DELAY_NORMALIZED_LENGTH, Provenance::INFERRED); - } else { - args.base_cost_type.set(DEMAND_ONLY_NORMALIZED_LENGTH, Provenance::INFERRED); - } + VTR_ASSERT(args.RouterAlgorithm == TIMING_DRIVEN || args.RouterAlgorithm == PARALLEL); + + if (args.RouteType == DETAILED) { + if (args.timing_analysis) { + args.base_cost_type.set(DELAY_NORMALIZED_LENGTH, Provenance::INFERRED); } else { - VTR_ASSERT(args.RouteType == GLOBAL); - //Global RR graphs don't have valid timing, so use demand base cost args.base_cost_type.set(DEMAND_ONLY_NORMALIZED_LENGTH, Provenance::INFERRED); } + } else { + VTR_ASSERT(args.RouteType == GLOBAL); + //Global RR graphs don't have valid timing, so use demand base cost + args.base_cost_type.set(DEMAND_ONLY_NORMALIZED_LENGTH, Provenance::INFERRED); } } diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index cd1722cb80c..c87d2bec7fc 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -62,7 +62,7 @@ #include "read_activity.h" #include "net_delay.h" #include "AnalysisDelayCalculator.h" -#include "timing_info.h" +#include "concrete_timing_info.h" #include "netlist_writer.h" #include "RoutingDelayCalculator.h" #include "check_route.h" @@ -96,7 +96,7 @@ #include "iostream" #ifdef VPR_USE_TBB -# define TBB_PREVIEW_GLOBAL_CONTROL 1 +# define TBB_PREVIEW_GLOBAL_CONTROL 1 /* Needed for compatibility with old TBB versions */ # include # include #endif @@ -899,17 +899,13 @@ RouteStatus vpr_route_fixed_W(const Netlist<>& net_list, std::shared_ptr delay_calc, NetPinsMatrix& net_delay, bool is_flat) { - if (router_needs_lookahead(vpr_setup.RouterOpts.router_algorithm)) { - // Prime lookahead cache to avoid adding lookahead computation cost to - // the routing timer. - get_cached_router_lookahead( - vpr_setup.RoutingArch, - vpr_setup.RouterOpts.lookahead_type, - vpr_setup.RouterOpts.write_router_lookahead, - vpr_setup.RouterOpts.read_router_lookahead, - vpr_setup.Segments, - is_flat); - } + get_cached_router_lookahead( + vpr_setup.RoutingArch, + vpr_setup.RouterOpts.lookahead_type, + vpr_setup.RouterOpts.write_router_lookahead, + vpr_setup.RouterOpts.read_router_lookahead, + vpr_setup.Segments, + is_flat); vtr::ScopedStartFinishTimer timer("Routing"); diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index a5d086bbbbd..9e1c9241346 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -195,10 +195,10 @@ struct DeviceContext : public Context { std::vector rr_rc_data; ///@brief Sets of non-configurably connected nodes - std::vector> rr_non_config_node_sets; + std::vector> rr_non_config_node_sets; - ///@brief Reverse look-up from RR node to non-configurably connected node set (index into rr_nonconf_node_sets) - std::unordered_map rr_node_to_non_config_node_set; + ///@brief Reverse look-up from RR node to non-configurably connected node set (index into rr_non_config_node_sets) + std::unordered_map rr_node_to_non_config_node_set; /* A writeable view of routing resource graph to be the ONLY database * for routing resource graph builder functions. @@ -423,13 +423,13 @@ struct RoutingContext : public Context { vtr::vector> trace_nodes; - vtr::vector> net_rr_terminals; /* [0..num_nets-1][0..num_pins-1] */ + vtr::vector> net_rr_terminals; /* [0..num_nets-1][0..num_pins-1] */ vtr::vector is_clock_net; /* [0..num_nets-1] */ - vtr::vector> rr_blk_source; /* [0..num_blocks-1][0..num_class-1] */ + vtr::vector> rr_blk_source; /* [0..num_blocks-1][0..num_class-1] */ - std::vector rr_node_route_inf; /* [0..device_ctx.num_rr_nodes-1] */ + vtr::vector rr_node_route_inf; /* [0..device_ctx.num_rr_nodes-1] */ vtr::vector>> net_terminal_groups; @@ -444,7 +444,7 @@ struct RoutingContext : public Context { * bit value 1: node is part of a non-configurable set * Initialized once when RoutingContext is initialized, static throughout invocation of router */ - vtr::dynamic_bitset<> non_configurable_bitset; /*[0...device_ctx.num_rr_nodes] */ + vtr::dynamic_bitset non_configurable_bitset; /*[0...device_ctx.num_rr_nodes] */ ///@brief Information about current routing status of each net t_net_routing_status net_status; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 1f47c1030d5..5d03e194f8a 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1241,8 +1241,8 @@ struct t_placer_opts { * channel width given. If this variable is * * == NO_FIXED_CHANNEL_WIDTH, do a binary search * * on channel width. * - * router_algorithm: BREADTH_FIRST or TIMING_DRIVEN. Selects the desired * - * routing algorithm. * + * router_algorithm: TIMING_DRIVEN or PARALLEL. Selects the desired * + * routing algorithm. * * base_cost_type: Specifies how to compute the base cost of each type of * * rr_node. DELAY_NORMALIZED -> base_cost = "demand" * * x average delay to route past 1 CLB. DEMAND_ONLY -> * @@ -1267,7 +1267,7 @@ struct t_placer_opts { * read_rr_graph_name: stores the file name of the rr graph to be read by vpr */ enum e_router_algorithm { - BREADTH_FIRST, + PARALLEL, TIMING_DRIVEN, }; @@ -1646,13 +1646,13 @@ constexpr bool is_src_sink(e_rr_type type) { return (type == SOURCE || type == S * the expected cost to the target if the timing_driven router * is being used. * @param backward_path_cost Total cost of the path up to and including this - * node. Not used by breadth-first router. + * node. * @param target_flag Is this node a target (sink) for the current routing? * Number of times this node must be reached to fully route. * @param occ The current occupancy of the associated rr node */ struct t_rr_node_route_inf { - int prev_node; + RRNodeId prev_node; RREdgeId prev_edge; float acc_cost; @@ -1675,84 +1675,53 @@ struct t_rr_node_route_inf { * @brief Information about the current status of a particular * net as pertains to routing */ -class t_net_routing_status { +template +class t_routing_status { public: void clear() { - is_routed_.clear(); - is_fixed_.clear(); + is_routed_.assign(is_routed_.size(), 0); + is_fixed_.assign(is_routed_.size(), 0); } - void resize(size_t number_nets) { is_routed_.resize(number_nets); - is_routed_.fill(false); + is_routed_.assign(is_routed_.size(), 0); is_fixed_.resize(number_nets); - is_fixed_.fill(false); + is_fixed_.assign(is_routed_.size(), 0); } - void set_is_routed(ParentNetId net, bool is_routed) { - is_routed_.set(index(net), is_routed); + void set_is_routed(NetIdType net, bool is_routed) { + is_routed_[index(net)] = is_routed; } - bool is_routed(ParentNetId net) const { - return is_routed_.get(index(net)); + bool is_routed(NetIdType net) const { + return is_routed_[index(net)]; } - void set_is_fixed(ParentNetId net, bool is_fixed) { - is_fixed_.set(index(net), is_fixed); + void set_is_fixed(NetIdType net, bool is_fixed) { + is_fixed_[index(net)] = is_fixed; } - bool is_fixed(ParentNetId net) const { - return is_fixed_.get(index(net)); + bool is_fixed(NetIdType net) const { + return is_fixed_[index(net)]; } private: - ParentNetId index(ParentNetId net) const { - VTR_ASSERT_SAFE(net != ParentNetId::INVALID()); + NetIdType index(NetIdType net) const { + VTR_ASSERT_SAFE(net != NetIdType::INVALID()); return net; } - vtr::dynamic_bitset is_routed_; /// is_fixed_; /// instead of bitset for thread safety */ + vtr::vector is_routed_; /// is_fixed_; /// is_routed_; /// is_fixed_; /// t_net_routing_status; +typedef t_routing_status t_atom_net_routing_status; +/** Edge between two RRNodes */ struct t_node_edge { - t_node_edge(int fnode, int tnode) { - from_node = fnode; - to_node = tnode; - } + t_node_edge(RRNodeId fnode, RRNodeId tnode) + : from_node(fnode) + , to_node(tnode) {} - int from_node; - int to_node; + RRNodeId from_node; + RRNodeId to_node; //For std::set friend bool operator<(const t_node_edge& lhs, const t_node_edge& rhs) { @@ -1762,7 +1731,7 @@ struct t_node_edge { ///@brief Non-configurably connected nodes and edges in the RR graph struct t_non_configurable_rr_sets { - std::set> node_sets; + std::set> node_sets; std::set> edge_sets; }; @@ -1842,7 +1811,7 @@ class RouteStatus { int chan_width_ = -1; }; -typedef vtr::vector>> t_clb_opins_used; //[0..num_blocks-1][0..class-1][0..used_pins-1] +typedef vtr::vector>> t_clb_opins_used; //[0..num_blocks-1][0..class-1][0..used_pins-1] typedef std::vector> t_arch_switch_fanin; diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index edb52d2ac5c..888373990a9 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -568,14 +568,14 @@ void init_draw_coords(float width_val) { if (!draw_state->show_graphics && !draw_state->save_graphics && draw_state->graphics_commands.empty()) return; //do not initialize only if --disp off and --save_graphics off + /* Each time routing is on screen, need to reallocate the color of each * * rr_node, as the number of rr_nodes may change. */ if (rr_graph.num_nodes() != 0) { draw_state->draw_rr_node.resize(rr_graph.num_nodes()); - /*FIXME: the type cast should be eliminated by making draw_rr_node adapt RRNodeId */ - for (const RRNodeId& rr_id : rr_graph.nodes()) { - draw_state->draw_rr_node[(size_t)rr_id].color = DEFAULT_RR_NODE_COLOR; - draw_state->draw_rr_node[(size_t)rr_id].node_highlighted = false; + for (RRNodeId inode : rr_graph.nodes()) { + draw_state->draw_rr_node[inode].color = DEFAULT_RR_NODE_COLOR; + draw_state->draw_rr_node[inode].node_highlighted = false; } } draw_coords->tile_width = width_val; @@ -790,9 +790,9 @@ void act_on_mouse_move(ezgl::application* app, GdkEventButton* /* event */, doub t_draw_state* draw_state = get_draw_state_vars(); if (draw_state->draw_rr_toggle != DRAW_NO_RR) { - int hit_node = draw_check_rr_node_hit(x, y); + RRNodeId hit_node = draw_check_rr_node_hit(x, y); - if (hit_node != OPEN) { + if (hit_node) { //Update message const auto& device_ctx = g_vpr_ctx.device(); @@ -837,20 +837,20 @@ ezgl::point2d atom_pin_draw_coord(AtomPinId pin) { } //Returns the set of rr nodes which connect driver to sink -std::vector trace_routed_connection_rr_nodes( - const ClusterNetId net_id, - const int driver_pin, - const int sink_pin) { +std::vector trace_routed_connection_rr_nodes( + ClusterNetId net_id, + int driver_pin, + int sink_pin) { auto& route_ctx = g_vpr_ctx.routing(); VTR_ASSERT(route_ctx.route_trees[net_id]); const RouteTree& tree = route_ctx.route_trees[net_id].value(); - VTR_ASSERT(tree.root().inode == RRNodeId(route_ctx.net_rr_terminals[net_id][driver_pin])); + VTR_ASSERT(tree.root().inode == route_ctx.net_rr_terminals[net_id][driver_pin]); - int sink_rr_node = route_ctx.net_rr_terminals[ParentNetId(size_t(net_id))][sink_pin]; + RRNodeId sink_rr_node = route_ctx.net_rr_terminals[ParentNetId(size_t(net_id))][sink_pin]; - std::vector rr_nodes_on_path; + std::vector rr_nodes_on_path; //Collect the rr nodes trace_routed_connection_rr_nodes_recurr(tree.root(), @@ -867,8 +867,8 @@ std::vector trace_routed_connection_rr_nodes( //Adds the rr nodes linking rt_node to sink_rr_node to rr_nodes_on_path //Returns true if rt_node is on the path bool trace_routed_connection_rr_nodes_recurr(const RouteTreeNode& rt_node, - int sink_rr_node, - std::vector& rr_nodes_on_path) { + RRNodeId sink_rr_node, + std::vector& rr_nodes_on_path) { //DFS from the current rt_node to the sink_rr_node, when the sink is found trace back the used rr nodes if (rt_node.inode == RRNodeId(sink_rr_node)) { @@ -880,7 +880,7 @@ bool trace_routed_connection_rr_nodes_recurr(const RouteTreeNode& rt_node, bool on_path_to_sink = trace_routed_connection_rr_nodes_recurr( child_rt_node, sink_rr_node, rr_nodes_on_path); if (on_path_to_sink) { - rr_nodes_on_path.push_back(size_t(rt_node.inode)); + rr_nodes_on_path.push_back(rt_node.inode); return true; } } @@ -889,12 +889,12 @@ bool trace_routed_connection_rr_nodes_recurr(const RouteTreeNode& rt_node, } //Find the edge between two rr nodes -t_edge_size find_edge(int prev_inode, int inode) { +t_edge_size find_edge(RRNodeId prev_inode, RRNodeId inode) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; for (t_edge_size iedge = 0; - iedge < rr_graph.num_edges(RRNodeId(prev_inode)); ++iedge) { - if (size_t(rr_graph.edge_sink_node(RRNodeId(prev_inode), iedge)) == size_t(inode)) { + iedge < rr_graph.num_edges(prev_inode); ++iedge) { + if (rr_graph.edge_sink_node(prev_inode, iedge) == inode) { return iedge; } } @@ -935,19 +935,19 @@ static void draw_router_expansion_costs(ezgl::renderer* g) { auto& device_ctx = g_vpr_ctx.device(); auto& routing_ctx = g_vpr_ctx.routing(); - std::vector rr_costs(device_ctx.rr_graph.num_nodes()); + vtr::vector rr_costs(device_ctx.rr_graph.num_nodes()); - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { + for (RRNodeId inode : device_ctx.rr_graph.nodes()) { float cost = get_router_expansion_cost( - routing_ctx.rr_node_route_inf[(size_t)rr_id], + routing_ctx.rr_node_route_inf[inode], draw_state->show_router_expansion_cost); - rr_costs[(size_t)rr_id] = cost; + rr_costs[inode] = cost; } bool all_nan = true; - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { - if (std::isinf(rr_costs[(size_t)rr_id])) { - rr_costs[(size_t)rr_id] = NAN; + for (RRNodeId inode : device_ctx.rr_graph.nodes()) { + if (std::isinf(rr_costs[inode])) { + rr_costs[inode] = NAN; } else { all_nan = false; } diff --git a/vpr/src/draw/draw.h b/vpr/src/draw/draw.h index 8a4c15077ed..145f05800aa 100644 --- a/vpr/src/draw/draw.h +++ b/vpr/src/draw/draw.h @@ -73,7 +73,7 @@ const ezgl::color DRIVEN_BY_IT_COLOR = ezgl::LIGHT_MEDIUM_BLUE; const float WIRE_DRAWING_WIDTH = 0.5; /* Find the edge between two rr nodes */ -t_edge_size find_edge(int prev_inode, int inode); +t_edge_size find_edge(RRNodeId prev_inode, RRNodeId inode); /* Returns the track number of this routing resource node inode. */ int get_track_num(int inode, const vtr::OffsetMatrix& chanx_track, const vtr::OffsetMatrix& chany_track); @@ -91,17 +91,17 @@ ezgl::color to_ezgl_color(vtr::Color color); * could be caused by the user clicking on a routing resource, toggled, or * fan-in/fan-out of a highlighted node. */ bool draw_if_net_highlighted(ClusterNetId inet); -std::vector trace_routed_connection_rr_nodes( - const ClusterNetId net_id, - const int driver_pin, - const int sink_pin); +std::vector trace_routed_connection_rr_nodes( + ClusterNetId net_id, + int driver_pin, + int sink_pin); /* Helper function for trace_routed_connection_rr_nodes * Adds the rr nodes linking rt_node to sink_rr_node to rr_nodes_on_path * Returns true if rt_node is on the path. */ bool trace_routed_connection_rr_nodes_recurr(const RouteTreeNode& rt_node, - int sink_rr_node, - std::vector& rr_nodes_on_path); + RRNodeId sink_rr_node, + std::vector& rr_nodes_on_path); /* This routine highlights the blocks affected in the latest move * * It highlights the old and new locations of the moved blocks * diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp index 75802edf8c5..0eb49a02034 100644 --- a/vpr/src/draw/draw_basic.cpp +++ b/vpr/src/draw/draw_basic.cpp @@ -267,10 +267,10 @@ void draw_congestion(ezgl::renderer* g) { //Record min/max congestion float min_congestion_ratio = 1.; float max_congestion_ratio = min_congestion_ratio; - std::vector congested_rr_nodes = collect_congested_rr_nodes(); - for (int inode : congested_rr_nodes) { + auto congested_rr_nodes = collect_congested_rr_nodes(); + for (RRNodeId inode : congested_rr_nodes) { short occ = route_ctx.rr_node_route_inf[inode].occ(); - short capacity = rr_graph.node_capacity(RRNodeId(inode)); + short capacity = rr_graph.node_capacity(inode); float congestion_ratio = float(occ) / capacity; @@ -290,12 +290,12 @@ void draw_congestion(ezgl::renderer* g) { //Sort the nodes in ascending order of value for drawing, this ensures high //valued nodes are not overdrawn by lower value ones (e.g-> when zoomed-out far) - auto cmp_ascending_acc_cost = [&](int lhs_node, int rhs_node) { + auto cmp_ascending_acc_cost = [&](RRNodeId lhs_node, RRNodeId rhs_node) { short lhs_occ = route_ctx.rr_node_route_inf[lhs_node].occ(); - short lhs_capacity = rr_graph.node_capacity(RRNodeId(lhs_node)); + short lhs_capacity = rr_graph.node_capacity(lhs_node); short rhs_occ = route_ctx.rr_node_route_inf[rhs_node].occ(); - short rhs_capacity = rr_graph.node_capacity(RRNodeId(rhs_node)); + short rhs_capacity = rr_graph.node_capacity(rhs_node); float lhs_cong_ratio = float(lhs_occ) / lhs_capacity; float rhs_cong_ratio = float(rhs_occ) / rhs_capacity; @@ -307,7 +307,7 @@ void draw_congestion(ezgl::renderer* g) { if (draw_state->show_congestion == DRAW_CONGESTED_WITH_NETS) { auto rr_node_nets = collect_rr_node_nets(); - for (int inode : congested_rr_nodes) { + for (RRNodeId inode : congested_rr_nodes) { for (ClusterNetId net : rr_node_nets[inode]) { ezgl::color color = kelly_max_contrast_colors[size_t(net) % kelly_max_contrast_colors.size()]; draw_state->net_color[net] = color; @@ -317,7 +317,7 @@ void draw_congestion(ezgl::renderer* g) { drawroute(HIGHLIGHTED, g); //Reset colors - for (int inode : congested_rr_nodes) { + for (RRNodeId inode : congested_rr_nodes) { for (ClusterNetId net : rr_node_nets[inode]) { draw_state->net_color[net] = DEFAULT_RR_NODE_COLOR; } @@ -327,9 +327,9 @@ void draw_congestion(ezgl::renderer* g) { } //Draw each congested node - for (int inode : congested_rr_nodes) { + for (RRNodeId inode : congested_rr_nodes) { short occ = route_ctx.rr_node_route_inf[inode].occ(); - short capacity = rr_graph.node_capacity(RRNodeId(inode)); + short capacity = rr_graph.node_capacity(inode); float congestion_ratio = float(occ) / capacity; @@ -338,7 +338,7 @@ void draw_congestion(ezgl::renderer* g) { ezgl::color color = to_ezgl_color(cmap->color(congestion_ratio)); - switch (rr_graph.node_type(RRNodeId(inode))) { + switch (rr_graph.node_type(inode)) { case CHANX: //fallthrough case CHANY: draw_rr_chan(inode, color, g); @@ -376,30 +376,30 @@ void draw_routing_costs(ezgl::renderer* g) { float min_cost = std::numeric_limits::infinity(); float max_cost = -min_cost; - std::vector rr_node_costs(device_ctx.rr_graph.num_nodes(), 0.); + vtr::vector rr_node_costs(0.); - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { + for (const RRNodeId inode : device_ctx.rr_graph.nodes()) { float cost = 0.; if (draw_state->show_routing_costs == DRAW_TOTAL_ROUTING_COSTS || draw_state->show_routing_costs == DRAW_LOG_TOTAL_ROUTING_COSTS) { - cost = get_single_rr_cong_cost((size_t)rr_id, + cost = get_single_rr_cong_cost(inode, get_draw_state_vars()->pres_fac); } else if (draw_state->show_routing_costs == DRAW_BASE_ROUTING_COSTS) { - cost = get_single_rr_cong_base_cost((size_t)rr_id); + cost = get_single_rr_cong_base_cost(inode); } else if (draw_state->show_routing_costs == DRAW_ACC_ROUTING_COSTS || draw_state->show_routing_costs == DRAW_LOG_ACC_ROUTING_COSTS) { - cost = get_single_rr_cong_acc_cost((size_t)rr_id); + cost = get_single_rr_cong_acc_cost(inode); } else { VTR_ASSERT( draw_state->show_routing_costs == DRAW_PRES_ROUTING_COSTS || draw_state->show_routing_costs == DRAW_LOG_PRES_ROUTING_COSTS); - cost = get_single_rr_cong_pres_cost((size_t)rr_id, + cost = get_single_rr_cong_pres_cost(inode, get_draw_state_vars()->pres_fac); } @@ -409,17 +409,18 @@ void draw_routing_costs(ezgl::renderer* g) { == DRAW_LOG_PRES_ROUTING_COSTS) { cost = std::log(cost); } - rr_node_costs[(size_t)rr_id] = cost; + rr_node_costs[inode] = cost; min_cost = std::min(min_cost, cost); max_cost = std::max(max_cost, cost); } //Hide min value, draw_rr_costs() ignores NaN's - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { - if (rr_node_costs[(size_t)rr_id] == min_cost) { - rr_node_costs[(size_t)rr_id] = NAN; + for (RRNodeId inode : device_ctx.rr_graph.nodes()) { + if (rr_node_costs[inode] == min_cost) { + rr_node_costs[inode] = NAN; } } + char msg[vtr::bufsize]; if (draw_state->show_routing_costs == DRAW_TOTAL_ROUTING_COSTS) { sprintf(msg, "Total Congestion Cost Range [%g, %g]", min_cost, @@ -555,9 +556,9 @@ void draw_routed_net(ParentNetId net_id, ezgl::renderer* g) { if (!route_ctx.route_trees[net_id]) // No routing -> Skip. (Allows me to draw partially complete routes) return; - std::vector rr_nodes_to_draw; + std::vector rr_nodes_to_draw; for (auto& rt_node : route_ctx.route_trees[net_id].value().all_nodes()) { - int inode = size_t(rt_node.inode); + RRNodeId inode = rt_node.inode; if (draw_if_net_highlighted(convert_to_cluster_net_id(net_id))) { /* If a net has been highlighted, highlight the whole net in * @@ -582,7 +583,7 @@ void draw_routed_net(ParentNetId net_id, ezgl::renderer* g) { } //Draws the set of rr_nodes specified, using the colors set in draw_state -void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::renderer* g) { +void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::renderer* g) { t_draw_state* draw_state = get_draw_state_vars(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; @@ -611,11 +612,10 @@ void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::renderer } for (size_t i = 1; i < rr_nodes_to_draw.size(); ++i) { - int inode = rr_nodes_to_draw[i]; - RRNodeId rr_node = RRNodeId(inode); - auto rr_type = rr_graph.node_type(rr_node); + RRNodeId inode = rr_nodes_to_draw[i]; + auto rr_type = rr_graph.node_type(inode); - int prev_node = rr_nodes_to_draw[i - 1]; + RRNodeId prev_node = rr_nodes_to_draw[i - 1]; auto prev_type = rr_graph.node_type(RRNodeId(prev_node)); auto iedge = find_edge(prev_node, inode); @@ -628,7 +628,7 @@ void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::renderer } case IPIN: { draw_rr_pin(inode, draw_state->draw_rr_node[inode].color, g); - if (rr_graph.node_type(RRNodeId(prev_node)) == OPIN) { + if (rr_graph.node_type(prev_node) == OPIN) { draw_pin_to_pin(prev_node, inode, g); } else { draw_pin_to_chan_edge(inode, prev_node, g); @@ -637,20 +637,17 @@ void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::renderer } case CHANX: { if (draw_state->draw_route_type == GLOBAL) - chanx_track[rr_graph.node_xlow(rr_node)][rr_graph.node_ylow(rr_node)]++; + chanx_track[rr_graph.node_xlow(inode)][rr_graph.node_ylow(inode)]++; draw_rr_chan(inode, draw_state->draw_rr_node[inode].color, g); switch (prev_type) { case CHANX: { - draw_chanx_to_chanx_edge(RRNodeId(prev_node), RRNodeId(inode), - switch_type, g); + draw_chanx_to_chanx_edge(prev_node, inode, switch_type, g); break; } case CHANY: { - draw_chanx_to_chany_edge(inode, prev_node, - - FROM_Y_TO_X, switch_type, g); + draw_chanx_to_chany_edge(inode, prev_node, FROM_Y_TO_X, switch_type, g); break; } case OPIN: { @@ -668,7 +665,7 @@ void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::renderer } case CHANY: { if (draw_state->draw_route_type == GLOBAL) - chany_track[rr_graph.node_xlow(rr_node)][rr_graph.node_ylow(rr_node)]++; + chany_track[rr_graph.node_xlow(inode)][rr_graph.node_ylow(inode)]++; draw_rr_chan(inode, draw_state->draw_rr_node[inode].color, g); @@ -1146,16 +1143,15 @@ void draw_routed_timing_edge_connection(tatum::NodeId src_tnode, t_draw_state* draw_state = get_draw_state_vars(); - std::vector routed_rr_nodes = trace_routed_connection_rr_nodes(net_id, 0, sink_net_pin_index); + std::vector routed_rr_nodes = trace_routed_connection_rr_nodes(net_id, 0, sink_net_pin_index); //Mark all the nodes highlighted - for (int inode : routed_rr_nodes) { + for (RRNodeId inode : routed_rr_nodes) { draw_state->draw_rr_node[inode].color = color; } - draw_partial_route((std::vector)routed_rr_nodes, - (ezgl::renderer*)g); + draw_partial_route(routed_rr_nodes, (ezgl::renderer*)g); } else { //Connection entirely within the CLB, we don't draw the internal routing so treat it as a fly-line VTR_ASSERT(clb_src_block == clb_sink_block); diff --git a/vpr/src/draw/draw_basic.h b/vpr/src/draw/draw_basic.h index 3973f55a4be..4b2fad749c2 100644 --- a/vpr/src/draw/draw_basic.h +++ b/vpr/src/draw/draw_basic.h @@ -69,7 +69,7 @@ void drawroute(enum e_draw_net_type draw_net_type, ezgl::renderer* g); void draw_routed_net(ParentNetId net, ezgl::renderer* g); //Draws the set of rr_nodes specified, using the colors set in draw_state -void draw_partial_route(const std::vector& rr_nodes_to_draw, +void draw_partial_route(const std::vector& rr_nodes_to_draw, ezgl::renderer* g); /* Draws a heat map of routing wire utilization (i.e. fraction of wires used in each channel) diff --git a/vpr/src/draw/draw_rr.cpp b/vpr/src/draw/draw_rr.cpp index c4a4cde6278..116942d9612 100644 --- a/vpr/src/draw/draw_rr.cpp +++ b/vpr/src/draw/draw_rr.cpp @@ -8,6 +8,7 @@ #include #include +#include "rr_graph_fwd.h" #include "vtr_assert.h" #include "vtr_ndoffsetmatrix.h" #include "vtr_memory.h" @@ -61,7 +62,6 @@ //The arrow head position for turning/straight-thru connections in a switch box constexpr float SB_EDGE_TURN_ARROW_POSITION = 0.2; constexpr float SB_EDGE_STRAIGHT_ARROW_POSITION = 0.95; -constexpr float EMPTY_BLOCK_LIGHTEN_FACTOR = 0.20; /* Draws the routing resources that exist in the FPGA, if the user wants * them drawn. @@ -80,11 +80,10 @@ void draw_rr(ezgl::renderer* g) { g->set_line_dash(ezgl::line_dash::none); - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { - size_t inode = (size_t)rr_id; + for (const RRNodeId inode : device_ctx.rr_graph.nodes()) { if (!draw_state->draw_rr_node[inode].node_highlighted) { /* If not highlighted node, assign color based on type. */ - switch (rr_graph.node_type(rr_id)) { + switch (rr_graph.node_type(inode)) { case CHANX: case CHANY: draw_state->draw_rr_node[inode].color = DEFAULT_RR_NODE_COLOR; @@ -107,7 +106,7 @@ void draw_rr(ezgl::renderer* g) { } /* Now call drawing routines to draw the node. */ - switch (rr_graph.node_type(rr_id)) { + switch (rr_graph.node_type(inode)) { case SINK: draw_rr_src_sink(inode, draw_state->draw_rr_node[inode].color, g); break; @@ -138,24 +137,23 @@ void draw_rr(ezgl::renderer* g) { default: vpr_throw(VPR_ERROR_OTHER, __FILE__, __LINE__, - "in draw_rr: Unexpected rr_node type: %d.\n", rr_graph.node_type(rr_id)); + "in draw_rr: Unexpected rr_node type: %d.\n", rr_graph.node_type(inode)); } } drawroute(HIGHLIGHTED, g); } -void draw_rr_chan(int inode, const ezgl::color color, ezgl::renderer* g) { +void draw_rr_chan(RRNodeId inode, const ezgl::color color, ezgl::renderer* g) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - auto rr_node = RRNodeId(inode); - t_rr_type type = rr_graph.node_type(rr_node); + t_rr_type type = rr_graph.node_type(inode); VTR_ASSERT(type == CHANX || type == CHANY); ezgl::rectangle bound_box = draw_get_rr_chan_bbox(inode); - Direction dir = rr_graph.node_direction(rr_node); + Direction dir = rr_graph.node_direction(inode); //We assume increasing direction, and swap if needed ezgl::point2d start = bound_box.bottom_left(); @@ -181,8 +179,8 @@ void draw_rr_chan(int inode, const ezgl::color color, ezgl::renderer* g) { int coord_min = -1; int coord_max = -1; if (type == CHANX) { - coord_min = rr_graph.node_xlow(rr_node); - coord_max = rr_graph.node_xhigh(rr_node); + coord_min = rr_graph.node_xlow(inode); + coord_max = rr_graph.node_xhigh(inode); if (dir == Direction::INC) { mux_dir = RIGHT; } else { @@ -190,8 +188,8 @@ void draw_rr_chan(int inode, const ezgl::color color, ezgl::renderer* g) { } } else { VTR_ASSERT(type == CHANY); - coord_min = rr_graph.node_ylow(rr_node); - coord_max = rr_graph.node_yhigh(rr_node); + coord_min = rr_graph.node_ylow(inode); + coord_max = rr_graph.node_yhigh(inode); if (dir == Direction::INC) { mux_dir = TOP; } else { @@ -236,7 +234,7 @@ void draw_rr_chan(int inode, const ezgl::color color, ezgl::renderer* g) { if (switchpoint_min == 0) { if (dir != Direction::BIDIR) { //Draw a mux at the start of each wire, labelled with it's size (#inputs) - draw_mux_with_size(start, mux_dir, WIRE_DRAWING_WIDTH, rr_graph.node_fan_in(rr_node), g); + draw_mux_with_size(start, mux_dir, WIRE_DRAWING_WIDTH, rr_graph.node_fan_in(inode), g); } } else { //Draw arrows and label with switch point @@ -262,7 +260,7 @@ void draw_rr_chan(int inode, const ezgl::color color, ezgl::renderer* g) { if (switchpoint_max == 0) { if (dir != Direction::BIDIR) { //Draw a mux at the start of each wire, labelled with it's size (#inputs) - draw_mux_with_size(start, mux_dir, WIRE_DRAWING_WIDTH, rr_graph.node_fan_in(rr_node), g); + draw_mux_with_size(start, mux_dir, WIRE_DRAWING_WIDTH, rr_graph.node_fan_in(inode), g); } } else { //Draw arrows and label with switch point @@ -290,14 +288,13 @@ void draw_rr_chan(int inode, const ezgl::color color, ezgl::renderer* g) { /* Draws all the edges that the user wants shown between inode and what it * connects to. inode is assumed to be a CHANX, CHANY, or IPIN. */ -void draw_rr_edges(int inode, ezgl::renderer* g) { +void draw_rr_edges(RRNodeId inode, ezgl::renderer* g) { t_draw_state* draw_state = get_draw_state_vars(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; auto rr_node = RRNodeId(inode); t_rr_type from_type, to_type; - int to_node; short switch_type; from_type = rr_graph.node_type(rr_node); @@ -308,10 +305,10 @@ void draw_rr_edges(int inode, ezgl::renderer* g) { return; /* Nothing to draw. */ } - for (t_edge_size iedge = 0, l = rr_graph.num_edges(RRNodeId(inode)); iedge < l; iedge++) { - to_node = size_t(rr_graph.edge_sink_node(rr_node, iedge)); - to_type = rr_graph.node_type(RRNodeId(to_node)); - bool edge_configurable = rr_graph.edge_is_configurable(RRNodeId(inode), iedge); + for (t_edge_size iedge = 0, l = rr_graph.num_edges(inode); iedge < l; iedge++) { + RRNodeId to_node = rr_graph.edge_sink_node(rr_node, iedge); + to_type = rr_graph.node_type(to_node); + bool edge_configurable = rr_graph.edge_is_configurable(inode, iedge); switch (from_type) { case OPIN: @@ -531,7 +528,7 @@ void draw_rr_edges(int inode, ezgl::renderer* g) { /* Draws an IPIN or OPIN rr_node. Note that the pin can appear on more * * than one side of a clb. Also note that this routine can change the * * current color to BLACK. */ -void draw_rr_pin(int inode, const ezgl::color& color, ezgl::renderer* g) { +void draw_rr_pin(RRNodeId inode, const ezgl::color& color, ezgl::renderer* g) { t_draw_coords* draw_coords = get_draw_coords_vars(); float xcen, ycen; @@ -563,14 +560,14 @@ void draw_rr_pin(int inode, const ezgl::color& color, ezgl::renderer* g) { } } -void draw_rr_src_sink(int inode, ezgl::color color, ezgl::renderer* g) { +void draw_rr_src_sink(RRNodeId inode, ezgl::color color, ezgl::renderer* g) { t_draw_coords* draw_coords = get_draw_coords_vars(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; float xcen, ycen; - draw_get_rr_src_sink_coords(rr_graph.rr_nodes()[inode], &xcen, &ycen); + draw_get_rr_src_sink_coords(rr_graph.rr_nodes()[size_t(inode)], &xcen, &ycen); g->set_color(color); @@ -579,7 +576,7 @@ void draw_rr_src_sink(int inode, ezgl::color color, ezgl::renderer* g) { {xcen + draw_coords->pin_size, ycen + draw_coords->pin_size}); std::string str = vtr::string_fmt("%d", - rr_graph.node_class_num(RRNodeId(inode))); + rr_graph.node_class_num(inode)); g->set_color(ezgl::BLACK); g->draw_text({xcen, ycen}, str.c_str(), 2 * draw_coords->pin_size, 2 * draw_coords->pin_size); @@ -648,16 +645,15 @@ void draw_rr_switch(float from_x, float from_y, float to_x, float to_y, bool buf } } -void draw_expand_non_configurable_rr_nodes_recurr(int from_node, - std::set& expanded_nodes) { +void draw_expand_non_configurable_rr_nodes_recurr(RRNodeId from_node, + std::set& expanded_nodes) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; expanded_nodes.insert(from_node); - for (t_edge_size iedge = 0; - iedge < rr_graph.num_edges(RRNodeId(from_node)); ++iedge) { - bool edge_configurable = rr_graph.edge_is_configurable(RRNodeId(from_node), iedge); - int to_node = size_t(rr_graph.edge_sink_node(RRNodeId(from_node), iedge)); + for (t_edge_size iedge = 0; iedge < rr_graph.num_edges(from_node); ++iedge) { + bool edge_configurable = rr_graph.edge_is_configurable(from_node, iedge); + RRNodeId to_node = rr_graph.edge_sink_node(from_node, iedge); if (!edge_configurable && !expanded_nodes.count(to_node)) { draw_expand_non_configurable_rr_nodes_recurr(to_node, @@ -672,8 +668,8 @@ void draw_expand_non_configurable_rr_nodes_recurr(int from_node, * * It returns the hit RR node's ID (or OPEN if no hit) */ -int draw_check_rr_node_hit(float click_x, float click_y) { - int hit_node = OPEN; +RRNodeId draw_check_rr_node_hit(float click_x, float click_y) { + RRNodeId hit_node = RRNodeId::INVALID(); ezgl::rectangle bound_box; t_draw_coords* draw_coords = get_draw_coords_vars(); @@ -684,17 +680,16 @@ int draw_check_rr_node_hit(float click_x, float click_y) { VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); int layer_num = 0; - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { - size_t inode = (size_t)rr_id; - switch (rr_graph.node_type(rr_id)) { + for (const RRNodeId inode : device_ctx.rr_graph.nodes()) { + switch (rr_graph.node_type(inode)) { case IPIN: case OPIN: { - int i = rr_graph.node_xlow(rr_id); - int j = rr_graph.node_ylow(rr_id); - t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({i, j, layer_num}); - int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); - int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); - int ipin = rr_graph.node_pin_num(rr_id); + t_physical_tile_loc tile_loc = {rr_graph.node_xlow(inode), rr_graph.node_ylow(inode), layer_num}; + t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type(tile_loc); + int width_offset = device_ctx.grid.get_width_offset(tile_loc); + int height_offset = device_ctx.grid.get_height_offset(tile_loc); + int ipin = rr_graph.node_pin_num(inode); + float xcen, ycen; for (const e_side& iside : SIDES) { // If pin exists on this side of the block, then get pin coordinates @@ -713,7 +708,7 @@ int draw_check_rr_node_hit(float click_x, float click_y) { case SOURCE: case SINK: { float xcen, ycen; - draw_get_rr_src_sink_coords(rr_graph.rr_nodes()[inode], &xcen, &ycen); + draw_get_rr_src_sink_coords(rr_graph.rr_nodes()[size_t(inode)], &xcen, &ycen); // Now check if we clicked on this pin if (click_x >= xcen - draw_coords->pin_size && click_x <= xcen + draw_coords->pin_size && click_y >= ycen - draw_coords->pin_size && click_y <= ycen + draw_coords->pin_size) { @@ -757,12 +752,12 @@ bool highlight_rr_nodes(float x, float y) { } // Check which rr_node (if any) was clicked on. - int hit_node = draw_check_rr_node_hit(x, y); + RRNodeId hit_node = draw_check_rr_node_hit(x, y); return highlight_rr_nodes(hit_node); } -void draw_rr_costs(ezgl::renderer* g, const std::vector& rr_costs, bool lowest_cost_first) { +void draw_rr_costs(ezgl::renderer* g, const vtr::vector& rr_costs, bool lowest_cost_first) { t_draw_state* draw_state = get_draw_state_vars(); /* Draws routing costs */ @@ -780,11 +775,11 @@ void draw_rr_costs(ezgl::renderer* g, const std::vector& rr_costs, bool l float min_cost = std::numeric_limits::infinity(); float max_cost = -min_cost; - for (const RRNodeId& rr_id : rr_graph.nodes()) { - if (std::isnan(rr_costs[(size_t)rr_id])) continue; + for (RRNodeId inode : rr_graph.nodes()) { + if (std::isnan(rr_costs[inode])) continue; - min_cost = std::min(min_cost, rr_costs[(size_t)rr_id]); - max_cost = std::max(max_cost, rr_costs[(size_t)rr_id]); + min_cost = std::min(min_cost, rr_costs[inode]); + max_cost = std::max(max_cost, rr_costs[inode]); } if (min_cost == std::numeric_limits::infinity()) min_cost = 0; if (max_cost == -std::numeric_limits::infinity()) max_cost = 0; @@ -792,9 +787,8 @@ void draw_rr_costs(ezgl::renderer* g, const std::vector& rr_costs, bool l //Draw the nodes in ascending order of value, this ensures high valued nodes //are not overdrawn by lower value ones (e.g-> when zoomed-out far) - std::vector nodes(rr_graph.num_nodes()); - std::iota(nodes.begin(), nodes.end(), 0); - auto cmp_ascending_cost = [&](int lhs_node, int rhs_node) { + std::vector nodes(device_ctx.rr_graph.nodes().begin(), device_ctx.rr_graph.nodes().end()); + auto cmp_ascending_cost = [&](RRNodeId lhs_node, RRNodeId rhs_node) { if (lowest_cost_first) { return rr_costs[lhs_node] > rr_costs[rhs_node]; } @@ -802,14 +796,13 @@ void draw_rr_costs(ezgl::renderer* g, const std::vector& rr_costs, bool l }; std::sort(nodes.begin(), nodes.end(), cmp_ascending_cost); - for (int inode : nodes) { + for (RRNodeId inode : nodes) { float cost = rr_costs[inode]; - RRNodeId rr_node = RRNodeId(inode); if (std::isnan(cost)) continue; ezgl::color color = to_ezgl_color(cmap->color(cost)); - switch (rr_graph.node_type(rr_node)) { + switch (rr_graph.node_type(inode)) { case CHANX: //fallthrough case CHANY: draw_rr_chan(inode, color, g); @@ -841,9 +834,9 @@ void draw_rr_costs(ezgl::renderer* g, const std::vector& rr_costs, bool l /* Returns the coordinates at which the center of this pin should be drawn. * * inode gives the node number, and iside gives the side of the clb or pad * * the physical pin is on. */ -void draw_get_rr_pin_coords(int inode, float* xcen, float* ycen, const e_side& pin_side) { +void draw_get_rr_pin_coords(RRNodeId inode, float* xcen, float* ycen, const e_side& pin_side) { auto& device_ctx = g_vpr_ctx.device(); - draw_get_rr_pin_coords(device_ctx.rr_graph.rr_nodes()[inode], xcen, ycen, pin_side); + draw_get_rr_pin_coords(device_ctx.rr_graph.rr_nodes()[size_t(inode)], xcen, ycen, pin_side); } void draw_get_rr_pin_coords(const t_rr_node& node, float* xcen, float* ycen, const e_side& pin_side) { diff --git a/vpr/src/draw/draw_rr.h b/vpr/src/draw/draw_rr.h index 129a994694d..63e12398792 100644 --- a/vpr/src/draw/draw_rr.h +++ b/vpr/src/draw/draw_rr.h @@ -53,30 +53,30 @@ void draw_rr(ezgl::renderer* g); /* Draws all the edges that the user wants shown between inode and what it * connects to. inode is assumed to be a CHANX, CHANY, or IPIN. */ -void draw_rr_edges(int from_node, ezgl::renderer* g); +void draw_rr_edges(RRNodeId from_node, ezgl::renderer* g); -void draw_rr_chan(int inode, const ezgl::color color, ezgl::renderer* g); +void draw_rr_chan(RRNodeId inode, const ezgl::color color, ezgl::renderer* g); /* Draws an IPIN or OPIN rr_node. Note that the pin can appear on more * than one side of a clb. Also note that this routine can change the * current color to BLACK. */ -void draw_rr_pin(int inode, const ezgl::color& color, ezgl::renderer* g); +void draw_rr_pin(RRNodeId inode, const ezgl::color& color, ezgl::renderer* g); -void draw_rr_src_sink(int inode, ezgl::color color, ezgl::renderer* g); +void draw_rr_src_sink(RRNodeId inode, ezgl::color color, ezgl::renderer* g); void draw_get_rr_src_sink_coords(const t_rr_node& node, float* xcen, float* ycen); /* Draws a buffer (triangle) or pass transistor (circle) on the edge * connecting from to to, depending on the status of buffered. The drawing * is closest to the from_node, since it reflects the switch type of from. */ void draw_rr_switch(float from_x, float from_y, float to_x, float to_y, bool buffered, bool switch_configurable, ezgl::renderer* g); -void draw_expand_non_configurable_rr_nodes_recurr(int from_node, - std::set& expanded_nodes); +void draw_expand_non_configurable_rr_nodes_recurr(RRNodeId from_node, + std::set& expanded_nodes); /* This is a helper function for highlight_rr_nodes(). It determines whether * a routing resource has been clicked on by computing a bounding box for that * and checking if the mouse click hit inside its bounding box. - * It returns the hit RR node's ID (or OPEN if no hit) */ -int draw_check_rr_node_hit(float click_x, float click_y); + * It returns the hit RR node's ID (or INVALID if no hit) */ +RRNodeId draw_check_rr_node_hit(float click_x, float click_y); /* This routine is called when the routing resource graph is shown, and someone * clicks outside a block. That click might represent a click on a wire -- we call @@ -85,12 +85,12 @@ int draw_check_rr_node_hit(float click_x, float click_y); bool highlight_rr_nodes(float x, float y); /* Draws routing costs */ -void draw_rr_costs(ezgl::renderer* g, const std::vector& rr_costs, bool lowest_cost_first = true); +void draw_rr_costs(ezgl::renderer* g, const vtr::vector& rr_costs, bool lowest_cost_first = true); /* Returns the coordinates at which the center of this pin should be drawn. * * inode gives the node number, and iside gives the side of the clb or pad * * the physical pin is on. */ -void draw_get_rr_pin_coords(int inode, float* xcen, float* ycen, const e_side& pin_side); +void draw_get_rr_pin_coords(RRNodeId inode, float* xcen, float* ycen, const e_side& pin_side); /* Returns the coordinates at which the center of this pin should be drawn. * * node gives the node object, and iside gives the side of the clb or pad * diff --git a/vpr/src/draw/draw_rr_edges.cpp b/vpr/src/draw/draw_rr_edges.cpp index 6ff00263676..d8aee31c89b 100644 --- a/vpr/src/draw/draw_rr_edges.cpp +++ b/vpr/src/draw/draw_rr_edges.cpp @@ -70,8 +70,8 @@ void draw_chany_to_chany_edge(RRNodeId from_node, RRNodeId to_node, short switch int from_ylow, to_ylow, from_yhigh, to_yhigh; //, from_x, to_x; // Get the coordinates of the channel wires. - from_chan = draw_get_rr_chan_bbox(size_t(from_node)); - to_chan = draw_get_rr_chan_bbox(size_t(to_node)); + from_chan = draw_get_rr_chan_bbox(from_node); + to_chan = draw_get_rr_chan_bbox(to_node); // from_x = rr_graph.node_xlow(RRNodeId(from_node)); // to_x = rr_graph.node_xlow(RRNodeId(to_node)); @@ -137,7 +137,7 @@ void draw_chany_to_chany_edge(RRNodeId from_node, RRNodeId to_node, short switch g->draw_line({x1, y1}, {x2, y2}); if (draw_state->draw_rr_toggle == DRAW_ALL_RR - || draw_state->draw_rr_node[size_t(from_node)].node_highlighted) { + || draw_state->draw_rr_node[from_node].node_highlighted) { draw_rr_switch(x1, y1, x2, y2, rr_graph.rr_switch_inf(RRSwitchId(switch_type)).buffered(), rr_graph.rr_switch_inf(RRSwitchId(switch_type)).configurable(), g); @@ -160,8 +160,8 @@ void draw_chanx_to_chanx_edge(RRNodeId from_node, RRNodeId to_node, short switch int from_xlow, to_xlow, from_xhigh, to_xhigh; // Get the coordinates of the channel wires. - from_chan = draw_get_rr_chan_bbox(size_t(from_node)); - to_chan = draw_get_rr_chan_bbox(size_t(to_node)); + from_chan = draw_get_rr_chan_bbox(from_node); + to_chan = draw_get_rr_chan_bbox(to_node); /* (x1, y1) point on from_node, (x2, y2) point on to_node. */ @@ -229,14 +229,14 @@ void draw_chanx_to_chanx_edge(RRNodeId from_node, RRNodeId to_node, short switch g->draw_line({x1, y1}, {x2, y2}); if (draw_state->draw_rr_toggle == DRAW_ALL_RR - || draw_state->draw_rr_node[size_t(from_node)].node_highlighted) { + || draw_state->draw_rr_node[from_node].node_highlighted) { draw_rr_switch(x1, y1, x2, y2, rr_graph.rr_switch_inf(RRSwitchId(switch_type)).buffered(), rr_graph.rr_switch_inf(RRSwitchId(switch_type)).configurable(), g); } } -void draw_chanx_to_chany_edge(int chanx_node, int chany_node, enum e_edge_dir edge_dir, short switch_type, ezgl::renderer* g) { +void draw_chanx_to_chany_edge(RRNodeId chanx_node, RRNodeId chany_node, enum e_edge_dir edge_dir, short switch_type, ezgl::renderer* g) { t_draw_state* draw_state = get_draw_state_vars(); t_draw_coords* draw_coords = get_draw_coords_vars(); auto& device_ctx = g_vpr_ctx.device(); @@ -260,17 +260,17 @@ void draw_chanx_to_chany_edge(int chanx_node, int chany_node, enum e_edge_dir ed x2 = chany_bbox.left(); // these values xhigh/low yhigh/low mark the cordinates for the begining and ends of the wire. - chanx_xlow = rr_graph.node_xlow(RRNodeId(chanx_node)); - chanx_y = rr_graph.node_ylow(RRNodeId(chanx_node)); - chany_x = rr_graph.node_xlow(RRNodeId(chany_node)); - chany_ylow = rr_graph.node_ylow(RRNodeId(chany_node)); + chanx_xlow = rr_graph.node_xlow(chanx_node); + chanx_y = rr_graph.node_ylow(chanx_node); + chany_x = rr_graph.node_xlow(chany_node); + chany_ylow = rr_graph.node_ylow(chany_node); if (chanx_xlow <= chany_x) { /* Can draw connection going right */ /* Connection not at end of the CHANX segment. */ x1 = draw_coords->tile_x[chany_x] + draw_coords->get_tile_width(); - if (rr_graph.node_direction(RRNodeId(chanx_node)) != Direction::BIDIR && (SwitchType)switch_type != SwitchType::SHORT) { + if (rr_graph.node_direction(chanx_node) != Direction::BIDIR && (SwitchType)switch_type != SwitchType::SHORT) { if (edge_dir == FROM_X_TO_Y) { - if (rr_graph.node_direction(RRNodeId(chanx_node)) == Direction::DEC) { /* If dec wire, then going left */ + if (rr_graph.node_direction(chanx_node) == Direction::DEC) { /* If dec wire, then going left */ x1 = draw_coords->tile_x[chany_x + 1]; } } @@ -282,9 +282,9 @@ void draw_chanx_to_chany_edge(int chanx_node, int chany_node, enum e_edge_dir ed /* Connection not at end of the CHANY segment. */ y2 = draw_coords->tile_y[chanx_y] + draw_coords->get_tile_width(); - if (rr_graph.node_direction(RRNodeId(chany_node)) != Direction::BIDIR && (SwitchType)switch_type != SwitchType::SHORT) { + if (rr_graph.node_direction(chany_node) != Direction::BIDIR && (SwitchType)switch_type != SwitchType::SHORT) { if (edge_dir == FROM_Y_TO_X) { - if (rr_graph.node_direction(RRNodeId(chany_node)) == Direction::DEC) { /* If dec wire, then going down */ + if (rr_graph.node_direction(chany_node) == Direction::DEC) { /* If dec wire, then going down */ y2 = draw_coords->tile_y[chanx_y + 1]; } } @@ -310,12 +310,12 @@ void draw_chanx_to_chany_edge(int chanx_node, int chany_node, enum e_edge_dir ed } } -void draw_pin_to_pin(int opin_node, int ipin_node, ezgl::renderer* g) { +void draw_pin_to_pin(RRNodeId opin_node, RRNodeId ipin_node, ezgl::renderer* g) { /* This routine draws an edge from the opin rr node to the ipin rr node */ auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - VTR_ASSERT(rr_graph.node_type(RRNodeId(opin_node)) == OPIN); - VTR_ASSERT(rr_graph.node_type(RRNodeId(ipin_node)) == IPIN); + VTR_ASSERT(rr_graph.node_type(opin_node) == OPIN); + VTR_ASSERT(rr_graph.node_type(ipin_node) == IPIN); /* FIXME: May use a smarter strategy * Currently, we use the last side found for both OPIN and IPIN @@ -325,7 +325,7 @@ void draw_pin_to_pin(int opin_node, int ipin_node, ezgl::renderer* g) { float x1 = 0, y1 = 0; std::vector opin_candidate_sides; for (const e_side& opin_candidate_side : SIDES) { - if (rr_graph.is_node_on_specific_side(RRNodeId(opin_node), opin_candidate_side)) { + if (rr_graph.is_node_on_specific_side(opin_node, opin_candidate_side)) { opin_candidate_sides.push_back(opin_candidate_side); } } @@ -335,7 +335,7 @@ void draw_pin_to_pin(int opin_node, int ipin_node, ezgl::renderer* g) { float x2 = 0, y2 = 0; std::vector ipin_candidate_sides; for (const e_side& ipin_candidate_side : SIDES) { - if (rr_graph.is_node_on_specific_side(RRNodeId(ipin_node), ipin_candidate_side)) { + if (rr_graph.is_node_on_specific_side(ipin_node, ipin_candidate_side)) { ipin_candidate_sides.push_back(ipin_candidate_side); } } @@ -349,21 +349,21 @@ void draw_pin_to_pin(int opin_node, int ipin_node, ezgl::renderer* g) { draw_triangle_along_line(g, xend, yend, x1, x2, y1, y2); } -void draw_pin_to_sink(int ipin_node, int sink_node, ezgl::renderer* g) { +void draw_pin_to_sink(RRNodeId ipin_node, RRNodeId sink_node, ezgl::renderer* g) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; float x1 = 0, y1 = 0; /* Draw the line for each ipin on different sides */ for (const e_side& pin_side : SIDES) { - if (!rr_graph.is_node_on_specific_side(RRNodeId(ipin_node), pin_side)) { + if (!rr_graph.is_node_on_specific_side(ipin_node, pin_side)) { continue; } draw_get_rr_pin_coords(ipin_node, &x1, &y1, pin_side); float x2 = 0, y2 = 0; - draw_get_rr_src_sink_coords(rr_graph.rr_nodes()[sink_node], &x2, &y2); + draw_get_rr_src_sink_coords(rr_graph.rr_nodes()[size_t(sink_node)], &x2, &y2); g->draw_line({x1, y1}, {x2, y2}); @@ -373,16 +373,16 @@ void draw_pin_to_sink(int ipin_node, int sink_node, ezgl::renderer* g) { } } -void draw_source_to_pin(int source_node, int opin_node, ezgl::renderer* g) { +void draw_source_to_pin(RRNodeId source_node, RRNodeId opin_node, ezgl::renderer* g) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; float x1 = 0, y1 = 0; - draw_get_rr_src_sink_coords(rr_graph.rr_nodes()[source_node], &x1, &y1); + draw_get_rr_src_sink_coords(rr_graph.rr_nodes()[size_t(source_node)], &x1, &y1); /* Draw the line for each ipin on different sides */ for (const e_side& pin_side : SIDES) { - if (!rr_graph.is_node_on_specific_side(RRNodeId(opin_node), pin_side)) { + if (!rr_graph.is_node_on_specific_side(opin_node, pin_side)) { continue; } @@ -397,7 +397,7 @@ void draw_source_to_pin(int source_node, int opin_node, ezgl::renderer* g) { } } -void draw_pin_to_chan_edge(int pin_node, int chan_node, ezgl::renderer* g) { +void draw_pin_to_chan_edge(RRNodeId pin_node, RRNodeId chan_node, ezgl::renderer* g) { /* This routine draws an edge from the pin_node to the chan_node (CHANX or * * CHANY). The connection is made to the nearest end of the track instead * * of perpendicular to the track to symbolize a single-drive connection. */ @@ -408,19 +408,14 @@ void draw_pin_to_chan_edge(int pin_node, int chan_node, ezgl::renderer* g) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - //const t_rr_node& pin_rr = device_ctx.rr_nodes[pin_node]; - auto pin_rr = RRNodeId(pin_node); - auto chan_rr = RRNodeId(chan_node); + t_physical_tile_loc tile_loc = { + rr_graph.node_xlow(pin_node), + rr_graph.node_ylow(pin_node), + rr_graph.node_layer(pin_node)}; - const auto& grid_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(pin_rr), - rr_graph.node_ylow(pin_rr), - rr_graph.node_layer(pin_rr)}); - int width_offset = device_ctx.grid.get_width_offset({rr_graph.node_xlow(pin_rr), - rr_graph.node_ylow(pin_rr), - rr_graph.node_layer(pin_rr)}); - int height_offset = device_ctx.grid.get_height_offset({rr_graph.node_xlow(pin_rr), - rr_graph.node_ylow(pin_rr), - rr_graph.node_layer(pin_rr)}); + const auto& grid_type = device_ctx.grid.get_physical_type(tile_loc); + int width_offset = device_ctx.grid.get_width_offset(tile_loc); + int height_offset = device_ctx.grid.get_height_offset(tile_loc); float x1 = 0, y1 = 0; /* If there is only one side, no need for the following inference!!! @@ -461,8 +456,8 @@ void draw_pin_to_chan_edge(int pin_node, int chan_node, ezgl::renderer* g) { */ std::vector pin_candidate_sides; for (const e_side& pin_candidate_side : SIDES) { - if ((rr_graph.is_node_on_specific_side(pin_rr, pin_candidate_side)) - && (grid_type->pinloc[width_offset][height_offset][pin_candidate_side][rr_graph.node_pin_num(pin_rr)])) { + if ((rr_graph.is_node_on_specific_side(pin_node, pin_candidate_side)) + && (grid_type->pinloc[width_offset][height_offset][pin_candidate_side][rr_graph.node_pin_num(pin_node)])) { pin_candidate_sides.push_back(pin_candidate_side); } } @@ -470,18 +465,18 @@ void draw_pin_to_chan_edge(int pin_node, int chan_node, ezgl::renderer* g) { * Any rr_node of a grid should have at least 1 side!!! */ e_side pin_side = NUM_SIDES; - const t_rr_type channel_type = rr_graph.node_type(RRNodeId(chan_node)); + const t_rr_type channel_type = rr_graph.node_type(chan_node); if (1 == pin_candidate_sides.size()) { pin_side = pin_candidate_sides[0]; } else { VTR_ASSERT(1 < pin_candidate_sides.size()); - if (CHANX == channel_type && rr_graph.node_ylow(pin_rr) <= rr_graph.node_ylow(chan_rr)) { + if (CHANX == channel_type && rr_graph.node_ylow(pin_node) <= rr_graph.node_ylow(chan_node)) { pin_side = TOP; - } else if (CHANX == channel_type && rr_graph.node_ylow(pin_rr) - 1 >= rr_graph.node_ylow(chan_rr)) { + } else if (CHANX == channel_type && rr_graph.node_ylow(pin_node) - 1 >= rr_graph.node_ylow(chan_node)) { pin_side = BOTTOM; - } else if (CHANY == channel_type && rr_graph.node_xlow(pin_rr) <= rr_graph.node_xlow(chan_rr)) { + } else if (CHANY == channel_type && rr_graph.node_xlow(pin_node) <= rr_graph.node_xlow(chan_node)) { pin_side = RIGHT; - } else if (CHANY == channel_type && rr_graph.node_xlow(pin_rr) - 1 >= rr_graph.node_xlow(chan_rr)) { + } else if (CHANY == channel_type && rr_graph.node_xlow(pin_node) - 1 >= rr_graph.node_xlow(chan_node)) { pin_side = LEFT; } /* The inferred side must be in the list of sides of the pin rr_node!!! */ @@ -528,13 +523,13 @@ void draw_pin_to_chan_edge(int pin_node, int chan_node, ezgl::renderer* g) { ezgl::rectangle chan_bbox = draw_get_rr_chan_bbox(chan_node); float x2 = 0, y2 = 0; - const Direction chan_rr_direction = rr_graph.node_direction(RRNodeId(chan_node)); + const Direction chan_rr_direction = rr_graph.node_direction(chan_node); switch (channel_type) { case CHANX: { y1 += draw_pin_offset; y2 = chan_bbox.bottom(); x2 = x1; - if (is_opin(rr_graph.node_pin_num(pin_rr), grid_type)) { + if (is_opin(rr_graph.node_pin_num(pin_node), grid_type)) { if (chan_rr_direction == Direction::INC) { x2 = chan_bbox.left(); } else if (chan_rr_direction == Direction::DEC) { @@ -547,7 +542,7 @@ void draw_pin_to_chan_edge(int pin_node, int chan_node, ezgl::renderer* g) { x1 += draw_pin_offset; x2 = chan_bbox.left(); y2 = y1; - if (is_opin(rr_graph.node_pin_num(pin_rr), grid_type)) { + if (is_opin(rr_graph.node_pin_num(pin_node), grid_type)) { if (chan_rr_direction == Direction::INC) { y2 = chan_bbox.bottom(); } else if (chan_rr_direction == Direction::DEC) { @@ -564,7 +559,7 @@ void draw_pin_to_chan_edge(int pin_node, int chan_node, ezgl::renderer* g) { g->draw_line({x1, y1}, {x2, y2}); //don't draw the ex, or triangle unless zoomed in really far - if (chan_rr_direction == Direction::BIDIR || !is_opin(rr_graph.node_pin_num(pin_rr), grid_type)) { + if (chan_rr_direction == Direction::BIDIR || !is_opin(rr_graph.node_pin_num(pin_node), grid_type)) { draw_x(x2, y2, 0.7 * draw_coords->pin_size, g); } else { float xend = x2 + (x1 - x2) / 10.; diff --git a/vpr/src/draw/draw_rr_edges.h b/vpr/src/draw/draw_rr_edges.h index 5e569135add..81077e22d01 100644 --- a/vpr/src/draw/draw_rr_edges.h +++ b/vpr/src/draw/draw_rr_edges.h @@ -49,11 +49,11 @@ void draw_chany_to_chany_edge(RRNodeId from_node, RRNodeId to_node, short switch_type, ezgl::renderer* g); void draw_chanx_to_chanx_edge(RRNodeId from_node, RRNodeId to_node, short switch_type, ezgl::renderer* g); -void draw_chanx_to_chany_edge(int chanx_node, int chany_node, enum e_edge_dir edge_dir, short switch_type, ezgl::renderer* g); -void draw_pin_to_pin(int opin, int ipin, ezgl::renderer* g); -void draw_pin_to_sink(int ipin_node, int sink_node, ezgl::renderer* g); -void draw_source_to_pin(int source_node, int opin_node, ezgl::renderer* g); -void draw_pin_to_chan_edge(int pin_node, int chan_node, ezgl::renderer* g); +void draw_chanx_to_chany_edge(RRNodeId chanx_node, RRNodeId chany_node, enum e_edge_dir edge_dir, short switch_type, ezgl::renderer* g); +void draw_pin_to_pin(RRNodeId opin, RRNodeId ipin, ezgl::renderer* g); +void draw_pin_to_sink(RRNodeId ipin_node, RRNodeId sink_node, ezgl::renderer* g); +void draw_source_to_pin(RRNodeId source_node, RRNodeId opin_node, ezgl::renderer* g); +void draw_pin_to_chan_edge(RRNodeId pin_node, RRNodeId chan_node, ezgl::renderer* g); #endif /* NO_GRAPHICS */ #endif /* DRAW_X_TO_Y_H */ diff --git a/vpr/src/draw/draw_searchbar.cpp b/vpr/src/draw/draw_searchbar.cpp index efdf22eae7c..fd11b0a2f43 100644 --- a/vpr/src/draw/draw_searchbar.cpp +++ b/vpr/src/draw/draw_searchbar.cpp @@ -65,34 +65,33 @@ * wire has been clicked on by the user. * TODO: Fix this for global routing, currently for detailed only. */ -ezgl::rectangle draw_get_rr_chan_bbox(int inode) { +ezgl::rectangle draw_get_rr_chan_bbox(RRNodeId inode) { double left = 0, right = 0, top = 0, bottom = 0; t_draw_coords* draw_coords = get_draw_coords_vars(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - auto rr_node = RRNodeId(inode); - switch (rr_graph.node_type(rr_node)) { + switch (rr_graph.node_type(inode)) { case CHANX: - left = draw_coords->tile_x[rr_graph.node_xlow(rr_node)]; - right = draw_coords->tile_x[rr_graph.node_xhigh(rr_node)] + left = draw_coords->tile_x[rr_graph.node_xlow(inode)]; + right = draw_coords->tile_x[rr_graph.node_xhigh(inode)] + draw_coords->get_tile_width(); - bottom = draw_coords->tile_y[rr_graph.node_ylow(rr_node)] + bottom = draw_coords->tile_y[rr_graph.node_ylow(inode)] + draw_coords->get_tile_width() - + (1. + rr_graph.node_track_num(rr_node)); - top = draw_coords->tile_y[rr_graph.node_ylow(rr_node)] + + (1. + rr_graph.node_track_num(inode)); + top = draw_coords->tile_y[rr_graph.node_ylow(inode)] + draw_coords->get_tile_width() - + (1. + rr_graph.node_track_num(rr_node)); + + (1. + rr_graph.node_track_num(inode)); break; case CHANY: - left = draw_coords->tile_x[rr_graph.node_xlow(rr_node)] + left = draw_coords->tile_x[rr_graph.node_xlow(inode)] + draw_coords->get_tile_width() - + (1. + rr_graph.node_track_num(rr_node)); - right = draw_coords->tile_x[rr_graph.node_xlow(rr_node)] + + (1. + rr_graph.node_track_num(inode)); + right = draw_coords->tile_x[rr_graph.node_xlow(inode)] + draw_coords->get_tile_width() - + (1. + rr_graph.node_track_num(rr_node)); - bottom = draw_coords->tile_y[rr_graph.node_ylow(rr_node)]; - top = draw_coords->tile_y[rr_graph.node_yhigh(rr_node)] + + (1. + rr_graph.node_track_num(inode)); + bottom = draw_coords->tile_y[rr_graph.node_ylow(inode)]; + top = draw_coords->tile_y[rr_graph.node_yhigh(inode)] + draw_coords->get_tile_width(); break; default: @@ -166,34 +165,36 @@ void draw_highlight_blocks_color(t_logical_block_type_ptr type, /* If an rr_node has been clicked on, it will be highlighted in MAGENTA. * If so, and toggle nets is selected, highlight the whole net in that colour. */ -void highlight_nets(char* message, int hit_node, bool is_flat) { +void highlight_nets(char* message, RRNodeId hit_node, bool is_flat) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& route_ctx = g_vpr_ctx.routing(); + /* Don't crash if there's no routing */ + if (route_ctx.route_trees.empty()) + return; + t_draw_state* draw_state = get_draw_state_vars(); for (auto net_id : cluster_ctx.clb_nlist.nets()) { - if (!route_ctx.route_trees.empty()) { - if (!route_ctx.route_trees[net_id]) - continue; - ParentNetId parent_id = get_cluster_net_parent_id(g_vpr_ctx.atom().lookup, net_id, is_flat); - - for (auto& rt_node : route_ctx.route_trees[parent_id].value().all_nodes()) { - int inode = size_t(rt_node.inode); - if (draw_state->draw_rr_node[inode].color == ezgl::MAGENTA) { - draw_state->net_color[net_id] = draw_state->draw_rr_node[inode].color; - if (inode == hit_node) { - std::string orig_msg(message); - sprintf(message, "%s || Net: %zu (%s)", orig_msg.c_str(), - size_t(net_id), - cluster_ctx.clb_nlist.net_name(net_id).c_str()); - } - } else if (draw_state->draw_rr_node[inode].color - == ezgl::WHITE) { - // If node is de-selected. - draw_state->net_color[net_id] = ezgl::BLACK; - break; + ParentNetId parent_id = get_cluster_net_parent_id(g_vpr_ctx.atom().lookup, net_id, is_flat); + if (!route_ctx.route_trees[parent_id]) + continue; + + for (auto& rt_node : route_ctx.route_trees[parent_id].value().all_nodes()) { + RRNodeId inode = rt_node.inode; + if (draw_state->draw_rr_node[inode].color == ezgl::MAGENTA) { + draw_state->net_color[net_id] = draw_state->draw_rr_node[inode].color; + if (inode == hit_node) { + std::string orig_msg(message); + sprintf(message, "%s || Net: %zu (%s)", orig_msg.c_str(), + size_t(net_id), + cluster_ctx.clb_nlist.net_name(net_id).c_str()); } + } else if (draw_state->draw_rr_node[inode].color + == ezgl::WHITE) { + // If node is de-selected. + draw_state->net_color[net_id] = ezgl::BLACK; + break; } } } @@ -205,15 +206,16 @@ void highlight_nets(char* message, int hit_node, bool is_flat) { * fan_in into the node in blue and fan_out from the node in red. If de-highlighted, * de-highlight its fan_in and fan_out. */ -void draw_highlight_fan_in_fan_out(const std::set& nodes) { +void draw_highlight_fan_in_fan_out(const std::set& nodes) { t_draw_state* draw_state = get_draw_state_vars(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; + for (auto node : nodes) { /* Highlight the fanout nodes in red. */ - for (t_edge_size iedge = 0, l = rr_graph.num_edges(RRNodeId(node)); + for (t_edge_size iedge = 0, l = rr_graph.num_edges(node); iedge < l; iedge++) { - int fanout_node = size_t(rr_graph.edge_sink_node(RRNodeId(node), iedge)); + RRNodeId fanout_node = rr_graph.edge_sink_node(node, iedge); if (draw_state->draw_rr_node[node].color == ezgl::MAGENTA && draw_state->draw_rr_node[fanout_node].color @@ -229,22 +231,21 @@ void draw_highlight_fan_in_fan_out(const std::set& nodes) { } /* Highlight the nodes that can fanin to this node in blue. */ - for (const RRNodeId& inode : rr_graph.nodes()) { - for (t_edge_size iedge = 0, l = rr_graph.num_edges(inode); iedge < l; - iedge++) { - int fanout_node = size_t(rr_graph.edge_sink_node(inode, iedge)); + for (RRNodeId inode : rr_graph.nodes()) { + for (t_edge_size iedge = 0, l = rr_graph.num_edges(inode); iedge < l; iedge++) { + RRNodeId fanout_node = rr_graph.edge_sink_node(inode, iedge); if (fanout_node == node) { if (draw_state->draw_rr_node[node].color == ezgl::MAGENTA - && draw_state->draw_rr_node[size_t(inode)].color + && draw_state->draw_rr_node[inode].color != ezgl::MAGENTA) { // If node is highlighted, highlight its fanin - draw_state->draw_rr_node[size_t(inode)].color = ezgl::BLUE; - draw_state->draw_rr_node[size_t(inode)].node_highlighted = true; + draw_state->draw_rr_node[inode].color = ezgl::BLUE; + draw_state->draw_rr_node[inode].node_highlighted = true; } else if (draw_state->draw_rr_node[node].color == ezgl::WHITE) { // If node is de-highlighted, de-highlight its fanin - draw_state->draw_rr_node[size_t(inode)].color = DEFAULT_RR_NODE_COLOR; - draw_state->draw_rr_node[size_t(inode)].node_highlighted = false; + draw_state->draw_rr_node[inode].color = DEFAULT_RR_NODE_COLOR; + draw_state->draw_rr_node[inode].node_highlighted = false; } } } @@ -252,8 +253,8 @@ void draw_highlight_fan_in_fan_out(const std::set& nodes) { } } -std::set draw_expand_non_configurable_rr_nodes(int from_node) { - std::set expanded_nodes; +std::set draw_expand_non_configurable_rr_nodes(RRNodeId from_node) { + std::set expanded_nodes; draw_expand_non_configurable_rr_nodes_recurr(from_node, expanded_nodes); return expanded_nodes; } @@ -275,10 +276,9 @@ void deselect_all() { for (auto net_id : cluster_ctx.clb_nlist.nets()) draw_state->net_color[net_id] = ezgl::BLACK; - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { - size_t i = (size_t)rr_id; - draw_state->draw_rr_node[i].color = DEFAULT_RR_NODE_COLOR; - draw_state->draw_rr_node[i].node_highlighted = false; + for (RRNodeId inode : device_ctx.rr_graph.nodes()) { + draw_state->draw_rr_node[inode].color = DEFAULT_RR_NODE_COLOR; + draw_state->draw_rr_node[inode].node_highlighted = false; } get_selected_sub_block_info().clear(); } diff --git a/vpr/src/draw/draw_searchbar.h b/vpr/src/draw/draw_searchbar.h index fcb8a69b0ea..6fc1092afd3 100644 --- a/vpr/src/draw/draw_searchbar.h +++ b/vpr/src/draw/draw_searchbar.h @@ -51,24 +51,24 @@ /* This function computes and returns the boundary coordinates of a channel * wire segment. This can be used for drawing a wire or determining if a * wire has been clicked on by the user.*/ -ezgl::rectangle draw_get_rr_chan_bbox(int inode); +ezgl::rectangle draw_get_rr_chan_bbox(RRNodeId inode); /* Highlights a block and its fanout/fanin. */ void draw_highlight_blocks_color(t_logical_block_type_ptr type, ClusterBlockId blk_id); /* If an rr_node has been clicked on, it will be highlighted in MAGENTA. * If so, and toggle nets is selected, highlight the whole net in that colour.*/ -void highlight_nets(char* message, int hit_node, bool is_flat); +void highlight_nets(char* message, RRNodeId hit_node, bool is_flat); /* If an rr_node has been clicked on, it will be either highlighted in MAGENTA, * or de-highlighted in WHITE. If highlighted, and toggle_rr is selected, highlight * fan_in into the node in blue and fan_out from the node in red. If de-highlighted, * de-highlight its fan_in and fan_out. */ -void draw_highlight_fan_in_fan_out(const std::set& nodes); +void draw_highlight_fan_in_fan_out(const std::set& nodes); /* Calls draw_expand_non_configurable_rr_nodes_recurr with hit_node as from_node * and an empty set of expanded_nodes. */ -std::set draw_expand_non_configurable_rr_nodes(int hit_node); +std::set draw_expand_non_configurable_rr_nodes(RRNodeId hit_node); /* Sets the color of all clbs, nets and rr_nodes to the default. * as well as clearing the highlighed sub-block */ diff --git a/vpr/src/draw/draw_types.h b/vpr/src/draw/draw_types.h index 857519ba25f..c014740e374 100644 --- a/vpr/src/draw/draw_types.h +++ b/vpr/src/draw/draw_types.h @@ -233,7 +233,7 @@ struct t_draw_state { * ROUTING is on screen. * [0..device_ctx.rr_nodes.size()-1] */ - std::vector draw_rr_node; + vtr::vector draw_rr_node; std::shared_ptr setup_timing_info; diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp index 361728a904c..590e4981d61 100644 --- a/vpr/src/draw/search_bar.cpp +++ b/vpr/src/draw/search_bar.cpp @@ -13,6 +13,7 @@ * */ +#include "physical_types.h" #ifndef NO_GRAPHICS # include # include @@ -92,8 +93,8 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) { return; } - highlight_rr_nodes(rr_node_id); - auto_zoom_rr_node(rr_node_id); + highlight_rr_nodes(RRNodeId(rr_node_id)); + auto_zoom_rr_node(RRNodeId(rr_node_id)); } else if (search_type == "Block ID") { @@ -175,13 +176,13 @@ void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app) { app->refresh_drawing(); } -bool highlight_rr_nodes(int hit_node) { +bool highlight_rr_nodes(RRNodeId hit_node) { t_draw_state* draw_state = get_draw_state_vars(); //TODO: fixed sized char array may cause overflow. char message[250] = ""; - if (hit_node != OPEN) { + if (hit_node) { const auto& device_ctx = g_vpr_ctx.device(); auto nodes = draw_expand_non_configurable_rr_nodes(hit_node); for (auto node : nodes) { @@ -229,23 +230,25 @@ bool highlight_rr_nodes(int hit_node) { return true; } -void auto_zoom_rr_node(int rr_node_id) { +void auto_zoom_rr_node(RRNodeId rr_node_id) { t_draw_coords* draw_coords = get_draw_coords_vars(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; ezgl::rectangle rr_node; // find the location of the node - switch (rr_graph.node_type(RRNodeId(rr_node_id))) { + switch (rr_graph.node_type(rr_node_id)) { case IPIN: case OPIN: { - int i = rr_graph.node_xlow(RRNodeId(rr_node_id)); - int j = rr_graph.node_ylow(RRNodeId(rr_node_id)); - int layer_num = rr_graph.node_layer(RRNodeId(rr_node_id)); - t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({i, j, layer_num}); - int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); - int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); - int ipin = rr_graph.node_ptc_num(RRNodeId(rr_node_id)); + t_physical_tile_loc tile_loc = { + rr_graph.node_xlow(rr_node_id), + rr_graph.node_ylow(rr_node_id), + rr_graph.node_layer(rr_node_id)}; + t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type(tile_loc); + int width_offset = device_ctx.grid.get_width_offset(tile_loc); + int height_offset = device_ctx.grid.get_height_offset(tile_loc); + + int ipin = rr_graph.node_ptc_num(rr_node_id); float xcen, ycen; for (const e_side& iside : SIDES) { diff --git a/vpr/src/draw/search_bar.h b/vpr/src/draw/search_bar.h index 0251f4dc4f6..8108bfa8c4a 100644 --- a/vpr/src/draw/search_bar.h +++ b/vpr/src/draw/search_bar.h @@ -20,8 +20,8 @@ # include "draw_color.h" void search_and_highlight(GtkWidget* /*widget*/, ezgl::application* app); -bool highlight_rr_nodes(int hit_node); -void auto_zoom_rr_node(int rr_node_id); +bool highlight_rr_nodes(RRNodeId hit_node); +void auto_zoom_rr_node(RRNodeId rr_node_id); void highlight_cluster_block(ClusterBlockId clb_index); void highlight_nets(ClusterNetId net_id); void highlight_nets(std::string net_name); diff --git a/vpr/src/pack/cluster_util.h b/vpr/src/pack/cluster_util.h index 1316229abc5..9a91e47ea7a 100644 --- a/vpr/src/pack/cluster_util.h +++ b/vpr/src/pack/cluster_util.h @@ -8,7 +8,7 @@ #include "vpr_utils.h" #include "constraints_report.h" -#include "timing_info.h" +#include "concrete_timing_info.h" #include "PreClusterDelayCalculator.h" #include "PreClusterTimingGraphResolver.h" #include "tatum/echo_writer.hpp" diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 700c661483d..1b9a6508010 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -7,6 +7,7 @@ #include #include +#include "NetPinTimingInvalidator.h" #include "vtr_assert.h" #include "vtr_log.h" #include "vtr_util.h" @@ -608,11 +609,15 @@ void try_place(const Netlist<>& net_list, placer_criticalities = std::make_unique( cluster_ctx.clb_nlist, netlist_pin_lookup); - pin_timing_invalidator = std::make_unique( - net_list, netlist_pin_lookup, - atom_ctx.nlist, atom_ctx.lookup, + pin_timing_invalidator = make_net_pin_timing_invalidator( + placer_opts.timing_update_type, + net_list, + netlist_pin_lookup, + atom_ctx.nlist, + atom_ctx.lookup, *timing_info->timing_graph(), is_flat); + //First time compute timing and costs, compute from scratch PlaceCritParams crit_params; crit_params.crit_exponent = first_crit_exponent; diff --git a/vpr/src/place/place_timing_update.cpp b/vpr/src/place/place_timing_update.cpp index 783937ec77d..619ea0ad068 100644 --- a/vpr/src/place/place_timing_update.cpp +++ b/vpr/src/place/place_timing_update.cpp @@ -149,7 +149,7 @@ void update_timing_classes(const PlaceCritParams& crit_params, * by iterating over the set of clustered netlist connections/pins * returned by PlacerCriticalities::pins_with_modified_criticality(). * - * Hence, this routine should always be called when PlacerCriticalites + * Hence, this routine should always be called when PlacerCriticalities * is enabled to be updated in update_timing_classes(). Otherwise, the * incremental method will no longer be correct. */ diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index 74682d220f3..5476e24cafe 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -4,6 +4,7 @@ #include #include +#include "rr_graph_fwd.h" #include "vtr_assert.h" #include "vtr_ndmatrix.h" #include "vtr_log.h" @@ -149,8 +150,8 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, t_physical_tile_type_ptr to_type, int to_pin, int to_pin_class, - int* src_rr, - int* sink_rr); + RRNodeId& out_src_node, + RRNodeId& out_sink_node); static bool verify_delta_delays(const vtr::NdMatrix& delta_delays); @@ -379,14 +380,14 @@ static float route_connection_delay( VTR_ASSERT(sink_rr_node != RRNodeId::INVALID()); - if (!measure_directconnect && directconnect_exists(size_t(source_rr_node), size_t(sink_rr_node))) { + if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { //Skip if we shouldn't measure direct connects and a direct connect exists continue; } { successfully_routed = route_profiler.calculate_delay( - size_t(source_rr_node), size_t(sink_rr_node), + source_rr_node, sink_rr_node, router_opts, &net_delay_value); } @@ -467,9 +468,7 @@ static void generic_compute_matrix_dijkstra_expansion( RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, source_x, source_y, SOURCE, driver_ptc); VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); - auto delays = calculate_all_path_delays_from_rr_node(size_t(source_rr_node), - router_opts, - is_flat); + auto delays = calculate_all_path_delays_from_rr_node(source_rr_node, router_opts, is_flat); bool path_to_all_sinks = true; for (int sink_x = start_x; sink_x <= end_x; sink_x++) { @@ -504,12 +503,12 @@ static void generic_compute_matrix_dijkstra_expansion( VTR_ASSERT(sink_rr_node != RRNodeId::INVALID()); - if (!measure_directconnect && directconnect_exists(size_t(source_rr_node), size_t(sink_rr_node))) { + if (!measure_directconnect && directconnect_exists(source_rr_node, sink_rr_node)) { //Skip if we shouldn't measure direct connects and a direct connect exists continue; } - if (std::isnan(delays[size_t(sink_rr_node)])) { + if (std::isnan(delays[sink_rr_node])) { // This sink was not found continue; } @@ -523,7 +522,7 @@ static void generic_compute_matrix_dijkstra_expansion( #endif found_matrix[delta_x][delta_y] = true; - add_delay_to_matrix(&matrix, delta_x, delta_y, delays[size_t(sink_rr_node)]); + add_delay_to_matrix(&matrix, delta_x, delta_y, delays[sink_rr_node]); found_a_sink = true; break; @@ -1008,8 +1007,8 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, t_physical_tile_type_ptr to_type, int to_pin, int to_pin_class, - int* src_rr, - int* sink_rr) { + RRNodeId& out_src_node, + RRNodeId& out_sink_node) { VTR_ASSERT(from_type != nullptr); VTR_ASSERT(to_type != nullptr); @@ -1039,7 +1038,7 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, bool from_pin_found = false; if (direct->from_side != NUM_SIDES) { RRNodeId from_pin_rr = node_lookup.find_node(layer_num, x, y, OPIN, from_pin, direct->from_side); - from_pin_found = (from_pin_rr != RRNodeId::INVALID()); + from_pin_found = from_pin_rr.is_valid(); } else { from_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, x, y, OPIN, from_pin).empty()); } @@ -1100,13 +1099,13 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, { RRNodeId src_rr_candidate = node_lookup.find_node(found_layer_num, from_x, from_y, SOURCE, from_pin_class); VTR_ASSERT(src_rr_candidate); - *src_rr = size_t(src_rr_candidate); + out_src_node = src_rr_candidate; } { RRNodeId sink_rr_candidate = node_lookup.find_node(found_layer_num, to_x, to_y, SINK, to_pin_class); VTR_ASSERT(sink_rr_candidate); - *sink_rr = size_t(sink_rr_candidate); + out_sink_node = sink_rr_candidate; } return true; @@ -1164,7 +1163,7 @@ void OverrideDelayModel::compute_override_delay_model( //sampled_rr_pairs and skipping them if they occur multiple times. int missing_instances = 0; int missing_paths = 0; - std::set> sampled_rr_pairs; + std::set> sampled_rr_pairs; for (int iconn = 0; iconn < num_conns; ++iconn) { //Find the associated pins int from_pin = find_pin(from_type, from_port.port_name(), from_port.port_low_index() + iconn); @@ -1179,9 +1178,9 @@ void OverrideDelayModel::compute_override_delay_model( int to_pin_class = find_pin_class(to_type, to_port.port_name(), to_port.port_low_index() + iconn, RECEIVER); VTR_ASSERT(to_pin_class != OPEN); - int src_rr = OPEN; - int sink_rr = OPEN; - bool found_sample_points = find_direct_connect_sample_locations(direct, from_type, from_pin, from_pin_class, to_type, to_pin, to_pin_class, &src_rr, &sink_rr); + bool found_sample_points; + RRNodeId src_rr, sink_rr; + found_sample_points = find_direct_connect_sample_locations(direct, from_type, from_pin, from_pin_class, to_type, to_pin, to_pin_class, src_rr, sink_rr); if (!found_sample_points) { ++missing_instances; @@ -1192,9 +1191,6 @@ void OverrideDelayModel::compute_override_delay_model( //sampled the associated source/sink pair and don't need to do so again if (sampled_rr_pairs.count({src_rr, sink_rr})) continue; - VTR_ASSERT(src_rr != OPEN); - VTR_ASSERT(sink_rr != OPEN); - float direct_connect_delay = std::numeric_limits::quiet_NaN(); bool found_routing_path = route_profiler.calculate_delay(src_rr, sink_rr, router_opts2, &direct_connect_delay); @@ -1213,7 +1209,7 @@ void OverrideDelayModel::compute_override_delay_model( } } -bool directconnect_exists(int src_rr_node, int sink_rr_node) { +bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node) { //Returns true if there is a directconnect between the two RR nodes // //This is checked by looking for a SOURCE -> OPIN -> IPIN -> SINK path @@ -1221,20 +1217,20 @@ bool directconnect_exists(int src_rr_node, int sink_rr_node) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - VTR_ASSERT(rr_graph.node_type(RRNodeId(src_rr_node)) == SOURCE && rr_graph.node_type(RRNodeId(sink_rr_node)) == SINK); + VTR_ASSERT(rr_graph.node_type(src_rr_node) == SOURCE && rr_graph.node_type(sink_rr_node) == SINK); //TODO: This is a constant depth search, but still may be too slow - for (t_edge_size i_src_edge = 0; i_src_edge < rr_graph.num_edges(RRNodeId(src_rr_node)); ++i_src_edge) { - int opin_rr_node = size_t(rr_graph.edge_sink_node(RRNodeId(src_rr_node), i_src_edge)); + for (t_edge_size i_src_edge = 0; i_src_edge < rr_graph.num_edges(src_rr_node); ++i_src_edge) { + RRNodeId opin_rr_node = rr_graph.edge_sink_node(src_rr_node, i_src_edge); - if (rr_graph.node_type(RRNodeId(opin_rr_node)) != OPIN) continue; + if (rr_graph.node_type(opin_rr_node) != OPIN) continue; - for (t_edge_size i_opin_edge = 0; i_opin_edge < rr_graph.num_edges(RRNodeId(opin_rr_node)); ++i_opin_edge) { - int ipin_rr_node = size_t(rr_graph.edge_sink_node(RRNodeId(opin_rr_node), i_opin_edge)); - if (rr_graph.node_type(RRNodeId(ipin_rr_node)) != IPIN) continue; + for (t_edge_size i_opin_edge = 0; i_opin_edge < rr_graph.num_edges(opin_rr_node); ++i_opin_edge) { + RRNodeId ipin_rr_node = rr_graph.edge_sink_node(opin_rr_node, i_opin_edge); + if (rr_graph.node_type(ipin_rr_node) != IPIN) continue; - for (t_edge_size i_ipin_edge = 0; i_ipin_edge < rr_graph.num_edges(RRNodeId(ipin_rr_node)); ++i_ipin_edge) { - if (size_t(sink_rr_node) == size_t(rr_graph.edge_sink_node(RRNodeId(ipin_rr_node), i_ipin_edge))) { + for (t_edge_size i_ipin_edge = 0; i_ipin_edge < rr_graph.num_edges(ipin_rr_node); ++i_ipin_edge) { + if (sink_rr_node == rr_graph.edge_sink_node(ipin_rr_node, i_ipin_edge)) { return true; } } diff --git a/vpr/src/place/timing_place_lookup.h b/vpr/src/place/timing_place_lookup.h index ac3e7f4df1e..30e1a8ae01a 100644 --- a/vpr/src/place/timing_place_lookup.h +++ b/vpr/src/place/timing_place_lookup.h @@ -13,6 +13,6 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& bool is_flat); std::vector get_best_classes(enum e_pin_type pintype, t_physical_tile_type_ptr type); -bool directconnect_exists(int src_rr_node, int sink_rr_node); +bool directconnect_exists(RRNodeId src_rr_node, RRNodeId sink_rr_node); #endif diff --git a/vpr/src/route/annotate_routing.cpp b/vpr/src/route/annotate_routing.cpp index 46762dd0f9b..f290b7ccbf2 100644 --- a/vpr/src/route/annotate_routing.cpp +++ b/vpr/src/route/annotate_routing.cpp @@ -62,7 +62,7 @@ vtr::vector annotate_rr_node_nets(const Netlist<>& net_li describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, - (int)size_t(rr_node), + rr_node, is_flat) .c_str()); } else { diff --git a/vpr/src/route/binary_heap.cpp b/vpr/src/route/binary_heap.cpp index 0a7d1447060..30ad19a02c7 100644 --- a/vpr/src/route/binary_heap.cpp +++ b/vpr/src/route/binary_heap.cpp @@ -1,4 +1,5 @@ #include "binary_heap.h" +#include "rr_graph_fwd.h" #include "vtr_log.h" static size_t parent(size_t i) { return i >> 1; } @@ -81,7 +82,7 @@ t_heap* BinaryHeap::get_heap_head() { } sift_up(hole, heap_[heap_tail_]); - } while (cheapest->index == OPEN); /* Get another one if invalid entry. */ + } while (!cheapest->index.is_valid()); /* Get another one if invalid entry. */ return (cheapest); } @@ -167,26 +168,6 @@ bool BinaryHeap::is_valid() const { return true; } -void BinaryHeap::invalidate_heap_entries(int sink_node, int ipin_node) { - /* marks all the heap entries consisting of sink_node, where it was reached - * via ipin_node, as invalid (open). used only by the breadth_first router - * and even then only in rare circumstances. - * - * This function enables forcing the breadth-first router to route to a - * sink more than once, using multiple ipins, which is useful in some - * architectures. - * */ - - for (size_t i = 1; i < heap_tail_; i++) { - if (heap_[i]->index == sink_node) { - if (heap_[i]->prev_node() == ipin_node) { - heap_[i]->index = OPEN; /* Invalid. */ - break; - } - } - } -} - void BinaryHeap::free_all_memory() { if (!heap_.empty()) { empty_heap(); @@ -220,16 +201,18 @@ void BinaryHeap::prune_heap() { continue; } - if (heap_[i]->index == OPEN) { + if (!heap_[i]->index.is_valid()) { free(heap_[i]); heap_[i] = nullptr; continue; } - VTR_ASSERT(static_cast(heap_[i]->index) < max_index_); + auto idx = size_t(heap_[i]->index); - if (best_heap_item[heap_[i]->index] == nullptr || best_heap_item[heap_[i]->index]->cost > heap_[i]->cost) { - best_heap_item[heap_[i]->index] = heap_[i]; + VTR_ASSERT(idx < max_index_); + + if (best_heap_item[idx] == nullptr || best_heap_item[idx]->cost > heap_[i]->cost) { + best_heap_item[idx] = heap_[i]; } } @@ -239,7 +222,9 @@ void BinaryHeap::prune_heap() { continue; } - if (best_heap_item[heap_[i]->index] != heap_[i]) { + auto idx = size_t(heap_[i]->index); + + if (best_heap_item[idx] != heap_[i]) { free(heap_[i]); heap_[i] = nullptr; } diff --git a/vpr/src/route/binary_heap.h b/vpr/src/route/binary_heap.h index bec798767e7..f1d017051d7 100644 --- a/vpr/src/route/binary_heap.h +++ b/vpr/src/route/binary_heap.h @@ -22,8 +22,6 @@ class BinaryHeap : public HeapInterface { void build_heap() final; void set_prune_limit(size_t max_index, size_t prune_limit) final; - void invalidate_heap_entries(int sink_node, int ipin_node) final; - void free_all_memory() final; private: diff --git a/vpr/src/route/bucket.cpp b/vpr/src/route/bucket.cpp index 79e0b220620..1804a74e4ca 100644 --- a/vpr/src/route/bucket.cpp +++ b/vpr/src/route/bucket.cpp @@ -1,6 +1,7 @@ #include "bucket.h" #include +#include "rr_graph_fwd.h" #include "vtr_log.h" #include "vpr_error.h" @@ -274,13 +275,13 @@ void Bucket::push_back(t_heap* hptr) { } if (!min_push_cost_.empty()) { - if (hptr->cost > min_push_cost_[hptr->index]) { + if (hptr->cost > min_push_cost_[size_t(hptr->index)]) { BucketItem* item = reinterpret_cast(hptr); items_.free_item(item); return; } - min_push_cost_[hptr->index] = hptr->cost; + min_push_cost_[size_t(hptr->index)] = hptr->cost; } // Check to see if the range of costs observed by the heap has changed. @@ -442,10 +443,6 @@ t_heap* Bucket::get_heap_head() { return &item->item; } -void Bucket::invalidate_heap_entries(int /*sink_node*/, int /*ipin_node*/) { - throw std::runtime_error("invalidate_heap_entries not implemented for Bucket"); -} - void Bucket::print() { for (size_t i = heap_head_; i < heap_tail_; ++i) { if (heap_[heap_head_] != nullptr) { @@ -471,9 +468,11 @@ void Bucket::prune_heap() { for (size_t bucket = heap_head_; bucket <= heap_tail_; ++bucket) { for (BucketItem* item = heap_[bucket]; item != nullptr; item = item->next_bucket) { - VTR_ASSERT(static_cast(item->item.index) < max_index_); - if (best_heap_item[item->item.index] == nullptr || best_heap_item[item->item.index]->item.cost > item->item.cost) { - best_heap_item[item->item.index] = item; + auto idx = size_t(item->item.index); + VTR_ASSERT(idx < max_index_); + if (best_heap_item[idx] == nullptr + || best_heap_item[idx]->item.cost > item->item.cost) { + best_heap_item[idx] = item; } } } @@ -484,8 +483,9 @@ void Bucket::prune_heap() { BucketItem* item = heap_[bucket]; while (item != nullptr) { BucketItem* next_item = item->next_bucket; + auto idx = size_t(item->item.index); - if (best_heap_item[item->item.index] != item) { + if (best_heap_item[idx] != item) { // This item isn't the cheapest, return it to the free list. items_.free_item(item); } else { diff --git a/vpr/src/route/bucket.h b/vpr/src/route/bucket.h index 526b69958c5..b712d54eb7b 100644 --- a/vpr/src/route/bucket.h +++ b/vpr/src/route/bucket.h @@ -220,8 +220,6 @@ class Bucket : public HeapInterface { // Print items contained in buckets. void print(); - void invalidate_heap_entries(int sink_node, int ipin_node) final; - private: // Factor used to convert cost from float to int. Should be scaled to // enable sufficent precision in bucketting. diff --git a/vpr/src/route/check_route.cpp b/vpr/src/route/check_route.cpp index 3cf5c5c20f2..cf18d894efa 100644 --- a/vpr/src/route/check_route.cpp +++ b/vpr/src/route/check_route.cpp @@ -136,8 +136,8 @@ void check_route(const Netlist<>& net_list, " %s\n" " %s\n", size_t(net_id), - describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, size_t(rt_node.parent()->inode), is_flat).c_str(), - describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, size_t(inode), is_flat).c_str()); + describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, rt_node.parent()->inode, is_flat).c_str(), + describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, inode, is_flat).c_str()); } } @@ -479,9 +479,8 @@ void recompute_occupancy_from_scratch(const Netlist<>& net_list, bool is_flat) { auto& device_ctx = g_vpr_ctx.device(); /* First set the occupancy of everything to zero. */ - /*FIXME: the type cast should be eliminated by making rr_node_route_inf adapt RRNodeId */ - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) - route_ctx.rr_node_route_inf[(size_t)rr_id].set_occ(0); + for (RRNodeId inode : device_ctx.rr_graph.nodes()) + route_ctx.rr_node_route_inf[inode].set_occ(0); /* Now go through each net and count the tracks and pins used everywhere */ @@ -493,7 +492,7 @@ void recompute_occupancy_from_scratch(const Netlist<>& net_list, bool is_flat) { continue; for (auto& rt_node : route_ctx.route_trees[net_id].value().all_nodes()) { - size_t inode = size_t(rt_node.inode); + RRNodeId inode = rt_node.inode; route_ctx.rr_node_route_inf[inode].set_occ(route_ctx.rr_node_route_inf[inode].occ() + 1); } } @@ -509,8 +508,8 @@ void recompute_occupancy_from_scratch(const Netlist<>& net_list, bool is_flat) { int num_local_opins = route_ctx.clb_opins_used_locally[cluster_blk_id][iclass].size(); /* Will always be 0 for pads or SINK classes. */ for (int ipin = 0; ipin < num_local_opins; ipin++) { - int inode = route_ctx.clb_opins_used_locally[cluster_blk_id][iclass][ipin]; - VTR_ASSERT(inode >= 0 && inode < (ssize_t)device_ctx.rr_graph.num_nodes()); + RRNodeId inode = route_ctx.clb_opins_used_locally[cluster_blk_id][iclass][ipin]; + VTR_ASSERT(inode && size_t(inode) < device_ctx.rr_graph.num_nodes()); route_ctx.rr_node_route_inf[inode].set_occ(route_ctx.rr_node_route_inf[inode].occ() + 1); } } @@ -524,7 +523,7 @@ static void check_locally_used_clb_opins(const t_clb_opins_used& clb_opins_used_ /* Checks that enough OPINs on CLBs have been set aside (used up) to make a * * legal routing if subblocks connect to OPINs directly. */ - int iclass, num_local_opins, inode, ipin; + int iclass, num_local_opins, ipin; t_rr_type rr_type; auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -537,7 +536,7 @@ static void check_locally_used_clb_opins(const t_clb_opins_used& clb_opins_used_ /* Always 0 for pads and for SINK classes */ for (ipin = 0; ipin < num_local_opins; ipin++) { - inode = clb_opins_used_locally[blk_id][iclass][ipin]; + RRNodeId inode = clb_opins_used_locally[blk_id][iclass][ipin]; check_node_and_range(RRNodeId(inode), route_type, is_flat); /* Node makes sense? */ /* Now check that node is an OPIN of the right type. */ @@ -613,16 +612,12 @@ static bool check_non_configurable_edges(const Netlist<>& net_list, // Collect all the edges used by this net's routing std::set routing_edges; - std::set routing_nodes; + std::set routing_nodes; for (auto& rt_node : route_ctx.route_trees[net].value().all_nodes()) { - routing_nodes.insert(size_t(rt_node.inode)); + routing_nodes.insert(rt_node.inode); if (!rt_node.parent()) continue; - /* a lot of casts to silence the warnings - * to clean this up: fix the type in vpr_types.h to use RRNodeIDs instead */ - int parent_inode = size_t(rt_node.parent()->inode); - int this_inode = size_t(rt_node.inode); - t_node_edge edge = {parent_inode, this_inode}; + t_node_edge edge = {rt_node.parent()->inode, rt_node.inode}; routing_edges.insert(edge); } @@ -640,7 +635,7 @@ static bool check_non_configurable_edges(const Netlist<>& net_list, //within a set is used by the routing for (const auto& rr_nodes : non_configurable_rr_sets.node_sets) { //Compute the intersection of the routing and current non-configurable nodes set - std::vector intersection; + std::vector intersection; std::set_intersection(routing_nodes.begin(), routing_nodes.end(), rr_nodes.begin(), rr_nodes.end(), std::back_inserter(intersection)); @@ -655,7 +650,7 @@ static bool check_non_configurable_edges(const Netlist<>& net_list, //Compute the difference to identify the missing nodes //for detailed error reporting -- the nodes //which are in rr_nodes but not in routing_nodes. - std::vector difference; + std::vector difference; std::set_difference(rr_nodes.begin(), rr_nodes.end(), routing_nodes.begin(), routing_nodes.end(), std::back_inserter(difference)); @@ -738,13 +733,13 @@ static bool check_non_configurable_edges(const Netlist<>& net_list, msg += vtr::string_fmt(" %s\n", describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, - missing_edge.from_node, + RRNodeId(missing_edge.from_node), is_flat) .c_str()); msg += vtr::string_fmt(" %s\n", describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, - missing_edge.to_node, + RRNodeId(missing_edge.to_node), is_flat) .c_str()); } @@ -803,7 +798,8 @@ void check_net_for_stubs(const Netlist<>& net_list, msg += vtr::string_fmt(" %s\n", describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, - inode, is_flat) + RRNodeId(inode), + is_flat) .c_str()); } diff --git a/vpr/src/route/connection_based_routing.cpp b/vpr/src/route/connection_based_routing.cpp index da88b945d7e..972e5bf31e7 100644 --- a/vpr/src/route/connection_based_routing.cpp +++ b/vpr/src/route/connection_based_routing.cpp @@ -5,14 +5,12 @@ // incremental rerouting resources class definitions Connection_based_routing_resources::Connection_based_routing_resources(const Netlist<>& net_list, - const vtr::vector>& net_terminals, + const vtr::vector>& net_terminals, bool is_flat) : net_list_(net_list) , net_terminals_(net_terminals) , is_flat_(is_flat) - , current_inet(NO_PREVIOUS) - , // not routing to a specific net yet (note that NO_PREVIOUS is not unsigned, so will be largest unsigned) - last_stable_critical_path_delay{0.0f} + , last_stable_critical_path_delay{0.0f} , critical_path_growth_tolerance{1.001f} , connection_criticality_tolerance{0.9f} , connection_delay_optimality_tolerance{1.1f} { @@ -31,6 +29,8 @@ Connection_based_routing_resources::Connection_based_routing_resources(const Net reached_rt_sinks.reserve(max_sink_pins_per_net); size_t routing_num_nets = net_list_.nets().size(); + remaining_targets.resize(routing_num_nets); + reached_rt_sinks.resize(routing_num_nets); lower_bound_connection_delay.resize(routing_num_nets); forcible_reroute_connection_flag.resize(routing_num_nets); @@ -125,15 +125,13 @@ bool Connection_based_routing_resources::forcibly_reroute_connections(float max_ return !any_connection_rerouted; } -void Connection_based_routing_resources::clear_force_reroute_for_connection(int rr_sink_node) { - forcible_reroute_connection_flag[current_inet][rr_sink_node] = false; +void Connection_based_routing_resources::clear_force_reroute_for_connection(ParentNetId net_id, RRNodeId rr_sink_node) { + forcible_reroute_connection_flag[net_id][rr_sink_node] = false; profiling::perform_forced_reroute(); } -void Connection_based_routing_resources::clear_force_reroute_for_net() { - VTR_ASSERT(current_inet != ParentNetId::INVALID()); - - auto& net_flags = forcible_reroute_connection_flag[current_inet]; +void Connection_based_routing_resources::clear_force_reroute_for_net(ParentNetId net_id) { + auto& net_flags = forcible_reroute_connection_flag[net_id]; for (auto& force_reroute_flag : net_flags) { if (force_reroute_flag.second) { force_reroute_flag.second = false; diff --git a/vpr/src/route/connection_based_routing.h b/vpr/src/route/connection_based_routing.h index 9e24998ac95..59f0edf2936 100644 --- a/vpr/src/route/connection_based_routing.h +++ b/vpr/src/route/connection_based_routing.h @@ -14,31 +14,35 @@ // pruning the route tree of large fanouts. Instead of rerouting to each sink of a congested net, // reroute only the connections to the ones that did not have a legal connection the previous time class Connection_based_routing_resources { - // Incremental reroute resources -------------- + /** Holds remaining target pin indices (if this net has a RouteTree from the previous + * iteration, its prune() call will update this) (should be moved into RouteTree) */ + vtr::vector> remaining_targets; - // a property of each net, but only valid after pruning the previous route tree - // the "targets" in question can be either rr_node indices or pin indices, the - // conversion from node to pin being performed by this class - std::vector remaining_targets; - - // contains rt_nodes representing sinks reached legally while pruning the route tree - // used to populate rt_node_of_sink after building route tree from traceback - // order does not matter - std::vector reached_rt_sinks; + /** Holds RRNodeIds of legally reached sinks. Used to build the external rt_node_to_sink + * lookup. (should be moved into RouteTree)*/ + vtr::vector> reached_rt_sinks; public: Connection_based_routing_resources(const Netlist<>& net_list, - const vtr::vector>& net_terminals, + const vtr::vector>& net_terminals, bool is_flat); // adding to the resources when they are reached during pruning // mark rr sink node as something that still needs to be reached - void toreach_rr_sink(int rr_sink_node) { remaining_targets.push_back(rr_sink_node); } + void toreach_rr_sink(ParentNetId net_id, int rr_sink_node) { + remaining_targets[net_id].push_back(rr_sink_node); + } // mark rt sink node as something that has been legally reached - void reached_rt_sink(RRNodeId rt_sink) { reached_rt_sinks.push_back(rt_sink); } + void reached_rt_sink(ParentNetId net_id, RRNodeId rt_sink) { + reached_rt_sinks[net_id].push_back(rt_sink); + } // get a handle on the resources - std::vector& get_remaining_targets() { return remaining_targets; } - std::vector& get_reached_rt_sinks() { return reached_rt_sinks; } + std::vector& get_remaining_targets(ParentNetId net_id) { + return remaining_targets[net_id]; + } + std::vector& get_reached_rt_sinks(ParentNetId net_id) { + return reached_rt_sinks[net_id]; + } bool sanity_check_lookup() const; @@ -48,7 +52,7 @@ class Connection_based_routing_resources { // Targeted reroute resources -------------- private: const Netlist<>& net_list_; - const vtr::vector>& net_terminals_; + const vtr::vector>& net_terminals_; bool is_flat_; // whether or not a connection should be forcibly rerouted the next iteration // takes [inet][sink_rr_node_index] and returns whether that connection should be rerouted or not @@ -57,15 +61,12 @@ class Connection_based_routing_resources { * 2. the connection is critical enough * 3. the connection is suboptimal, in comparison to lower_bound_connection_delay */ - vtr::vector> forcible_reroute_connection_flag; + vtr::vector> forcible_reroute_connection_flag; // the optimal delay for a connection [inet][ipin] ([0...num_net][1...num_pin]) // determined after the first routing iteration when only optimizing for timing delay vtr::vector> lower_bound_connection_delay; - // the current net that's being routed - ParentNetId current_inet; - // the most recent stable critical path delay // compared against the current iteration's critical path delay // if the growth is too high, some connections will be forcibly ripped up @@ -86,16 +87,12 @@ class Connection_based_routing_resources { //Updates the connection delay lower bound (if less than current best found) void update_lower_bound_connection_delay(ParentNetId net, int ipin, float delay); - // initialize routing resources at the start of routing to a new net - void prepare_routing_for_net(ParentNetId inet) { - current_inet = inet; - // fresh net with fresh targets - remaining_targets.clear(); - reached_rt_sinks.clear(); + void prepare_routing_for_net(ParentNetId net_id) { + remaining_targets[net_id].clear(); + reached_rt_sinks[net_id].clear(); } // get a handle on the resources - ParentNetId get_current_inet() const { return current_inet; } float get_stable_critical_path_delay() const { return last_stable_critical_path_delay; } bool critical_path_delay_grew_significantly(float new_critical_path_delay) const { @@ -105,17 +102,17 @@ class Connection_based_routing_resources { // for updating the last stable path delay void set_stable_critical_path_delay(float stable_critical_path_delay) { last_stable_critical_path_delay = stable_critical_path_delay; } - // get whether the connection to rr_sink_node of current_inet should be forcibly rerouted (can either assign or just read) - bool should_force_reroute_connection(int rr_sink_node) const { - auto itr = forcible_reroute_connection_flag[current_inet].find(rr_sink_node); + // get whether the connection to rr_sink_node of net_id should be forcibly rerouted (can either assign or just read) + bool should_force_reroute_connection(ParentNetId net_id, RRNodeId rr_sink_node) const { + auto itr = forcible_reroute_connection_flag[net_id].find(rr_sink_node); - if (itr == forcible_reroute_connection_flag[current_inet].end()) { + if (itr == forcible_reroute_connection_flag[net_id].end()) { return false; //A non-SINK end of a branch } return itr->second; } - void clear_force_reroute_for_connection(int rr_sink_node); - void clear_force_reroute_for_net(); + void clear_force_reroute_for_connection(ParentNetId net_id, RRNodeId rr_sink_node); + void clear_force_reroute_for_net(ParentNetId net_id); // check each connection of each net to see if any satisfy the criteria described above (for the forcible_reroute_connection_flag data structure) // and if so, mark them to be rerouted diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index 4d0c0f96f05..62db70ed31f 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -3,8 +3,9 @@ #include "binary_heap.h" #include "bucket.h" +#include "rr_graph_fwd.h" -inline static bool relevant_node_to_target(const RRGraphView* rr_graph, +static inline bool relevant_node_to_target(const RRGraphView* rr_graph, RRNodeId node_to_add, RRNodeId target_node) { VTR_ASSERT(rr_graph->node_type(target_node) == t_rr_type::SINK); @@ -57,17 +58,22 @@ inline void update_router_stats(const DeviceContext& device_ctx, } } +/** return tuple */ template -std::pair ConnectionRouter::timing_driven_route_connection_from_route_tree( +std::tuple ConnectionRouter::timing_driven_route_connection_from_route_tree( const RouteTreeNode& rt_root, - int sink_node, + RRNodeId sink_node, const t_conn_cost_params cost_params, t_bb bounding_box, RouterStats& router_stats, - const ConnectionParameters& conn_params) { + const ConnectionParameters& conn_params, + bool can_grow_bb) { router_stats_ = &router_stats; conn_params_ = &conn_params; - t_heap* cheapest = timing_driven_route_connection_common_setup(rt_root, sink_node, cost_params, bounding_box); + + bool retry = false; + t_heap* cheapest; + std::tie(retry, cheapest) = timing_driven_route_connection_common_setup(rt_root, sink_node, cost_params, bounding_box, can_grow_bb); if (cheapest != nullptr) { rcv_path_manager.update_route_tree_set(cheapest->path_data); @@ -76,30 +82,34 @@ std::pair ConnectionRouter::timing_driven_route_connection_f heap_.free(cheapest); heap_.empty_heap(); rcv_path_manager.empty_heap(); - return std::make_pair(true, out); + return std::make_tuple(true, /*retry=*/false, out); } else { + reset_path_costs(); + modified_rr_node_inf_.clear(); heap_.empty_heap(); - return std::make_pair(false, t_heap()); + return std::make_tuple(false, retry, t_heap()); } } +/** Return */ template -t_heap* ConnectionRouter::timing_driven_route_connection_common_setup( +std::tuple ConnectionRouter::timing_driven_route_connection_common_setup( const RouteTreeNode& rt_root, - int sink_node, + RRNodeId sink_node, const t_conn_cost_params cost_params, - t_bb bounding_box) { + t_bb bounding_box, + bool can_grow_bb) { //Re-add route nodes from the existing route tree to the heap. //They need to be repushed onto the heap since each node's cost is target specific. add_route_tree_to_heap(rt_root, sink_node, cost_params, false); heap_.build_heap(); // via sifting down everything - int source_node = size_t(rt_root.inode); + RRNodeId source_node = rt_root.inode; if (heap_.is_empty_heap()) { VTR_LOG("No source in route tree: %s\n", describe_unrouteable_connection(source_node, sink_node, is_flat_).c_str()); - return nullptr; + return std::make_tuple(false, nullptr); } VTR_LOGV_DEBUG(router_debug_, " Routing to %d as normal net (BB: %d,%d x %d,%d)\n", sink_node, @@ -112,7 +122,25 @@ t_heap* ConnectionRouter::timing_driven_route_connection_common_setup( if (cheapest == nullptr) { // No path found within the current bounding box. - // Try again with no bounding box (i.e. a full device grid bounding box). + // + // If the bounding box is already max size, just fail + if (bounding_box.xmin == 0 + && bounding_box.ymin == 0 + && bounding_box.xmax == (int)(grid_.width() - 1) + && bounding_box.ymax == (int)(grid_.height() - 1)) { + VTR_LOG("%s\n", describe_unrouteable_connection(source_node, sink_node, is_flat_).c_str()); + return std::make_tuple(false, nullptr); + } + + // If we cannot grow the bounding box, leave unrouted and bubble up a signal + // to retry this net with a full-device bounding box. If we are already at full device extents, + // just fail + if (!can_grow_bb) { + VTR_LOG_WARN("No routing path for connection to sink_rr %d, leaving unrouted to retry on next iteration\n", sink_node); + return std::make_tuple(true, nullptr); + } + + // Otherwise, try again with full-device bounding box. // // Note that the additional run-time overhead of re-trying only occurs // when we were otherwise going to give up -- the typical case (route @@ -155,25 +183,27 @@ t_heap* ConnectionRouter::timing_driven_route_connection_common_setup( if (cheapest == nullptr) { VTR_LOG("%s\n", describe_unrouteable_connection(source_node, sink_node, is_flat_).c_str()); - return nullptr; + return std::make_tuple(false, nullptr); } - return cheapest; + return std::make_tuple(false, cheapest); } -//Finds a path from the route tree rooted at rt_root to sink_node for a high fanout net. +// Finds a path from the route tree rooted at rt_root to sink_node for a high fanout net. // -//Unlike timing_driven_route_connection_from_route_tree(), only part of the route tree -//which is spatially close to the sink is added to the heap. +// Unlike timing_driven_route_connection_from_route_tree(), only part of the route tree +// which is spatially close to the sink is added to the heap. +// Returns a tuple of */ template -std::pair ConnectionRouter::timing_driven_route_connection_from_route_tree_high_fanout( +std::tuple ConnectionRouter::timing_driven_route_connection_from_route_tree_high_fanout( const RouteTreeNode& rt_root, - int sink_node, + RRNodeId sink_node, const t_conn_cost_params cost_params, t_bb net_bounding_box, const SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, - const ConnectionParameters& conn_params) { + const ConnectionParameters& conn_params, + bool can_grow_bb) { router_stats_ = &router_stats; conn_params_ = &conn_params; @@ -183,20 +213,22 @@ std::pair ConnectionRouter::timing_driven_route_connection_f t_bb high_fanout_bb = add_high_fanout_route_tree_to_heap(rt_root, sink_node, cost_params, spatial_rt_lookup, net_bounding_box); heap_.build_heap(); - int source_node = size_t(rt_root.inode); + RRNodeId source_node = rt_root.inode; if (heap_.is_empty_heap()) { VTR_LOG("No source in route tree: %s\n", describe_unrouteable_connection(source_node, sink_node, is_flat_).c_str()); - return std::make_pair(false, t_heap()); + return std::make_tuple(false, false, t_heap()); } VTR_LOGV_DEBUG(router_debug_, " Routing to %d as high fanout net (BB: %d,%d x %d,%d)\n", sink_node, high_fanout_bb.xmin, high_fanout_bb.ymin, high_fanout_bb.xmax, high_fanout_bb.ymax); - t_heap* cheapest = timing_driven_route_connection_from_heap(sink_node, - cost_params, - high_fanout_bb); + bool retry_with_full_bb = false; + t_heap* cheapest; + cheapest = timing_driven_route_connection_from_heap(sink_node, + cost_params, + high_fanout_bb); if (cheapest == nullptr) { //Found no path, that may be due to an unlucky choice of existing route tree sub-set, @@ -208,10 +240,11 @@ std::pair ConnectionRouter::timing_driven_route_connection_f reset_path_costs(); modified_rr_node_inf_.clear(); - cheapest = timing_driven_route_connection_common_setup(rt_root, - sink_node, - cost_params, - net_bounding_box); + std::tie(retry_with_full_bb, cheapest) = timing_driven_route_connection_common_setup(rt_root, + sink_node, + cost_params, + net_bounding_box, + can_grow_bb); } if (cheapest == nullptr) { @@ -219,7 +252,7 @@ std::pair ConnectionRouter::timing_driven_route_connection_f heap_.empty_heap(); rcv_path_manager.empty_heap(); - return std::make_pair(false, t_heap()); + return std::make_tuple(false, retry_with_full_bb, t_heap()); } rcv_path_manager.update_route_tree_set(cheapest->path_data); @@ -230,19 +263,21 @@ std::pair ConnectionRouter::timing_driven_route_connection_f heap_.empty_heap(); rcv_path_manager.empty_heap(); - return std::make_pair(true, out); + return std::make_tuple(true, retry_with_full_bb, out); } -// Finds a path to sink_node, starting from the elements currently in the heap. +//Finds a path to sink_node, starting from the elements currently in the heap. // // This is the core maze routing routine. // // Returns either the last element of the path, or nullptr if no path is found template -t_heap* ConnectionRouter::timing_driven_route_connection_from_heap(int sink_node, +t_heap* ConnectionRouter::timing_driven_route_connection_from_heap(RRNodeId sink_node, const t_conn_cost_params cost_params, t_bb bounding_box) { VTR_ASSERT_SAFE(heap_.is_valid()); + //std::cout << "using this: " << (void *)this << "\n"; + //std::cout << "using heap: " << heap_.get_ptr() << "\n"; if (heap_.is_empty_heap()) { //No source VTR_LOGV_DEBUG(router_debug_, " Initial heap empty (no source)\n"); @@ -258,10 +293,10 @@ t_heap* ConnectionRouter::timing_driven_route_connection_from_heap(int sin update_router_stats(device_ctx, rr_graph_, router_stats_, - RRNodeId(cheapest->index), + cheapest->index, false); - int inode = cheapest->index; + RRNodeId inode = cheapest->index; VTR_LOGV_DEBUG(router_debug_, " Popping node %d (cost: %g)\n", inode, cheapest->cost); @@ -303,7 +338,7 @@ t_heap* ConnectionRouter::timing_driven_route_connection_from_heap(int sin // Find shortest paths from specified route tree to all nodes in the RR graph template -std::vector ConnectionRouter::timing_driven_find_all_shortest_paths_from_route_tree( +vtr::vector ConnectionRouter::timing_driven_find_all_shortest_paths_from_route_tree( const RouteTreeNode& rt_root, const t_conn_cost_params cost_params, t_bb bounding_box, @@ -313,7 +348,7 @@ std::vector ConnectionRouter::timing_driven_find_all_shortest_path conn_params_ = &conn_params; // Add the route tree to the heap with no specific target node - int target_node = OPEN; + RRNodeId target_node = RRNodeId::INVALID(); add_route_tree_to_heap(rt_root, target_node, cost_params, false); heap_.build_heap(); // via sifting down everything @@ -331,10 +366,10 @@ std::vector ConnectionRouter::timing_driven_find_all_shortest_path // Note that to re-use code used for the regular A*-based router we use a // no-operation lookahead which always returns zero. template -std::vector ConnectionRouter::timing_driven_find_all_shortest_paths_from_heap( +vtr::vector ConnectionRouter::timing_driven_find_all_shortest_paths_from_heap( const t_conn_cost_params cost_params, t_bb bounding_box) { - std::vector cheapest_paths(rr_nodes_.size()); + vtr::vector cheapest_paths(rr_nodes_.size()); VTR_ASSERT_SAFE(heap_.is_valid()); @@ -348,26 +383,26 @@ std::vector ConnectionRouter::timing_driven_find_all_shortest_path update_router_stats(g_vpr_ctx.device(), rr_graph_, router_stats_, - RRNodeId(cheapest->index), + cheapest->index, false); - int inode = cheapest->index; + RRNodeId inode = cheapest->index; VTR_LOGV_DEBUG(router_debug_, " Popping node %d (cost: %g)\n", inode, cheapest->cost); // Since we want to find shortest paths to all nodes in the graph // we do not specify a target node. // - // By setting the target_node to OPEN in combination with the NoOp router + // By setting the target_node to INVALID in combination with the NoOp router // lookahead we can re-use the node exploration code from the regular router - int target_node = OPEN; + RRNodeId target_node = RRNodeId::INVALID(); timing_driven_expand_cheapest(cheapest, target_node, cost_params, bounding_box); - if (cheapest_paths[inode].index == OPEN || cheapest_paths[inode].cost >= cheapest->cost) { + if (cheapest_paths[inode].index == RRNodeId::INVALID() || cheapest_paths[inode].cost >= cheapest->cost) { VTR_LOGV_DEBUG(router_debug_, " Better cost to node %d: %g (was %g)\n", inode, cheapest->cost, cheapest_paths[inode].cost); cheapest_paths[inode] = *cheapest; } else { @@ -383,10 +418,10 @@ std::vector ConnectionRouter::timing_driven_find_all_shortest_path template void ConnectionRouter::timing_driven_expand_cheapest(t_heap* cheapest, - int target_node, + RRNodeId target_node, const t_conn_cost_params cost_params, t_bb bounding_box) { - int inode = cheapest->index; + RRNodeId inode = cheapest->index; t_rr_node_route_inf* route_inf = &rr_node_route_inf_[inode]; float best_total_cost = route_inf->path_cost; @@ -433,21 +468,19 @@ template void ConnectionRouter::timing_driven_expand_neighbours(t_heap* current, const t_conn_cost_params cost_params, t_bb bounding_box, - int target_node) { - /* Puts all the rr_nodes adjacent to current on the heap. - */ + RRNodeId target_node) { + /* Puts all the rr_nodes adjacent to current on the heap. */ t_bb target_bb; - if (target_node != OPEN) { - target_bb.xmin = rr_graph_->node_xlow(RRNodeId(target_node)); - target_bb.ymin = rr_graph_->node_ylow(RRNodeId(target_node)); - target_bb.xmax = rr_graph_->node_xhigh(RRNodeId(target_node)); - target_bb.ymax = rr_graph_->node_yhigh(RRNodeId(target_node)); + if (target_node != RRNodeId::INVALID()) { + target_bb.xmin = rr_graph_->node_xlow(target_node); + target_bb.ymin = rr_graph_->node_ylow(target_node); + target_bb.xmax = rr_graph_->node_xhigh(target_node); + target_bb.ymax = rr_graph_->node_yhigh(target_node); } // For each node associated with the current heap element, expand all of it's neighbors - int from_node_int = current->index; - RRNodeId from_node(from_node_int); + RRNodeId from_node = current->index; auto edges = rr_nodes_.edge_range(from_node); // This is a simple prefetch that prefetches: @@ -478,9 +511,9 @@ void ConnectionRouter::timing_driven_expand_neighbours(t_heap* current, for (RREdgeId from_edge : edges) { RRNodeId to_node = rr_nodes_.edge_sink_node(from_edge); timing_driven_expand_neighbour(current, - from_node_int, + from_node, from_edge, - size_t(to_node), + to_node, cost_params, bounding_box, target_node, @@ -493,14 +526,13 @@ void ConnectionRouter::timing_driven_expand_neighbours(t_heap* current, // to the heap. template void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, - const int from_node, - const RREdgeId from_edge, - const int to_node_int, + RRNodeId from_node, + RREdgeId from_edge, + RRNodeId to_node, const t_conn_cost_params cost_params, const t_bb bounding_box, - int target_node, + RRNodeId target_node, const t_bb target_bb) { - RRNodeId to_node(to_node_int); int to_xlow = rr_graph_->node_xlow(to_node); int to_ylow = rr_graph_->node_ylow(to_node); int to_xhigh = rr_graph_->node_xhigh(to_node); @@ -518,7 +550,7 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, " Pruned expansion of node %d edge %zu -> %d" " (to node location %d,%dx%d,%d outside of expanded" " net bounding box %d,%dx%d,%d)\n", - from_node, size_t(from_edge), to_node_int, + from_node, size_t(from_edge), size_t(to_node), to_xlow, to_ylow, to_xhigh, to_yhigh, bounding_box.xmin, bounding_box.ymin, bounding_box.xmax, bounding_box.ymax); return; /* Node is outside (expanded) bounding box. */ @@ -528,7 +560,7 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, * the issue of how to cost them properly so they don't get expanded before * * more promising routes, but makes route-through (via CLBs) impossible. * * Change this if you want to investigate route-throughs. */ - if (target_node != OPEN) { + if (target_node != RRNodeId::INVALID()) { t_rr_type to_type = rr_graph_->node_type(to_node); if (to_type == IPIN) { // Check if this IPIN leads to the target block @@ -541,7 +573,7 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, " Pruned expansion of node %d edge %zu -> %d" " (to node is IPIN at %d,%dx%d,%d which does not" " lead to target block %d,%dx%d,%d)\n", - from_node, size_t(from_edge), to_node_int, + from_node, size_t(from_edge), size_t(to_node), to_xlow, to_ylow, to_xhigh, to_yhigh, target_bb.xmin, target_bb.ymin, target_bb.xmax, target_bb.ymax); return; @@ -550,7 +582,7 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, } VTR_LOGV_DEBUG(router_debug_, " Expanding node %d edge %zu -> %d\n", - from_node, size_t(from_edge), to_node_int); + from_node, size_t(from_edge), size_t(to_node)); // Check if the node exists in the route tree when RCV is enabled // Other pruning methods have been disabled when RCV is on, so this method is required to prevent "loops" from being created @@ -564,7 +596,7 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, timing_driven_add_to_heap(cost_params, current, from_node, - to_node_int, + to_node, from_edge, target_node); } @@ -574,10 +606,10 @@ void ConnectionRouter::timing_driven_expand_neighbour(t_heap* current, template void ConnectionRouter::timing_driven_add_to_heap(const t_conn_cost_params cost_params, const t_heap* current, - const int from_node, - const int to_node, + RRNodeId from_node, + RRNodeId to_node, const RREdgeId from_edge, - const int target_node) { + RRNodeId target_node) { const auto& device_ctx = g_vpr_ctx.device(); t_heap next; @@ -639,7 +671,7 @@ void ConnectionRouter::timing_driven_add_to_heap(const t_conn_cost_params if (rcv_path_manager.is_enabled() && current->path_data) { next_ptr->path_data->path_rr = current->path_data->path_rr; next_ptr->path_data->edge = current->path_data->edge; - next_ptr->path_data->path_rr.emplace_back(RRNodeId(from_node)); + next_ptr->path_data->path_rr.emplace_back(from_node); next_ptr->path_data->edge.emplace_back(from_edge); } @@ -647,7 +679,7 @@ void ConnectionRouter::timing_driven_add_to_heap(const t_conn_cost_params update_router_stats(device_ctx, rr_graph_, router_stats_, - RRNodeId(to_node), + to_node, true); } else { @@ -664,7 +696,7 @@ void ConnectionRouter::timing_driven_add_to_heap(const t_conn_cost_params #ifdef VTR_ASSERT_SAFE_ENABLED //Returns true if both nodes are part of the same non-configurable edge set -static bool same_non_config_node_set(int from_node, int to_node) { +static bool same_non_config_node_set(RRNodeId from_node, RRNodeId to_node) { auto& device_ctx = g_vpr_ctx.device(); auto from_itr = device_ctx.rr_node_to_non_config_node_set.find(from_node); @@ -682,18 +714,18 @@ static bool same_non_config_node_set(int from_node, int to_node) { template float ConnectionRouter::compute_node_cost_using_rcv(const t_conn_cost_params cost_params, - const int to_node, - const int target_node, - const float backwards_delay, - const float backwards_cong, - const float R_upstream) { + RRNodeId to_node, + RRNodeId target_node, + float backwards_delay, + float backwards_cong, + float R_upstream) { float expected_delay; float expected_cong; const t_conn_delay_budget* delay_budget = cost_params.delay_budget; // TODO: This function is not tested for is_flat == true VTR_ASSERT(is_flat_ != true); - std::tie(expected_delay, expected_cong) = router_lookahead_.get_expected_delay_and_cong(RRNodeId(to_node), RRNodeId(target_node), cost_params, R_upstream); + std::tie(expected_delay, expected_cong) = router_lookahead_.get_expected_delay_and_cong(to_node, target_node, cost_params, R_upstream); float expected_total_delay_cost; float expected_total_cong_cost; @@ -736,10 +768,10 @@ void ConnectionRouter::set_rcv_enabled(bool enable) { template void ConnectionRouter::evaluate_timing_driven_node_costs(t_heap* to, const t_conn_cost_params cost_params, - const int from_node, - const int to_node, - const RREdgeId from_edge, - const int target_node) { + RRNodeId from_node, + RRNodeId to_node, + RREdgeId from_edge, + RRNodeId target_node) { /* new_costs.backward_cost: is the "known" part of the cost to this node -- the * congestion cost of all the routing resources back to the existing route * plus the known delay of the total path back to the source. @@ -758,12 +790,12 @@ void ConnectionRouter::evaluate_timing_driven_node_costs(t_heap* to, float switch_Cinternal = rr_switch_inf_[iswitch].Cinternal; //To node info - auto rc_index = rr_graph_->node_rc_index(RRNodeId(to_node)); + auto rc_index = rr_graph_->node_rc_index(to_node); float node_C = rr_rc_data_[rc_index].C; float node_R = rr_rc_data_[rc_index].R; //From node info - float from_node_R = rr_rc_data_[rr_graph_->node_rc_index(RRNodeId(from_node))].R; + float from_node_R = rr_rc_data_[rr_graph_->node_rc_index(from_node)].R; //Update R_upstream if (switch_buffered) { @@ -811,8 +843,8 @@ void ConnectionRouter::evaluate_timing_driven_node_costs(t_heap* to, //cost. cong_cost = 0.; } - if (conn_params_->has_choking_spot_ && is_flat_ && rr_graph_->node_type(RRNodeId(to_node)) == IPIN) { - auto find_res = conn_params_->connection_choking_spots_.find(RRNodeId(to_node)); + if (conn_params_->has_choking_spot_ && is_flat_ && rr_graph_->node_type(to_node) == IPIN) { + auto find_res = conn_params_->connection_choking_spots_.find(to_node); if (find_res != conn_params_->connection_choking_spots_.end()) { cong_cost = cong_cost / pow(2, (float)find_res->second); } @@ -823,8 +855,8 @@ void ConnectionRouter::evaluate_timing_driven_node_costs(t_heap* to, to->backward_path_cost += cost_params.criticality * Tdel; //Delay cost if (cost_params.bend_cost != 0.) { - t_rr_type from_type = rr_graph_->node_type(RRNodeId(from_node)); - t_rr_type to_type = rr_graph_->node_type(RRNodeId(to_node)); + t_rr_type from_type = rr_graph_->node_type(from_node); + t_rr_type to_type = rr_graph_->node_type(to_node); if ((from_type == CHANX && to_type == CHANY) || (from_type == CHANY && to_type == CHANX)) { to->backward_path_cost += cost_params.bend_cost; //Bend cost } @@ -840,8 +872,8 @@ void ConnectionRouter::evaluate_timing_driven_node_costs(t_heap* to, } else { const auto& device_ctx = g_vpr_ctx.device(); //Update total cost - float expected_cost = router_lookahead_.get_expected_cost(RRNodeId(to_node), - RRNodeId(target_node), + float expected_cost = router_lookahead_.get_expected_cost(to_node, + target_node, cost_params, to->R_upstream); VTR_LOGV_DEBUG(router_debug_ && !std::isfinite(expected_cost), @@ -881,7 +913,7 @@ void ConnectionRouter::empty_heap_annotating_node_route_inf() { template void ConnectionRouter::add_route_tree_to_heap( const RouteTreeNode& rt_node, - int target_node, + RRNodeId target_node, const t_conn_cost_params cost_params, bool from_high_fanout) { /* Puts the entire partial routing below and including rt_node onto the heap * @@ -907,7 +939,7 @@ void ConnectionRouter::add_route_tree_to_heap( if (is_flat_) { if (relevant_node_to_target(rr_graph_, child_node.inode, - RRNodeId(target_node))) { + target_node)) { add_route_tree_to_heap(child_node, target_node, cost_params, @@ -929,7 +961,7 @@ void ConnectionRouter::add_route_tree_to_heap( template void ConnectionRouter::add_route_tree_node_to_heap( const RouteTreeNode& rt_node, - int target_node, + RRNodeId target_node, const t_conn_cost_params cost_params, bool is_high_fanout) { const auto& device_ctx = g_vpr_ctx.device(); @@ -948,35 +980,35 @@ void ConnectionRouter::add_route_tree_node_to_heap( float tot_cost = backward_path_cost + cost_params.astar_fac * router_lookahead_.get_expected_cost(inode, - RRNodeId(target_node), + target_node, cost_params, R_upstream); VTR_LOGV_DEBUG(router_debug_, " Adding node %8d to heap from init route tree with cost %g (%s)\n", inode, tot_cost, - describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, size_t(inode), is_flat_).c_str()); + describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, inode, is_flat_).c_str()); push_back_node(&heap_, rr_node_route_inf_, - size_t(inode), tot_cost, NO_PREVIOUS, RREdgeId::INVALID(), + inode, tot_cost, RRNodeId::INVALID(), RREdgeId::INVALID(), backward_path_cost, R_upstream); } else { - float expected_total_cost = compute_node_cost_using_rcv(cost_params, size_t(inode), target_node, rt_node.Tdel, 0, R_upstream); + float expected_total_cost = compute_node_cost_using_rcv(cost_params, inode, target_node, rt_node.Tdel, 0, R_upstream); - push_back_node_with_info(&heap_, size_t(inode), expected_total_cost, + push_back_node_with_info(&heap_, inode, expected_total_cost, backward_path_cost, R_upstream, rt_node.Tdel, &rcv_path_manager); } update_router_stats(device_ctx, rr_graph_, router_stats_, - RRNodeId(inode), + inode, true); - router_stats_->rt_node_pushes[rr_graph_->node_type(RRNodeId(inode))]++; + router_stats_->rt_node_pushes[rr_graph_->node_type(inode)]++; if (is_high_fanout) { - router_stats_->rt_node_high_fanout_pushes[rr_graph_->node_type(RRNodeId(inode))]++; + router_stats_->rt_node_high_fanout_pushes[rr_graph_->node_type(inode)]++; } else { - router_stats_->rt_node_entire_tree_pushes[rr_graph_->node_type(RRNodeId(inode))]++; + router_stats_->rt_node_entire_tree_pushes[rr_graph_->node_type(inode)]++; } } @@ -995,7 +1027,7 @@ static t_bb adjust_highfanout_bounding_box(t_bb highfanout_bb) { template t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( const RouteTreeNode& rt_root, - int target_node, + RRNodeId target_node, const t_conn_cost_params cost_params, const SpatialRouteTreeLookup& spatial_rt_lookup, t_bb net_bounding_box) { @@ -1010,19 +1042,18 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( //the entire route tree (which is likely large for a high fanout net). //Determine which bin the target node is located in - RRNodeId target_node_id(target_node); - int target_bin_x = grid_to_bin_x(rr_graph_->node_xlow(target_node_id), spatial_rt_lookup); - int target_bin_y = grid_to_bin_y(rr_graph_->node_ylow(target_node_id), spatial_rt_lookup); + int target_bin_x = grid_to_bin_x(rr_graph_->node_xlow(target_node), spatial_rt_lookup); + int target_bin_y = grid_to_bin_y(rr_graph_->node_ylow(target_node), spatial_rt_lookup); int nodes_added = 0; int chan_nodes_added = 0; t_bb highfanout_bb; - highfanout_bb.xmin = rr_graph_->node_xlow(target_node_id); - highfanout_bb.xmax = rr_graph_->node_xhigh(target_node_id); - highfanout_bb.ymin = rr_graph_->node_ylow(target_node_id); - highfanout_bb.ymax = rr_graph_->node_yhigh(target_node_id); + highfanout_bb.xmin = rr_graph_->node_xlow(target_node); + highfanout_bb.xmax = rr_graph_->node_xhigh(target_node); + highfanout_bb.ymin = rr_graph_->node_ylow(target_node); + highfanout_bb.ymax = rr_graph_->node_yhigh(target_node); //Add existing routing starting from the target bin. //If the target's bin has insufficient existing routing add from the surrounding bins @@ -1042,7 +1073,7 @@ t_bb ConnectionRouter::add_high_fanout_route_tree_to_heap( RRNodeId rr_node_to_add = rt_node.inode; if (is_flat_) { - if (!relevant_node_to_target(rr_graph_, rr_node_to_add, target_node_id)) + if (!relevant_node_to_target(rr_graph_, rr_node_to_add, target_node)) continue; } @@ -1097,7 +1128,7 @@ std::unique_ptr make_connection_router(e_heap_type he const RRGraphView* rr_graph, const std::vector& rr_rc_data, const vtr::vector& rr_switch_inf, - std::vector& rr_node_route_inf, + vtr::vector& rr_node_route_inf, bool is_flat) { switch (heap_type) { case e_heap_type::BINARY_HEAP: @@ -1124,4 +1155,4 @@ std::unique_ptr make_connection_router(e_heap_type he VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap_type %d", heap_type); } -} +} \ No newline at end of file diff --git a/vpr/src/route/connection_router.h b/vpr/src/route/connection_router.h index 5956d5b0ad9..5834e852409 100644 --- a/vpr/src/route/connection_router.h +++ b/vpr/src/route/connection_router.h @@ -31,7 +31,7 @@ class ConnectionRouter : public ConnectionRouterInterface { const RRGraphView* rr_graph, const std::vector& rr_rc_data, const vtr::vector& rr_switch_inf, - std::vector& rr_node_route_inf, + vtr::vector& rr_node_route_inf, bool is_flat) : grid_(grid) , router_lookahead_(router_lookahead) @@ -41,7 +41,7 @@ class ConnectionRouter : public ConnectionRouterInterface { , rr_switch_inf_(rr_switch_inf.data(), rr_switch_inf.size()) , net_terminal_groups(g_vpr_ctx.routing().net_terminal_groups) , net_terminal_group_num(g_vpr_ctx.routing().net_terminal_group_num) - , rr_node_route_inf_(rr_node_route_inf.data(), rr_node_route_inf.size()) + , rr_node_route_inf_(rr_node_route_inf) , is_flat_(is_flat) , router_stats_(nullptr) , router_debug_(false) { @@ -60,34 +60,42 @@ class ConnectionRouter : public ConnectionRouterInterface { ::reset_path_costs(modified_rr_node_inf_); } - // Finds a path from the route tree rooted at rt_root to sink_node - // - // This is used when you want to allow previous routing of the same net to - // serve as valid start locations for the current connection. - // - // Returns either the last element of the path, or nullptr if no path is - // found - std::pair timing_driven_route_connection_from_route_tree( + /** Finds a path from the route tree rooted at rt_root to sink_node. + * This is used when you want to allow previous routing of the same net to + * serve as valid start locations for the current connection. + * + * Returns a tuple of: + * bool: path exists? (hard failure, rr graph disconnected) + * bool: should retry with full bounding box? (only used in parallel routing) + * t_heap: heap element of cheapest path */ + std::tuple timing_driven_route_connection_from_route_tree( const RouteTreeNode& rt_root, - int sink_node, + RRNodeId sink_node, const t_conn_cost_params cost_params, t_bb bounding_box, RouterStats& router_stats, - const ConnectionParameters& conn_params) final; - - // Finds a path from the route tree rooted at rt_root to sink_node for a - // high fanout net. - // - // Unlike timing_driven_route_connection_from_route_tree(), only part of - // the route tree which is spatially close to the sink is added to the heap. - std::pair timing_driven_route_connection_from_route_tree_high_fanout( + const ConnectionParameters& conn_params, + bool can_grow_bb) final; + + /** Finds a path from the route tree rooted at rt_root to sink_node for a + * high fanout net. + * + * Unlike timing_driven_route_connection_from_route_tree(), only part of + * the route tree which is spatially close to the sink is added to the heap. + * + * Returns a tuple of: + * bool: path exists? (hard failure, rr graph disconnected) + * bool: should retry with full bounding box? (only used in parallel routing) + * t_heap: heap element of cheapest path */ + std::tuple timing_driven_route_connection_from_route_tree_high_fanout( const RouteTreeNode& rt_root, - int sink_node, + RRNodeId sink_node, const t_conn_cost_params cost_params, - t_bb bounding_box, + t_bb net_bounding_box, const SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, - const ConnectionParameters& conn_params) final; + const ConnectionParameters& conn_params, + bool can_grow_bb) final; // Finds a path from the route tree rooted at rt_root to all sinks // available. @@ -98,7 +106,7 @@ class ConnectionRouter : public ConnectionRouterInterface { // Dijkstra's algorithm with a modified exit condition (runs until heap is // empty). When using cost_params.astar_fac = 0, for efficiency the // RouterLookahead used should be the NoOpLookahead. - std::vector timing_driven_find_all_shortest_paths_from_route_tree( + vtr::vector timing_driven_find_all_shortest_paths_from_route_tree( const RouteTreeNode& rt_root, const t_conn_cost_params cost_params, t_bb bounding_box, @@ -123,7 +131,7 @@ class ConnectionRouter : public ConnectionRouterInterface { private: // Mark that data associated with rr_node "inode" has been modified, and // needs to be reset in reset_path_costs. - void add_to_mod_list(int inode) { + void add_to_mod_list(RRNodeId inode) { if (std::isinf(rr_node_route_inf_[inode].path_cost)) { modified_rr_node_inf_.push_back(inode); } @@ -144,14 +152,22 @@ class ConnectionRouter : public ConnectionRouterInterface { route_inf->backward_path_cost = cheapest->backward_path_cost; } - // Common logic from timing_driven_route_connection_from_route_tree and - // timing_driven_route_connection_from_route_tree_high_fanout for running - // connection router. - t_heap* timing_driven_route_connection_common_setup( + /** Common logic from timing_driven_route_connection_from_route_tree and + * timing_driven_route_connection_from_route_tree_high_fanout for running + * the connection router. + * @param[in] rt_root RouteTreeNode describing the current routing state + * @param[in] sink_node Sink node ID to route to + * @param[in] cost_params + * @param[in] bounding_box Keep search confined to this bounding box + * @param[in] can_grow_bb Can this fn grow the given bounding box? + * @return bool Signal to retry this connection with a full-device bounding box, + * @return t_heap* Heap element describing the path found. */ + std::tuple timing_driven_route_connection_common_setup( const RouteTreeNode& rt_root, - int sink_node, + RRNodeId sink_node, const t_conn_cost_params cost_params, - t_bb bounding_box); + t_bb bounding_box, + bool can_grow_bb); // Finds a path to sink_node, starting from the elements currently in the // heap. @@ -163,14 +179,14 @@ class ConnectionRouter : public ConnectionRouterInterface { // Returns either the last element of the path, or nullptr if no path is // found t_heap* timing_driven_route_connection_from_heap( - int sink_node, + RRNodeId sink_node, const t_conn_cost_params cost_params, t_bb bounding_box); // Expand this current node if it is a cheaper path. void timing_driven_expand_cheapest( t_heap* cheapest, - int target_node, + RRNodeId target_node, const t_conn_cost_params cost_params, t_bb bounding_box); @@ -179,7 +195,7 @@ class ConnectionRouter : public ConnectionRouterInterface { t_heap* current, const t_conn_cost_params cost_params, t_bb bounding_box, - int target_node); + RRNodeId target_node); // Conditionally adds to_node to the router heap (via path from from_node // via from_edge). @@ -188,12 +204,12 @@ class ConnectionRouter : public ConnectionRouterInterface { // to the heap. void timing_driven_expand_neighbour( t_heap* current, - const int from_node, - const RREdgeId from_edge, - const int to_node, + RRNodeId from_node, + RREdgeId from_edge, + RRNodeId to_node, const t_conn_cost_params cost_params, const t_bb bounding_box, - int target_node, + RRNodeId target_node, const t_bb target_bb); // Add to_node to the heap, and also add any nodes which are connected by @@ -201,22 +217,22 @@ class ConnectionRouter : public ConnectionRouterInterface { void timing_driven_add_to_heap( const t_conn_cost_params cost_params, const t_heap* current, - const int from_node, - const int to_node, - const RREdgeId from_edge, - const int target_node); + RRNodeId from_node, + RRNodeId to_node, + RREdgeId from_edge, + RRNodeId target_node); // Calculates the cost of reaching to_node void evaluate_timing_driven_node_costs( t_heap* to, const t_conn_cost_params cost_params, - const int from_node, - const int to_node, - const RREdgeId from_edge, - const int target_node); + RRNodeId from_node, + RRNodeId to_node, + RREdgeId from_edge, + RRNodeId target_node); // Find paths from current heap to all nodes in the RR graph - std::vector timing_driven_find_all_shortest_paths_from_heap( + vtr::vector timing_driven_find_all_shortest_paths_from_heap( const t_conn_cost_params cost_params, t_bb bounding_box); @@ -225,17 +241,17 @@ class ConnectionRouter : public ConnectionRouterInterface { //Adds the route tree rooted at rt_node to the heap, preparing it to be //used as branch-points for further routing. void add_route_tree_to_heap(const RouteTreeNode& rt_node, - int target_node, + RRNodeId target_node, const t_conn_cost_params cost_params, bool from_high_fanout); // Evaluate node costs using the RCV algorith float compute_node_cost_using_rcv(const t_conn_cost_params cost_params, - const int to_node, - const int target_node, - const float backwards_delay, - const float backwards_cong, - const float R_upstream); + RRNodeId to_node, + RRNodeId target_node, + float backwards_delay, + float backwards_cong, + float R_upstream); //Unconditionally adds rt_node to the heap // @@ -243,13 +259,13 @@ class ConnectionRouter : public ConnectionRouterInterface { //responsibility. void add_route_tree_node_to_heap( const RouteTreeNode& rt_node, - int target_node, + RRNodeId target_node, const t_conn_cost_params cost_params, bool is_high_fanout); t_bb add_high_fanout_route_tree_to_heap( const RouteTreeNode& rt_root, - int target_node, + RRNodeId target_node, const t_conn_cost_params cost_params, const SpatialRouteTreeLookup& spatial_route_tree_lookup, t_bb net_bounding_box); @@ -262,9 +278,9 @@ class ConnectionRouter : public ConnectionRouterInterface { vtr::array_view rr_switch_inf_; const vtr::vector>>& net_terminal_groups; const vtr::vector>& net_terminal_group_num; - vtr::array_view rr_node_route_inf_; + vtr::vector& rr_node_route_inf_; bool is_flat_; - std::vector modified_rr_node_inf_; + std::vector modified_rr_node_inf_; RouterStats* router_stats_; const ConnectionParameters* conn_params_; HeapImplementation heap_; @@ -274,7 +290,11 @@ class ConnectionRouter : public ConnectionRouterInterface { PathManager rcv_path_manager; }; -// Construct a connection router that uses the specified heap type. +/** Construct a connection router that uses the specified heap type. + * This function is not used, but removing it will result in "undefined reference" + * errors since heap type specializations won't get emitted from connection_router.cpp + * without it. + * The alternative is moving all ConnectionRouter fn implementations into the header. */ std::unique_ptr make_connection_router( e_heap_type heap_type, const DeviceGrid& grid, @@ -283,7 +303,7 @@ std::unique_ptr make_connection_router( const RRGraphView* rr_graph, const std::vector& rr_rc_data, const vtr::vector& rr_switch_inf, - std::vector& rr_node_route_inf, + vtr::vector& rr_node_route_inf, bool is_flat); #endif /* _CONNECTION_ROUTER_H */ diff --git a/vpr/src/route/connection_router_interface.h b/vpr/src/route/connection_router_interface.h index c81d081b19a..2180dbe76f3 100644 --- a/vpr/src/route/connection_router_interface.h +++ b/vpr/src/route/connection_router_interface.h @@ -5,6 +5,7 @@ #include "heap_type.h" #include "route_tree_fwd.h" +#include "rr_graph_fwd.h" #include "vpr_types.h" #include "router_stats.h" #include "spatial_route_tree_lookup.h" @@ -43,35 +44,43 @@ class ConnectionRouterInterface { // Reset modified data in rr_node_route_inf based on modified_rr_node_inf. virtual void reset_path_costs() = 0; - // Finds a path from the route tree rooted at rt_root to sink_node - // - // This is used when you want to allow previous routing of the same net to - // serve as valid start locations for the current connection. - // - // Returns either the last element of the path, or nullptr if no path is - // found - virtual std::pair timing_driven_route_connection_from_route_tree( + /** Finds a path from the route tree rooted at rt_root to sink_node. + * This is used when you want to allow previous routing of the same net to + * serve as valid start locations for the current connection. + * + * Returns a tuple of: + * bool: path exists? (hard failure, rr graph disconnected) + * bool: should retry with full bounding box? (only used in parallel routing) + * t_heap: heap element of cheapest path */ + virtual std::tuple timing_driven_route_connection_from_route_tree( const RouteTreeNode& rt_root, - int sink_node, + RRNodeId sink_node, const t_conn_cost_params cost_params, t_bb bounding_box, RouterStats& router_stats, - const ConnectionParameters& conn_params) + const ConnectionParameters& conn_params, + bool can_grow_bb) = 0; - // Finds a path from the route tree rooted at rt_root to sink_node for a - // high fanout net. - // - // Unlike timing_driven_route_connection_from_route_tree(), only part of - // the route tree which is spatially close to the sink is added to the heap. - virtual std::pair timing_driven_route_connection_from_route_tree_high_fanout( + /** Finds a path from the route tree rooted at rt_root to sink_node for a + * high fanout net. + * + * Unlike timing_driven_route_connection_from_route_tree(), only part of + * the route tree which is spatially close to the sink is added to the heap. + * + * Returns a tuple of: + * bool: path exists? (hard failure, rr graph disconnected) + * bool: should retry with full bounding box? (only used in parallel routing) + * t_heap: heap element of cheapest path */ + virtual std::tuple timing_driven_route_connection_from_route_tree_high_fanout( const RouteTreeNode& rt_root, - int sink_node, + RRNodeId sink_node, const t_conn_cost_params cost_params, t_bb bounding_box, const SpatialRouteTreeLookup& spatial_rt_lookup, RouterStats& router_stats, - const ConnectionParameters& conn_params) + const ConnectionParameters& conn_params, + bool can_grow_bb) = 0; // Finds a path from the route tree rooted at rt_root to all sinks @@ -83,7 +92,7 @@ class ConnectionRouterInterface { // Dijkstra's algorithm with a modified exit condition (runs until heap is // empty). When using cost_params.astar_fac = 0, for efficiency the // RouterLookahead used should be the NoOpLookahead. - virtual std::vector timing_driven_find_all_shortest_paths_from_route_tree( + virtual vtr::vector timing_driven_find_all_shortest_paths_from_route_tree( const RouteTreeNode& rt_root, const t_conn_cost_params cost_params, t_bb bounding_box, diff --git a/vpr/src/route/edge_groups.cpp b/vpr/src/route/edge_groups.cpp index 1f78deaf0f9..6b63f7d8823 100644 --- a/vpr/src/route/edge_groups.cpp +++ b/vpr/src/route/edge_groups.cpp @@ -1,11 +1,12 @@ #include "edge_groups.h" #include +#include "rr_graph_fwd.h" // Adds non-configurable (undirected) edge to be grouped. // // Returns true if this is a new edge. -bool EdgeGroups::add_non_config_edge(int from_node, int to_node) { +bool EdgeGroups::add_non_config_edge(RRNodeId from_node, RRNodeId to_node) { return graph_[from_node].edges.insert(to_node).second && graph_[to_node].edges.insert(from_node).second; } @@ -49,7 +50,7 @@ t_non_configurable_rr_sets EdgeGroups::output_sets() { t_non_configurable_rr_sets sets; for (const auto& nodes : rr_non_config_node_sets_) { std::set edge_set; - std::set node_set(nodes.begin(), nodes.end()); + std::set node_set(nodes.begin(), nodes.end()); for (const auto& src : node_set) { for (const auto& dest : graph_[src].edges) { @@ -66,12 +67,12 @@ t_non_configurable_rr_sets EdgeGroups::output_sets() { // Set device context structures for non-configurable node sets. void EdgeGroups::set_device_context(DeviceContext& device_ctx) { - std::vector> rr_non_config_node_sets; + std::vector> rr_non_config_node_sets; for (const auto& item : rr_non_config_node_sets_) { rr_non_config_node_sets.emplace_back(std::move(item)); } - std::unordered_map rr_node_to_non_config_node_set; + std::unordered_map rr_node_to_non_config_node_set; for (size_t set = 0; set < rr_non_config_node_sets.size(); ++set) { for (const auto inode : rr_non_config_node_sets[set]) { rr_node_to_non_config_node_set.insert( diff --git a/vpr/src/route/edge_groups.h b/vpr/src/route/edge_groups.h index 9ff3876d691..90236ce3d65 100644 --- a/vpr/src/route/edge_groups.h +++ b/vpr/src/route/edge_groups.h @@ -26,7 +26,7 @@ class EdgeGroups { // Adds non-configurable (undirected) edge to be grouped. // // Returns true if this is a new edge. - bool add_non_config_edge(int from_node, int to_node); + bool add_non_config_edge(RRNodeId from_node, RRNodeId to_node); // After add_non_config_edge has been called for all edges, create_sets // will form groups of nodes that are connected via non-configurable @@ -42,19 +42,19 @@ class EdgeGroups { private: struct node_data { - std::unordered_set edges; // Set of indices into graph_ - int set = OPEN; // Index into rr_non_config_node_sets_ + std::unordered_set edges; // Set of indices into graph_ + int set = OPEN; // Index into rr_non_config_node_sets_ }; // Perform a DFS traversal marking everything reachable with the same set id size_t add_connected_group(const node_data& node); // Set of non-configurable edges. - std::unordered_map graph_; + std::unordered_map graph_; // Connected components, representing nodes connected by non-configurable edges. // Order is arbitrary. - std::vector> rr_non_config_node_sets_; + std::vector> rr_non_config_node_sets_; }; #endif diff --git a/vpr/src/route/heap_type.cpp b/vpr/src/route/heap_type.cpp index 9ae6c073a03..3cf46eb74b5 100644 --- a/vpr/src/route/heap_type.cpp +++ b/vpr/src/route/heap_type.cpp @@ -2,6 +2,7 @@ #include "binary_heap.h" #include "bucket.h" +#include "rr_graph_fwd.h" #include "vpr_error.h" #include "vpr_types.h" @@ -26,9 +27,9 @@ HeapStorage::alloc() { temp_ptr->cost = 0.; temp_ptr->backward_path_cost = 0.; temp_ptr->R_upstream = 0.; - temp_ptr->index = OPEN; + temp_ptr->index = RRNodeId::INVALID(); temp_ptr->path_data = nullptr; - temp_ptr->set_prev_node(NO_PREVIOUS); + temp_ptr->set_prev_node(RRNodeId::INVALID()); temp_ptr->set_prev_edge(RREdgeId::INVALID()); return (temp_ptr); } diff --git a/vpr/src/route/heap_type.h b/vpr/src/route/heap_type.h index b1125696de1..e0e759703a5 100644 --- a/vpr/src/route/heap_type.h +++ b/vpr/src/route/heap_type.h @@ -14,8 +14,6 @@ * cost: The cost used to sort heap. * For the timing-driven router this is the backward_path_cost + * expected cost to the target. - * For the breadth-first router it is the node cost to reach this - * point. * * backward_path_cost: Used only by the timing-driven router. The "known" * cost of the path up to and including this node. @@ -41,15 +39,19 @@ struct t_heap { float backward_path_cost = 0.; float R_upstream = 0.; - int index = OPEN; + RRNodeId index = RRNodeId::INVALID(); // Structure to handle extra RCV structures // Managed by PathManager class t_heap_path* path_data; + /** Previous node and edge IDs. These are not StrongIds for performance & brevity + * reasons: StrongIds can't be trivially placed into an anonymous union (see below) */ struct t_prev { - int node; - unsigned int edge; + uint32_t node; + uint32_t edge; + static_assert(sizeof(uint32_t) == sizeof(RRNodeId)); + static_assert(sizeof(uint32_t) == sizeof(RREdgeId)); }; t_heap* next_heap_item() const { @@ -60,20 +62,24 @@ struct t_heap { u.next = next; } - int prev_node() const { - return u.prev.node; + /** Get prev_node. + * Be careful: will return 0 (a valid id!) if uninitialized. */ + constexpr RRNodeId prev_node() const { + return RRNodeId(u.prev.node); } - void set_prev_node(int prev_node) { - u.prev.node = prev_node; + inline void set_prev_node(RRNodeId node) { + u.prev.node = size_t(node); } - RREdgeId prev_edge() const { + /** Get prev_edge. + * Be careful: will return 0 (a valid id!) if uninitialized. */ + constexpr RREdgeId prev_edge() const { return RREdgeId(u.prev.edge); } - void set_prev_edge(RREdgeId edge) { - u.prev.edge = (size_t)edge; + inline void set_prev_edge(RREdgeId edge) { + u.prev.edge = size_t(edge); } private: @@ -179,15 +185,6 @@ class HeapInterface { // Empty all items from the heap. virtual void empty_heap() = 0; - // marks all the heap entries consisting of sink_node, where it was - // reached via ipin_node, as invalid (open). used only by the - // breadth_first router and even then only in rare circumstances. - // - // This function enables forcing the breadth-first router to route to a - // sink more than once, using multiple ipins, which is useful in some - // architectures. - virtual void invalidate_heap_entries(int sink_node, int ipin_node) = 0; - // Free all storage used by the heap. // // This returns all memory allocated by the HeapInterface instance. Only diff --git a/vpr/src/route/overuse_report.cpp b/vpr/src/route/overuse_report.cpp index f2e0864ec25..077401d7289 100644 --- a/vpr/src/route/overuse_report.cpp +++ b/vpr/src/route/overuse_report.cpp @@ -52,11 +52,11 @@ void log_overused_nodes_status(int max_logged_overused_rr_nodes) { //Print overuse info body int overuse_index = 0; - for (const RRNodeId& rr_id : rr_graph.nodes()) { - int overuse = route_ctx.rr_node_route_inf[(size_t)rr_id].occ() - rr_graph.node_capacity(rr_id); + for (RRNodeId inode : rr_graph.nodes()) { + int overuse = route_ctx.rr_node_route_inf[inode].occ() - rr_graph.node_capacity(inode); if (overuse > 0) { - log_single_overused_node_status(overuse_index, rr_id); + log_single_overused_node_status(overuse_index, inode); ++overuse_index; //Reached the logging limit @@ -103,7 +103,7 @@ void report_overused_nodes(const Netlist<>& net_list, /* Report basic rr node info */ os << "Overused RR node #" << inode << '\n'; os << "Node id = " << size_t(node_id) << '\n'; - os << "Occupancy = " << route_ctx.rr_node_route_inf[size_t(node_id)].occ() << '\n'; + os << "Occupancy = " << route_ctx.rr_node_route_inf[node_id].occ() << '\n'; os << "Capacity = " << rr_graph.node_capacity(node_id) << "\n\n"; /* Report selective info based on the rr node type */ @@ -179,7 +179,7 @@ void generate_overused_nodes_to_congested_net_lookup(const Netlist<>& net_list, for (auto& rt_node : route_ctx.route_trees[net_id].value().all_nodes()) { RRNodeId inode = rt_node.inode; - int overuse = route_ctx.rr_node_route_inf[size_t(inode)].occ() - rr_graph.node_capacity(inode); + int overuse = route_ctx.rr_node_route_inf[inode].occ() - rr_graph.node_capacity(inode); if (overuse > 0) { nodes_to_nets_lookup[inode].insert(net_id); } @@ -394,7 +394,7 @@ static void log_single_overused_node_status(int overuse_index, RRNodeId node_id) VTR_LOG(" %7d", size_t(node_id)); //Occupancy - VTR_LOG(" %10d", route_ctx.rr_node_route_inf[size_t(node_id)].occ()); + VTR_LOG(" %10d", route_ctx.rr_node_route_inf[node_id].occ()); //Capacity VTR_LOG(" %9d", rr_graph.node_capacity(node_id)); diff --git a/vpr/src/route/partition_tree.cpp b/vpr/src/route/partition_tree.cpp new file mode 100644 index 00000000000..f896d93bc94 --- /dev/null +++ b/vpr/src/route/partition_tree.cpp @@ -0,0 +1,131 @@ +#include "partition_tree.h" +#include + +PartitionTree::PartitionTree(const Netlist<>& netlist) { + const auto& device_ctx = g_vpr_ctx.device(); + + auto all_nets = std::vector(netlist.nets().begin(), netlist.nets().end()); + _root = build_helper(netlist, all_nets, 0, 0, device_ctx.grid.width(), device_ctx.grid.height()); +} + +std::unique_ptr PartitionTree::build_helper(const Netlist<>& netlist, const std::vector& nets, int x1, int y1, int x2, int y2) { + if (nets.empty()) + return nullptr; + + const auto& route_ctx = g_vpr_ctx.routing(); + auto out = std::make_unique(); + + /* Find best cutline. In ParaDRo this is done using prefix sums, but + * life is too short to implement them, therefore I'm just doing a linear search, + * and the complexity is O((fpga width + height) * #nets * log2(w+h * #nets)). + * What we are searching for is the cutline with the most balanced workload (# of fanouts) + * on the sides. */ + int left, right, mine; + int score; + /* TODO: maybe put all of this into a tuple or struct? */ + int best_score = std::numeric_limits::max(); + int best_pos = -1, best_left = -1, best_right = -1; + enum { X, + Y } best_axis + = X; + + for (int x = x1 + 1; x < x2; x++) { + left = right = mine = 0; + for (auto net_id : nets) { + t_bb bb = route_ctx.route_bb[net_id]; + size_t fanout = netlist.net_sinks(net_id).size(); + if (bb.xmin < x && bb.xmax < x) { + left += fanout; + } else if (bb.xmin > x && bb.xmax > x) { + right += fanout; + } else if (bb.xmin <= x && bb.xmax >= x) { + mine += fanout; + } else { + VTR_ASSERT(false); /* unreachable */ + } + } + score = abs(left - right); + if (score < best_score) { + best_score = score; + best_left = left; + best_right = right; + best_pos = x; + best_axis = X; + } + } + for (int y = y1 + 1; y < y2; y++) { + left = right = mine = 0; + for (auto net_id : nets) { + t_bb bb = route_ctx.route_bb[net_id]; + size_t fanout = netlist.net_sinks(net_id).size(); + if (bb.ymin < y && bb.ymax < y) { + left += fanout; + } else if (bb.ymin > y && bb.ymax > y) { + right += fanout; + } else if (bb.ymin <= y && bb.ymax >= y) { + mine += fanout; + } else { + VTR_ASSERT(false); /* unreachable */ + } + } + score = abs(left - right); + if (score < best_score) { + best_score = score; + best_left = left; + best_right = right; + best_pos = y; + best_axis = Y; + } + } + + /* If one of the sides has 0 nets in the best arrangement, + * there's no use in partitioning this: no parallelism comes out of it. */ + if (best_left == 0 || best_right == 0) { + out->nets = std::move(nets); + return out; + } + + /* Populate net IDs on each side + * and call next level of build_partition_trees. */ + std::vector left_nets, right_nets, my_nets; + + if (best_axis == X) { + for (auto net_id : nets) { + t_bb bb = route_ctx.route_bb[net_id]; + if (bb.xmin < best_pos && bb.xmax < best_pos) { + left_nets.push_back(net_id); + } else if (bb.xmin > best_pos && bb.xmax > best_pos) { + right_nets.push_back(net_id); + } else if (bb.xmin <= best_pos && bb.xmax >= best_pos) { + my_nets.push_back(net_id); + } else { + VTR_ASSERT(false); /* unreachable */ + } + } + + out->left = build_helper(netlist, left_nets, x1, y1, best_pos, y2); + out->right = build_helper(netlist, right_nets, best_pos, y2, x2, y2); + } else { + VTR_ASSERT(best_axis == Y); + for (auto net_id : nets) { + t_bb bb = route_ctx.route_bb[net_id]; + if (bb.ymin < best_pos && bb.ymax < best_pos) { + left_nets.push_back(net_id); + } else if (bb.ymin > best_pos && bb.ymax > best_pos) { + right_nets.push_back(net_id); + } else if (bb.ymin <= best_pos && bb.ymax >= best_pos) { + my_nets.push_back(net_id); + } else { + VTR_ASSERT(false); /* unreachable */ + } + } + + out->left = build_helper(netlist, left_nets, x1, best_pos, x2, y2); + out->right = build_helper(netlist, right_nets, x1, y1, x2, best_pos); + } + + out->nets = std::move(my_nets); + out->cutline_axis = best_axis; + out->cutline_pos = best_pos; + return out; +} diff --git a/vpr/src/route/partition_tree.h b/vpr/src/route/partition_tree.h new file mode 100644 index 00000000000..97988d5fdbb --- /dev/null +++ b/vpr/src/route/partition_tree.h @@ -0,0 +1,66 @@ +#pragma once + +#include "connection_router.h" +#include "router_stats.h" + +/** Routing iteration results per thread. (for a subset of the input netlist) */ +struct RouteIterResults { + /** Are there any connections impossible to route due to a disconnected rr_graph? */ + bool is_routable = true; + /** Net IDs for which timing_driven_route_net() actually got called */ + std::vector rerouted_nets; + /** RouterStats collected from my subset of nets */ + RouterStats stats; +}; + +/** Spatial partition tree for routing. + * + * This divides the netlist into a tree of regions, so that nets with non-overlapping + * bounding boxes can be routed in parallel. + * + * Branch nodes represent a cutline and their nets vector includes only the nets intersected + * by the cutline. Leaf nodes represent a final set of nets reached by partitioning. + * + * To route this in parallel, we first route the nets in the root node, then add + * its left and right to a task queue, and repeat this for the whole tree. + * + * The tree stores some routing results to be later combined, such as is_routable and + * rerouted_nets. (TODO: do this per thread instead of per node) */ +class PartitionTreeNode { + public: + /** Nets claimed by this node (intersected by cutline if branch, nets in final region if leaf) */ + std::vector nets; + /** Left subtree. */ + std::unique_ptr left = nullptr; + /** Right subtree. */ + std::unique_ptr right = nullptr; + /** Are there any connections impossible to route due to a disconnected rr_graph? */ + bool is_routable = false; + /** Net IDs for which timing_driven_route_net() actually got called */ + std::vector rerouted_nets; + + /* debug stuff */ + int cutline_axis = -1; + int cutline_pos = -1; + std::vector exec_times; +}; + +/** Holds the root PartitionTreeNode and exposes top level operations. */ +class PartitionTree { + public: + PartitionTree() = delete; + PartitionTree(const PartitionTree&) = delete; + PartitionTree(PartitionTree&&) = default; + PartitionTree& operator=(const PartitionTree&) = delete; + PartitionTree& operator=(PartitionTree&&) = default; + + /** Can only be built from a netlist */ + PartitionTree(const Netlist<>& netlist); + + /** Access root. Shouldn't cause a segfault, because PartitionTree constructor always makes a _root */ + inline PartitionTreeNode& root(void) { return *_root; } + + private: + std::unique_ptr _root; + std::unique_ptr build_helper(const Netlist<>& netlist, const std::vector& nets, int x1, int y1, int x2, int y2); +}; diff --git a/vpr/src/route/route_budgets.cpp b/vpr/src/route/route_budgets.cpp index 9f8fc689a66..ff14ec752ac 100644 --- a/vpr/src/route/route_budgets.cpp +++ b/vpr/src/route/route_budgets.cpp @@ -47,7 +47,7 @@ #include "route_timing.h" #include "tatum/report/TimingPathFwd.hpp" #include "tatum/base/TimingType.hpp" -#include "timing_info.h" +#include "concrete_timing_info.h" #include "tatum/echo_writer.hpp" #include "net_delay.h" #include "route_budgets.h" diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index 466608319fb..881dbfd46aa 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -20,6 +20,7 @@ #include "globals.h" #include "route_export.h" #include "route_common.h" +#include "route_parallel.h" #include "route_timing.h" #include "place_and_route.h" #include "rr_graph.h" @@ -69,23 +70,24 @@ * */ /******************** Subroutines local to route_common.c *******************/ -static vtr::vector> load_net_rr_terminals(const RRGraphView& rr_graph, - const Netlist<>& net_list, - bool is_flat); +static vtr::vector> load_net_rr_terminals(const RRGraphView& rr_graph, + const Netlist<>& net_list, + bool is_flat); static std::tuple>>, vtr::vector>> load_net_terminal_groups(const RRGraphView& rr_graph, const Netlist<>& net_list, - const vtr::vector>& net_rr_terminals, + const vtr::vector>& net_rr_terminals, bool is_flat); -static vtr::vector> load_rr_clb_sources(const RRGraphView& rr_graph, - const Netlist<>& net_list, - bool is_flat); +static vtr::vector> load_rr_clb_sources(const RRGraphView& rr_graph, + const Netlist<>& net_list, + bool is_flat); static t_clb_opins_used alloc_and_load_clb_opins_used_locally(); -static void adjust_one_rr_occ_and_acc_cost(int inode, int add_or_sub, float acc_fac); + +static void adjust_one_rr_occ_and_acc_cost(RRNodeId inode, int add_or_sub, float acc_fac); static vtr::vector load_is_clock_net(const Netlist<>& net_list, bool is_flat); @@ -257,10 +259,32 @@ bool try_route(const Netlist<>& net_list, VTR_LOG_WARN("No nets to route\n"); } - if (router_opts.router_algorithm == BREADTH_FIRST) { - VTR_LOG("Confirming router algorithm: BREADTH_FIRST (deleted, doesn't do anything).\n"); - //success = try_breadth_first_route(router_opts); - success = 0; + if (router_opts.router_algorithm == PARALLEL) { + VTR_LOG("Confirming router algorithm: PARALLEL.\n"); + +#ifdef VPR_USE_TBB + auto& atom_ctx = g_vpr_ctx.atom(); + + IntraLbPbPinLookup intra_lb_pb_pin_lookup(device_ctx.logical_block_types); + ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, atom_ctx.nlist, intra_lb_pb_pin_lookup); + + success = try_parallel_route(net_list, + *det_routing_arch, + router_opts, + analysis_opts, + segment_inf, + net_delay, + netlist_pin_lookup, + timing_info, + delay_calc, + first_iteration_priority, + is_flat); + + profiling::time_on_fanout_analysis(); +#else + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "VPR was not compiled with TBB support required for parallel routing\n"); +#endif + } else { /* TIMING_DRIVEN route */ VTR_LOG("Confirming router algorithm: TIMING_DRIVEN.\n"); auto& atom_ctx = g_vpr_ctx.atom(); @@ -295,7 +319,7 @@ bool feasible_routing() { auto& route_ctx = g_vpr_ctx.routing(); for (const RRNodeId& rr_id : rr_graph.nodes()) { - if (route_ctx.rr_node_route_inf[(size_t)rr_id].occ() > rr_graph.node_capacity(rr_id)) { + if (route_ctx.rr_node_route_inf[rr_id].occ() > rr_graph.node_capacity(rr_id)) { return (false); } } @@ -304,18 +328,18 @@ bool feasible_routing() { } //Returns all RR nodes in the current routing which are congested -std::vector collect_congested_rr_nodes() { +std::vector collect_congested_rr_nodes() { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; auto& route_ctx = g_vpr_ctx.routing(); - std::vector congested_rr_nodes; - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { - short occ = route_ctx.rr_node_route_inf[(size_t)rr_id].occ(); - short capacity = rr_graph.node_capacity(rr_id); + std::vector congested_rr_nodes; + for (const RRNodeId inode : device_ctx.rr_graph.nodes()) { + short occ = route_ctx.rr_node_route_inf[inode].occ(); + short capacity = rr_graph.node_capacity(inode); if (occ > capacity) { - congested_rr_nodes.push_back((size_t)rr_id); + congested_rr_nodes.push_back(inode); } } @@ -324,23 +348,23 @@ std::vector collect_congested_rr_nodes() { /* Returns a vector from [0..device_ctx.rr_nodes.size()-1] containing the set * of nets using each RR node */ -std::vector> collect_rr_node_nets() { +vtr::vector> collect_rr_node_nets() { auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.routing(); auto& cluster_ctx = g_vpr_ctx.clustering(); - std::vector> rr_node_nets(device_ctx.rr_graph.num_nodes()); + vtr::vector> rr_node_nets(device_ctx.rr_graph.num_nodes()); for (ClusterNetId inet : cluster_ctx.clb_nlist.nets()) { if (!route_ctx.route_trees[inet]) continue; for (auto& rt_node : route_ctx.route_trees[inet].value().all_nodes()) { - rr_node_nets[size_t(rt_node.inode)].insert(inet); + rr_node_nets[rt_node.inode].insert(inet); } } return rr_node_nets; } -void pathfinder_update_single_node_occupancy(int inode, int add_or_sub) { +void pathfinder_update_single_node_occupancy(RRNodeId inode, int add_or_sub) { /* Updates pathfinder's occupancy by either adding or removing the * usage of a resource node. */ @@ -366,12 +390,12 @@ void pathfinder_update_acc_cost_and_overuse_info(float acc_fac, OveruseInfo& ove size_t overused_nodes = 0, total_overuse = 0, worst_overuse = 0; for (const RRNodeId& rr_id : rr_graph.nodes()) { - int overuse = route_ctx.rr_node_route_inf[(size_t)rr_id].occ() - rr_graph.node_capacity(rr_id); + int overuse = route_ctx.rr_node_route_inf[rr_id].occ() - rr_graph.node_capacity(rr_id); // If overused, update the acc_cost and add this node to the overuse info // If not, do nothing if (overuse > 0) { - route_ctx.rr_node_route_inf[(size_t)rr_id].acc_cost += overuse * acc_fac; + route_ctx.rr_node_route_inf[rr_id].acc_cost += overuse * acc_fac; ++overused_nodes; total_overuse += overuse; @@ -387,9 +411,9 @@ void pathfinder_update_acc_cost_and_overuse_info(float acc_fac, OveruseInfo& ove /** Update pathfinder cost of all nodes rooted at rt_node, including rt_node itself */ void pathfinder_update_cost_from_route_tree(const RouteTreeNode& root, int add_or_sub) { - pathfinder_update_single_node_occupancy(size_t(root.inode), add_or_sub); + pathfinder_update_single_node_occupancy(root.inode, add_or_sub); for (auto& node : root.all_nodes()) { - pathfinder_update_single_node_occupancy(size_t(node.inode), add_or_sub); + pathfinder_update_single_node_occupancy(node.inode, add_or_sub); } } @@ -444,20 +468,20 @@ void init_route_structs(const Netlist<>& net_list, /* The routine sets the path_cost to HUGE_POSITIVE_FLOAT for * * all channel segments touched by previous routing phases. */ -void reset_path_costs(const std::vector& visited_rr_nodes) { +void reset_path_costs(const std::vector& visited_rr_nodes) { auto& route_ctx = g_vpr_ctx.mutable_routing(); for (auto node : visited_rr_nodes) { route_ctx.rr_node_route_inf[node].path_cost = std::numeric_limits::infinity(); route_ctx.rr_node_route_inf[node].backward_path_cost = std::numeric_limits::infinity(); - route_ctx.rr_node_route_inf[node].prev_node = NO_PREVIOUS; + route_ctx.rr_node_route_inf[node].prev_node = RRNodeId::INVALID(); route_ctx.rr_node_route_inf[node].prev_edge = RREdgeId::INVALID(); } } /* Returns the congestion cost of using this rr-node plus that of any * * non-configurably connected rr_nodes that must be used when it is used. */ -float get_rr_cong_cost(int inode, float pres_fac) { +float get_rr_cong_cost(RRNodeId inode, float pres_fac) { auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.routing(); @@ -467,7 +491,7 @@ float get_rr_cong_cost(int inode, float pres_fac) { // Access unordered_map only when the node is part of a non-configurable set auto itr = device_ctx.rr_node_to_non_config_node_set.find(inode); if (itr != device_ctx.rr_node_to_non_config_node_set.end()) { - for (int node : device_ctx.rr_non_config_node_sets[itr->second]) { + for (RRNodeId node : device_ctx.rr_non_config_node_sets[itr->second]) { if (node == inode) { continue; //Already included above } @@ -486,7 +510,7 @@ float get_rr_cong_cost(int inode, float pres_fac) { * equivalent, so both will connect to the same SINK). */ void mark_ends(const Netlist<>& net_list, ParentNetId net_id) { unsigned int ipin; - int inode; + RRNodeId inode; auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -498,7 +522,7 @@ void mark_ends(const Netlist<>& net_list, ParentNetId net_id) { void mark_remaining_ends(ParentNetId net_id, const std::vector& remaining_sinks) { // like mark_ends, but only performs it for the remaining sinks of a net - int inode; + RRNodeId inode; auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -614,9 +638,9 @@ void reset_rr_node_route_structs() { VTR_ASSERT(route_ctx.rr_node_route_inf.size() == size_t(device_ctx.rr_graph.num_nodes())); for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { - auto& node_inf = route_ctx.rr_node_route_inf[(size_t)rr_id]; + auto& node_inf = route_ctx.rr_node_route_inf[rr_id]; - node_inf.prev_node = NO_PREVIOUS; + node_inf.prev_node = RRNodeId::INVALID(); node_inf.prev_edge = RREdgeId::INVALID(); node_inf.acc_cost = 1.0; node_inf.path_cost = std::numeric_limits::infinity(); @@ -629,10 +653,10 @@ void reset_rr_node_route_structs() { /* Allocates and loads the route_ctx.net_rr_terminals data structure. For each net it stores the rr_node * * index of the SOURCE of the net and all the SINKs of the net [clb_nlist.nets()][clb_nlist.net_pins()]. * * Entry [inet][pnum] stores the rr index corresponding to the SOURCE (opin) or SINK (ipin) of the pin. */ -static vtr::vector> load_net_rr_terminals(const RRGraphView& rr_graph, - const Netlist<>& net_list, - bool is_flat) { - vtr::vector> net_rr_terminals; +static vtr::vector> load_net_rr_terminals(const RRGraphView& rr_graph, + const Netlist<>& net_list, + bool is_flat) { + vtr::vector> net_rr_terminals; net_rr_terminals.resize(net_list.nets().size()); @@ -652,7 +676,7 @@ static vtr::vector> load_net_rr_terminals(const RR (pin_count == 0 ? SOURCE : SINK), /* First pin is driver */ iclass); VTR_ASSERT(inode != RRNodeId::INVALID()); - net_rr_terminals[net_id][pin_count] = size_t(inode); + net_rr_terminals[net_id][pin_count] = inode; pin_count++; } } @@ -664,7 +688,7 @@ static std::tuple>>, vtr::vector>> load_net_terminal_groups(const RRGraphView& rr_graph, const Netlist<>& net_list, - const vtr::vector>& net_rr_terminals, + const vtr::vector>& net_rr_terminals, bool is_flat) { vtr::vector>> net_terminal_groups; vtr::vector> net_terminal_group_num; @@ -676,14 +700,14 @@ load_net_terminal_groups(const RRGraphView& rr_graph, net_terminal_groups[net_id].reserve(net_list.net_pins(net_id).size()); net_terminal_group_num[net_id].resize(net_list.net_pins(net_id).size(), -1); std::vector net_pin_blk_id(net_list.net_pins(net_id).size(), ParentBlockId::INVALID()); - std::unordered_map rr_node_pin_num; + std::unordered_map rr_node_pin_num; int pin_count = 0; for (auto pin_id : net_list.net_pins(net_id)) { if (pin_count == 0) { pin_count++; continue; } - int rr_node_num = net_rr_terminals[net_id][pin_count]; + RRNodeId rr_node_num = net_rr_terminals[net_id][pin_count]; auto block_id = net_list.pin_block(pin_id); net_pin_blk_id[pin_count] = block_id; rr_node_pin_num[rr_node_num] = pin_count; @@ -693,11 +717,11 @@ load_net_terminal_groups(const RRGraphView& rr_graph, int group_num = -1; for (int curr_grp_num = 0; curr_grp_num < (int)net_terminal_groups[net_id].size(); curr_grp_num++) { const auto& curr_grp = net_terminal_groups[net_id][curr_grp_num]; - auto group_loc = get_block_loc(net_pin_blk_id[rr_node_pin_num.at(curr_grp[0])], is_flat); + auto group_loc = get_block_loc(net_pin_blk_id[rr_node_pin_num.at(RRNodeId(curr_grp[0]))], is_flat); if (blk_loc.loc == group_loc.loc) { if (classes_in_same_block(block_id, rr_graph.node_ptc_num(RRNodeId(curr_grp[0])), - rr_graph.node_ptc_num(RRNodeId(net_rr_terminals[net_id][pin_count])), + rr_graph.node_ptc_num(net_rr_terminals[net_id][pin_count]), is_flat)) { group_num = curr_grp_num; break; @@ -706,12 +730,15 @@ load_net_terminal_groups(const RRGraphView& rr_graph, } if (group_num == -1) { - std::vector new_group = {rr_node_num}; + /* TODO: net_terminal_groups cannot be fully RRNodeId - ified, because this code calls libarchfpga which + * I think should not be aware of RRNodeIds. Fixing this requires some refactoring to lift the offending functions + * into VPR. */ + std::vector new_group = {int(size_t(rr_node_num))}; int new_group_num = net_terminal_groups[net_id].size(); net_terminal_groups[net_id].push_back(new_group); net_terminal_group_num[net_id][pin_count] = new_group_num; } else { - net_terminal_groups[net_id][group_num].push_back(rr_node_num); + net_terminal_groups[net_id][group_num].push_back(size_t(rr_node_num)); net_terminal_group_num[net_id][pin_count] = group_num; } @@ -727,11 +754,11 @@ load_net_terminal_groups(const RRGraphView& rr_graph, * in the FPGA. Currently only the SOURCE rr_node values are used, and * * they are used only to reserve pins for locally used OPINs in the router. * * [0..cluster_ctx.clb_nlist.blocks().size()-1][0..num_class-1]. * - * The values for blocks that are padsare NOT valid. */ -static vtr::vector> load_rr_clb_sources(const RRGraphView& rr_graph, - const Netlist<>& net_list, - bool is_flat) { - vtr::vector> rr_blk_source; + * The values for blocks that are pads are NOT valid. */ +static vtr::vector> load_rr_clb_sources(const RRGraphView& rr_graph, + const Netlist<>& net_list, + bool is_flat) { + vtr::vector> rr_blk_source; t_rr_type rr_type; @@ -759,9 +786,9 @@ static vtr::vector> load_rr_clb_sources(const RR blk_loc.loc.y, rr_type, iclass); - rr_blk_source[blk_id][iclass] = size_t(inode); + rr_blk_source[blk_id][iclass] = inode; } else { - rr_blk_source[blk_id][iclass] = OPEN; + rr_blk_source[blk_id][iclass] = RRNodeId::INVALID(); } } } @@ -911,7 +938,7 @@ t_bb load_net_route_bb(const Netlist<>& net_list, return bb; } -void add_to_mod_list(int inode, std::vector& modified_rr_node_inf) { +void add_to_mod_list(RRNodeId inode, std::vector& modified_rr_node_inf) { auto& route_ctx = g_vpr_ctx.routing(); if (std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)) { @@ -936,7 +963,7 @@ void add_to_mod_list(int inode, std::vector& modified_rr_node_inf) { // this would equate to duplicating a BLE into an already in-use BLE instance, which is clearly incorrect). void reserve_locally_used_opins(HeapInterface* heap, float pres_fac, float acc_fac, bool rip_up_local_opins, bool is_flat) { VTR_ASSERT(is_flat == false); - int num_local_opin, inode, from_node, iconn, num_edges, to_node; + int num_local_opin, iconn, num_edges; int iclass, ipin; float cost; t_heap* heap_head_ptr; @@ -959,8 +986,8 @@ void reserve_locally_used_opins(HeapInterface* heap, float pres_fac, float acc_f /* Always 0 for pads and for RECEIVER (IPIN) classes */ for (ipin = 0; ipin < num_local_opin; ipin++) { - inode = route_ctx.clb_opins_used_locally[blk_id][iclass][ipin]; - VTR_ASSERT(inode >= 0 && inode < (ssize_t)rr_graph.num_nodes()); + RRNodeId inode = route_ctx.clb_opins_used_locally[blk_id][iclass][ipin]; + VTR_ASSERT(inode && size_t(inode) < rr_graph.num_nodes()); adjust_one_rr_occ_and_acc_cost(inode, -1, acc_fac); } } @@ -985,17 +1012,17 @@ void reserve_locally_used_opins(HeapInterface* heap, float pres_fac, float acc_f //congestion cost are popped-off/reserved first. (Intuitively, we want //the reserved OPINs to move out of the way of congestion, by preferring //to reserve OPINs with lower congestion costs). - from_node = route_ctx.rr_blk_source[(const ParentBlockId&)blk_id][iclass]; + RRNodeId from_node = route_ctx.rr_blk_source[(const ParentBlockId&)blk_id][iclass]; num_edges = rr_graph.num_edges(RRNodeId(from_node)); for (iconn = 0; iconn < num_edges; iconn++) { - to_node = size_t(rr_graph.edge_sink_node(RRNodeId(from_node), iconn)); + RRNodeId to_node = rr_graph.edge_sink_node(RRNodeId(from_node), iconn); VTR_ASSERT(rr_graph.node_type(RRNodeId(to_node)) == OPIN); //Add the OPIN to the heap according to it's congestion cost cost = get_rr_cong_cost(to_node, pres_fac); add_node_to_heap(heap, route_ctx.rr_node_route_inf, - to_node, cost, OPEN, RREdgeId::INVALID(), + to_node, cost, RRNodeId::INVALID(), RREdgeId::INVALID(), 0., 0.); } @@ -1003,9 +1030,9 @@ void reserve_locally_used_opins(HeapInterface* heap, float pres_fac, float acc_f //Pop the nodes off the heap. We get them from the heap so we //reserve those pins with lowest congestion cost first. heap_head_ptr = heap->get_heap_head(); - inode = heap_head_ptr->index; + RRNodeId inode(heap_head_ptr->index); - VTR_ASSERT(rr_graph.node_type(RRNodeId(inode)) == OPIN); + VTR_ASSERT(rr_graph.node_type(inode) == OPIN); adjust_one_rr_occ_and_acc_cost(inode, 1, acc_fac); route_ctx.clb_opins_used_locally[blk_id][iclass][ipin] = inode; @@ -1017,7 +1044,7 @@ void reserve_locally_used_opins(HeapInterface* heap, float pres_fac, float acc_f } } -static void adjust_one_rr_occ_and_acc_cost(int inode, int add_or_sub, float acc_fac) { +static void adjust_one_rr_occ_and_acc_cost(RRNodeId inode, int add_or_sub, float acc_fac) { /* Increments or decrements (depending on add_or_sub) the occupancy of * * one rr_node, and adjusts the present cost of that node appropriately. */ @@ -1026,7 +1053,7 @@ static void adjust_one_rr_occ_and_acc_cost(int inode, int add_or_sub, float acc_ const auto& rr_graph = device_ctx.rr_graph; int new_occ = route_ctx.rr_node_route_inf[inode].occ() + add_or_sub; - int capacity = rr_graph.node_capacity(RRNodeId(inode)); + int capacity = rr_graph.node_capacity(inode); route_ctx.rr_node_route_inf[inode].set_occ(new_occ); if (new_occ < capacity) { @@ -1044,7 +1071,7 @@ void print_invalid_routing_info(const Netlist<>& net_list, bool is_flat) { auto& route_ctx = g_vpr_ctx.routing(); //Build a look-up of nets using each RR node - std::multimap rr_node_nets; + std::multimap rr_node_nets; for (auto net_id : net_list.nets()) { if (!route_ctx.route_trees[net_id]) @@ -1055,15 +1082,15 @@ void print_invalid_routing_info(const Netlist<>& net_list, bool is_flat) { } } - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { + for (const RRNodeId inode : device_ctx.rr_graph.nodes()) { int node_x, node_y; - node_x = rr_graph.node_xlow(rr_id); - node_y = rr_graph.node_ylow(rr_id); - size_t inode = (size_t)rr_id; + node_x = rr_graph.node_xlow(inode); + node_y = rr_graph.node_ylow(inode); + int occ = route_ctx.rr_node_route_inf[inode].occ(); - int cap = rr_graph.node_capacity(rr_id); + int cap = rr_graph.node_capacity(inode); if (occ > cap) { - VTR_LOG(" %s is overused (occ=%d capacity=%d)\n", describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, size_t(inode), is_flat).c_str(), occ, cap); + VTR_LOG(" %s is overused (occ=%d capacity=%d)\n", describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, inode, is_flat).c_str(), occ, cap); auto range = rr_node_nets.equal_range(inode); for (auto itr = range.first; itr != range.second; ++itr) { @@ -1089,19 +1116,20 @@ void print_rr_node_route_inf() { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; for (size_t inode = 0; inode < route_ctx.rr_node_route_inf.size(); ++inode) { - if (!std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)) { - int prev_node = route_ctx.rr_node_route_inf[inode].prev_node; - RREdgeId prev_edge = route_ctx.rr_node_route_inf[inode].prev_edge; + const auto& inf = route_ctx.rr_node_route_inf[RRNodeId(inode)]; + if (!std::isinf(inf.path_cost)) { + RRNodeId prev_node = inf.prev_node; + RREdgeId prev_edge = inf.prev_edge; auto switch_id = rr_graph.rr_nodes().edge_switch(prev_edge); VTR_LOG("rr_node: %d prev_node: %d prev_edge: %zu", inode, prev_node, (size_t)prev_edge); - if (prev_node != OPEN && bool(prev_edge) && !rr_graph.rr_switch_inf(RRSwitchId(switch_id)).configurable()) { + if (prev_node.is_valid() && bool(prev_edge) && !rr_graph.rr_switch_inf(RRSwitchId(switch_id)).configurable()) { VTR_LOG("*"); } VTR_LOG(" pcost: %g back_pcost: %g\n", - route_ctx.rr_node_route_inf[inode].path_cost, route_ctx.rr_node_route_inf[inode].backward_path_cost); + inf.path_cost, inf.backward_path_cost); } } } @@ -1114,23 +1142,25 @@ void print_rr_node_route_inf_dot() { VTR_LOG("digraph G {\n"); VTR_LOG("\tnode[shape=record]\n"); for (size_t inode = 0; inode < route_ctx.rr_node_route_inf.size(); ++inode) { - if (!std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)) { + const auto& inf = route_ctx.rr_node_route_inf[RRNodeId(inode)]; + if (!std::isinf(inf.path_cost)) { VTR_LOG("\tnode%zu[label=\"{%zu (%s)", inode, inode, rr_graph.node_type_string(RRNodeId(inode))); - if (route_ctx.rr_node_route_inf[inode].occ() > rr_graph.node_capacity(RRNodeId(inode))) { + if (inf.occ() > rr_graph.node_capacity(RRNodeId(inode))) { VTR_LOG(" x"); } VTR_LOG("}\"]\n"); } } for (size_t inode = 0; inode < route_ctx.rr_node_route_inf.size(); ++inode) { - if (!std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)) { - int prev_node = route_ctx.rr_node_route_inf[inode].prev_node; - RREdgeId prev_edge = route_ctx.rr_node_route_inf[inode].prev_edge; + const auto& inf = route_ctx.rr_node_route_inf[RRNodeId(inode)]; + if (!std::isinf(inf.path_cost)) { + RRNodeId prev_node = inf.prev_node; + RREdgeId prev_edge = inf.prev_edge; auto switch_id = rr_graph.rr_nodes().edge_switch(prev_edge); - if (prev_node != OPEN && bool(prev_edge)) { + if (prev_node.is_valid() && prev_edge.is_valid()) { VTR_LOG("\tnode%d -> node%zu [", prev_node, inode); - if (prev_node != OPEN && bool(prev_edge) && !rr_graph.rr_switch_inf(RRSwitchId(switch_id)).configurable()) { + if (rr_graph.rr_switch_inf(RRSwitchId(switch_id)).configurable()) { VTR_LOG("label=\"*\""); } @@ -1142,23 +1172,7 @@ void print_rr_node_route_inf_dot() { VTR_LOG("}\n"); } -// True if router will use a lookahead. -// -// This controls whether the router lookahead cache will be primed outside of -// the router ScopedStartFinishTimer. -bool router_needs_lookahead(enum e_router_algorithm router_algorithm) { - switch (router_algorithm) { - case BREADTH_FIRST: - return false; - case TIMING_DRIVEN: - return true; - default: - VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown routing algorithm %d", - router_algorithm); - } -} - -std::string describe_unrouteable_connection(const int source_node, const int sink_node, bool is_flat) { +std::string describe_unrouteable_connection(RRNodeId source_node, RRNodeId sink_node, bool is_flat) { const auto& device_ctx = g_vpr_ctx.device(); std::string msg = vtr::string_fmt( "Cannot route from %s (%s) to " diff --git a/vpr/src/route/route_common.h b/vpr/src/route/route_common.h index 9df96d7f807..4a7d1a2cf76 100644 --- a/vpr/src/route/route_common.h +++ b/vpr/src/route/route_common.h @@ -16,42 +16,42 @@ t_bb load_net_route_bb(const Netlist<>& net_list, ParentNetId net_id, int bb_factor); -void pathfinder_update_single_node_occupancy(int inode, int add_or_sub); +void pathfinder_update_single_node_occupancy(RRNodeId inode, int add_or_sub); void pathfinder_update_acc_cost_and_overuse_info(float acc_fac, OveruseInfo& overuse_info); -/** Update pathfinder cost of all nodes rooted at rt_node, including rt_node itself */ +/** Update pathfinder cost of all nodes under root (including root) */ void pathfinder_update_cost_from_route_tree(const RouteTreeNode& root, int add_or_sub); float update_pres_fac(float new_pres_fac); -void reset_path_costs(const std::vector& visited_rr_nodes); +void reset_path_costs(const std::vector& visited_rr_nodes); -float get_rr_cong_cost(int inode, float pres_fac); +float get_rr_cong_cost(RRNodeId inode, float pres_fac); /* Returns the base cost of using this rr_node */ -inline float get_single_rr_cong_base_cost(int inode) { +inline float get_single_rr_cong_base_cost(RRNodeId inode) { auto& device_ctx = g_vpr_ctx.device(); - auto cost_index = device_ctx.rr_graph.node_cost_index(RRNodeId(inode)); + auto cost_index = device_ctx.rr_graph.node_cost_index(inode); return device_ctx.rr_indexed_data[cost_index].base_cost; } /* Returns the accumulated congestion cost of using this rr_node */ -inline float get_single_rr_cong_acc_cost(int inode) { +inline float get_single_rr_cong_acc_cost(RRNodeId inode) { auto& route_ctx = g_vpr_ctx.routing(); return route_ctx.rr_node_route_inf[inode].acc_cost; } /* Returns the present congestion cost of using this rr_node */ -inline float get_single_rr_cong_pres_cost(int inode, float pres_fac) { +inline float get_single_rr_cong_pres_cost(RRNodeId inode, float pres_fac) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; auto& route_ctx = g_vpr_ctx.routing(); int occ = route_ctx.rr_node_route_inf[inode].occ(); - int capacity = rr_graph.node_capacity(RRNodeId(inode)); + int capacity = rr_graph.node_capacity(inode); if (occ >= capacity) { return (1. + pres_fac * (occ + 1 - capacity)); @@ -62,13 +62,13 @@ inline float get_single_rr_cong_pres_cost(int inode, float pres_fac) { /* Returns the congestion cost of using this rr_node, * *ignoring* non-configurable edges */ -inline float get_single_rr_cong_cost(int inode, float pres_fac) { +inline float get_single_rr_cong_cost(RRNodeId inode, float pres_fac) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; auto& route_ctx = g_vpr_ctx.routing(); float pres_cost; - int overuse = route_ctx.rr_node_route_inf[inode].occ() - rr_graph.node_capacity(RRNodeId(inode)); + int overuse = route_ctx.rr_node_route_inf[inode].occ() - rr_graph.node_capacity(inode); if (overuse >= 0) { pres_cost = (1. + pres_fac * (overuse + 1)); @@ -76,7 +76,7 @@ inline float get_single_rr_cong_cost(int inode, float pres_fac) { pres_cost = 1.; } - auto cost_index = rr_graph.node_cost_index(RRNodeId(inode)); + auto cost_index = rr_graph.node_cost_index(inode); float cost = device_ctx.rr_indexed_data[cost_index].base_cost * route_ctx.rr_node_route_inf[inode].acc_cost * pres_cost; @@ -91,7 +91,7 @@ void mark_ends(const Netlist<>& net_list, ParentNetId net_id); void mark_remaining_ends(ParentNetId net_id, const std::vector& remaining_sinks); -void add_to_mod_list(int inode, std::vector& modified_rr_node_inf); +void add_to_mod_list(RRNodeId inode, std::vector& modified_rr_node_inf); void init_route_structs(const Netlist<>& net_list, int bb_factor, @@ -108,9 +108,7 @@ void print_rr_node_route_inf(); void print_rr_node_route_inf_dot(); void print_invalid_routing_info(const Netlist<>& net_list, bool is_flat); -bool router_needs_lookahead(enum e_router_algorithm router_algorithm); - -std::string describe_unrouteable_connection(const int source_node, const int sink_node, bool is_flat); +std::string describe_unrouteable_connection(RRNodeId source_node, RRNodeId sink_node, bool is_flat); /* If flat_routing isn't enabled, this function would simply pass from_node and to_node to the router_lookahead. * However, if flat_routing is enabled, we can not do the same. For the time being, router lookahead is not aware @@ -129,9 +127,7 @@ float get_cost_from_lookahead(const RouterLookahead& router_lookahead, * given is lower than the current path_cost to this channel segment. The * * index of its predecessor is stored to make traceback easy. The index of * * the edge used to get from its predecessor to it is also stored to make * - * timing analysis, etc. The backward_path_cost and R_upstream values are * - * used only by the timing-driven router -- the breadth-first router * - * ignores them. * + * timing analysis, etc. * * * * Returns t_heap suitable for adding to heap or nullptr if node is more * * expensive than previously explored path. */ @@ -139,9 +135,9 @@ template t_heap* prepare_to_add_node_to_heap( T* heap, const RouteInf& rr_node_route_inf, - int inode, + RRNodeId inode, float total_cost, - int prev_node, + RRNodeId prev_node, RREdgeId prev_edge, float backward_path_cost, float R_upstream) { @@ -164,9 +160,9 @@ template void add_node_to_heap( T* heap, const RouteInf& rr_node_route_inf, - int inode, + RRNodeId inode, float total_cost, - int prev_node, + RRNodeId prev_node, RREdgeId prev_edge, float backward_path_cost, float R_upstream) { @@ -186,9 +182,9 @@ template void push_back_node( T* heap, const RouteInf& rr_node_route_inf, - int inode, + RRNodeId inode, float total_cost, - int prev_node, + RRNodeId prev_node, RREdgeId prev_edge, float backward_path_cost, float R_upstream) { @@ -207,7 +203,7 @@ void push_back_node( template void push_back_node_with_info( T* heap, - int inode, + RRNodeId inode, float total_cost, float backward_path_cost, float R_upstream, diff --git a/vpr/src/route/route_export.h b/vpr/src/route/route_export.h index 6c1d1e547db..3aa1703647e 100644 --- a/vpr/src/route/route_export.h +++ b/vpr/src/route/route_export.h @@ -1,4 +1,4 @@ -/******** Function prototypes for functions in route_common.c that *********** +/******** Function prototypes for functions in route_common.cpp that *********** ******** are used outside the router modules. ***********/ #include "vpr_types.h" #include @@ -32,9 +32,9 @@ bool try_route(const Netlist<>& net_list, bool feasible_routing(); -std::vector collect_congested_rr_nodes(); +std::vector collect_congested_rr_nodes(); -std::vector> collect_rr_node_nets(); +vtr::vector> collect_rr_node_nets(); t_clb_opins_used alloc_route_structs(); diff --git a/vpr/src/route/route_parallel.cpp b/vpr/src/route/route_parallel.cpp new file mode 100644 index 00000000000..1e50f758b31 --- /dev/null +++ b/vpr/src/route/route_parallel.cpp @@ -0,0 +1,1006 @@ +/** @file Functions specific to parallel routing. + * Reuse code from route_timing.cpp where possible. */ + +#include +#include +#include +#include +#include +#include +#include + +#include "binary_heap.h" +#include "bucket.h" +#include "concrete_timing_info.h" +#include "connection_router.h" +#include "draw.h" +#include "globals.h" +#include "netlist_fwd.h" +#include "partition_tree.h" +#include "read_route.h" +#include "route_export.h" +#include "route_common.h" +#include "route_timing.h" +#include "route_parallel.h" +// all functions in profiling:: namespace, which are only activated if PROFILE is defined +#include "route_profiling.h" +#include "timing_util.h" +#include "vtr_time.h" + +#include "NetPinTimingInvalidator.h" + +#ifdef VPR_USE_TBB + +# include "tbb/enumerable_thread_specific.h" +# include "tbb/task_group.h" +# include "tbb/global_control.h" + +/** route_net and similar functions need many bits of state collected from various + * parts of VPR, collect them here for ease of use */ +template +class RouteIterCtx { + public: + tbb::enumerable_thread_specific routers; + const Netlist<>& net_list; + int itry; + float pres_fac; + const t_router_opts& router_opts; + CBRR& connections_inf; + tbb::enumerable_thread_specific router_stats; + tbb::enumerable_thread_specific route_structs; + NetPinsMatrix& net_delay; + const ClusteredPinAtomPinsLookup& netlist_pin_lookup; + std::shared_ptr timing_info; + NetPinTimingInvalidator* pin_timing_invalidator; + route_budgets& budgeting_inf; + float worst_negative_slack; + const RoutingPredictor& routing_predictor; + const vtr::vector>>& choking_spots; + bool is_flat; +}; + +/** Helper for reduce_partition_tree. Traverse \p node's subtree and collect results into \p results */ +static void reduce_partition_tree_helper(const PartitionTreeNode& node, RouteIterResults& results); + +/** + * Try to route in parallel with the given ConnectionRouter. + * ConnectionRouter is typically templated with a heap type, so this lets us + * route with different heap implementations. + * + * This fn is very similar to try_timing_driven_route_tmpl, but it has enough small changes to + * warrant a copy. (TODO: refactor this to reuse more of the serial code) + * + * @param netlist Input netlist + * @param det_routing_arch Routing architecture. See definition of t_det_routing_arch for more details. + * @param router_opts Command line options for the router. + * @param analysis_opts Command line options for timing analysis (used in generate_route_timing_reports()) + * @param segment_inf + * @param[in, out] net_delay + * @param netlist_pin_lookup + * @param[in, out] timing_info Interface to the timing analyzer + * @param delay_calc + * @param first_iteration_priority + * @param is_flat + * @return Success status + * + * The reason that try_parallel_route_tmpl (and descendents) are being + * templated over is because using a virtual interface instead fully templating + * the router results in a 5% runtime increase. + * + * The reason to template over the router in general is to enable runtime + * selection of core router algorithm's, specifically the router heap. */ +template +static bool try_parallel_route_tmpl(const Netlist<>& netlist, + const t_det_routing_arch& det_routing_arch, + const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + const std::vector& segment_inf, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + std::shared_ptr delay_calc, + ScreenUpdatePriority first_iteration_priority, + bool is_flat); + +/************************ Subroutine definitions *****************************/ + +bool try_parallel_route(const Netlist<>& net_list, + const t_det_routing_arch& det_routing_arch, + const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + const std::vector& segment_inf, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + std::shared_ptr delay_calc, + ScreenUpdatePriority first_iteration_priority, + bool is_flat) { + switch (router_opts.router_heap) { + case e_heap_type::BINARY_HEAP: + return try_parallel_route_tmpl>(net_list, + det_routing_arch, + router_opts, + analysis_opts, + segment_inf, + net_delay, + netlist_pin_lookup, + timing_info, + delay_calc, + first_iteration_priority, + is_flat); + break; + case e_heap_type::BUCKET_HEAP_APPROXIMATION: + return try_parallel_route_tmpl>(net_list, + det_routing_arch, + router_opts, + analysis_opts, + segment_inf, + net_delay, + netlist_pin_lookup, + timing_info, + delay_calc, + first_iteration_priority, + is_flat); + default: + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, "Unknown heap type %d", router_opts.router_heap); + } +} + +template +bool try_parallel_route_tmpl(const Netlist<>& net_list, + const t_det_routing_arch& det_routing_arch, + const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + const std::vector& segment_inf, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + std::shared_ptr delay_calc, + ScreenUpdatePriority first_iteration_priority, + bool is_flat) { + // Make sure template type ConnectionRouter is a ConnectionRouterInterface. + /// TODO: Template on "NetRouter" instead of ConnectionRouter to avoid copying top level routing logic? + static_assert(std::is_base_of::value, "ConnectionRouter must implement the ConnectionRouterInterface"); + + const auto& device_ctx = g_vpr_ctx.device(); + const auto& atom_ctx = g_vpr_ctx.atom(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + auto choking_spots = set_nets_choking_spots(net_list, + route_ctx.net_terminal_groups, + route_ctx.net_terminal_group_num, + router_opts.has_choking_spot, + is_flat); + + //Initially, the router runs normally trying to reduce congestion while + //balancing other metrics (timing, wirelength, run-time etc.) + RouterCongestionMode router_congestion_mode = RouterCongestionMode::NORMAL; + + //Initialize and properly size the lookups for profiling + profiling::profiling_initialization(get_max_pins_per_net(net_list)); + + /* + * Configure the routing predictor + */ + RoutingPredictor routing_predictor; + float abort_iteration_threshold = std::numeric_limits::infinity(); //Default no early abort + if (router_opts.routing_failure_predictor == SAFE) { + abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_SAFE * router_opts.max_router_iterations; + } else if (router_opts.routing_failure_predictor == AGGRESSIVE) { + abort_iteration_threshold = ROUTING_PREDICTOR_ITERATION_ABORT_FACTOR_AGGRESSIVE * router_opts.max_router_iterations; + } else { + VTR_ASSERT_MSG(router_opts.routing_failure_predictor == OFF, "Unrecognized routing failure predictor setting"); + } + + float high_effort_congestion_mode_iteration_threshold = router_opts.congested_routing_iteration_threshold_frac * router_opts.max_router_iterations; + + /* Set delay of ignored signals to zero. Non-ignored net delays are set by + * update_net_delays_from_route_tree() inside parallel_route_net(), + * which is only called for non-ignored nets. */ + for (auto net_id : net_list.nets()) { + if (net_list.net_is_ignored(net_id)) { + for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { + net_delay[net_id][ipin] = 0.; + } + } + } + + CBRR connections_inf{net_list, route_ctx.net_rr_terminals, is_flat}; + + route_budgets budgeting_inf(net_list, is_flat); + + // This needs to be called before filling intra-cluster lookahead maps to ensure that the intra-cluster lookahead maps are initialized. + const RouterLookahead* router_lookahead = get_cached_router_lookahead(det_routing_arch, + router_opts.lookahead_type, + router_opts.write_router_lookahead, + router_opts.read_router_lookahead, + segment_inf, + is_flat); + + if (is_flat) { + // If is_flat is true, the router lookahead maps related to intra-cluster resources should be initialized since + // they haven't been initialized when the map related to global resources was initialized. + auto cache_key = route_ctx.router_lookahead_cache_key_; + std::unique_ptr mut_router_lookahead(route_ctx.cached_router_lookahead_.release()); + VTR_ASSERT(mut_router_lookahead); + route_ctx.cached_router_lookahead_.clear(); + if (!router_opts.read_intra_cluster_router_lookahead.empty()) { + mut_router_lookahead->read_intra_cluster(router_opts.read_intra_cluster_router_lookahead); + } else { + mut_router_lookahead->compute_intra_tile(); + } + route_ctx.cached_router_lookahead_.set(cache_key, std::move(mut_router_lookahead)); + router_lookahead = get_cached_router_lookahead(det_routing_arch, + router_opts.lookahead_type, + router_opts.write_router_lookahead, + router_opts.read_router_lookahead, + segment_inf, + is_flat); + if (!router_opts.write_intra_cluster_router_lookahead.empty()) { + router_lookahead->write_intra_cluster(router_opts.write_intra_cluster_router_lookahead); + } + } + + VTR_ASSERT(router_lookahead != nullptr); + + /* + * Routing parameters + */ + float pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); /* Typically 0 -> ignore cong. */ + int bb_fac = router_opts.bb_factor; + + //When routing conflicts are detected the bounding boxes are scaled + //by BB_SCALE_FACTOR every BB_SCALE_ITER_COUNT iterations + constexpr float BB_SCALE_FACTOR = 2; + constexpr int BB_SCALE_ITER_COUNT = 5; + + size_t available_wirelength = calculate_wirelength_available(); + + /* + * Routing status and metrics + */ + bool routing_is_successful = false; + WirelengthInfo wirelength_info; + OveruseInfo overuse_info(device_ctx.rr_graph.num_nodes()); + tatum::TimingPathInfo critical_path; + int itry; //Routing iteration number + int itry_conflicted_mode = 0; + + /* + * Best result so far + */ + vtr::vector> best_routing; + t_clb_opins_used best_clb_opins_used_locally; + RoutingMetrics best_routing_metrics; + int legal_convergence_count = 0; + std::vector scratch; + + /* + * On the first routing iteration ignore congestion to get reasonable net + * delay estimates. Set criticalities to 1 when timing analysis is on to + * optimize timing, and to 0 when timing analysis is off to optimize routability. + * + * Subsequent iterations use the net delays from the previous iteration. + */ + std::shared_ptr route_timing_info; + { + vtr::ScopedStartFinishTimer init_timing_timer("Initializing router criticalities"); + if (timing_info) { + if (router_opts.initial_timing == e_router_initial_timing::ALL_CRITICAL) { + //First routing iteration, make all nets critical for a min-delay routing + route_timing_info = make_constant_timing_info(1.); + } else { + VTR_ASSERT(router_opts.initial_timing == e_router_initial_timing::LOOKAHEAD); + + { + //Estimate initial connection delays from the router lookahead + init_net_delay_from_lookahead(*router_lookahead, + net_list, + route_ctx.net_rr_terminals, + net_delay, + device_ctx.rr_graph, + is_flat); + + //Run STA to get estimated criticalities + timing_info->update(); + } + route_timing_info = timing_info; + } + } else { + //Not timing driven, force criticality to zero for a routability-driven routing + route_timing_info = make_constant_timing_info(0.); + } + VTR_LOG("Initial Net Connection Criticality Histogram:\n"); + print_router_criticality_histogram(net_list, *route_timing_info, netlist_pin_lookup, is_flat); + } + + std::unique_ptr pin_timing_invalidator; + if (timing_info) { + pin_timing_invalidator = make_net_pin_timing_invalidator( + router_opts.timing_update_type, + net_list, + netlist_pin_lookup, + atom_ctx.nlist, + atom_ctx.lookup, + *timing_info->timing_graph(), + is_flat); + } + + /* Build partition tree for parallel routing */ + vtr::Timer t; + PartitionTree partition_tree(net_list); + float total_prep_time = t.elapsed_sec(); + VTR_LOG("# Built partition tree in %f seconds\n", total_prep_time); + + tbb::task_group tbb_task_group; + + /* Set up thread local storage. + * tbb::enumerable_thread_specific will construct the elements as needed. + * see https://spec.oneapi.io/versions/1.0-rev-3/elements/oneTBB/source/thread_local_storage/enumerable_thread_specific_cls/construct_destroy_copy.html */ + auto routers = tbb::enumerable_thread_specific(ConnectionRouter( + device_ctx.grid, + *router_lookahead, + device_ctx.rr_graph.rr_nodes(), + &device_ctx.rr_graph, + device_ctx.rr_rc_data, + device_ctx.rr_graph.rr_switch(), + route_ctx.rr_node_route_inf, + is_flat)); /* Here we provide an "exemplar" to copy for each thread */ + auto router_stats_thread = tbb::enumerable_thread_specific(); + auto route_structs = tbb::enumerable_thread_specific(net_list); + + RouterStats router_stats; + float prev_iter_cumm_time = 0; + vtr::Timer iteration_timer; + int num_net_bounding_boxes_updated = 0; + int itry_since_last_convergence = -1; + + // This heap is used for reserve_locally_used_opins. + BinaryHeap small_heap; + small_heap.init_heap(device_ctx.grid); + + // When RCV is enabled the router will not stop unless negative hold slack is 0 + // In some cases this isn't doable, due to global nets or intracluster routing issues + // In these cases RCV will finish early if it goes RCV_FINISH_EARLY_COUNTDOWN iterations without detecting resolvable negative hold slack + // Increasing this will make the router fail occasionally, decreasing will sometimes not let all hold violations be resolved + constexpr int RCV_FINISH_EARLY_COUNTDOWN = 15; + + int rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN; + + print_route_status_header(); + for (itry = 1; itry <= router_opts.max_router_iterations; ++itry) { + for (auto& stats : router_stats_thread) { + init_router_stats(stats); + } + + /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */ + for (auto net_id : net_list.nets()) { + route_ctx.net_status.set_is_routed(net_id, false); + route_ctx.net_status.set_is_fixed(net_id, false); + } + + if (itry_since_last_convergence >= 0) { + ++itry_since_last_convergence; + } + + // Calculate this once and pass it into net routing to check if should reroute for hold + float worst_negative_slack = 0; + if (budgeting_inf.if_set()) { + worst_negative_slack = timing_info->hold_total_negative_slack(); + } + + /** + * Route nets in parallel using the partition tree. Need to pass on + * some context to each task. + * TODO: Move pin_criticality into timing_driven_route_net(). + * TODO: Move rt_node_of_sink lookup into RouteTree. + */ + RouteIterCtx iter_ctx = { + routers, + net_list, + itry, + pres_fac, + router_opts, + connections_inf, + router_stats_thread, + route_structs, + net_delay, + netlist_pin_lookup, + route_timing_info, + pin_timing_invalidator.get(), + budgeting_inf, + worst_negative_slack, + routing_predictor, + choking_spots, + is_flat}; + + RouteIterResults iter_results = route_partition_tree(tbb_task_group, partition_tree, iter_ctx); + + if (!iter_results.is_routable) { + return false; // Impossible to route + } + + /* Note that breakpoints won't work properly with parallel routing. + * (how to do that? stop all threads when a thread hits a breakpoint? too complicated) + * However we still make an attempt to update graphics */ +# ifndef NO_GRAPHICS + for (auto net_id : net_list.nets()) { + update_router_info_and_check_bp(BP_NET_ID, size_t(net_id)); + } +# endif + + // Make sure any CLB OPINs used up by subblocks being hooked directly to them are reserved for that purpose + bool rip_up_local_opins = (itry == 1 ? false : true); + if (!is_flat) { + reserve_locally_used_opins(&small_heap, pres_fac, + router_opts.acc_fac, rip_up_local_opins, is_flat); + } + + /* + * Calculate metrics for the current routing + */ + bool routing_is_feasible = feasible_routing(); + float est_success_iteration = routing_predictor.estimate_success_iteration(); + + //Update resource costs and overuse info + if (itry == 1) { + pathfinder_update_acc_cost_and_overuse_info(0., overuse_info); /* Acc_fac=0 for first iter. */ + } else { + pathfinder_update_acc_cost_and_overuse_info(router_opts.acc_fac, overuse_info); + } + + wirelength_info = calculate_wirelength_info(net_list, available_wirelength); + routing_predictor.add_iteration_overuse(itry, overuse_info.overused_nodes); + + if (timing_info) { + //Update timing based on the new routing + //Note that the net delays have already been updated by parallel_route_net + timing_info->update(); + timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during routing + pin_timing_invalidator->reset(); + + //Use the real timing analysis criticalities for subsequent routing iterations + // 'route_timing_info' is what is actually passed into the net/connection routers, + // and for the 1st iteration may not be the actual STA results (e.g. all criticalities set to 1) + route_timing_info = timing_info; + + critical_path = timing_info->least_slack_critical_path(); + + VTR_ASSERT_SAFE(timing_driven_check_net_delays(net_list, net_delay)); + + if (itry == 1) { + generate_route_timing_reports(router_opts, analysis_opts, *timing_info, *delay_calc, is_flat); + } + } + + float iter_cumm_time = iteration_timer.elapsed_sec(); + float iter_elapsed_time = iter_cumm_time - prev_iter_cumm_time; + + //Output progress + print_route_status(itry, iter_elapsed_time, pres_fac, num_net_bounding_boxes_updated, iter_results.stats, overuse_info, wirelength_info, timing_info, est_success_iteration); + + prev_iter_cumm_time = iter_cumm_time; + + //Update graphics + if (itry == 1) { + update_screen(first_iteration_priority, "Routing...", ROUTING, timing_info); + } else { + update_screen(ScreenUpdatePriority::MINOR, "Routing...", ROUTING, timing_info); + } + + if (router_opts.save_routing_per_iteration) { + std::string filename = vtr::string_fmt("iteration_%03d.route", itry); + print_route(net_list, nullptr, filename.c_str(), is_flat); + } + + // Update router stats + update_router_stats(router_stats, iter_results.stats); + + /* + * Are we finished? + */ + if (is_iteration_complete(routing_is_feasible, router_opts, itry, timing_info, rcv_finished_count == 0)) { + auto& router_ctx = g_vpr_ctx.routing(); + + if (is_better_quality_routing(best_routing, best_routing_metrics, wirelength_info, timing_info)) { + //Save routing + best_routing = router_ctx.route_trees; + best_clb_opins_used_locally = router_ctx.clb_opins_used_locally; + + routing_is_successful = true; + + //Update best metrics + if (timing_info) { + timing_driven_check_net_delays(net_list, net_delay); + + best_routing_metrics.sTNS = timing_info->setup_total_negative_slack(); + best_routing_metrics.sWNS = timing_info->setup_worst_negative_slack(); + best_routing_metrics.hTNS = timing_info->hold_total_negative_slack(); + best_routing_metrics.hWNS = timing_info->hold_worst_negative_slack(); + best_routing_metrics.critical_path = critical_path; + } + best_routing_metrics.used_wirelength = wirelength_info.used_wirelength(); + } + + //Decrease pres_fac so that critical connections will take more direct routes + //Note that we use first_iter_pres_fac here (typically zero), and switch to + //use initial_pres_fac on the next iteration. + pres_fac = update_pres_fac(router_opts.first_iter_pres_fac); + + //Reduce timing tolerances to re-route more delay-suboptimal signals + connections_inf.set_connection_criticality_tolerance(0.7); + connections_inf.set_connection_delay_tolerance(1.01); + + ++legal_convergence_count; + itry_since_last_convergence = 0; + + VTR_ASSERT(routing_is_successful); + } + + if (itry_since_last_convergence == 1) { + //We used first_iter_pres_fac when we started routing again + //after the first routing convergence. Since that is often zero, + //we want to set pres_fac to a reasonable (i.e. typically non-zero) + //value afterwards -- so it grows when multiplied by pres_fac_mult + pres_fac = update_pres_fac(router_opts.initial_pres_fac); + } + + //Have we converged the maximum number of times, did not make any changes, or does it seem + //unlikely additional convergences will improve QoR? + if (legal_convergence_count >= router_opts.max_convergence_count + || iter_results.stats.connections_routed == 0 + || early_reconvergence_exit_heuristic(router_opts, itry_since_last_convergence, timing_info, best_routing_metrics)) { +# ifndef NO_GRAPHICS + update_router_info_and_check_bp(BP_ROUTE_ITER, -1); +# endif + break; //Done routing + } + + /* + * Abort checks: Should we give-up because this routing problem is unlikely to converge to a legal routing? + */ + if (itry == 1 && early_exit_heuristic(router_opts, wirelength_info)) { +# ifndef NO_GRAPHICS + update_router_info_and_check_bp(BP_ROUTE_ITER, -1); +# endif + //Abort + break; + } + + //Estimate at what iteration we will converge to a legal routing + if (overuse_info.overused_nodes > ROUTING_PREDICTOR_MIN_ABSOLUTE_OVERUSE_THRESHOLD) { + //Only consider aborting if we have a significant number of overused resources + + if (!std::isnan(est_success_iteration) && est_success_iteration > abort_iteration_threshold && router_opts.routing_budgets_algorithm != YOYO) { + VTR_LOG("Routing aborted, the predicted iteration for a successful route (%.1f) is too high.\n", est_success_iteration); +# ifndef NO_GRAPHICS + update_router_info_and_check_bp(BP_ROUTE_ITER, -1); +# endif + break; //Abort + } + } + + if (itry == 1 && router_opts.exit_after_first_routing_iteration) { + VTR_LOG("Exiting after first routing iteration as requested\n"); +# ifndef NO_GRAPHICS + update_router_info_and_check_bp(BP_ROUTE_ITER, -1); +# endif + break; + } + + /* + * Prepare for the next iteration + */ + + if (router_opts.route_bb_update == e_route_bb_update::DYNAMIC) { + /** TODO: Disabled BB scaling for the baseline parallel router. Should re-enable it by building/updating partition tree on every iteration */ + // num_net_bounding_boxes_updated = dynamic_update_bounding_boxes(iter_results.rerouted_nets, net_list, router_opts.high_fanout_threshold); + } + + if (itry >= high_effort_congestion_mode_iteration_threshold) { + //We are approaching the maximum number of routing iterations, + //and still do not have a legal routing. Switch to a mode which + //focuses more on attempting to resolve routing conflicts. + router_congestion_mode = RouterCongestionMode::CONFLICTED; + } + + //Update pres_fac + if (itry == 1) { + pres_fac = update_pres_fac(router_opts.initial_pres_fac); + } else { + pres_fac *= router_opts.pres_fac_mult; + + /* Avoid overflow for high iteration counts, even if acc_cost is big */ + pres_fac = update_pres_fac(std::min(pres_fac, static_cast(HUGE_POSITIVE_FLOAT / 1e5))); + + // Increase short path criticality if it's having a hard time resolving hold violations due to congestion + if (budgeting_inf.if_set()) { + bool rcv_finished = false; + + /* This constant represents how much extra delay the budget increaser adds to the minimum and maximum delay budgets + * Experimentally this value delivers fast hold slack resolution, while not overwhelming the router + * Increasing this will make it resolve hold faster, but could result in lower circuit quality */ + constexpr float budget_increase_factor = 300e-12; + + if (itry > 5 && worst_negative_slack != 0) rcv_finished = budgeting_inf.increase_min_budgets_if_struggling(budget_increase_factor, timing_info, worst_negative_slack, netlist_pin_lookup); + if (rcv_finished) + rcv_finished_count--; + else + rcv_finished_count = RCV_FINISH_EARLY_COUNTDOWN; + } + } + + if (router_congestion_mode == RouterCongestionMode::CONFLICTED) { + //The design appears to have routing conflicts which are difficult to resolve: + // 1) Don't re-route legal connections due to delay. This allows + // the router to focus on the actual conflicts + // 2) Increase the net bounding boxes. This potentially allows + // the router to route around otherwise congested regions + // (at the cost of high run-time). + + //Increase the size of the net bounding boxes to give the router more + //freedom to find alternate paths. + // + //In the case of routing conflicts there are multiple connections competing + //for the same resources which can not resolve the congestion themselves. + //In normal routing mode we try to keep the bounding boxes small to minimize + //run-time, but this can limits how far signals can detour (i.e. they can't + //route outside the bounding box), which can cause conflicts to oscillate back + //and forth without resolving. + // + //By scaling the bounding boxes here, we slowly increase the router's search + //space in hopes of it allowing signals to move further out of the way to + //alleviate the conflicts. + if (itry_conflicted_mode % BB_SCALE_ITER_COUNT == 0) { + //We scale the bounding boxes by BB_SCALE_FACTOR, + //every BB_SCALE_ITER_COUNT iterations. This ensures + //that we give the router some time (BB_SCALE_ITER_COUNT) to try + //resolve/negotiate congestion at the new BB factor. + // + //Note that we increase the BB factor slowly to try and minimize + //the bounding box size (since larger bounding boxes slow the router down). + auto& grid = g_vpr_ctx.device().grid; + int max_grid_dim = std::max(grid.width(), grid.height()); + + //Scale by BB_SCALE_FACTOR but clip to grid size to avoid overflow + bb_fac = std::min(max_grid_dim, bb_fac * BB_SCALE_FACTOR); + + /** TODO: Disabled BB scaling for the baseline parallel router. Should re-enable it by building/updating partition tree on every iteration */ + // route_ctx.route_bb = load_route_bb(net_list, bb_fac); + } + + ++itry_conflicted_mode; + } + + if (timing_info) { + if (should_setup_lower_bound_connection_delays(itry, router_opts)) { + // first iteration sets up the lower bound connection delays since only timing is optimized for + connections_inf.set_stable_critical_path_delay(critical_path.delay()); + connections_inf.set_lower_bound_connection_delays(net_delay); + + //load budgets using information from uncongested delay information + budgeting_inf.load_route_budgets(net_delay, timing_info, netlist_pin_lookup, router_opts); + /*for debugging purposes*/ + // if (budgeting_inf.if_set()) { + // budgeting_inf.print_route_budget(std::string("route_budgets_") + std::to_string(itry) + ".txt", net_delay); + // } + + if (router_opts.routing_budgets_algorithm == YOYO) { + for (auto& router : routers) { + router.set_rcv_enabled(true); + } + } + + } else { + bool stable_routing_configuration = true; + + /* + * Determine if any connection need to be forcibly re-routed due to timing + */ + + //Yes, if explicitly enabled + bool should_ripup_for_delay = (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::ON); + + //Or, if things are not too congested + should_ripup_for_delay |= (router_opts.incr_reroute_delay_ripup == e_incr_reroute_delay_ripup::AUTO + && router_congestion_mode == RouterCongestionMode::NORMAL); + + if (should_ripup_for_delay) { + if (connections_inf.critical_path_delay_grew_significantly(critical_path.delay())) { + // only need to forcibly reroute if critical path grew significantly + stable_routing_configuration = connections_inf.forcibly_reroute_connections(router_opts.max_criticality, + timing_info, + netlist_pin_lookup, + net_delay); + } + } + + // not stable if any connection needs to be forcibly rerouted + if (stable_routing_configuration) { + connections_inf.set_stable_critical_path_delay(critical_path.delay()); + } + } + } else { + /* If timing analysis is not enabled, make sure that the criticalities and the + * net_delays stay as 0 so that wirelength can be optimized. */ + + for (auto net_id : net_list.nets()) { + for (unsigned int ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { + net_delay[net_id][ipin] = 0.; + } + } + } + + if (router_opts.congestion_analysis) profiling::congestion_analysis(); + if (router_opts.fanout_analysis) profiling::time_on_fanout_analysis(); + // profiling::time_on_criticality_analysis(); + } + + if (routing_is_successful) { + VTR_LOG("Restoring best routing\n"); + + auto& router_ctx = g_vpr_ctx.mutable_routing(); + + /* Restore congestion from best route */ + for (auto net_id : net_list.nets()) { + if (route_ctx.route_trees[net_id]) + pathfinder_update_cost_from_route_tree(route_ctx.route_trees[net_id]->root(), -1); + if (best_routing[net_id]) + pathfinder_update_cost_from_route_tree(best_routing[net_id]->root(), 1); + } + router_ctx.route_trees = best_routing; + router_ctx.clb_opins_used_locally = best_clb_opins_used_locally; + + prune_unused_non_configurable_nets(connections_inf, net_list); + + if (timing_info) { + VTR_LOG("Critical path: %g ns\n", 1e9 * best_routing_metrics.critical_path.delay()); + } + + VTR_LOG("Successfully routed after %d routing iterations.\n", itry); + } else { + VTR_LOG("Routing failed.\n"); + + //If the routing fails, print the overused info + print_overused_nodes_status(router_opts, overuse_info); + +# ifdef VTR_ENABLE_DEBUG_LOGGING + if (f_router_debug) print_invalid_routing_info(net_list, is_flat); +# endif + } + + VTR_LOG("Final Net Connection Criticality Histogram:\n"); + print_router_criticality_histogram(net_list, *route_timing_info, netlist_pin_lookup, is_flat); + + VTR_ASSERT(router_stats.heap_pushes >= router_stats.intra_cluster_node_pushes); + VTR_ASSERT(router_stats.heap_pops >= router_stats.intra_cluster_node_pops); + VTR_LOG( + "Router Stats: total_nets_routed: %zu total_connections_routed: %zu total_heap_pushes: %zu total_heap_pops: %zu " + "total_internal_heap_pushes: %zu total_internal_heap_pops: %zu total_external_heap_pushes: %zu total_external_heap_pops: %zu ", + router_stats.nets_routed, router_stats.connections_routed, router_stats.heap_pushes, router_stats.heap_pops, + router_stats.intra_cluster_node_pushes, router_stats.intra_cluster_node_pops, + router_stats.inter_cluster_node_pushes, router_stats.inter_cluster_node_pops); + for (int node_type_idx = 0; node_type_idx < t_rr_type::NUM_RR_TYPES; node_type_idx++) { + VTR_LOG("total_external_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pushes[node_type_idx]); + VTR_LOG("total_external_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.inter_cluster_node_type_cnt_pops[node_type_idx]); + VTR_LOG("total_internal_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pushes[node_type_idx]); + VTR_LOG("total_internal_%s_pops: %zu ", rr_node_typename[node_type_idx], router_stats.intra_cluster_node_type_cnt_pops[node_type_idx]); + VTR_LOG("rt_node_%s_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_pushes[node_type_idx]); + VTR_LOG("rt_node_%s_high_fanout_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_high_fanout_pushes[node_type_idx]); + VTR_LOG("rt_node_%s_entire_tree_pushes: %zu ", rr_node_typename[node_type_idx], router_stats.rt_node_entire_tree_pushes[node_type_idx]); + } + + VTR_LOG("total_number_of_adding_all_rt: %zu ", router_stats.add_all_rt); + VTR_LOG("total_number_of_adding_high_fanout_rt: %zu ", router_stats.add_high_fanout_rt); + VTR_LOG("total_number_of_adding_all_rt_from_calling_high_fanout_rt: %zu ", router_stats.add_all_rt_from_high_fanout); + VTR_LOG("\n"); + + return routing_is_successful; +} + +/** Try routing a net. This calls timing_driven_route_net. + * The only difference is that it returns a "retry_net" flag, which means that the net + * couldn't be routed with the default bounding box and needs a full-device BB. + * This is required when routing in parallel, because the threads ensure data separation based on BB size. + * The single-thread router just retries with a full-device BB and does not need to notify the caller. + * TODO: make the serial router follow this execution path to decrease code duplication */ +template +NetResultFlags try_parallel_route_net(ConnectionRouter& router, + const Netlist<>& net_list, + const ParentNetId& net_id, + int itry, + float pres_fac, + const t_router_opts& router_opts, + CBRR& connections_inf, + RouterStats& router_stats, + std::vector& pin_criticality, + std::vector>& rt_node_of_sink, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + float worst_negative_slack, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat) { + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + NetResultFlags flags; + + connections_inf.prepare_routing_for_net(net_id); + + bool reroute_for_hold = false; + if (budgeting_inf.if_set()) { + reroute_for_hold = (budgeting_inf.get_should_reroute(net_id)); + reroute_for_hold &= worst_negative_slack != 0; + } + + if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */ + flags.success = true; + } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */ + flags.success = true; + } else if (!(reroute_for_hold) && !should_route_net(net_id, connections_inf, true)) { + flags.success = true; + } else { + // track time spent vs fanout + profiling::net_fanout_start(); + + vtr::Timer routing_timer; + flags = timing_driven_route_net(router, + net_list, + net_id, + itry, + pres_fac, + router_opts, + connections_inf, + router_stats, + pin_criticality, + rt_node_of_sink, + net_delay[net_id].data(), + netlist_pin_lookup, + timing_info, + pin_timing_invalidator, + budgeting_inf, + worst_negative_slack, + routing_predictor, + choking_spots, + is_flat); + + profiling::net_fanout_end(net_list.net_sinks(net_id).size()); + + /* Impossible to route? (disconnected rr_graph) */ + if (flags.success) { + route_ctx.net_status.set_is_routed(net_id, true); + } else { + VTR_LOG("Routing failed for net %d\n", net_id); + } + + flags.was_rerouted = true; //Flag to record whether routing was actually changed + } + return flags; +} + +/* Helper for route_partition_tree(). */ +template +void route_partition_tree_helper(tbb::task_group& g, + PartitionTreeNode& node, + RouteIterCtx& ctx, + vtr::linear_map& nets_to_retry) { + /* Sort so net with most sinks is routed first. */ + std::sort(node.nets.begin(), node.nets.end(), [&](const ParentNetId id1, const ParentNetId id2) -> bool { + return ctx.net_list.net_sinks(id1).size() > ctx.net_list.net_sinks(id2).size(); + }); + + node.is_routable = true; + node.rerouted_nets.clear(); + + std::cout << "routing node with " << node.nets.size() << " nets\n"; + + vtr::Timer t; + for (auto net_id : node.nets) { + auto flags = try_parallel_route_net( + ctx.routers.local(), + ctx.net_list, + net_id, + ctx.itry, + ctx.pres_fac, + ctx.router_opts, + ctx.connections_inf, + ctx.router_stats.local(), + ctx.route_structs.local().pin_criticality, + ctx.route_structs.local().rt_node_of_sink, + ctx.net_delay, + ctx.netlist_pin_lookup, + ctx.timing_info, + ctx.pin_timing_invalidator, + ctx.budgeting_inf, + ctx.worst_negative_slack, + ctx.routing_predictor, + ctx.choking_spots[net_id], + ctx.is_flat); + + if (!flags.success && !flags.retry_with_full_bb) { + node.is_routable = false; + } + if (flags.was_rerouted) { + node.rerouted_nets.push_back(net_id); + } + /* If we need to retry this net with full-device BB, it will go up to the top + * of the tree, so remove it from this node and keep track of it */ + if (flags.retry_with_full_bb) { + node.nets.erase(std::remove(node.nets.begin(), node.nets.end(), net_id), node.nets.end()); + nets_to_retry[net_id] = true; + } + } + node.exec_times.push_back(t.elapsed_sec()); + + /* add left and right trees to task queue */ + if (node.left && node.right) { + g.run([&]() { + route_partition_tree_helper(g, *node.left, ctx, nets_to_retry); + }); + g.run([&]() { + route_partition_tree_helper(g, *node.right, ctx, nets_to_retry); + }); + } else { + VTR_ASSERT(!node.left && !node.right); // tree should have been built perfectly balanced + } +} + +/** Reduce results from partition tree into a single RouteIterResults */ +static void reduce_partition_tree_helper(const PartitionTreeNode& node, RouteIterResults& results) { + results.is_routable &= node.is_routable; + const std::vector& rerouted = node.rerouted_nets; + results.rerouted_nets.insert(results.rerouted_nets.end(), rerouted.begin(), rerouted.end()); + + if (node.left) + reduce_partition_tree_helper(*node.left, results); + if (node.right) + reduce_partition_tree_helper(*node.right, results); +} + +/** Route all nets in parallel using the partitioning information in the PartitionTree. + * + * @param[in, out] g TBB task group to dispatch tasks. + * @param[in, out] tree The partition tree. Non-const reference because iteration results get written on the nodes. + * @param[in, out] ctx RouteIterCtx containing all the necessary bits of state for routing. + * @return RouteIterResults combined from all threads. + * + * See comments in PartitionTreeNode for how parallel routing works. */ +template +RouteIterResults route_partition_tree(tbb::task_group& g, + PartitionTree& tree, + RouteIterCtx& ctx) { + auto& device_ctx = g_vpr_ctx.device(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); + + /* a net id -> retry? vector + * not a bool vector or a set because multiple threads may be writing on it */ + vtr::linear_map nets_to_retry; + + route_partition_tree_helper(g, tree.root(), ctx, nets_to_retry); + g.wait(); + + /* grow bounding box and add to top level if there is any net to retry */ + for (const auto& kv : nets_to_retry) { + if (kv.second) { + ParentNetId net_id = kv.first; + route_ctx.route_bb[net_id] = { + 0, + (int)(device_ctx.grid.width() - 1), + 0, + (int)(device_ctx.grid.height() - 1)}; + tree.root().nets.push_back(net_id); + } + } + + RouteIterResults out; + reduce_partition_tree_helper(tree.root(), out); + for (auto& thread_stats : ctx.router_stats) { + update_router_stats(out.stats, thread_stats); + } + return out; +} + +#endif // VPR_USE_TBB diff --git a/vpr/src/route/route_parallel.h b/vpr/src/route/route_parallel.h new file mode 100644 index 00000000000..b6b4766469f --- /dev/null +++ b/vpr/src/route/route_parallel.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include "connection_based_routing.h" +#include "netlist.h" +#include "vpr_types.h" + +#include "vpr_utils.h" +#include "timing_info_fwd.h" +#include "route_budgets.h" +#include "router_stats.h" +#include "router_lookahead.h" +#include "spatial_route_tree_lookup.h" +#include "connection_router_interface.h" +#include "heap_type.h" +#include "routing_predictor.h" + +#ifdef VPR_USE_TBB +/** Route in parallel. The number of threads is set by the global -j option to VPR. + * Return success status. */ +bool try_parallel_route(const Netlist<>& net_list, + const t_det_routing_arch& det_routing_arch, + const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + const std::vector& segment_inf, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + std::shared_ptr delay_calc, + ScreenUpdatePriority first_iteration_priority, + bool is_flat); +#endif diff --git a/vpr/src/route/route_path_manager.cpp b/vpr/src/route/route_path_manager.cpp index fa6c91fbd81..a7ba09e522d 100644 --- a/vpr/src/route/route_path_manager.cpp +++ b/vpr/src/route/route_path_manager.cpp @@ -12,7 +12,7 @@ PathManager::~PathManager() { } bool PathManager::node_exists_in_tree(t_heap_path* path_data, - RRNodeId& to_node) { + RRNodeId to_node) { // Prevent seg faults for searching path data structures that haven't been created yet if (!path_data || !is_enabled_) return false; @@ -43,9 +43,9 @@ void PathManager::insert_backwards_path_into_traceback(t_heap_path* path_data, f if (!is_enabled_) return; for (unsigned i = 1; i < path_data->edge.size() - 1; i++) { - size_t node_2 = (size_t)path_data->path_rr[i]; + RRNodeId node_2 = path_data->path_rr[i]; RREdgeId edge = path_data->edge[i - 1]; - route_ctx.rr_node_route_inf[node_2].prev_node = (size_t)path_data->path_rr[i - 1]; + route_ctx.rr_node_route_inf[node_2].prev_node = path_data->path_rr[i - 1]; route_ctx.rr_node_route_inf[node_2].prev_edge = edge; route_ctx.rr_node_route_inf[node_2].path_cost = cost; route_ctx.rr_node_route_inf[node_2].backward_path_cost = backward_path_cost; diff --git a/vpr/src/route/route_path_manager.h b/vpr/src/route/route_path_manager.h index 1ea10c6fbf0..73f0ddae9ef 100644 --- a/vpr/src/route/route_path_manager.h +++ b/vpr/src/route/route_path_manager.h @@ -61,7 +61,7 @@ class PathManager { // This is needed for RCV as the non minimum distance pathfinding can lead to illegal loops // By keeping a set of the current route tree for a net, as well as checking the current path we can prevent this bool node_exists_in_tree(t_heap_path* path_data, - RRNodeId& to_node); + RRNodeId to_node); // Insert a node into the current route tree set indicating that it's currently in routing // Use this whenever updating the route tree diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index 48074f717cb..9a2197ed3ca 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -7,7 +7,9 @@ #include #include +#include "NetPinTimingInvalidator.h" #include "netlist_fwd.h" +#include "rr_graph_fwd.h" #include "vtr_assert.h" #include "vtr_log.h" #include "vtr_time.h" @@ -34,7 +36,7 @@ // all functions in profiling:: namespace, which are only activated if PROFILE is defined #include "route_profiling.h" -#include "timing_info.h" +#include "concrete_timing_info.h" #include "timing_util.h" #include "route_budgets.h" #include "binary_heap.h" @@ -44,29 +46,6 @@ #include "tatum/TimingReporter.hpp" #include "overuse_report.h" -#define CONGESTED_SLOPE_VAL -0.04 - -enum class RouterCongestionMode { - NORMAL, - CONFLICTED -}; - -//identifies the two breakpoint types in routing -typedef enum router_breakpoint_type { - BP_ROUTE_ITER, - BP_NET_ID -} bp_router_type; - -struct RoutingMetrics { - size_t used_wirelength = 0; - - float sWNS = std::numeric_limits::quiet_NaN(); - float sTNS = std::numeric_limits::quiet_NaN(); - float hWNS = std::numeric_limits::quiet_NaN(); - float hTNS = std::numeric_limits::quiet_NaN(); - tatum::TimingPathInfo critical_path; -}; - /* * File-scope variables */ @@ -86,34 +65,58 @@ static int num_routing_failed = 0; /******************** Subroutines local to route_timing.cpp ********************/ +/** Attempt to route a single sink (target_pin) in a net. + * In the process, update global pathfinder costs, rr_node_route_inf and extend the global RouteTree + * for this net. + * + * @param router The ConnectionRouter instance + * @param net_list Input netlist + * @param net_id + * @param itarget # of this connection in the net (only used for debug output) + * @param target_pin # of this sink in the net (TODO: is it the same thing as itarget?) + * @param cost_params + * @param router_opts + * @param[in, out] tree RouteTree describing the current routing state + * @param rt_node_of_sink Lookup from target_pin-like indices (indicating SINK nodes) to RouteTreeNodes + * @param spatial_rt_lookup + * @param router_stats + * @param budgeting_inf + * @param routing_predictor + * @param choking_spots + * @param is_flat + * @return NetResultFlags for this sink to be bubbled up through timing_driven_route_net */ template -static bool timing_driven_route_sink(ConnectionRouter& router, - const Netlist<>& net_list, - ParentNetId net_id, - unsigned itarget, - int target_pin, - const t_conn_cost_params cost_params, - const t_router_opts& router_opts, - RouteTree& tree, - std::vector>& rt_node_of_sink, - SpatialRouteTreeLookup& spatial_rt_lookup, - RouterStats& router_stats, - route_budgets& budgeting_inf, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat); +static NetResultFlags timing_driven_route_sink(ConnectionRouter& router, + const Netlist<>& net_list, + ParentNetId net_id, + unsigned itarget, + int target_pin, + const t_conn_cost_params cost_params, + const t_router_opts& router_opts, + RouteTree& tree, + std::vector>& rt_node_of_sink, + SpatialRouteTreeLookup& spatial_rt_lookup, + RouterStats& router_stats, + route_budgets& budgeting_inf, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat); +/** Return tuple of: + * bool: Did we find a path for each sink in this net? + * bool: Should the caller retry with a full-device bounding box? */ template -static bool timing_driven_pre_route_to_clock_root(ConnectionRouter& router, - ParentNetId net_id, - const Netlist<>& net_list, - int sink_node, - const t_conn_cost_params cost_params, - int high_fanout_threshold, - RouteTree& tree, - SpatialRouteTreeLookup& spatial_rt_lookup, - RouterStats& router_stats, - bool is_flat); +static std::tuple timing_driven_pre_route_to_clock_root(ConnectionRouter& router, + ParentNetId net_id, + const Netlist<>& net_list, + RRNodeId sink_node, + const t_conn_cost_params cost_params, + int high_fanout_threshold, + RouteTree& tree, + SpatialRouteTreeLookup& spatial_rt_lookup, + RouterStats& router_stats, + bool is_flat, + bool can_grow_bb); static void setup_routing_resources(int itry, ParentNetId net_id, @@ -125,9 +128,6 @@ static void setup_routing_resources(int itry, const t_router_opts& router_opts, bool ripup_high_fanout_nets); -static bool timing_driven_check_net_delays(const Netlist<>& net_list, - NetPinsMatrix& net_delay); - static void update_net_delays_from_route_tree(float* net_delay, const Netlist<>& net_list, std::vector>& rt_node_of_sink, @@ -135,12 +135,6 @@ static void update_net_delays_from_route_tree(float* net_delay, TimingInfo* timing_info, NetPinTimingInvalidator* pin_timing_invalidator); -static bool should_route_net(ParentNetId net_id, - CBRR& connections_inf, - bool if_force_reroute); - -static bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info); - static bool check_hold(const t_router_opts& router_opts, float worst_neg_slack); static float get_net_pin_criticality(const std::shared_ptr timing_info, @@ -160,91 +154,8 @@ struct more_sinks_than { } }; -static size_t calculate_wirelength_available(); -static WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength); - -static void print_route_status_header(); -static void print_route_status(int itry, - double elapsed_sec, - float pres_fac, - int num_bb_updated, - const RouterStats& router_stats, - const OveruseInfo& overuse_info, - const WirelengthInfo& wirelength_info, - std::shared_ptr timing_info, - float est_success_iteration); - -static void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info); - -static void print_router_criticality_histogram(const Netlist<>& net_list, - const SetupTimingInfo& timing_info, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - bool is_flat); - static bool is_high_fanout(int fanout, int fanout_threshold); -static size_t dynamic_update_bounding_boxes(const std::vector& updated_nets, - const Netlist<>& net_list, - int high_fanout_threshold); - -static t_bb calc_current_bb(const RouteTree& tree); - -static bool is_better_quality_routing(const vtr::vector>& best_routing, - const RoutingMetrics& best_routing_metrics, - const WirelengthInfo& wirelength_info, - std::shared_ptr timing_info); - -static bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, - int itry_since_last_convergence, - std::shared_ptr timing_info, - const RoutingMetrics& best_routing_metrics); - -static void generate_route_timing_reports(const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const SetupTimingInfo& timing_info, - const RoutingDelayCalculator& delay_calc, - bool is_flat); - -static void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node); - -static void prune_unused_non_configurable_nets(CBRR& connections_inf, - const Netlist<>& net_list); - -static void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead, - const Netlist<>& net_list, - const vtr::vector>& net_rr_terminals, - NetPinsMatrix& net_delay, - const RRGraphView& rr_graph, - bool is_flat); - -static void update_route_stats(RouterStats& router_stats, RouterStats& router_iteration_stats); - -static void init_route_stats(RouterStats& router_stats); - -/** - * If flat_routing and has_choking_spot are true, there are some choke points inside the cluster which would increase the convergence time of routing. - * To address this issue, the congestion cost of those choke points needs to decrease. This function identify those choke points for each net, - * and since the amount of congestion reduction is dependant on the number sinks reachable from that choke point, it also store the number of reachable sinks - * for each choke point. - * @param net_list - * @param net_terminal_groups [Net_id][group_id] -> rr_node_id of the pins in the group - * @param net_terminal_group_num [Net_id][pin_id] -> group_id - * @param has_choking_spot is true if the given architecture has choking spots inside the cluster - * @param is_flat is true if flat_routing is enabled - * @return [Net_id][pin_id] -> [choke_point_rr_node_id, number of sinks reachable by this choke point] - */ -vtr::vector>> set_nets_choking_spots(const Netlist<>& net_list, - const vtr::vector>>& net_terminal_groups, - const vtr::vector>& net_terminal_group_num, - bool has_choking_spot, - bool is_flat); - -#ifndef NO_GRAPHICS -void update_router_info_and_check_bp(bp_router_type type, int net_id); -#endif - // The reason that try_timing_driven_route_tmpl (and descendents) are being // templated over is because using a virtual interface instead fully templating // the router results in a 5% runtime increase. @@ -323,6 +234,9 @@ bool try_timing_driven_route_tmpl(const Netlist<>& net_list, * must have already been allocated, and net_delay must have been allocated. * * Returns true if the routing succeeds, false otherwise. */ + // Make sure template type ConnectionRouter is a ConnectionRouterInterface. + static_assert(std::is_base_of::value, "ConnectionRouter must implement the ConnectionRouterInterface"); + const auto& device_ctx = g_vpr_ctx.device(); const auto& atom_ctx = g_vpr_ctx.atom(); auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -438,7 +352,6 @@ bool try_timing_driven_route_tmpl(const Netlist<>& net_list, t_clb_opins_used best_clb_opins_used_locally; RoutingMetrics best_routing_metrics; int legal_convergence_count = 0; - std::vector scratch; ConnectionRouter router( device_ctx.grid, @@ -450,9 +363,6 @@ bool try_timing_driven_route_tmpl(const Netlist<>& net_list, route_ctx.rr_node_route_inf, is_flat); - // Make sure template type ConnectionRouter is a ConnectionRouterInterface. - static_assert(std::is_base_of::value, "ConnectionRouter must implement the ConnectionRouterInterface"); - /* * On the first routing iteration ignore congestion to get reasonable net * delay estimates. Set criticalities to 1 when timing analysis is on to @@ -494,16 +404,18 @@ bool try_timing_driven_route_tmpl(const Netlist<>& net_list, std::unique_ptr pin_timing_invalidator; if (timing_info) { - pin_timing_invalidator = std::make_unique(net_list, - netlist_pin_lookup, - atom_ctx.nlist, - atom_ctx.lookup, - *timing_info->timing_graph(), - is_flat); + pin_timing_invalidator = make_net_pin_timing_invalidator( + router_opts.timing_update_type, + net_list, + netlist_pin_lookup, + atom_ctx.nlist, + atom_ctx.lookup, + *timing_info->timing_graph(), + is_flat); } RouterStats router_stats; - init_route_stats(router_stats); + init_router_stats(router_stats); timing_driven_route_structs route_structs(net_list); float prev_iter_cumm_time = 0; vtr::Timer iteration_timer; @@ -525,7 +437,7 @@ bool try_timing_driven_route_tmpl(const Netlist<>& net_list, print_route_status_header(); for (itry = 1; itry <= router_opts.max_router_iterations; ++itry) { RouterStats router_iteration_stats; - init_route_stats(router_iteration_stats); + init_router_stats(router_iteration_stats); std::vector rerouted_nets; /* Reset "is_routed" and "is_fixed" flags to indicate nets not pre-routed (yet) */ @@ -548,33 +460,31 @@ bool try_timing_driven_route_tmpl(const Netlist<>& net_list, * Route each net */ for (auto net_id : sorted_nets) { - bool was_rerouted = false; - bool is_routable = try_timing_driven_route_net(router, - net_list, - net_id, - itry, - pres_fac, - router_opts, - connections_inf, - router_iteration_stats, - route_structs.pin_criticality, - route_structs.rt_node_of_sink, - net_delay, - netlist_pin_lookup, - route_timing_info, - pin_timing_invalidator.get(), - budgeting_inf, - was_rerouted, - worst_negative_slack, - routing_predictor, - choking_spots[net_id], - is_flat); - - if (!is_routable) { - return (false); //Impossible to route + NetResultFlags flags = try_timing_driven_route_net(router, + net_list, + net_id, + itry, + pres_fac, + router_opts, + connections_inf, + router_iteration_stats, + route_structs.pin_criticality, + route_structs.rt_node_of_sink, + net_delay, + netlist_pin_lookup, + route_timing_info, + pin_timing_invalidator.get(), + budgeting_inf, + worst_negative_slack, + routing_predictor, + choking_spots[net_id], + is_flat); + + if (!flags.success) { + return false; //Impossible to route } - if (was_rerouted) { + if (flags.was_rerouted) { rerouted_nets.push_back(net_id); #ifndef NO_GRAPHICS update_router_info_and_check_bp(BP_NET_ID, size_t(net_id)); @@ -647,7 +557,7 @@ bool try_timing_driven_route_tmpl(const Netlist<>& net_list, } //Update router stats (total) - update_route_stats(router_stats, router_iteration_stats); + update_router_stats(router_stats, router_iteration_stats); /* * Are we finished? @@ -836,7 +746,8 @@ bool try_timing_driven_route_tmpl(const Netlist<>& net_list, // budgeting_inf.print_route_budget(std::string("route_budgets_") + std::to_string(itry) + ".txt", net_delay); // } - if (router_opts.routing_budgets_algorithm == YOYO) router.set_rcv_enabled(true); + if (router_opts.routing_budgets_algorithm == YOYO) + router.set_rcv_enabled(true); } else { bool stable_routing_configuration = true; @@ -948,29 +859,28 @@ bool try_timing_driven_route_tmpl(const Netlist<>& net_list, } template -bool try_timing_driven_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - const ParentNetId& net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - std::vector& pin_criticality, - std::vector>& rt_node_of_sink, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - bool& was_rerouted, - float worst_negative_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat) { +NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, + const Netlist<>& net_list, + const ParentNetId& net_id, + int itry, + float pres_fac, + const t_router_opts& router_opts, + CBRR& connections_inf, + RouterStats& router_stats, + std::vector& pin_criticality, + std::vector>& rt_node_of_sink, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + float worst_negative_slack, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat) { auto& route_ctx = g_vpr_ctx.mutable_routing(); - bool is_routed = false; + NetResultFlags flags; connections_inf.prepare_routing_for_net(net_id); @@ -981,47 +891,48 @@ bool try_timing_driven_route_net(ConnectionRouter& router, } if (route_ctx.net_status.is_fixed(net_id)) { /* Skip pre-routed nets. */ - is_routed = true; + flags.success = true; } else if (net_list.net_is_ignored(net_id)) { /* Skip ignored nets. */ - is_routed = true; + flags.success = true; } else if (!(reroute_for_hold) && !should_route_net(net_id, connections_inf, true)) { - is_routed = true; + flags.success = true; } else { // track time spent vs fanout profiling::net_fanout_start(); - is_routed = timing_driven_route_net(router, - net_list, - net_id, - itry, - pres_fac, - router_opts, - connections_inf, - router_stats, - pin_criticality, - rt_node_of_sink, - net_delay[net_id].data(), - netlist_pin_lookup, - timing_info, - pin_timing_invalidator, - budgeting_inf, - worst_negative_slack, - routing_predictor, - choking_spots, - is_flat); + flags = timing_driven_route_net(router, + net_list, + net_id, + itry, + pres_fac, + router_opts, + connections_inf, + router_stats, + pin_criticality, + rt_node_of_sink, + net_delay[net_id].data(), + netlist_pin_lookup, + timing_info, + pin_timing_invalidator, + budgeting_inf, + worst_negative_slack, + routing_predictor, + choking_spots, + is_flat); profiling::net_fanout_end(net_list.net_sinks(net_id).size()); /* Impossible to route? (disconnected rr_graph) */ - if (is_routed) { + if (flags.success) { route_ctx.net_status.set_is_routed(net_id, true); } else { VTR_LOG("Routing failed for net %d\n", net_id); } - was_rerouted = true; //Flag to record whether routing was actually changed + flags.was_rerouted = true; // Flag to record whether routing was actually changed } - return (is_routed); + + return flags; } int get_max_pins_per_net(const Netlist<>& net_list) { @@ -1035,30 +946,25 @@ int get_max_pins_per_net(const Netlist<>& net_list) { } template -bool timing_driven_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - ParentNetId net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - std::vector& pin_criticality, - std::vector>& rt_node_of_sink, - float* net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - float worst_neg_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat) { - /* Returns true as long as found some way to hook up this net, even if that * - * way resulted in overuse of resources (congestion). If there is no way * - * to route this net, even ignoring congestion, it returns false. In this * - * case the rr_graph is disconnected and you can give up. */ - +NetResultFlags timing_driven_route_net(ConnectionRouter& router, + const Netlist<>& net_list, + ParentNetId net_id, + int itry, + float pres_fac, + const t_router_opts& router_opts, + CBRR& connections_inf, + RouterStats& router_stats, + std::vector& pin_criticality, + std::vector>& rt_node_of_sink, + float* net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + float worst_neg_slack, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -1067,6 +973,8 @@ bool timing_driven_route_net(ConnectionRouter& router, VTR_LOGV_DEBUG(f_router_debug, "Routing Net %zu (%zu sinks)\n", size_t(net_id), num_sinks); + NetResultFlags flags; + setup_routing_resources( itry, net_id, @@ -1093,7 +1001,7 @@ bool timing_driven_route_net(ConnectionRouter& router, // after this point the route tree is correct // remaining_targets from this point on are the **pin indices** that have yet to be routed - auto& remaining_targets = connections_inf.get_remaining_targets(); + auto& remaining_targets = connections_inf.get_remaining_targets(net_id); // calculate criticality of remaining target pins for (int ipin : remaining_targets) { @@ -1131,7 +1039,7 @@ bool timing_driven_route_net(ConnectionRouter& router, // Pre-route to clock source for clock nets (marked as global nets) if (net_list.net_is_global(net_id) && router_opts.two_stage_clock_routing) { //VTR_ASSERT(router_opts.clock_modeling == DEDICATED_NETWORK); - int sink_node = device_ctx.virtual_clock_network_root_idx; + RRNodeId sink_node(device_ctx.virtual_clock_network_root_idx); enable_router_debug(router_opts, net_id, sink_node, itry, &router); @@ -1140,18 +1048,25 @@ bool timing_driven_route_net(ConnectionRouter& router, // Set to the max timing criticality which should intern minimize clock insertion // delay by selecting a direct route from the clock source to the virtual sink cost_params.criticality = router_opts.max_criticality; - if (!timing_driven_pre_route_to_clock_root(router, - net_id, - net_list, - sink_node, - cost_params, - router_opts.high_fanout_threshold, - tree, - spatial_route_tree_lookup, - router_stats, - is_flat)) { - return false; - } + + /* Is the connection router allowed to grow the bounding box? That's not the case + * when routing in parallel, so disallow it. TODO: Have both timing_driven and parallel + * routers handle this in the same way */ + bool can_grow_bb = (router_opts.router_algorithm != PARALLEL); + + std::tie(flags.success, flags.retry_with_full_bb) = timing_driven_pre_route_to_clock_root(router, + net_id, + net_list, + sink_node, + cost_params, + router_opts.high_fanout_threshold, + tree, + spatial_route_tree_lookup, + router_stats, + is_flat, + can_grow_bb); + + return flags; } if (budgeting_inf.if_set()) { @@ -1162,7 +1077,7 @@ bool timing_driven_route_net(ConnectionRouter& router, for (unsigned itarget = 0; itarget < remaining_targets.size(); ++itarget) { int target_pin = remaining_targets[itarget]; - int sink_rr = route_ctx.net_rr_terminals[net_id][target_pin]; + RRNodeId sink_rr = route_ctx.net_rr_terminals[net_id][target_pin]; enable_router_debug(router_opts, net_id, sink_rr, itry, &router); @@ -1179,25 +1094,31 @@ bool timing_driven_route_net(ConnectionRouter& router, profiling::conn_start(); // build a branch in the route tree to the target - if (!timing_driven_route_sink(router, - net_list, - net_id, - itarget, - target_pin, - cost_params, - router_opts, - tree, - rt_node_of_sink, - spatial_route_tree_lookup, - router_stats, - budgeting_inf, - routing_predictor, - choking_spots, - is_flat)) - return false; + auto sink_flags = timing_driven_route_sink(router, + net_list, + net_id, + itarget, + target_pin, + cost_params, + router_opts, + tree, + rt_node_of_sink, + spatial_route_tree_lookup, + router_stats, + budgeting_inf, + routing_predictor, + choking_spots, + is_flat); + + flags.retry_with_full_bb |= sink_flags.retry_with_full_bb; - profiling::conn_finish(route_ctx.net_rr_terminals[net_id][0], - sink_rr, + if (!sink_flags.success) { + flags.success = false; + return flags; + } + + profiling::conn_finish(size_t(route_ctx.net_rr_terminals[net_id][0]), + size_t(sink_rr), pin_criticality[target_pin]); ++router_stats.connections_routed; @@ -1222,24 +1143,27 @@ bool timing_driven_route_net(ConnectionRouter& router, } } - VTR_ASSERT_MSG(g_vpr_ctx.routing().rr_node_route_inf[size_t(tree.root().inode)].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested"); + VTR_ASSERT_MSG(g_vpr_ctx.routing().rr_node_route_inf[tree.root().inode].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested"); VTR_LOGV_DEBUG(f_router_debug, "Routed Net %zu (%zu sinks)\n", size_t(net_id), num_sinks); router.empty_rcv_route_tree_set(); // ? - return true; + + flags.success = true; + return flags; } template -static bool timing_driven_pre_route_to_clock_root(ConnectionRouter& router, - ParentNetId net_id, - const Netlist<>& net_list, - int sink_node, - const t_conn_cost_params cost_params, - int high_fanout_threshold, - RouteTree& tree, - SpatialRouteTreeLookup& spatial_rt_lookup, - RouterStats& router_stats, - bool is_flat) { +static std::tuple timing_driven_pre_route_to_clock_root(ConnectionRouter& router, + ParentNetId net_id, + const Netlist<>& net_list, + RRNodeId sink_node, + const t_conn_cost_params cost_params, + int high_fanout_threshold, + RouteTree& tree, + SpatialRouteTreeLookup& spatial_rt_lookup, + RouterStats& router_stats, + bool is_flat, + bool can_grow_bb) { const auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.mutable_routing(); auto& m_route_ctx = g_vpr_ctx.mutable_routing(); @@ -1253,20 +1177,21 @@ static bool timing_driven_pre_route_to_clock_root(ConnectionRouter& router, router.clear_modified_rr_node_info(); - bool found_path; + bool found_path, retry_with_full_bb; t_heap cheapest; ConnectionParameters conn_params(net_id, -1, false, std::unordered_map()); - std::tie(found_path, cheapest) = router.timing_driven_route_connection_from_route_tree( + std::tie(found_path, retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree( tree.root(), sink_node, cost_params, bounding_box, router_stats, - conn_params); + conn_params, + can_grow_bb); // TODO: Parts of the rest of this function are repetitive to code in timing_driven_route_sink. Should refactor. if (!found_path) { @@ -1279,7 +1204,7 @@ static bool timing_driven_pre_route_to_clock_root(ConnectionRouter& router, if (f_router_debug) { update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr); } - return false; + return std::make_tuple(found_path, retry_with_full_bb); } profiling::sink_criticality_end(cost_params.criticality); @@ -1312,33 +1237,33 @@ static bool timing_driven_pre_route_to_clock_root(ConnectionRouter& router, m_route_ctx.rr_node_route_inf[sink_node].set_occ(0); // routed to a sink successfully - return true; + return std::make_tuple(true, false); } template -static bool timing_driven_route_sink(ConnectionRouter& router, - const Netlist<>& net_list, - ParentNetId net_id, - unsigned itarget, - int target_pin, - const t_conn_cost_params cost_params, - const t_router_opts& router_opts, - RouteTree& tree, - std::vector>& rt_node_of_sink, - SpatialRouteTreeLookup& spatial_rt_lookup, - RouterStats& router_stats, - route_budgets& budgeting_inf, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat) { - /* Build a path from the existing route tree to the target_node - * add this branch to the existing route tree and update pathfinder costs and rr_node_route_inf to reflect this */ +static NetResultFlags timing_driven_route_sink(ConnectionRouter& router, + const Netlist<>& net_list, + ParentNetId net_id, + unsigned itarget, + int target_pin, + const t_conn_cost_params cost_params, + const t_router_opts& router_opts, + RouteTree& tree, + std::vector>& rt_node_of_sink, + SpatialRouteTreeLookup& spatial_rt_lookup, + RouterStats& router_stats, + route_budgets& budgeting_inf, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat) { const auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.mutable_routing(); + NetResultFlags flags; + profiling::sink_criticality_start(); - int sink_node = route_ctx.net_rr_terminals[net_id][target_pin]; + RRNodeId sink_node = route_ctx.net_rr_terminals[net_id][target_pin]; VTR_LOGV_DEBUG(f_router_debug, "Net %zu Target %d (%s)\n", size_t(net_id), itarget, describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, sink_node, is_flat).c_str()); router.clear_modified_rr_node_info(); @@ -1347,6 +1272,10 @@ static bool timing_driven_route_sink(ConnectionRouter& router, t_heap cheapest; t_bb bounding_box = route_ctx.route_bb[net_id]; + /* Is the connection router allowed to grow the bounding box? That's not the case + * when routing in parallel, so disallow it. */ + bool can_grow_bb = (router_opts.router_algorithm != PARALLEL); + bool net_is_global = net_list.net_is_global(net_id); bool high_fanout = is_high_fanout(net_list.net_sinks(net_id).size(), router_opts.high_fanout_threshold); constexpr float HIGH_FANOUT_CRITICALITY_THRESHOLD = 0.9; @@ -1360,20 +1289,22 @@ static bool timing_driven_route_sink(ConnectionRouter& router, //However, if the current sink is 'critical' from a timing perspective, we put the entire route tree back onto //the heap to ensure it has more flexibility to find the best path. if (high_fanout && !sink_critical && !net_is_global && !net_is_clock && -routing_predictor.get_slope() > router_opts.high_fanout_max_slope) { - std::tie(found_path, cheapest) = router.timing_driven_route_connection_from_route_tree_high_fanout(tree.root(), - sink_node, - cost_params, - bounding_box, - spatial_rt_lookup, - router_stats, - conn_params); + std::tie(found_path, flags.retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree_high_fanout(tree.root(), + sink_node, + cost_params, + bounding_box, + spatial_rt_lookup, + router_stats, + conn_params, + can_grow_bb); } else { - std::tie(found_path, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(), - sink_node, - cost_params, - bounding_box, - router_stats, - conn_params); + std::tie(found_path, flags.retry_with_full_bb, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(), + sink_node, + cost_params, + bounding_box, + router_stats, + conn_params, + can_grow_bb); } if (!found_path) { @@ -1387,12 +1318,13 @@ static bool timing_driven_route_sink(ConnectionRouter& router, if (f_router_debug) { update_screen(ScreenUpdatePriority::MAJOR, "Unable to route connection.", ROUTING, nullptr); } - return false; + flags.success = false; + return flags; } profiling::sink_criticality_end(cost_params.criticality); - int inode = cheapest.index; + RRNodeId inode(cheapest.index); route_ctx.rr_node_route_inf[inode].target_flag--; /* Connected to this SINK. */ vtr::optional new_branch, new_sink; @@ -1422,7 +1354,8 @@ static bool timing_driven_route_sink(ConnectionRouter& router, router.reset_path_costs(); // routed to a sink successfully - return true; + flags.success = true; + return flags; } static void setup_routing_resources(int itry, @@ -1459,17 +1392,17 @@ static void setup_routing_resources(int itry, tree = RouteTree(net_id); for (unsigned int sink_pin = 1; sink_pin <= num_sinks; ++sink_pin) - connections_inf.toreach_rr_sink(sink_pin); + connections_inf.toreach_rr_sink(net_id, sink_pin); // since all connections will be rerouted for this net, clear all of net's forced reroute flags - connections_inf.clear_force_reroute_for_net(); + connections_inf.clear_force_reroute_for_net(net_id); // when we don't prune the tree, we also don't know the sink node indices // thus we'll use functions that act on pin indices like mark_ends instead // of their versions that act on node indices directly like mark_remaining_ends mark_ends(net_list, net_id); } else { - auto& reached_sinks = connections_inf.get_reached_rt_sinks(); - auto& remaining_targets = connections_inf.get_remaining_targets(); + auto& reached_sinks = connections_inf.get_reached_rt_sinks(net_id); + auto& remaining_targets = connections_inf.get_remaining_targets(net_id); profiling::net_rebuild_start(); @@ -1552,25 +1485,23 @@ void update_rr_base_costs(int fanout) { } } -/** Traverses down a route tree and updates rr_node_inf for all nodes - * to reflect that these nodes have already been routed to */ -static void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node) { +void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node) { auto& route_ctx = g_vpr_ctx.mutable_routing(); for (auto& child : rt_node.child_nodes()) { RRNodeId inode = child.inode; - route_ctx.rr_node_route_inf[size_t(inode)].prev_node = NO_PREVIOUS; - route_ctx.rr_node_route_inf[size_t(inode)].prev_edge = RREdgeId::INVALID(); + route_ctx.rr_node_route_inf[inode].prev_node = RRNodeId::INVALID(); + route_ctx.rr_node_route_inf[inode].prev_edge = RREdgeId::INVALID(); // path cost should be unset - VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[size_t(inode)].path_cost)); - VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[size_t(inode)].backward_path_cost)); + VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[inode].path_cost)); + VTR_ASSERT(std::isinf(route_ctx.rr_node_route_inf[inode].backward_path_cost)); update_rr_route_inf_from_tree(child); } } -static bool timing_driven_check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_delay) { +bool timing_driven_check_net_delays(const Netlist<>& net_list, NetPinsMatrix& net_delay) { constexpr float ERROR_TOL = 0.0001; /* Checks that the net delays computed incrementally during timing driven * @@ -1628,9 +1559,9 @@ static void update_net_delays_from_route_tree(float* net_delay, } /* Detect if net should be routed or not */ -static bool should_route_net(ParentNetId net_id, - CBRR& connections_inf, - bool if_force_reroute) { +bool should_route_net(ParentNetId net_id, + CBRR& connections_inf, + bool if_force_reroute) { auto& route_ctx = g_vpr_ctx.routing(); auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; @@ -1643,7 +1574,7 @@ static bool should_route_net(ParentNetId net_id, /* Walk over all rt_nodes in the net */ for (auto& rt_node : route_ctx.route_trees[net_id]->all_nodes()) { RRNodeId inode = rt_node.inode; - int occ = route_ctx.rr_node_route_inf[size_t(inode)].occ(); + int occ = route_ctx.rr_node_route_inf[inode].occ(); int capacity = rr_graph.node_capacity(inode); if (occ > capacity) { @@ -1653,22 +1584,19 @@ static bool should_route_net(ParentNetId net_id, if (rt_node.is_leaf()) { //End of a branch // even if net is fully routed, not complete if parts of it should get ripped up (EXPERIMENTAL) if (if_force_reroute) { - if (connections_inf.should_force_reroute_connection(size_t(inode))) { + if (connections_inf.should_force_reroute_connection(net_id, inode)) { return true; } } } } - VTR_ASSERT(connections_inf.get_remaining_targets().empty()); + VTR_ASSERT(connections_inf.get_remaining_targets(net_id).empty()); return false; /* Current route has no overuse */ } -static bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info) { - /* Early exit code for cases where it is obvious that a successful route will not be found - * Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */ - +bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info) { if (wirelength_info.used_wirelength_ratio() > router_opts.init_wirelength_abort_threshold) { VTR_LOG("Wire length usage ratio %g exceeds limit of %g, fail routing.\n", wirelength_info.used_wirelength_ratio(), @@ -1727,7 +1655,7 @@ static float get_net_pin_criticality(const std::shared_ptr return pin_criticality; } -static size_t calculate_wirelength_available() { +size_t calculate_wirelength_available() { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; @@ -1742,13 +1670,16 @@ static size_t calculate_wirelength_available() { return available_wirelength; } -static WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength) { +WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength) { size_t used_wirelength = 0; VTR_ASSERT(available_wirelength > 0); + auto& route_ctx = g_vpr_ctx.routing(); + for (auto net_id : net_list.nets()) { if (!net_list.net_is_ignored(net_id) - && net_list.net_sinks(net_id).size() != 0) { /* Globals don't count. */ + && net_list.net_sinks(net_id).size() != 0 /* Globals don't count. */ + && route_ctx.route_trees[net_id]) { int bends, wirelength, segments; bool is_absorbed; get_num_bends_and_length(net_id, &bends, &wirelength, &segments, &is_absorbed); @@ -1760,14 +1691,14 @@ static WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_ return WirelengthInfo(available_wirelength, used_wirelength); } -static void print_route_status_header() { +void print_route_status_header() { VTR_LOG("---- ------ ------- ---- ------- ------- ------- ----------------- --------------- -------- ---------- ---------- ---------- ---------- --------\n"); VTR_LOG("Iter Time pres BBs Heap Re-Rtd Re-Rtd Overused RR Nodes Wirelength CPD sTNS sWNS hTNS hWNS Est Succ\n"); VTR_LOG(" (sec) fac Updt push Nets Conns (ns) (ns) (ns) (ns) (ns) Iter\n"); VTR_LOG("---- ------ ------- ---- ------- ------- ------- ----------------- --------------- -------- ---------- ---------- ---------- ---------- --------\n"); } -static void print_route_status(int itry, double elapsed_sec, float pres_fac, int num_bb_updated, const RouterStats& router_stats, const OveruseInfo& overuse_info, const WirelengthInfo& wirelength_info, std::shared_ptr timing_info, float est_success_iteration) { +void print_route_status(int itry, double elapsed_sec, float pres_fac, int num_bb_updated, const RouterStats& router_stats, const OveruseInfo& overuse_info, const WirelengthInfo& wirelength_info, std::shared_ptr timing_info, float est_success_iteration) { //Iteration VTR_LOG("%4d", itry); @@ -1863,7 +1794,14 @@ static void print_route_status(int itry, double elapsed_sec, float pres_fac, int fflush(stdout); } -static void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info) { +void print_router_criticality_histogram(const Netlist<>& net_list, + const SetupTimingInfo& timing_info, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + bool is_flat) { + print_histogram(create_criticality_histogram(net_list, timing_info, netlist_pin_lookup, is_flat, 10)); +} + +void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info) { //Print the index of this routing failure VTR_LOG("\nFailed routing attempt #%d\n", num_routing_failed); @@ -1881,13 +1819,6 @@ static void print_overused_nodes_status(const t_router_opts& router_opts, const VTR_LOG("\n"); } -static void print_router_criticality_histogram(const Netlist<>& net_list, - const SetupTimingInfo& timing_info, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - bool is_flat) { - print_histogram(create_criticality_histogram(net_list, timing_info, netlist_pin_lookup, is_flat, 10)); -} - //Returns true if the specified net fanout is classified as high fanout static bool is_high_fanout(int fanout, int fanout_threshold) { if (fanout_threshold < 0 || fanout < fanout_threshold) return false; @@ -1997,7 +1928,7 @@ size_t dynamic_update_bounding_boxes(const std::vector& updated_net } //Returns the bounding box of a net's used routing resources -static t_bb calc_current_bb(const RouteTree& tree) { +t_bb calc_current_bb(const RouteTree& tree) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; auto& grid = device_ctx.grid; @@ -2028,7 +1959,7 @@ static t_bb calc_current_bb(const RouteTree& tree) { void enable_router_debug( const t_router_opts& router_opts, ParentNetId net, - int sink_rr, + RRNodeId sink_rr, int router_iteration, ConnectionRouterInterface* router) { bool active_net_debug = (router_opts.router_debug_net >= -1); @@ -2036,7 +1967,7 @@ void enable_router_debug( bool active_iteration_debug = (router_opts.router_debug_iteration >= 0); bool match_net = (ParentNetId(router_opts.router_debug_net) == net || router_opts.router_debug_net == -1); - bool match_sink = (router_opts.router_debug_sink_rr == sink_rr || router_opts.router_debug_sink_rr < 0); + bool match_sink = (router_opts.router_debug_sink_rr == int(size_t((sink_rr))) || router_opts.router_debug_sink_rr < 0); bool match_iteration = (router_opts.router_debug_iteration == router_iteration || router_opts.router_debug_iteration < 0); f_router_debug = active_net_debug || active_sink_debug || active_iteration_debug; @@ -2076,10 +2007,10 @@ bool should_setup_lower_bound_connection_delays(int itry, const t_router_opts& / return false; } -static bool is_better_quality_routing(const vtr::vector>& best_routing, - const RoutingMetrics& best_routing_metrics, - const WirelengthInfo& wirelength_info, - std::shared_ptr timing_info) { +bool is_better_quality_routing(const vtr::vector>& best_routing, + const RoutingMetrics& best_routing_metrics, + const WirelengthInfo& wirelength_info, + std::shared_ptr timing_info) { if (best_routing.empty()) { return true; // First legal routing } @@ -2115,10 +2046,10 @@ static bool is_better_quality_routing(const vtr::vector timing_info, - const RoutingMetrics& best_routing_metrics) { +bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, + int itry_since_last_convergence, + std::shared_ptr timing_info, + const RoutingMetrics& best_routing_metrics) { // Give-up on reconvergent routing if the CPD improvement after the // first iteration since convergence is small, compared to the best // CPD seen so far @@ -2138,11 +2069,11 @@ static bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, return false; // Don't give up } -static void generate_route_timing_reports(const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const SetupTimingInfo& timing_info, - const RoutingDelayCalculator& delay_calc, - bool is_flat) { +void generate_route_timing_reports(const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + const SetupTimingInfo& timing_info, + const RoutingDelayCalculator& delay_calc, + bool is_flat) { auto& timing_ctx = g_vpr_ctx.timing(); auto& atom_ctx = g_vpr_ctx.atom(); @@ -2158,8 +2089,8 @@ static void generate_route_timing_reports(const t_router_opts& router_opts, // behind. As a result, the final routing may have stubs at // non-configurable sets. This function tracks non-configurable set usage, // and if the sets are unused, prunes them. -static void prune_unused_non_configurable_nets(CBRR& connections_inf, - const Netlist<>& net_list) { +void prune_unused_non_configurable_nets(CBRR& connections_inf, + const Netlist<>& net_list) { auto& device_ctx = g_vpr_ctx.device(); auto& route_ctx = g_vpr_ctx.mutable_routing(); @@ -2170,7 +2101,7 @@ static void prune_unused_non_configurable_nets(CBRR& connections_inf, RouteTree& tree = route_ctx.route_trees[net_id].value(); connections_inf.prepare_routing_for_net(net_id); - connections_inf.clear_force_reroute_for_net(); + connections_inf.clear_force_reroute_for_net(net_id); std::vector usage = tree.get_non_config_node_set_usage(); @@ -2180,27 +2111,27 @@ static void prune_unused_non_configurable_nets(CBRR& connections_inf, } // Initializes net_delay based on best-case delay estimates from the router lookahead -static void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead, - const Netlist<>& net_list, - const vtr::vector>& net_rr_terminals, - NetPinsMatrix& net_delay, - const RRGraphView& rr_graph, - bool is_flat) { +void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead, + const Netlist<>& net_list, + const vtr::vector>& net_rr_terminals, + NetPinsMatrix& net_delay, + const RRGraphView& rr_graph, + bool is_flat) { t_conn_cost_params cost_params; cost_params.criticality = 1.; // Ensures lookahead returns delay value for (auto net_id : net_list.nets()) { if (net_list.net_is_ignored(net_id)) continue; - int source_rr = net_rr_terminals[net_id][0]; + RRNodeId source_rr = net_rr_terminals[net_id][0]; for (size_t ipin = 1; ipin < net_list.net_pins(net_id).size(); ++ipin) { - int sink_rr = net_rr_terminals[net_id][ipin]; + RRNodeId sink_rr = net_rr_terminals[net_id][ipin]; float est_delay = get_cost_from_lookahead(router_lookahead, rr_graph, - RRNodeId(source_rr), - RRNodeId(sink_rr), + source_rr, + sink_rr, 0., cost_params, is_flat); @@ -2211,7 +2142,7 @@ static void init_net_delay_from_lookahead(const RouterLookahead& router_lookahea } } -static void update_route_stats(RouterStats& router_stats, RouterStats& router_iteration_stats) { +void update_router_stats(RouterStats& router_stats, RouterStats& router_iteration_stats) { router_stats.connections_routed += router_iteration_stats.connections_routed; router_stats.nets_routed += router_iteration_stats.nets_routed; router_stats.heap_pushes += router_iteration_stats.heap_pushes; @@ -2234,7 +2165,7 @@ static void update_route_stats(RouterStats& router_stats, RouterStats& router_it router_stats.add_high_fanout_rt += router_iteration_stats.add_high_fanout_rt; } -static void init_route_stats(RouterStats& router_stats) { +void init_router_stats(RouterStats& router_stats) { router_stats.connections_routed = 0; router_stats.nets_routed = 0; router_stats.heap_pushes = 0; diff --git a/vpr/src/route/route_timing.h b/vpr/src/route/route_timing.h index 6f4943eca4f..bccf9ba2c84 100644 --- a/vpr/src/route/route_timing.h +++ b/vpr/src/route/route_timing.h @@ -1,86 +1,66 @@ #pragma once + #include #include + #include "connection_based_routing.h" +#include "connection_router_interface.h" +#include "heap_type.h" #include "netlist.h" -#include "vpr_types.h" - -#include "vpr_utils.h" -#include "timing_info_fwd.h" #include "route_budgets.h" #include "router_stats.h" #include "router_lookahead.h" -#include "spatial_route_tree_lookup.h" -#include "connection_router_interface.h" -#include "heap_type.h" #include "routing_predictor.h" +#include "rr_graph_type.h" +#include "spatial_route_tree_lookup.h" +#include "timing_info_fwd.h" +#include "vpr_types.h" +#include "vpr_utils.h" + +#include "NetPinTimingInvalidator.h" extern bool f_router_debug; +/** TODO: remove timing_driven_route_structs together with this fn */ int get_max_pins_per_net(const Netlist<>& net_list); -bool try_timing_driven_route(const Netlist<>& net_list, - const t_det_routing_arch& det_routing_arch, - const t_router_opts& router_opts, - const t_analysis_opts& analysis_opts, - const std::vector& segment_inf, - NetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - std::shared_ptr delay_calc, - ScreenUpdatePriority first_iteration_priority, - bool is_flat); +/** Types and defines common to timing_driven and parallel routers */ -template -bool try_timing_driven_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - const ParentNetId& net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - std::vector& pin_criticality, - std::vector>& rt_node_of_sink, - ClbNetPinsMatrix& net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - bool& was_rerouted, - float worst_negative_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat); +#define CONGESTED_SLOPE_VAL -0.04 -template -bool timing_driven_route_net(ConnectionRouter& router, - const Netlist<>& net_list, - ParentNetId net_id, - int itry, - float pres_fac, - const t_router_opts& router_opts, - CBRR& connections_inf, - RouterStats& router_stats, - std::vector& pin_criticality, - std::vector>& rt_node_of_sink, - float* net_delay, - const ClusteredPinAtomPinsLookup& netlist_pin_lookup, - std::shared_ptr timing_info, - NetPinTimingInvalidator* pin_timing_invalidator, - route_budgets& budgeting_inf, - float worst_neg_slack, - const RoutingPredictor& routing_predictor, - const std::vector>& choking_spots, - bool is_flat); +/** Per-iteration congestion mode for the router: focus more on routability after a certain threshold */ +enum class RouterCongestionMode { + NORMAL, + CONFLICTED +}; -void enable_router_debug(const t_router_opts& router_opts, ParentNetId net, int sink_rr, int router_iteration, ConnectionRouterInterface* router); +/** Identifies the two breakpoint types in routing */ +typedef enum router_breakpoint_type { + BP_ROUTE_ITER, + BP_NET_ID +} bp_router_type; -bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr timing_info, bool rcv_finished); +/** Results from attempting to route a net. + * success: Could we route it? + * was_rerouted: Is the routing different from the last one? (set by try_* functions) + * retry_with_full_bb: Should we retry this net with a full-device bounding box? (used in the parallel router) + * + * I'm fine with returning 3 bytes from a fn: consider an enum class if this becomes too big */ +struct NetResultFlags { + bool success = false; + bool was_rerouted = false; + bool retry_with_full_bb = false; +}; -bool should_setup_lower_bound_connection_delays(int itry, const t_router_opts& router_opts); +struct RoutingMetrics { + size_t used_wirelength = 0; -void update_rr_base_costs(int fanout); + float sWNS = std::numeric_limits::quiet_NaN(); + float sTNS = std::numeric_limits::quiet_NaN(); + float hWNS = std::numeric_limits::quiet_NaN(); + float hTNS = std::numeric_limits::quiet_NaN(); + tatum::TimingPathInfo critical_path; +}; /* Data while timing driven route is active */ class timing_driven_route_structs { @@ -101,3 +81,199 @@ class timing_driven_route_structs { rt_node_of_sink[0] = vtr::nullopt; } }; + +/** Returns the bounding box of a net's used routing resources */ +t_bb calc_current_bb(const RouteTree& tree); + +/** Get available wirelength for the current RR graph */ +size_t calculate_wirelength_available(); + +/** Calculate wirelength for the current routing and populate a WirelengthInfo */ +WirelengthInfo calculate_wirelength_info(const Netlist<>& net_list, size_t available_wirelength); + +size_t dynamic_update_bounding_boxes(const std::vector& updated_nets, + const Netlist<>& net_list, + int high_fanout_threshold); + +/** Early exit code for cases where it is obvious that a successful route will not be found + * Heuristic: If total wirelength used in first routing iteration is X% of total available wirelength, exit */ +bool early_exit_heuristic(const t_router_opts& router_opts, const WirelengthInfo& wirelength_info); + +/** Give-up on reconvergent routing if the CPD improvement after the + * first iteration since convergence is small, compared to the best + * CPD seen so far */ +bool early_reconvergence_exit_heuristic(const t_router_opts& router_opts, + int itry_since_last_convergence, + std::shared_ptr timing_info, + const RoutingMetrics& best_routing_metrics); + +void enable_router_debug(const t_router_opts& router_opts, ParentNetId net, RRNodeId sink_rr, int router_iteration, ConnectionRouterInterface* router); + +void generate_route_timing_reports(const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + const SetupTimingInfo& timing_info, + const RoutingDelayCalculator& delay_calc, + bool is_flat); + +/** Initialize net_delay based on best-case delay estimates from the router lookahead. */ +void init_net_delay_from_lookahead(const RouterLookahead& router_lookahead, + const Netlist<>& net_list, + const vtr::vector>& net_rr_terminals, + NetPinsMatrix& net_delay, + const RRGraphView& rr_graph, + bool is_flat); + +void init_router_stats(RouterStats& router_stats); + +bool is_better_quality_routing(const vtr::vector>& best_routing, + const RoutingMetrics& best_routing_metrics, + const WirelengthInfo& wirelength_info, + std::shared_ptr timing_info); + +bool is_iteration_complete(bool routing_is_feasible, const t_router_opts& router_opts, int itry, std::shared_ptr timing_info, bool rcv_finished); + +/** Print the index of this routing failure */ +void print_overused_nodes_status(const t_router_opts& router_opts, const OveruseInfo& overuse_info); + +void print_route_status_header(); + +void print_route_status(int itry, + double elapsed_sec, + float pres_fac, + int num_bb_updated, + const RouterStats& router_stats, + const OveruseInfo& overuse_info, + const WirelengthInfo& wirelength_info, + std::shared_ptr timing_info, + float est_success_iteration); + +void print_router_criticality_histogram(const Netlist<>& net_list, + const SetupTimingInfo& timing_info, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + bool is_flat); + +/** If a route is ripped up during routing, non-configurable sets are left + * behind. As a result, the final routing may have stubs at + * non-configurable sets. This function tracks non-configurable set usage, + * and if the sets are unused, prunes them. */ +void prune_unused_non_configurable_nets(CBRR& connections_inf, + const Netlist<>& net_list); + +/** + * If flat_routing and has_choking_spot are true, there are some choke points inside the cluster which would increase the convergence time of routing. + * To address this issue, the congestion cost of those choke points needs to decrease. This function identify those choke points for each net, + * and since the amount of congestion reduction is dependant on the number sinks reachable from that choke point, it also store the number of reachable sinks + * for each choke point. + * @param net_list + * @param net_terminal_groups [Net_id][group_id] -> rr_node_id of the pins in the group + * @param net_terminal_group_num [Net_id][pin_id] -> group_id + * @param has_choking_spot is true if the given architecture has choking spots inside the cluster + * @param is_flat is true if flat_routing is enabled + * @return [Net_id][pin_id] -> [choke_point_rr_node_id, number of sinks reachable by this choke point] + */ + +vtr::vector>> set_nets_choking_spots(const Netlist<>& net_list, + const vtr::vector>>& net_terminal_groups, + const vtr::vector>& net_terminal_group_num, + bool has_choking_spot, + bool is_flat); + +/** Detect if net should be routed or not */ +bool should_route_net(ParentNetId net_id, + CBRR& connections_inf, + bool if_force_reroute); + +bool should_setup_lower_bound_connection_delays(int itry, const t_router_opts& router_opts); + +bool timing_driven_check_net_delays(const Netlist<>& net_list, + NetPinsMatrix& net_delay); + +bool try_timing_driven_route(const Netlist<>& net_list, + const t_det_routing_arch& det_routing_arch, + const t_router_opts& router_opts, + const t_analysis_opts& analysis_opts, + const std::vector& segment_inf, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + std::shared_ptr delay_calc, + ScreenUpdatePriority first_iteration_priority, + bool is_flat); + +/** Attempt to route a single net. + * + * @param router The ConnectionRouter instance + * @param net_list Input netlist + * @param net_id + * @param itry # of iteration + * @param pres_fac + * @param router_opts + * @param connections_inf + * @param router_stats + * @param pin_criticality + * @param rt_node_of_sink Lookup from target_pin-like indices (indicating SINK nodes) to RouteTreeNodes + * @param net_delay + * @param netlist_pin_lookup + * @param timing_info + * @param pin_timing_invalidator + * @param budgeting_inf + * @param worst_neg_slack + * @param routing_predictor + * @param choking_spots + * @param is_flat + * @return NetResultFlags for this net. success = false means the RR graph is disconnected and the caller can give up */ +template +NetResultFlags timing_driven_route_net(ConnectionRouter& router, + const Netlist<>& net_list, + ParentNetId net_id, + int itry, + float pres_fac, + const t_router_opts& router_opts, + CBRR& connections_inf, + RouterStats& router_stats, + std::vector& pin_criticality, + std::vector>& rt_node_of_sink, + float* net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + float worst_neg_slack, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat); + +template +NetResultFlags try_timing_driven_route_net(ConnectionRouter& router, + const Netlist<>& net_list, + const ParentNetId& net_id, + int itry, + float pres_fac, + const t_router_opts& router_opts, + CBRR& connections_inf, + RouterStats& router_stats, + std::vector& pin_criticality, + std::vector>& rt_node_of_sink, + NetPinsMatrix& net_delay, + const ClusteredPinAtomPinsLookup& netlist_pin_lookup, + std::shared_ptr timing_info, + NetPinTimingInvalidator* pin_timing_invalidator, + route_budgets& budgeting_inf, + float worst_negative_slack, + const RoutingPredictor& routing_predictor, + const std::vector>& choking_spots, + bool is_flat); + +void update_router_stats(RouterStats& router_stats, RouterStats& router_iteration_stats); + +#ifndef NO_GRAPHICS +void update_router_info_and_check_bp(bp_router_type type, int net_id); +#endif + +void update_rr_base_costs(int fanout); + +/** Traverses down a route tree and updates rr_node_inf for all nodes + * to reflect that these nodes have already been routed to */ +void update_rr_route_inf_from_tree(const RouteTreeNode& rt_node); diff --git a/vpr/src/route/route_tree.cpp b/vpr/src/route/route_tree.cpp index 033039d9b6f..82e929e3ebc 100644 --- a/vpr/src/route/route_tree.cpp +++ b/vpr/src/route/route_tree.cpp @@ -58,7 +58,7 @@ void RouteTreeNode::print_x(int depth) const { } auto& route_ctx = g_vpr_ctx.routing(); - if (route_ctx.rr_node_route_inf[size_t(inode)].occ() > rr_graph.node_capacity(inode)) { + if (route_ctx.rr_node_route_inf[inode].occ() > rr_graph.node_capacity(inode)) { VTR_LOG(" x"); } @@ -72,6 +72,7 @@ void RouteTreeNode::print_x(int depth) const { /* Construct a top-level route tree. */ RouteTree::RouteTree(RRNodeId _inode) { _root = new RouteTreeNode(_inode, RRSwitchId::INVALID(), nullptr); + _net_id = ParentNetId::INVALID(); _rr_node_to_rt_node[_inode] = _root; } @@ -79,6 +80,7 @@ RouteTree::RouteTree(ParentNetId _inet) { auto& route_ctx = g_vpr_ctx.routing(); RRNodeId inode = RRNodeId(route_ctx.net_rr_terminals[_inet][0]); _root = new RouteTreeNode(inode, RRSwitchId::INVALID(), nullptr); + _net_id = _inet; _rr_node_to_rt_node[inode] = _root; } @@ -104,6 +106,7 @@ void RouteTree::copy_tree_x(RouteTreeNode* lhs, const RouteTreeNode& rhs) { /* Copy constructor */ RouteTree::RouteTree(const RouteTree& rhs) { _root = copy_tree(rhs._root); + _net_id = rhs._net_id; } /* Move constructor: @@ -111,6 +114,7 @@ RouteTree::RouteTree(const RouteTree& rhs) { * Refs should stay valid after this? */ RouteTree::RouteTree(RouteTree&& rhs) { _root = rhs._root; + _net_id = rhs._net_id; rhs._root = nullptr; _rr_node_to_rt_node = std::move(rhs._rr_node_to_rt_node); } @@ -122,6 +126,7 @@ RouteTree& RouteTree::operator=(const RouteTree& rhs) { free_list(_root); _rr_node_to_rt_node.clear(); _root = copy_tree(rhs._root); + _net_id = rhs._net_id; return *this; } @@ -134,6 +139,7 @@ RouteTree& RouteTree::operator=(RouteTree&& rhs) { return *this; free_list(_root); _root = rhs._root; + _net_id = rhs._net_id; rhs._root = nullptr; _rr_node_to_rt_node = std::move(rhs._rr_node_to_rt_node); return *this; @@ -356,7 +362,7 @@ bool RouteTree::is_valid_x(const RouteTreeNode& rt_node) const { } if (rr_graph.node_type(inode) == SINK) { // sink, must not be congested and must not have fanouts - int occ = route_ctx.rr_node_route_inf[size_t(inode)].occ(); + int occ = route_ctx.rr_node_route_inf[inode].occ(); int capacity = rr_graph.node_capacity(inode); if (rt_node._next != nullptr && rt_node._next->_parent == &rt_node) { VTR_LOG("SINK %d has fanouts?\n", inode); @@ -414,7 +420,7 @@ bool RouteTree::is_uncongested_x(const RouteTreeNode& rt_node) const { const auto& rr_graph = device_ctx.rr_graph; RRNodeId inode = rt_node.inode; - if (route_ctx.rr_node_route_inf[size_t(inode)].occ() > rr_graph.node_capacity(RRNodeId(inode))) { + if (route_ctx.rr_node_route_inf[inode].occ() > rr_graph.node_capacity(RRNodeId(inode))) { //This node is congested return false; } @@ -504,8 +510,8 @@ RouteTree::add_subtree_from_heap(t_heap* hptr, int target_net_pin_index, bool is while (!_rr_node_to_rt_node.count(new_inode)) { new_branch_inodes.push_back(new_inode); new_branch_iswitches.push_back(new_iswitch); - edge = route_ctx.rr_node_route_inf[size_t(new_inode)].prev_edge; - new_inode = RRNodeId(route_ctx.rr_node_route_inf[size_t(new_inode)].prev_node); + edge = route_ctx.rr_node_route_inf[new_inode].prev_edge; + new_inode = RRNodeId(route_ctx.rr_node_route_inf[new_inode].prev_node); new_iswitch = RRSwitchId(rr_graph.rr_nodes().edge_switch(edge)); } new_branch_iswitches.push_back(new_iswitch); @@ -608,7 +614,9 @@ RouteTree::prune(CBRR& connections_inf, std::vector* non_config_node_set_us VTR_ASSERT_MSG(rr_graph.node_type(root().inode) == SOURCE, "Root of route tree must be SOURCE"); - VTR_ASSERT_MSG(route_ctx.rr_node_route_inf[size_t(root().inode)].occ() <= rr_graph.node_capacity(root().inode), + VTR_ASSERT_MSG(_net_id, "RouteTree must be constructed using a ParentNetId"); + + VTR_ASSERT_MSG(route_ctx.rr_node_route_inf[root().inode].occ() <= rr_graph.node_capacity(root().inode), "Route tree root/SOURCE should never be congested"); auto pruned_node = prune_x(*_root, connections_inf, false, non_config_node_set_usage); @@ -626,10 +634,10 @@ RouteTree::prune_x(RouteTreeNode& rt_node, CBRR& connections_inf, bool force_pru auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; auto& route_ctx = g_vpr_ctx.routing(); - bool congested = (route_ctx.rr_node_route_inf[size_t(rt_node.inode)].occ() > rr_graph.node_capacity(rt_node.inode)); + bool congested = (route_ctx.rr_node_route_inf[rt_node.inode].occ() > rr_graph.node_capacity(rt_node.inode)); int node_set = -1; - auto itr = device_ctx.rr_node_to_non_config_node_set.find(size_t(rt_node.inode)); + auto itr = device_ctx.rr_node_to_non_config_node_set.find(rt_node.inode); if (itr != device_ctx.rr_node_to_non_config_node_set.end()) { node_set = itr->second; } @@ -639,7 +647,7 @@ RouteTree::prune_x(RouteTreeNode& rt_node, CBRR& connections_inf, bool force_pru force_prune = true; } - if (connections_inf.should_force_reroute_connection(size_t(rt_node.inode))) { + if (connections_inf.should_force_reroute_connection(_net_id, rt_node.inode)) { //Forcibly re-route (e.g. to improve delay) force_prune = true; } @@ -668,12 +676,12 @@ RouteTree::prune_x(RouteTreeNode& rt_node, CBRR& connections_inf, bool force_pru //Valid path to sink //Record sink as reachable - connections_inf.reached_rt_sink(rt_node.inode); + connections_inf.reached_rt_sink(_net_id, rt_node.inode); return rt_node; // Not pruned } else { //Record as not reached - connections_inf.toreach_rr_sink(rt_node.net_pin_index); + connections_inf.toreach_rr_sink(_net_id, rt_node.net_pin_index); return vtr::nullopt; // Pruned } @@ -822,7 +830,7 @@ std::vector RouteTree::get_non_config_node_set_usage(void) const { const auto& rr_to_nonconf = device_ctx.rr_node_to_non_config_node_set; for (auto& rt_node : all_nodes()) { - auto it = rr_to_nonconf.find(size_t(rt_node.inode)); + auto it = rr_to_nonconf.find(rt_node.inode); if (it == rr_to_nonconf.end()) continue; diff --git a/vpr/src/route/route_tree.h b/vpr/src/route/route_tree.h index 3a1db5a0c6e..9d2200d2696 100644 --- a/vpr/src/route/route_tree.h +++ b/vpr/src/route/route_tree.h @@ -30,9 +30,9 @@ * if (found_path) * std::tie(std::ignore, rt_node_of_sink) = tree.update_from_heap(&cheapest, ...); * - * Congested paths in a tree can be pruned using RouteTree::prune(). Note that updates to a tree require an update to the global occupancy state via - * pathfinder_update_cost_from_route_tree(). In addition, RouteTree::prune() depends on this global data to find congestions, so the flow to - * prune a tree looks like this: + * Congested paths in a tree can be pruned using RouteTree::prune(). This is done between iterations to keep only the legally routed section. + * Note that updates to a tree require an update to the global occupancy state via pathfinder_update_cost_from_route_tree(). + * RouteTree::prune() depends on this global data to find congestions, so the flow to prune a tree is somewhat convoluted: * * RouteTree tree2 = tree; * // Prune the copy (using congestion data before subtraction) @@ -333,9 +333,11 @@ class RouteTree { RouteTree& operator=(const RouteTree&); RouteTree& operator=(RouteTree&&); - /** Return a RouteTree initialized to inode. */ + /** Return a RouteTree initialized to inode. + * Note that prune() won't work on a RouteTree initialized this way (see _net_id comments) */ RouteTree(RRNodeId inode); - /** Return a RouteTree initialized to the source of nets[inet]. */ + /** Return a RouteTree initialized to the source of nets[inet]. + * Use this constructor where possible (needed for prune() to work) */ RouteTree(ParentNetId inet); ~RouteTree() { @@ -356,7 +358,8 @@ class RouteTree { * Note that update_from_heap already does this, but prune() doesn't */ void reload_timing(vtr::optional from_node = vtr::nullopt); - /** Get the RouteTreeNode corresponding to the RRNodeId. Returns nullopt if not found. */ + /** Get the RouteTreeNode corresponding to the RRNodeId. Returns nullopt if not found. + * SINK nodes may be added to the tree multiple times. In that case, this will return the last one added. */ vtr::optional find_by_rr_id(RRNodeId rr_node) const; /** Check the consistency of this route tree. Looks for: @@ -509,6 +512,13 @@ class RouteTree { * This is also the internal node list via the ptrs in RouteTreeNode. */ RouteTreeNode* _root; + /** Net ID. + * A RouteTree does not have to be connected to a net, but if it isn't + * constructed using a ParentNetId prune() won't work. This is due to + * a data dependency through "Connection_based_routing_resources". Should + * be refactored when possible. */ + ParentNetId _net_id; + /** Lookup from RRNodeIds to RouteTreeNodes in the tree. * In some cases the same SINK node is put into the tree multiple times in a * single route. To model this, we are putting in separate rt_nodes in the route diff --git a/vpr/src/route/route_util.cpp b/vpr/src/route/route_util.cpp index 4c316278ad2..96e9551fbe8 100644 --- a/vpr/src/route/route_util.cpp +++ b/vpr/src/route/route_util.cpp @@ -33,7 +33,7 @@ vtr::Matrix calculate_routing_usage(t_rr_type rr_type, bool is_flat) { int y = rr_graph.node_ylow(rr_node); for (int x = rr_graph.node_xlow(rr_node); x <= rr_graph.node_xhigh(rr_node); ++x) { - usage[x][y] += route_ctx.rr_node_route_inf[size_t(rr_node)].occ(); + usage[x][y] += route_ctx.rr_node_route_inf[rr_node].occ(); } } else { VTR_ASSERT(rr_type == CHANY); @@ -42,7 +42,7 @@ vtr::Matrix calculate_routing_usage(t_rr_type rr_type, bool is_flat) { int x = rr_graph.node_xlow(rr_node); for (int y = rr_graph.node_ylow(rr_node); y <= rr_graph.node_yhigh(rr_node); ++y) { - usage[x][y] += route_ctx.rr_node_route_inf[size_t(rr_node)].occ(); + usage[x][y] += route_ctx.rr_node_route_inf[rr_node].occ(); } } } diff --git a/vpr/src/route/router_delay_profiling.cpp b/vpr/src/route/router_delay_profiling.cpp index a2b5faa4b75..4e2274c406f 100644 --- a/vpr/src/route/router_delay_profiling.cpp +++ b/vpr/src/route/router_delay_profiling.cpp @@ -23,14 +23,14 @@ RouterDelayProfiler::RouterDelayProfiler(const Netlist<>& net_list, is_flat) , is_flat_(is_flat) {} -bool RouterDelayProfiler::calculate_delay(int source_node, int sink_node, const t_router_opts& router_opts, float* net_delay) { +bool RouterDelayProfiler::calculate_delay(RRNodeId source_node, RRNodeId sink_node, const t_router_opts& router_opts, float* net_delay) { /* Returns true as long as found some way to hook up this net, even if that * * way resulted in overuse of resources (congestion). If there is no way * * to route this net, even ignoring congestion, it returns false. In this * * case the rr_graph is disconnected and you can give up. */ auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - auto& route_ctx = g_vpr_ctx.routing(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); //vtr::ScopedStartFinishTimer t(vtr::string_fmt("Profiling Delay from %s at %d,%d (%s) to %s at %d,%d (%s)", //rr_graph.node_type_string(RRNodeId(source_node)), @@ -71,16 +71,17 @@ bool RouterDelayProfiler::calculate_delay(int source_node, int sink_node, const -1, false, std::unordered_map()); - std::tie(found_path, cheapest) = router_.timing_driven_route_connection_from_route_tree( + std::tie(found_path, std::ignore, cheapest) = router_.timing_driven_route_connection_from_route_tree( tree.root(), sink_node, cost_params, bounding_box, router_stats, - conn_params); + conn_params, + true); if (found_path) { - VTR_ASSERT(cheapest.index == sink_node); + VTR_ASSERT(RRNodeId(cheapest.index) == sink_node); vtr::optional rt_node_of_sink; std::tie(std::ignore, rt_node_of_sink) = tree.update_from_heap(&cheapest, OPEN, nullptr, is_flat_); @@ -88,7 +89,7 @@ bool RouterDelayProfiler::calculate_delay(int source_node, int sink_node, const //find delay *net_delay = rt_node_of_sink->Tdel; - VTR_ASSERT_MSG(route_ctx.rr_node_route_inf[size_t(tree.root().inode)].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested"); + VTR_ASSERT_MSG(route_ctx.rr_node_route_inf[tree.root().inode].occ() <= rr_graph.node_capacity(tree.root().inode), "SOURCE should never be congested"); } //VTR_LOG("Explored %zu of %zu (%.2f) RR nodes: path delay %g\n", router_stats.heap_pops, device_ctx.rr_nodes.size(), float(router_stats.heap_pops) / device_ctx.rr_nodes.size(), *net_delay); @@ -102,13 +103,13 @@ bool RouterDelayProfiler::calculate_delay(int source_node, int sink_node, const } //Returns the shortest path delay from src_node to all RR nodes in the RR graph, or NaN if no path exists -std::vector calculate_all_path_delays_from_rr_node(int src_rr_node, - const t_router_opts& router_opts, - bool is_flat) { +vtr::vector calculate_all_path_delays_from_rr_node(RRNodeId src_rr_node, + const t_router_opts& router_opts, + bool is_flat) { auto& device_ctx = g_vpr_ctx.device(); - auto& routing_ctx = g_vpr_ctx.mutable_routing(); + auto& route_ctx = g_vpr_ctx.mutable_routing(); - std::vector path_delays_to(device_ctx.rr_graph.num_nodes(), std::numeric_limits::quiet_NaN()); + vtr::vector path_delays_to(device_ctx.rr_graph.num_nodes(), std::numeric_limits::quiet_NaN()); RouteTree tree((RRNodeId(src_rr_node))); @@ -131,6 +132,7 @@ std::vector calculate_all_path_delays_from_rr_node(int src_rr_node, /*write_lookahead=*/"", /*read_lookahead=*/"", /*segment_inf=*/{}, is_flat); + ConnectionRouter router( device_ctx.grid, *router_lookahead, @@ -138,24 +140,25 @@ std::vector calculate_all_path_delays_from_rr_node(int src_rr_node, &g_vpr_ctx.device().rr_graph, device_ctx.rr_rc_data, device_ctx.rr_graph.rr_switch(), - routing_ctx.rr_node_route_inf, + route_ctx.rr_node_route_inf, is_flat); RouterStats router_stats; ConnectionParameters conn_params(ParentNetId::INVALID(), OPEN, false, std::unordered_map()); - std::vector shortest_paths = router.timing_driven_find_all_shortest_paths_from_route_tree(tree.root(), - cost_params, - bounding_box, - router_stats, - conn_params); + vtr::vector shortest_paths = router.timing_driven_find_all_shortest_paths_from_route_tree(tree.root(), + cost_params, + bounding_box, + router_stats, + conn_params); VTR_ASSERT(shortest_paths.size() == device_ctx.rr_graph.num_nodes()); - for (int sink_rr_node = 0; sink_rr_node < (int)device_ctx.rr_graph.num_nodes(); ++sink_rr_node) { - if (sink_rr_node == src_rr_node) { + for (int isink = 0; isink < (int)device_ctx.rr_graph.num_nodes(); ++isink) { + RRNodeId sink_rr_node(isink); + if (RRNodeId(sink_rr_node) == src_rr_node) { path_delays_to[sink_rr_node] = 0.; } else { - if (shortest_paths[sink_rr_node].index == OPEN) continue; + if (!shortest_paths[sink_rr_node].index.is_valid()) continue; - VTR_ASSERT(shortest_paths[sink_rr_node].index == sink_rr_node); + VTR_ASSERT(RRNodeId(shortest_paths[sink_rr_node].index) == sink_rr_node); //Build the routing tree to get the delay tree = RouteTree(RRNodeId(src_rr_node)); diff --git a/vpr/src/route/router_delay_profiling.h b/vpr/src/route/router_delay_profiling.h index ac2b507094b..d4dcbb5dac7 100644 --- a/vpr/src/route/router_delay_profiling.h +++ b/vpr/src/route/router_delay_profiling.h @@ -13,7 +13,7 @@ class RouterDelayProfiler { RouterDelayProfiler(const Netlist<>& net_list, const RouterLookahead* lookahead, bool is_flat); - bool calculate_delay(int source_node, int sink_node, const t_router_opts& router_opts, float* net_delay); + bool calculate_delay(RRNodeId source_node, RRNodeId sink_node, const t_router_opts& router_opts, float* net_delay); private: const Netlist<>& net_list_; @@ -22,9 +22,9 @@ class RouterDelayProfiler { bool is_flat_; }; -std::vector calculate_all_path_delays_from_rr_node(int src_rr_node, - const t_router_opts& router_opts, - bool is_flat); +vtr::vector calculate_all_path_delays_from_rr_node(RRNodeId src_rr_node, + const t_router_opts& router_opts, + bool is_flat); void alloc_routing_structs(t_chan_width chan_width, const t_router_opts& router_opts, diff --git a/vpr/src/route/router_lookahead_extended_map.cpp b/vpr/src/route/router_lookahead_extended_map.cpp index 375b1127177..fa5a9a9849e 100644 --- a/vpr/src/route/router_lookahead_extended_map.cpp +++ b/vpr/src/route/router_lookahead_extended_map.cpp @@ -137,11 +137,11 @@ std::pair ExtendedMapLookahead::get_src_opin_cost(RRNodeId from_no VTR_ASSERT_SAFE_MSG(false, vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", - rr_node_arch_name(size_t(from_node), is_flat_).c_str(), + rr_node_arch_name(from_node, is_flat_).c_str(), describe_rr_node(device_ctx.rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, - size_t(from_node), + from_node, is_flat_) .c_str()) .c_str()); @@ -439,7 +439,7 @@ void ExtendedMapLookahead::compute(const std::vector& segment_inf util::RoutingCosts all_base_costs; /* run Dijkstra's algorithm for each segment type & channel type combination */ -#if defined(VPR_USE_TBB) // Run parallely +#if defined(VPR_USE_TBB) // Run in parallel std::mutex all_costs_mutex; tbb::parallel_for_each(sample_regions, [&](const SampleRegion& region) { #else // Run serially diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp index 2127975a86e..33b181a8be7 100644 --- a/vpr/src/route/router_lookahead_map.cpp +++ b/vpr/src/route/router_lookahead_map.cpp @@ -506,11 +506,11 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost), vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", - rr_node_arch_name(size_t(from_node), is_flat_).c_str(), + rr_node_arch_name(from_node, is_flat_).c_str(), describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, - size_t(from_node), + from_node, is_flat_) .c_str()) .c_str()); @@ -539,11 +539,11 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ VTR_ASSERT_SAFE_MSG(std::isfinite(expected_delay_cost), vtr::string_fmt("Lookahead failed to estimate cost from %s: %s", - rr_node_arch_name(size_t(from_node), is_flat_).c_str(), + rr_node_arch_name(from_node, is_flat_).c_str(), describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, - size_t(from_node), + from_node, is_flat_) .c_str()) .c_str()); diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index 5ec27a15cc8..e3141e947c3 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -6,7 +6,7 @@ * * In general, this utility library contains: * - * - Different dijkstra expansion alogrithms used to perform specific tasks, such as computing the SROURCE/OPIN --> CHAN lookup tables + * - Different dijkstra expansion algorithms used to perform specific tasks, such as computing the SOURCE/OPIN --> CHAN lookup tables * - Cost Entries definitions used when generating and querying the lookahead * * To access the utility functions, the util namespace needs to be used. @@ -96,7 +96,7 @@ PQ_Entry::PQ_Entry( float base_cost = 0.f; if (rr_graph.rr_switch_inf(RRSwitchId(switch_ind)).configurable()) { - base_cost = get_single_rr_cong_base_cost(size_t(set_rr_node)); + base_cost = get_single_rr_cong_base_cost(set_rr_node); } VTR_ASSERT(T_linear >= 0.); @@ -148,7 +148,7 @@ util::PQ_Entry_Base_Cost::PQ_Entry_Base_Cost( auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; if (rr_graph.rr_switch_inf(RRSwitchId(switch_ind)).configurable()) { - this->base_cost = parent->base_cost + get_single_rr_cong_base_cost(size_t(set_rr_node)); + this->base_cost = parent->base_cost + get_single_rr_cong_base_cost(set_rr_node); } else { this->base_cost = parent->base_cost; } @@ -365,7 +365,7 @@ t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat) { if (src_opin_delays[layer_num][itile][ptc].empty()) { VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n", rr_node_typename[rr_type], - rr_node_arch_name(size_t(node_id), is_flat).c_str(), + rr_node_arch_name(node_id, is_flat).c_str(), sample_loc.x, sample_loc.y, is_flat); diff --git a/vpr/src/route/router_stats.h b/vpr/src/route/router_stats.h index 4374a6a543a..47e91731179 100644 --- a/vpr/src/route/router_stats.h +++ b/vpr/src/route/router_stats.h @@ -1,5 +1,10 @@ #pragma once +#include "netlist_fwd.h" +#include "rr_graph_fwd.h" +#include "rr_node_types.h" +#include "vtr_assert.h" + // This struct instructs the router on how to route the given connection struct ConnectionParameters { ConnectionParameters(ParentNetId net_id, @@ -23,6 +28,7 @@ struct ConnectionParameters { const std::unordered_map& connection_choking_spots_; }; + struct RouterStats { size_t connections_routed = 0; size_t nets_routed = 0; @@ -32,19 +38,19 @@ struct RouterStats { size_t inter_cluster_node_pops = 0; size_t intra_cluster_node_pushes = 0; size_t intra_cluster_node_pops = 0; - size_t inter_cluster_node_type_cnt_pushes[t_rr_type::NUM_RR_TYPES]; - size_t inter_cluster_node_type_cnt_pops[t_rr_type::NUM_RR_TYPES]; - size_t intra_cluster_node_type_cnt_pushes[t_rr_type::NUM_RR_TYPES]; - size_t intra_cluster_node_type_cnt_pops[t_rr_type::NUM_RR_TYPES]; + size_t inter_cluster_node_type_cnt_pushes[t_rr_type::NUM_RR_TYPES] = {0}; + size_t inter_cluster_node_type_cnt_pops[t_rr_type::NUM_RR_TYPES] = {0}; + size_t intra_cluster_node_type_cnt_pushes[t_rr_type::NUM_RR_TYPES] = {0}; + size_t intra_cluster_node_type_cnt_pops[t_rr_type::NUM_RR_TYPES] = {0}; // For debugging purposes - size_t rt_node_pushes[t_rr_type::NUM_RR_TYPES]; - size_t rt_node_high_fanout_pushes[t_rr_type::NUM_RR_TYPES]; - size_t rt_node_entire_tree_pushes[t_rr_type::NUM_RR_TYPES]; + size_t rt_node_pushes[t_rr_type::NUM_RR_TYPES] = {0}; + size_t rt_node_high_fanout_pushes[t_rr_type::NUM_RR_TYPES] = {0}; + size_t rt_node_entire_tree_pushes[t_rr_type::NUM_RR_TYPES] = {0}; - size_t add_all_rt_from_high_fanout; - size_t add_high_fanout_rt; - size_t add_all_rt; + size_t add_all_rt_from_high_fanout = 0; + size_t add_high_fanout_rt = 0; + size_t add_all_rt = 0; }; class WirelengthInfo { diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index d200404d14a..a046361c926 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -3081,10 +3081,9 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, } /* Add the edges from this track to all it's connected pins into the list */ - int num_edges = 0; - num_edges += get_track_to_pins(rr_graph_builder, layer, start, chan_coord, track, tracks_per_chan, node, rr_edges_to_create, - track_to_pin_lookup, seg_details, chan_type, seg_dimension, - wire_to_ipin_switch, wire_to_pin_between_dice_switch, directionality); + get_track_to_pins(rr_graph_builder, layer, start, chan_coord, track, tracks_per_chan, node, rr_edges_to_create, + track_to_pin_lookup, seg_details, chan_type, seg_dimension, + wire_to_ipin_switch, wire_to_pin_between_dice_switch, directionality); /* get edges going from the current track into channel segments which are perpendicular to it */ if (chan_coord > 0) { @@ -3099,12 +3098,12 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, max_opposite_chan_width = nodes_per_chan.x_max; } if (to_seg_details->length() > 0) { - num_edges += get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, chan_coord, - opposite_chan_type, seg_dimension, max_opposite_chan_width, grid, - Fs_per_side, sblock_pattern, node, rr_edges_to_create, - from_seg_details, to_seg_details, opposite_chan_details, - directionality, - switch_block_conn, sb_conn_map); + get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, chan_coord, + opposite_chan_type, seg_dimension, max_opposite_chan_width, grid, + Fs_per_side, sblock_pattern, node, rr_edges_to_create, + from_seg_details, to_seg_details, opposite_chan_details, + directionality, + switch_block_conn, sb_conn_map); } } if (chan_coord < chan_dimension) { @@ -3119,12 +3118,11 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, max_opposite_chan_width = nodes_per_chan.x_max; } if (to_seg_details->length() > 0) { - num_edges += get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, chan_coord + 1, - opposite_chan_type, seg_dimension, max_opposite_chan_width, grid, - Fs_per_side, sblock_pattern, node, rr_edges_to_create, - from_seg_details, to_seg_details, opposite_chan_details, - directionality, - switch_block_conn, sb_conn_map); + get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, chan_coord + 1, + opposite_chan_type, seg_dimension, max_opposite_chan_width, grid, + Fs_per_side, sblock_pattern, node, rr_edges_to_create, + from_seg_details, to_seg_details, opposite_chan_details, + directionality, switch_block_conn, sb_conn_map); } } @@ -3152,12 +3150,12 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, max_chan_width = nodes_per_chan.y_max; } if (to_seg_details->length() > 0) { - num_edges += get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, target_seg, - chan_type, seg_dimension, max_chan_width, grid, - Fs_per_side, sblock_pattern, node, rr_edges_to_create, - from_seg_details, to_seg_details, from_chan_details, - directionality, - switch_block_conn, sb_conn_map); + get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, target_seg, + chan_type, seg_dimension, max_chan_width, grid, + Fs_per_side, sblock_pattern, node, rr_edges_to_create, + from_seg_details, to_seg_details, from_chan_details, + directionality, + switch_block_conn, sb_conn_map); } } } @@ -4523,7 +4521,7 @@ static void create_edge_groups(EdgeGroups* groups) { rr_graph.rr_nodes().for_each_edge( [&](RREdgeId edge, RRNodeId src, RRNodeId sink) { if (!rr_graph.rr_switch_inf(RRSwitchId(rr_graph.rr_nodes().edge_switch(edge))).configurable()) { - groups->add_non_config_edge(size_t(src), size_t(sink)); + groups->add_non_config_edge(src, sink); } }); diff --git a/vpr/src/route/rr_graph2.cpp b/vpr/src/route/rr_graph2.cpp index cb5c3f22323..48cc553f44b 100644 --- a/vpr/src/route/rr_graph2.cpp +++ b/vpr/src/route/rr_graph2.cpp @@ -1453,7 +1453,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid, VPR_ERROR(VPR_ERROR_ROUTE, "RR node type does not match between rr_nodes and rr_node_indices (%s/%s): %s", rr_node_typename[rr_graph.node_type(inode)], rr_node_typename[rr_type], - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + describe_rr_node(rr_graph, grid, rr_indexed_data, inode, is_flat).c_str()); } if (rr_graph.node_type(inode) == CHANX) { @@ -1463,7 +1463,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid, VPR_ERROR(VPR_ERROR_ROUTE, "RR node y position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s", rr_graph.node_ylow(inode), y, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + describe_rr_node(rr_graph, grid, rr_indexed_data, inode, is_flat).c_str()); } if (!rr_graph.x_in_node_range(x, inode)) { @@ -1471,7 +1471,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid, rr_graph.node_xlow(inode), rr_graph.node_xlow(inode), x, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + describe_rr_node(rr_graph, grid, rr_indexed_data, inode, is_flat).c_str()); } } else if (rr_graph.node_type(inode) == CHANY) { VTR_ASSERT_MSG(rr_graph.node_xlow(inode) == rr_graph.node_xhigh(inode), "CHANY should be veritcal"); @@ -1480,7 +1480,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid, VPR_ERROR(VPR_ERROR_ROUTE, "RR node x position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s", rr_graph.node_xlow(inode), x, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + describe_rr_node(rr_graph, grid, rr_indexed_data, inode, is_flat).c_str()); } if (!rr_graph.y_in_node_range(y, inode)) { @@ -1488,7 +1488,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid, rr_graph.node_ylow(inode), rr_graph.node_ylow(inode), y, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + describe_rr_node(rr_graph, grid, rr_indexed_data, inode, is_flat).c_str()); } } else if (rr_graph.node_type(inode) == SOURCE || rr_graph.node_type(inode) == SINK) { //Sources have co-ordintes covering the entire block they are in @@ -1497,7 +1497,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid, rr_graph.node_xlow(inode), rr_graph.node_xlow(inode), x, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + describe_rr_node(rr_graph, grid, rr_indexed_data, inode, is_flat).c_str()); } if (!rr_graph.y_in_node_range(y, inode)) { @@ -1505,7 +1505,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid, rr_graph.node_ylow(inode), rr_graph.node_ylow(inode), y, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + describe_rr_node(rr_graph, grid, rr_indexed_data, inode, is_flat).c_str()); } } else { @@ -1568,7 +1568,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid, rr_area, rr_node.length(), count, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + describe_rr_node(rr_graph, grid, rr_indexed_data, inode, is_flat).c_str()); } /* As we allow a pin to be indexable on multiple sides, * This check code should not be applied to input and output pins @@ -1578,7 +1578,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid, VPR_ERROR(VPR_ERROR_ROUTE, "Mismatch between RR node length (%d) and count within rr_node_indices (%d, should be length + 1): %s", rr_node.length(), count, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + describe_rr_node(rr_graph, grid, rr_indexed_data, inode, is_flat).c_str()); } } } diff --git a/vpr/src/route/rr_graph_area.cpp b/vpr/src/route/rr_graph_area.cpp index a32b7e41a0d..996723ad11e 100644 --- a/vpr/src/route/rr_graph_area.cpp +++ b/vpr/src/route/rr_graph_area.cpp @@ -419,7 +419,7 @@ void count_unidir_routing_transistors(std::vector& /*segment_inf* "Uni-directional RR node driven by non-configurable " "BUFFER has fan in %d (expected 1)\n", fan_in); - msg += " " + describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, size_t(to_node), is_flat); + msg += " " + describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, to_node, is_flat); VPR_FATAL_ERROR(VPR_ERROR_OTHER, msg.c_str()); } diff --git a/vpr/src/route/segment_stats.cpp b/vpr/src/route/segment_stats.cpp index 47b8e71e621..d19271477be 100644 --- a/vpr/src/route/segment_stats.cpp +++ b/vpr/src/route/segment_stats.cpp @@ -47,18 +47,17 @@ void get_segment_usage_stats(std::vector& segment_inf) { {X_AXIS, std::vector(max_segment_length + 1, 0)}, {Y_AXIS, std::vector(max_segment_length + 1, 0)}}; - for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { - size_t inode = (size_t)rr_id; - auto node_type = rr_graph.node_type(rr_id); + for (RRNodeId inode : device_ctx.rr_graph.nodes()) { + auto node_type = rr_graph.node_type(inode); if (node_type == CHANX || node_type == CHANY) { - cost_index = rr_graph.node_cost_index(rr_id); + cost_index = rr_graph.node_cost_index(inode); size_t seg_type = device_ctx.rr_indexed_data[cost_index].seg_index; int length = -1; if (!segment_inf[seg_type].longline) length = segment_inf[seg_type].length; else length = LONGLINE; - const short& inode_capacity = rr_graph.node_capacity(rr_id); + const short& inode_capacity = rr_graph.node_capacity(inode); int occ = route_ctx.rr_node_route_inf[inode].occ(); auto ax = (node_type == CHANX) ? X_AXIS : Y_AXIS; directed_occ_by_length[ax][length] += occ; diff --git a/vpr/src/timing/NetPinTimingInvalidator.h b/vpr/src/timing/NetPinTimingInvalidator.h new file mode 100644 index 00000000000..f452b95bd7a --- /dev/null +++ b/vpr/src/timing/NetPinTimingInvalidator.h @@ -0,0 +1,173 @@ +#pragma once + +#include "netlist_fwd.h" +#include "tatum/TimingGraphFwd.hpp" +#include "timing_info.h" +#include "vtr_range.h" + +#include "vtr_vec_id_set.h" + +#ifdef VPR_USE_TBB +# include +#endif + +/** Make NetPinTimingInvalidator a virtual class since it does nothing for the general case of non-incremental + * timing updates. It should really be templated to not pay the cost for vtable lookups, but this is the + * best approach without putting a template on every function which uses this machine. */ +class NetPinTimingInvalidator { + public: + typedef vtr::Range tedge_range; + virtual ~NetPinTimingInvalidator() = default; + virtual tedge_range pin_timing_edges(ParentPinId /* pin */) const = 0; + virtual void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) = 0; + virtual void reset() = 0; +}; + +//Helper class for iterating through the timing edges associated with a particular +//clustered netlist pin, and invalidating them. +// +//For efficiency, it pre-calculates and stores the mapping from ClusterPinId -> tatum::EdgeIds, +//and tracks whether a particular ClusterPinId has been already invalidated (to avoid the expense +//of invalidating it multiple times) +class IncrNetPinTimingInvalidator : public NetPinTimingInvalidator { + public: + IncrNetPinTimingInvalidator(const Netlist<>& net_list, + const ClusteredPinAtomPinsLookup& clb_atom_pin_lookup, + const AtomNetlist& atom_nlist, + const AtomLookup& atom_lookup, + const tatum::TimingGraph& timing_graph, + bool is_flat) { + size_t num_pins = net_list.pins().size(); + pin_first_edge_.reserve(num_pins + 1); //Exact + timing_edges_.reserve(num_pins + 1); //Lower bound + for (ParentPinId pin_id : net_list.pins()) { + pin_first_edge_.push_back(timing_edges_.size()); + if (is_flat) { + tatum::EdgeId tedge = atom_pin_to_timing_edge(timing_graph, atom_nlist, atom_lookup, convert_to_atom_pin_id(pin_id)); + + if (!tedge) { + continue; + } + + timing_edges_.push_back(tedge); + } else { + auto cluster_pin_id = convert_to_cluster_pin_id(pin_id); + auto atom_pins = clb_atom_pin_lookup.connected_atom_pins(cluster_pin_id); + for (const AtomPinId atom_pin : atom_pins) { + tatum::EdgeId tedge = atom_pin_to_timing_edge(timing_graph, atom_nlist, atom_lookup, atom_pin); + + if (!tedge) { + continue; + } + + timing_edges_.push_back(tedge); + } + } + } + + //Sentinels + timing_edges_.push_back(tatum::EdgeId::INVALID()); + pin_first_edge_.push_back(timing_edges_.size()); + + VTR_ASSERT(pin_first_edge_.size() == net_list.pins().size() + 1); + } + + //Returns the set of timing edges associated with the specified cluster pin + tedge_range pin_timing_edges(ParentPinId pin) const { + int ipin = size_t(pin); + return vtr::make_range(&timing_edges_[pin_first_edge_[ipin]], + &timing_edges_[pin_first_edge_[ipin + 1]]); + } + + /** Invalidates all timing edges associated with the clustered netlist connection + * driving the specified pin. + * Is concurrently safe. */ + void invalidate_connection(ParentPinId pin, TimingInfo* timing_info) { + if (invalidated_pins_.count(pin)) return; //Already invalidated + + for (tatum::EdgeId edge : pin_timing_edges(pin)) { + timing_info->invalidate_delay(edge); + } + + invalidated_pins_.insert(pin); + } + + /** Resets invalidation state for this class + * Not concurrently safe! */ + void reset() { + invalidated_pins_.clear(); + } + + private: + tatum::EdgeId atom_pin_to_timing_edge(const tatum::TimingGraph& timing_graph, + const AtomNetlist& atom_nlist, + const AtomLookup& atom_lookup, + const AtomPinId atom_pin) { + tatum::NodeId pin_tnode = atom_lookup.atom_pin_tnode(atom_pin); + VTR_ASSERT_SAFE(pin_tnode); + + AtomNetId atom_net = atom_nlist.pin_net(atom_pin); + VTR_ASSERT_SAFE(atom_net); + + AtomPinId atom_net_driver = atom_nlist.net_driver(atom_net); + VTR_ASSERT_SAFE(atom_net_driver); + + tatum::NodeId driver_tnode = atom_lookup.atom_pin_tnode(atom_net_driver); + VTR_ASSERT_SAFE(driver_tnode); + + //Find and invalidate the incoming timing edge corresponding + //to the connection between the net driver and sink pin + for (tatum::EdgeId edge : timing_graph.node_in_edges(pin_tnode)) { + if (timing_graph.edge_src_node(edge) == driver_tnode) { + //The edge corresponding to this atom pin + return edge; + } + } + return tatum::EdgeId::INVALID(); //None found + } + + private: + std::vector pin_first_edge_; //Indices into timing_edges corresponding + std::vector timing_edges_; + + /** Cache for invalidated pins. Use concurrent set when TBB is turned on, since the + * invalidator may be shared between threads */ +#ifdef VPR_USE_TBB + tbb::concurrent_unordered_set invalidated_pins_; +#else + vtr::vec_id_set invalidated_pins_; +#endif +}; + +/** NetPinTimingInvalidator is only a rube goldberg machine when incremental timing analysis + * is disabled, since timing_info->invalidate_delay does nothing. Use this class when incremental + * STA is disabled. */ +class NoopNetPinTimingInvalidator : public NetPinTimingInvalidator { + public: + tedge_range pin_timing_edges(ParentPinId /* pin */) const { + return vtr::make_range((const tatum::EdgeId*)nullptr, (const tatum::EdgeId*)nullptr); + } + + void invalidate_connection(ParentPinId /* pin */, TimingInfo* /* timing_info */) { + } + + void reset() { + } +}; + +/** Make a NetPinTimingInvalidator depending on update_type. Will return a NoopInvalidator if it's not INCREMENTAL. */ +inline std::unique_ptr make_net_pin_timing_invalidator( + e_timing_update_type update_type, + const Netlist<>& net_list, + const ClusteredPinAtomPinsLookup& clb_atom_pin_lookup, + const AtomNetlist& atom_nlist, + const AtomLookup& atom_lookup, + const tatum::TimingGraph& timing_graph, + bool is_flat) { + if (update_type == e_timing_update_type::FULL || update_type == e_timing_update_type::AUTO) { + return std::make_unique(); + } else { + VTR_ASSERT(update_type == e_timing_update_type::INCREMENTAL); + return std::make_unique(net_list, clb_atom_pin_lookup, atom_nlist, atom_lookup, timing_graph, is_flat); + } +} \ No newline at end of file diff --git a/vpr/src/timing/concrete_timing_info.h b/vpr/src/timing/concrete_timing_info.h index 313d7b8ff1f..9aaae0d82ff 100644 --- a/vpr/src/timing/concrete_timing_info.h +++ b/vpr/src/timing/concrete_timing_info.h @@ -2,6 +2,7 @@ #define VPR_CONCRETE_TIMING_INFO_H #include "vtr_log.h" +#include "timing_info.h" #include "timing_util.h" #include "vpr_error.h" #include "slack_evaluation.h" @@ -443,10 +444,67 @@ class ConstantTimingInfo : public SetupHoldTimingInfo { void update_setup() override {} private: + std::vector modified_pins_; /* always empty */ float criticality_; - std::vector modified_pins_; //Always kept empty typedef std::chrono::duration dsec; typedef std::chrono::high_resolution_clock Clock; }; + +/** Create a SetupTimingInfo for the given delay calculator */ +template +std::unique_ptr make_setup_timing_info(std::shared_ptr delay_calculator, e_timing_update_type update_type) { + auto& timing_ctx = g_vpr_ctx.timing(); + + std::shared_ptr analyzer; + + if (update_type == e_timing_update_type::FULL || update_type == e_timing_update_type::AUTO) { + analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); + } else { + VTR_ASSERT(update_type == e_timing_update_type::INCREMENTAL); + analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); + } + + return std::make_unique>(timing_ctx.graph, timing_ctx.constraints, delay_calculator, analyzer); +} + +/** Create a HoldTimingInfo for the given delay calculator */ +template +std::unique_ptr make_hold_timing_info(std::shared_ptr delay_calculator, e_timing_update_type update_type) { + auto& timing_ctx = g_vpr_ctx.timing(); + + std::shared_ptr analyzer; + if (update_type == e_timing_update_type::FULL || update_type == e_timing_update_type::AUTO) { + analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); + } else { + VTR_ASSERT(update_type == e_timing_update_type::INCREMENTAL); + analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); + } + + return std::make_unique>(timing_ctx.graph, timing_ctx.constraints, delay_calculator, analyzer); +} + +/** Create a SetupHoldTimingInfo for the given delay calculator */ +template +std::unique_ptr make_setup_hold_timing_info(std::shared_ptr delay_calculator, e_timing_update_type update_type) { + auto& timing_ctx = g_vpr_ctx.timing(); + + std::shared_ptr analyzer; + if (update_type == e_timing_update_type::FULL || update_type == e_timing_update_type::AUTO) { + analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); + } else { + VTR_ASSERT(update_type == e_timing_update_type::INCREMENTAL); + analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); + } + + return std::make_unique>(timing_ctx.graph, timing_ctx.constraints, delay_calculator, analyzer); +} + +/** Create a timing info object which does no timing analysis, and returns + * place-holder values. This is useful to running timing driven algorithms + * with timing disabled */ +inline std::unique_ptr make_constant_timing_info(const float criticality) { + return std::make_unique(criticality); +} + #endif diff --git a/vpr/src/timing/slack_evaluation.cpp b/vpr/src/timing/slack_evaluation.cpp index 060cd46ff51..0de7f775957 100644 --- a/vpr/src/timing/slack_evaluation.cpp +++ b/vpr/src/timing/slack_evaluation.cpp @@ -93,31 +93,13 @@ void SetupSlackCrit::update_slacks(const tatum::SetupTimingAnalyzer& analyzer) { pins_with_modified_slacks_.clear(); -#if defined(VPR_USE_TBB) - tbb::combinable> modified_pins; //Per-thread vectors - - tbb::parallel_for_each(nodes.begin(), nodes.end(), [&, this](tatum::NodeId node) { - AtomPinId modified_pin = this->update_pin_slack(node, analyzer); - - if (modified_pin) { - modified_pins.local().push_back(modified_pin); //Insert into per-thread vector - } - }); - - //Merge per-thread modified pins vectors - modified_pins.combine_each([&](const std::vector& pins) { - pins_with_modified_slacks_.insert(pins_with_modified_slacks_.end(), - pins.begin(), pins.end()); - }); - -#else + /** We could do this in parallel, but the overhead of combining the results is not worth it */ for (tatum::NodeId node : nodes) { AtomPinId modified_pin = update_pin_slack(node, analyzer); if (modified_pin) { pins_with_modified_slacks_.push_back(modified_pin); } } -#endif ++incr_slack_updates_; incr_slack_update_time_sec_ += timer.elapsed_sec(); @@ -356,31 +338,13 @@ template void SetupSlackCrit::update_pin_criticalities_from_nodes(const NodeRange& nodes, const tatum::SetupTimingAnalyzer& analyzer) { pins_with_modified_criticalities_.clear(); -#if defined(VPR_USE_TBB) - tbb::combinable> modified_pins; //Per-thread vectors - - tbb::parallel_for_each(nodes.begin(), nodes.end(), [&, this](tatum::NodeId node) { - AtomPinId modified_pin = update_pin_criticality(node, analyzer); - - if (modified_pin) { - modified_pins.local().push_back(modified_pin); //Insert into per-thread vector - } - }); - - //Merge per-thread modified pins vectors - modified_pins.combine_each([&](const std::vector& pins) { - pins_with_modified_criticalities_.insert(pins_with_modified_criticalities_.end(), - pins.begin(), pins.end()); - }); - -#else + /** We could do this in parallel, but the overhead of combining the results is not worth it */ for (tatum::NodeId node : nodes) { AtomPinId modified_pin = update_pin_criticality(node, analyzer); if (modified_pin) { pins_with_modified_criticalities_.push_back(modified_pin); } } -#endif } AtomPinId SetupSlackCrit::update_pin_criticality(const tatum::NodeId node, diff --git a/vpr/src/timing/slack_evaluation.h b/vpr/src/timing/slack_evaluation.h index 34f19fe4b81..3cd75b6ce8b 100644 --- a/vpr/src/timing/slack_evaluation.h +++ b/vpr/src/timing/slack_evaluation.h @@ -1,6 +1,8 @@ -#ifndef VPR_SLACK_EVALUATOR -#define VPR_SLACK_EVALUATOR +#pragma once + #include +#include + #include "atom_netlist_fwd.h" #include "DomainPair.h" #include "tatum/timing_analyzers.hpp" @@ -8,7 +10,7 @@ /* * SetupSlackCrit converts raw timing analysis results (i.e. timing tags associated with - * tatum::NodeIds calculated by the timign analyzer), to the shifted slacks and relaxed + * tatum::NodeIds calculated by the timing analyzer), to the shifted slacks and relaxed * criticalities associated with atom netlist connections (i.e. associated withAtomPinIds). * * For efficiency, when update_slacks_and_criticalities() is called it attempts to incrementally @@ -22,7 +24,6 @@ class SetupSlackCrit { public: //Types typedef std::vector::const_iterator modified_pin_iterator; - typedef vtr::Range modified_pin_range; public: //Accessors @@ -176,5 +177,3 @@ class HoldSlackCrit { vtr::vector pin_slacks_; vtr::vector pin_criticalities_; }; - -#endif diff --git a/vpr/src/timing/timing_info.h b/vpr/src/timing/timing_info.h index 5c37237f9e6..323ac7efbb6 100644 --- a/vpr/src/timing/timing_info.h +++ b/vpr/src/timing/timing_info.h @@ -7,23 +7,6 @@ #include "tatum/timing_paths.hpp" #include "timing_util.h" -//Create a SetupTimingInfo for the given delay calculator -template -std::unique_ptr make_setup_timing_info(std::shared_ptr delay_calculator); - -//Create a HoldTimingInfo for the given delay calculator -template -std::unique_ptr make_hold_timing_info(std::shared_ptr delay_calculator); - -//Create a SetupHoldTimingInfo for the given delay calculator -template -std::unique_ptr make_setup_hold_timing_info(std::shared_ptr delay_calculator); - -//Create a timing info object which does no timing analysis, and returns -//place-holder values. This is useful to running timing driven algorithms -//with timing disabled -std::unique_ptr make_no_op_timing_info(); - //Generic inteface which provides functionality to update (but not //access) timing information. // @@ -146,8 +129,8 @@ class HoldTimingInfo : public virtual TimingInfo { //information. // //Implementation Note: -// This class uses multiple inheritence, which is OK in this case for the following reasons: -// * The inheritance is virtual avoiding the diamon problem (i.e. there is only +// This class uses multiple inheritance, which is OK in this case for the following reasons: +// * The inheritance is virtual avoiding the diamond problem (i.e. there is only // one base TimingInfo class instance) // * Both SetupTimingInfo and HoldTimingInfo are purely abstract classes so there // is no data to be duplicated @@ -156,56 +139,4 @@ class SetupHoldTimingInfo : public SetupTimingInfo, public HoldTimingInfo { virtual std::shared_ptr setup_hold_analyzer() const = 0; }; -#include "concrete_timing_info.h" - -template -std::unique_ptr make_setup_timing_info(std::shared_ptr delay_calculator, e_timing_update_type update_type) { - auto& timing_ctx = g_vpr_ctx.timing(); - - std::shared_ptr analyzer; - - if (update_type == e_timing_update_type::FULL || update_type == e_timing_update_type::AUTO) { - analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); - } else { - VTR_ASSERT(update_type == e_timing_update_type::INCREMENTAL); - analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); - } - - return std::make_unique>(timing_ctx.graph, timing_ctx.constraints, delay_calculator, analyzer); -} - -template -std::unique_ptr make_hold_timing_info(std::shared_ptr delay_calculator, e_timing_update_type update_type) { - auto& timing_ctx = g_vpr_ctx.timing(); - - std::shared_ptr analyzer; - if (update_type == e_timing_update_type::FULL || update_type == e_timing_update_type::AUTO) { - analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); - } else { - VTR_ASSERT(update_type == e_timing_update_type::INCREMENTAL); - analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); - } - - return std::make_unique>(timing_ctx.graph, timing_ctx.constraints, delay_calculator, analyzer); -} - -template -std::unique_ptr make_setup_hold_timing_info(std::shared_ptr delay_calculator, e_timing_update_type update_type) { - auto& timing_ctx = g_vpr_ctx.timing(); - - std::shared_ptr analyzer; - if (update_type == e_timing_update_type::FULL || update_type == e_timing_update_type::AUTO) { - analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); - } else { - VTR_ASSERT(update_type == e_timing_update_type::INCREMENTAL); - analyzer = tatum::AnalyzerFactory::make(*timing_ctx.graph, *timing_ctx.constraints, *delay_calculator); - } - - return std::make_unique>(timing_ctx.graph, timing_ctx.constraints, delay_calculator, analyzer); -} - -inline std::unique_ptr make_constant_timing_info(const float criticality) { - return std::make_unique(criticality); -} - #endif diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index 63028c8ef8c..536667faa51 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -6,9 +6,11 @@ #include "vtr_math.h" #include "globals.h" -#include "timing_util.h" -#include "timing_info.h" #include "timing_fail_error.h" +#include "timing_info.h" +#include "timing_util.h" + +#include "tatum/report/graphviz_dot_writer.hpp" double sec_to_nanosec(double seconds) { return 1e9 * seconds; @@ -800,7 +802,7 @@ float calc_relaxed_criticality(const std::map& domains_max_re VTR_ASSERT_SAFE_MSG(!std::isnan(crit), "Criticality not be nan"); VTR_ASSERT_SAFE_MSG(std::isfinite(crit), "Criticality should not be infinite"); VTR_ASSERT_MSG(crit >= 0. - CRITICALITY_ROUND_OFF_TOLERANCE, "Criticality should never be negative"); - VTR_ASSERT_MSG(crit <= 1. + CRITICALITY_ROUND_OFF_TOLERANCE, "Criticality should never be greather than one"); + VTR_ASSERT_MSG(crit <= 1. + CRITICALITY_ROUND_OFF_TOLERANCE, "Criticality should never be greater than one"); //Clamp criticality to [0., 1.] to correct round-off crit = std::max(0.f, crit); @@ -809,7 +811,7 @@ float calc_relaxed_criticality(const std::map& domains_max_re max_crit = std::max(max_crit, crit); } VTR_ASSERT_MSG(max_crit >= 0., "Criticality should never be negative"); - VTR_ASSERT_MSG(max_crit <= 1., "Criticality should never be greather than one"); + VTR_ASSERT_MSG(max_crit <= 1., "Criticality should never be greater than one"); return max_crit; } diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h index b304d0a69c5..51fc0491c28 100644 --- a/vpr/src/timing/timing_util.h +++ b/vpr/src/timing/timing_util.h @@ -2,12 +2,11 @@ #define VPR_TIMING_UTIL_H #include +#include "netlist_fwd.h" #include "tatum/timing_analyzers.hpp" #include "tatum/TimingConstraints.hpp" #include "tatum/timing_paths.hpp" -#include "vtr_vec_id_set.h" - #include "histogram.h" #include "timing_info_fwd.h" #include "DomainPair.h" @@ -83,117 +82,6 @@ tatum::NodeId find_origin_node_for_hold_slack(const tatum::TimingTags::tag_range //Returns the a map of domain's and their clock fanout (i.e. logical outputs at which the clock captures) std::map count_clock_fanouts(const tatum::TimingGraph& timing_graph, const tatum::SetupTimingAnalyzer& setup_analyzer); -//Helper class for iterating through the timing edges associated with a particular -//clustered netlist pin, and invalidating them. -// -//For efficiency, it pre-calculates and stores the mapping from ClusterPinId -> tatum::EdgeIds, -//and tracks whether a particular ClusterPinId has been already invalidated (to avoid the expense -//of invalidating it multiple times) -class NetPinTimingInvalidator { - public: - typedef vtr::Range tedge_range; - - public: - NetPinTimingInvalidator(const Netlist<>& net_list, - const ClusteredPinAtomPinsLookup& clb_atom_pin_lookup, - const AtomNetlist& atom_nlist, - const AtomLookup& atom_lookup, - const tatum::TimingGraph& timing_graph, - bool is_flat) { - size_t num_pins = net_list.pins().size(); - pin_first_edge_.reserve(num_pins + 1); //Exact - timing_edges_.reserve(num_pins + 1); //Lower bound - for (ParentPinId pin_id : net_list.pins()) { - pin_first_edge_.push_back(timing_edges_.size()); - if (is_flat) { - tatum::EdgeId tedge = atom_pin_to_timing_edge(timing_graph, atom_nlist, atom_lookup, convert_to_atom_pin_id(pin_id)); - - if (!tedge) { - continue; - } - - timing_edges_.push_back(tedge); - } else { - auto cluster_pin_id = convert_to_cluster_pin_id(pin_id); - auto atom_pins = clb_atom_pin_lookup.connected_atom_pins(cluster_pin_id); - for (const AtomPinId atom_pin : atom_pins) { - tatum::EdgeId tedge = atom_pin_to_timing_edge(timing_graph, atom_nlist, atom_lookup, atom_pin); - - if (!tedge) { - continue; - } - - timing_edges_.push_back(tedge); - } - } - } - - //Sentinels - timing_edges_.push_back(tatum::EdgeId::INVALID()); - pin_first_edge_.push_back(timing_edges_.size()); - - VTR_ASSERT(pin_first_edge_.size() == net_list.pins().size() + 1); - } - - //Returns the set of timing edges associated with the specified cluster pin - tedge_range pin_timing_edges(ParentPinId pin) const { - int ipin = size_t(pin); - return vtr::make_range(&timing_edges_[pin_first_edge_[ipin]], - &timing_edges_[pin_first_edge_[ipin + 1]]); - } - - //Invalidates all timing edges associated with the clustered netlist connection - //driving the specified pin - template - void invalidate_connection(ParentPinId pin, TimingInfo* timing_info) { - if (invalidated_pins_.count(pin)) return; //Already invalidated - - for (tatum::EdgeId edge : pin_timing_edges(pin)) { - timing_info->invalidate_delay(edge); - } - - invalidated_pins_.insert(pin); - } - - //Resets invalidation state for this class - void reset() { - invalidated_pins_.clear(); - } - - private: - tatum::EdgeId atom_pin_to_timing_edge(const tatum::TimingGraph& timing_graph, - const AtomNetlist& atom_nlist, - const AtomLookup& atom_lookup, - const AtomPinId atom_pin) { - tatum::NodeId pin_tnode = atom_lookup.atom_pin_tnode(atom_pin); - VTR_ASSERT_SAFE(pin_tnode); - - AtomNetId atom_net = atom_nlist.pin_net(atom_pin); - VTR_ASSERT_SAFE(atom_net); - - AtomPinId atom_net_driver = atom_nlist.net_driver(atom_net); - VTR_ASSERT_SAFE(atom_net_driver); - - tatum::NodeId driver_tnode = atom_lookup.atom_pin_tnode(atom_net_driver); - VTR_ASSERT_SAFE(driver_tnode); - - //Find and invalidate the incoming timing edge corresponding - //to the connection between the net driver and sink pin - for (tatum::EdgeId edge : timing_graph.node_in_edges(pin_tnode)) { - if (timing_graph.edge_src_node(edge) == driver_tnode) { - //The edge corresponding to this atom pin - return edge; - } - } - return tatum::EdgeId::INVALID(); //None found - } - - private: - std::vector pin_first_edge_; //Indicies into timing_edges corresponding - std::vector timing_edges_; - vtr::vec_id_set invalidated_pins_; -}; - /* * Slack and criticality calculation utilities */ diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index e3815e4e006..6157c9b980d 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -207,27 +207,27 @@ void sync_grid_to_blocks() { } } -std::string rr_node_arch_name(int inode, bool is_flat) { +std::string rr_node_arch_name(RRNodeId inode, bool is_flat) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - auto rr_node = RRNodeId(inode); + auto rr_node = inode; std::string rr_node_arch_name; - if (rr_graph.node_type(RRNodeId(inode)) == OPIN || rr_graph.node_type(RRNodeId(inode)) == IPIN) { + if (rr_graph.node_type(inode) == OPIN || rr_graph.node_type(inode) == IPIN) { //Pin names auto type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node), rr_graph.node_layer(rr_node)}); rr_node_arch_name += block_type_pin_index_to_name(type, rr_graph.node_pin_num(rr_node), is_flat); - } else if (rr_graph.node_type(RRNodeId(inode)) == SOURCE || rr_graph.node_type(RRNodeId(inode)) == SINK) { + } else if (rr_graph.node_type(inode) == SOURCE || rr_graph.node_type(inode) == SINK) { //Set of pins associated with SOURCE/SINK auto type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node), rr_graph.node_layer(rr_node)}); auto pin_names = block_type_class_index_to_pin_names(type, rr_graph.node_class_num(rr_node), is_flat); if (pin_names.size() > 1) { - rr_node_arch_name += rr_graph.node_type_string(RRNodeId(inode)); + rr_node_arch_name += rr_graph.node_type_string(inode); rr_node_arch_name += " connected to "; rr_node_arch_name += "{"; rr_node_arch_name += vtr::join(pin_names, ", "); @@ -236,9 +236,9 @@ std::string rr_node_arch_name(int inode, bool is_flat) { rr_node_arch_name += pin_names[0]; } } else { - VTR_ASSERT(rr_graph.node_type(RRNodeId(inode)) == CHANX || rr_graph.node_type(RRNodeId(inode)) == CHANY); + VTR_ASSERT(rr_graph.node_type(inode) == CHANX || rr_graph.node_type(inode) == CHANY); //Wire segment name - auto cost_index = rr_graph.node_cost_index(RRNodeId(inode)); + auto cost_index = rr_graph.node_cost_index(inode); int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index; rr_node_arch_name += rr_graph.rr_segments(RRSegmentId(seg_index)).name; diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index f3a8f8917e7..1ba3dcb35b7 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -115,7 +115,7 @@ inline ParentNetId get_cluster_net_parent_id(const AtomLookup& atom_look_up, Clu void sync_grid_to_blocks(); //Returns a user-friendly architectural identifier for the specified RR node -std::string rr_node_arch_name(int inode, bool is_flat); +std::string rr_node_arch_name(RRNodeId inode, bool is_flat); /************************************************************** * Intra-Logic Block Utility Functions diff --git a/vpr/test/test_connection_router.cpp b/vpr/test/test_connection_router.cpp index 82109c9b45b..6c1b54734e3 100644 --- a/vpr/test/test_connection_router.cpp +++ b/vpr/test/test_connection_router.cpp @@ -1,6 +1,7 @@ #include #include "catch2/catch_test_macros.hpp" +#include "rr_graph_fwd.h" #include "vpr_api.h" #include "vpr_signal_handler.h" #include "globals.h" @@ -14,8 +15,8 @@ static constexpr int kMaxHops = 10; namespace { // Route from source_node to sink_node, returning either the delay, or infinity if unroutable. -static float do_one_route(int source_node, - int sink_node, +static float do_one_route(RRNodeId source_node, + RRNodeId sink_node, const t_det_routing_arch& det_routing_arch, const t_router_opts& router_opts, const std::vector& segment_inf) { @@ -67,18 +68,19 @@ static float do_one_route(int source_node, -1, false, std::unordered_map()); - std::tie(found_path, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(), - sink_node, - cost_params, - bounding_box, - router_stats, - conn_params); + std::tie(found_path, std::ignore, cheapest) = router.timing_driven_route_connection_from_route_tree(tree.root(), + sink_node, + cost_params, + bounding_box, + router_stats, + conn_params, + true); // Default delay is infinity, which indicates that a route was not found. float delay = std::numeric_limits::infinity(); if (found_path) { // Check that the route goes to the requested sink. - REQUIRE(cheapest.index == sink_node); + REQUIRE(RRNodeId(cheapest.index) == sink_node); // Get the delay vtr::optional rt_node_of_sink; @@ -92,12 +94,12 @@ static float do_one_route(int source_node, } // Find a source and a sink by walking edges. -std::tuple find_source_and_sink() { +std::tuple find_source_and_sink() { auto& device_ctx = g_vpr_ctx.device(); auto& rr_graph = device_ctx.rr_graph; // Current longest walk - std::tuple longest = std::make_tuple(0, 0, 0); + std::tuple longest = std::make_tuple(RRNodeId::INVALID(), RRNodeId::INVALID(), 0); // Start from each RR node for (size_t id = 0; id < rr_graph.num_nodes(); id++) { @@ -112,7 +114,7 @@ std::tuple find_source_and_sink() { // If this is the new longest walk, store it. if (hops > std::get<2>(longest)) { - longest = std::make_tuple(size_t(source), size_t(sink), hops); + longest = std::make_tuple(source, sink, hops); } } } @@ -164,7 +166,8 @@ TEST_CASE("connection_router", "[vpr]") { router_opts.flat_routing); // Find a source and sink to route - int source_rr_node, sink_rr_node, hops; + RRNodeId source_rr_node, sink_rr_node; + int hops; std::tie(source_rr_node, sink_rr_node, hops) = find_source_and_sink(); // Check that the route will be non-trivial diff --git a/vpr/test/test_edge_groups.cpp b/vpr/test/test_edge_groups.cpp index 9db09290f20..80bfea5e133 100644 --- a/vpr/test/test_edge_groups.cpp +++ b/vpr/test/test_edge_groups.cpp @@ -58,7 +58,7 @@ TEST_CASE("edge_groups_create_sets", "[vpr]") { // Add edges to the EdgeGroups object EdgeGroups groups; for (auto edge : random_edges) { - groups.add_non_config_edge(edge.first, edge.second); + groups.add_non_config_edge(RRNodeId(edge.first), RRNodeId(edge.second)); } // The algorithm to test @@ -67,9 +67,9 @@ TEST_CASE("edge_groups_create_sets", "[vpr]") { // Check for the expected sets for (auto set : connected_sets) { - std::set random_set; + std::set random_set; for (auto elem : set) { - random_set.insert(random_nodes[elem]); + random_set.insert(RRNodeId(random_nodes[elem])); } REQUIRE(sets.node_sets.find(random_set) != sets.node_sets.end()); } diff --git a/vtr_flow/parse/pass_requirements/common/pass_requirements.vpr_route_min_chan_width.txt b/vtr_flow/parse/pass_requirements/common/pass_requirements.vpr_route_min_chan_width.txt index b1e1071071e..8986c61e434 100644 --- a/vtr_flow/parse/pass_requirements/common/pass_requirements.vpr_route_min_chan_width.txt +++ b/vtr_flow/parse/pass_requirements/common/pass_requirements.vpr_route_min_chan_width.txt @@ -11,7 +11,7 @@ min_chan_width_routing_area_total;Range(0.7,1.3) min_chan_width_routing_area_per_tile;Range(0.7,1.3) #Run-time metrics -min_chan_width_route_time;RangeAbs(0.10,14.0,2) +min_chan_width_route_time;RangeAbs(0.10,15.0,2) #Peak memory #We set a 100MiB minimum threshold since the memory