diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index dde02162379..f823e3aa381 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -1713,7 +1713,11 @@ struct t_arch_switch_inf { * mux_trans_size: The area of each transistor in the segment's driving mux * * measured in minimum width transistor units * * buf_size: The area of the buffer. If set to zero, area should be * - * calculated from R */ + * calculated from R + * intra_tile: Indicate whether this rr_switch is a switch type used inside * + * clusters. These switch types are not specified in the * + * architecture description file and are added when flat router * + * is enabled */ struct t_rr_switch_inf { float R = 0.; float Cin = 0.; @@ -1726,6 +1730,8 @@ struct t_rr_switch_inf { e_power_buffer_type power_buffer_type = POWER_BUFFER_TYPE_UNDEFINED; float power_buffer_size = 0.; + bool intra_tile = false; + public: //Returns the type of switch SwitchType type() const; diff --git a/libs/librrgraph/src/base/rr_edge.h b/libs/librrgraph/src/base/rr_edge.h index 01a01526cfe..0fb3ac244ff 100644 --- a/libs/librrgraph/src/base/rr_edge.h +++ b/libs/librrgraph/src/base/rr_edge.h @@ -3,9 +3,8 @@ #include "rr_graph_fwd.h" -/* TODO: MUST change the node id to RRNodeId before refactoring is finished! */ struct t_rr_edge_info { - t_rr_edge_info(RRNodeId from, RRNodeId to, short type, bool is_remapped = false) noexcept + t_rr_edge_info(RRNodeId from, RRNodeId to, short type, bool is_remapped) noexcept : from_node(from) , to_node(to) , switch_type(type) diff --git a/libs/librrgraph/src/base/rr_graph_builder.cpp b/libs/librrgraph/src/base/rr_graph_builder.cpp index 072b47804ab..ca73a5a3811 100644 --- a/libs/librrgraph/src/base/rr_graph_builder.cpp +++ b/libs/librrgraph/src/base/rr_graph_builder.cpp @@ -60,6 +60,14 @@ void RRGraphBuilder::add_node_to_all_locs(RRNodeId node) { } } +void RRGraphBuilder::init_edge_remap(bool val) { + node_storage_.init_edge_remap(val); +} + +void RRGraphBuilder::clear_temp_storage() { + node_storage_.clear_temp_storage(); +} + void RRGraphBuilder::clear() { node_lookup_.clear(); node_storage_.clear(); diff --git a/libs/librrgraph/src/base/rr_graph_builder.h b/libs/librrgraph/src/base/rr_graph_builder.h index 5c00e1d2c18..cf82d64a24a 100644 --- a/libs/librrgraph/src/base/rr_graph_builder.h +++ b/libs/librrgraph/src/base/rr_graph_builder.h @@ -129,6 +129,10 @@ class RRGraphBuilder { */ void add_node_to_all_locs(RRNodeId node); + void init_edge_remap(bool val); + + void clear_temp_storage(); + /** @brief Clear all the underlying data storage */ void clear(); /** @brief reorder all the nodes @@ -216,10 +220,14 @@ class RRGraphBuilder { node_storage_.reserve_edges(num_edges); } - /** @brief emplace_back_edge; It add one edge. This method is efficient if reserve_edges was called with - * the number of edges present in the graph. */ - inline void emplace_back_edge(RRNodeId src, RRNodeId dest, short edge_switch) { - node_storage_.emplace_back_edge(src, dest, edge_switch); + /** @brief emplace_back_edge It adds one edge. This method is efficient if reserve_edges was called with + * the number of edges present in the graph. + * @param remapped If true, it means the switch id (edge_switch) corresponds to rr switch id. Thus, when the remapped function is called to + * remap the arch switch id to rr switch id, the edge switch id of this edge shouldn't be changed. For example, when the intra-cluster graph + * is built and the rr-graph related to global resources are read from a file, this parameter is true since the intra-cluster switches are + * also listed in rr-graph file. So, we use that list to use the rr switch id instead of passing arch switch id for intra-cluster edges.*/ + inline void emplace_back_edge(RRNodeId src, RRNodeId dest, short edge_switch, bool remapped) { + node_storage_.emplace_back_edge(src, dest, edge_switch, remapped); } /** @brief Append 1 more RR node to the RR graph. */ inline void emplace_back() { diff --git a/libs/librrgraph/src/base/rr_graph_storage.cpp b/libs/librrgraph/src/base/rr_graph_storage.cpp index 9934752dce0..ef85d779a4c 100644 --- a/libs/librrgraph/src/base/rr_graph_storage.cpp +++ b/libs/librrgraph/src/base/rr_graph_storage.cpp @@ -11,13 +11,13 @@ void t_rr_graph_storage::reserve_edges(size_t num_edges) { edge_remapped_.reserve(num_edges); } -void t_rr_graph_storage::emplace_back_edge(RRNodeId src, RRNodeId dest, short edge_switch) { +void t_rr_graph_storage::emplace_back_edge(RRNodeId src, RRNodeId dest, short edge_switch, bool remapped) { // Cannot mutate edges once edges have been read! VTR_ASSERT(!edges_read_); edge_src_node_.emplace_back(src); edge_dest_node_.emplace_back(dest); edge_switch_.emplace_back(edge_switch); - edge_remapped_.emplace_back(false); + edge_remapped_.emplace_back(remapped); } // Typical node to edge ratio. This allows a preallocation guess for the edges @@ -48,7 +48,8 @@ void t_rr_graph_storage::alloc_and_load_edges(const t_rr_edge_info_set* rr_edges emplace_back_edge( new_edge.from_node, new_edge.to_node, - new_edge.switch_type); + new_edge.switch_type, + new_edge.remapped); } } @@ -398,17 +399,10 @@ void t_rr_graph_storage::init_fan_in() { //Reset all fan-ins to zero edges_read_ = true; node_fan_in_.resize(node_storage_.size(), 0); - // This array is used to avoid initializing fan-in of the nodes which are already seen. - // This would reduce the run-time of flat rr graph generation since this function is called twice. - seen_edge_.resize(edge_dest_node_.size(), false); node_fan_in_.shrink_to_fit(); - seen_edge_.shrink_to_fit(); //Walk the graph and increment fanin on all downstream nodes for(const auto& edge_id : edge_dest_node_.keys()) { - if(!seen_edge_[edge_id]) { - node_fan_in_[edge_dest_node_[edge_id]] += 1; - seen_edge_[edge_id] = true; - } + node_fan_in_[edge_dest_node_[edge_id]] += 1; } } @@ -829,7 +823,6 @@ void t_rr_graph_storage::reorder(const vtr::vector& order, auto old_edge_dest_node = edge_dest_node_; auto old_edge_switch = edge_switch_; auto old_edge_remapped = edge_remapped_; - auto old_seen_edge = seen_edge_; RREdgeId cur_edge(0); // Reorder edges by source node @@ -843,7 +836,6 @@ void t_rr_graph_storage::reorder(const vtr::vector& order, edge_dest_node_[cur_edge] = order[old_edge_dest_node[e]]; edge_switch_[cur_edge] = old_edge_switch[e]; edge_remapped_[cur_edge] = old_edge_remapped[e]; - seen_edge_[cur_edge] = old_seen_edge[e]; cur_edge = RREdgeId(size_t(cur_edge) + 1); } } diff --git a/libs/librrgraph/src/base/rr_graph_storage.h b/libs/librrgraph/src/base/rr_graph_storage.h index b6c85caa22e..09d80264645 100644 --- a/libs/librrgraph/src/base/rr_graph_storage.h +++ b/libs/librrgraph/src/base/rr_graph_storage.h @@ -442,7 +442,6 @@ class t_rr_graph_storage { node_first_edge_.clear(); node_fan_in_.clear(); node_layer_.clear(); - seen_edge_.clear(); edge_src_node_.clear(); edge_dest_node_.clear(); edge_switch_.clear(); @@ -452,6 +451,18 @@ class t_rr_graph_storage { remapped_edges_ = false; } + // Clear the data structures that are mainly used during RR graph construction. + // After RR Graph is build, we no longer need these data structures. + void clear_temp_storage() { + edge_remapped_.clear(); + } + + // Clear edge_remap data structure, and then initialize it with the given value + void init_edge_remap(bool val) { + edge_remapped_.clear(); + edge_remapped_.resize(edge_switch_.size(), val); + } + // Shrink memory usage of the RR graph storage. // // Note that this will temporarily increase the amount of storage required @@ -462,7 +473,6 @@ class t_rr_graph_storage { node_first_edge_.shrink_to_fit(); node_fan_in_.shrink_to_fit(); node_layer_.shrink_to_fit(); - seen_edge_.shrink_to_fit(); edge_src_node_.shrink_to_fit(); edge_dest_node_.shrink_to_fit(); edge_switch_.shrink_to_fit(); @@ -561,11 +571,18 @@ class t_rr_graph_storage { // Reserve at least num_edges in the edge backing arrays. void reserve_edges(size_t num_edges); - // Add one edge. This method is efficient if reserve_edges was called with - // the number of edges present in the graph. This method is still - // amortized O(1), like std::vector::emplace_back, but both runtime and - // peak memory usage will be higher if reallocation is required. - void emplace_back_edge(RRNodeId src, RRNodeId dest, short edge_switch); + /*** + * @brief Add one edge. This method is efficient if reserve_edges was called with + * the number of edges present in the graph. This method is still + * amortized O(1), like std::vector::emplace_back, but both runtime and + * peak memory usage will be higher if reallocation is required. + * @param remapped This is used later in remap_rr_node_switch_indices to check whether an + * edge needs its switch ID remapped from the arch_sw_idx to rr_sw_idx. + * The difference between these two ids is because some switch delays depend on the fan-in + * of the node. Also, the information about switches is fly-weighted and are accessible with IDs. Thus, + * the number of rr switch types can be higher than the number of arch switch types. + */ + void emplace_back_edge(RRNodeId src, RRNodeId dest, short edge_switch, bool remapped); // Adds a batch of edges. void alloc_and_load_edges(const t_rr_edge_info_set* rr_edges_to_create); @@ -696,10 +713,24 @@ class t_rr_graph_storage { vtr::vector edge_src_node_; vtr::vector edge_dest_node_; vtr::vector edge_switch_; + /** + * The delay of certain switches specified in the architecture file depends on the number of inputs of the edge's sink node (pins or tracks). + * For example, in the case of a MUX switch, the delay increases as the number of inputs increases. + * During the construction of the RR Graph, switch IDs are assigned to the edges according to the order specified in the architecture file. + * These switch IDs are later used to retrieve information such as delay for each edge. + * This allows for effective fly-weighting of edge information. + * + * After building the RR Graph, we iterate over the nodes once more to store their fan-in. + * If a switch's characteristics depend on the fan-in of a node, a new switch ID is generated and assigned to the corresponding edge. + * This process is known as remapping. + * In this vector, we store information about which edges have undergone remapping. + * It is necessary to store this information, especially when flat-router is enabled. + * Remapping occurs when constructing global resources after placement and when adding intra-cluster resources after placement. + * Without storing this information, during subsequent remappings, it would be unclear whether the stored switch ID + * corresponds to the architecture ID or the RR Graph switch ID for an edge. + */ vtr::vector edge_remapped_; - vtr::vector seen_edge_; - /*************** * State flags * ***************/ diff --git a/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h b/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h index 2f0017be2ac..ff78fe54282 100644 --- a/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h +++ b/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h @@ -460,11 +460,15 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { // // If the switch name is not present in the architecture, generate an // error. + // If the graph is written when flat-routing is enabled, the types of the switches inside of the rr_graph are also + // added to the XML file. These types are not added to the data structure that contain arch switch types. They are added to all_sw_inf under device context. + // It remains as a future work to remove the arch_switch_types and use all_sw info under device_ctx instead. bool found_arch_name = false; std::string string_name = std::string(name); + // The string name has the format of "Internal Switch/delay". So, I have to use compare to specify the portion I want to be compared. + bool is_internal_sw = string_name.compare(0, 15, "Internal Switch") == 0; for (const auto& arch_sw_inf: arch_switch_inf_) { - if (string_name == arch_sw_inf.name) { - string_name = arch_sw_inf.name; + if (string_name == arch_sw_inf.name || is_internal_sw) { found_arch_name = true; break; } @@ -472,7 +476,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { if (!found_arch_name) { report_error("Switch name '%s' not found in architecture\n", string_name.c_str()); } - + sw->intra_tile = is_internal_sw; sw->name = string_name; } inline const char* get_switch_name(const t_rr_switch_inf*& sw) final { @@ -832,6 +836,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { inline void finish_rr_nodes_node(int& /*inode*/) final { } inline size_t num_rr_nodes_node(void*& /*ctx*/) final { + return rr_nodes_->size(); } inline const t_rr_node get_rr_nodes_node(int n, void*& /*ctx*/) final { @@ -923,7 +928,8 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { bind.set_ignore(); } - rr_graph_builder_->emplace_back_edge(RRNodeId(src_node), RRNodeId(sink_node), switch_id); + // The edge ids in the rr graph file are rr edge id not architecture edge id + rr_graph_builder_->emplace_back_edge(RRNodeId(src_node), RRNodeId(sink_node), switch_id, true); return bind; } inline void finish_rr_edges_edge(MetadataBind& bind) final { diff --git a/libs/libvtrcapnproto/intra_cluster_serdes.h b/libs/libvtrcapnproto/intra_cluster_serdes.h index e60ebb443e8..f5579fc8cde 100644 --- a/libs/libvtrcapnproto/intra_cluster_serdes.h +++ b/libs/libvtrcapnproto/intra_cluster_serdes.h @@ -16,201 +16,64 @@ #include "vpr_types.h" #include "router_lookahead_map_utils.h" - -void ToIntraClusterLookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, - std::unordered_map>& tile_min_cost, - const std::vector& physical_tile_types, - const VprIntraClusterLookahead::Reader& intra_cluster_lookahead_builder); - -void FromIntraClusterLookahead(VprIntraClusterLookahead::Builder& intra_cluster_lookahead_builder, - const std::unordered_map& inter_tile_pin_primitive_pin_delay, - const std::unordered_map>& tile_min_cost, - const std::vector& physical_tile_types); -// Generic function to convert from Matrix capnproto message to vtr::NdMatrix. -// -// Template arguments: -// N = Number of matrix dimensions, must be fixed. -// CapType = Source capnproto message type that is a single element the -// Matrix capnproto message. -// CType = Target C++ type that is a single element of vtr::NdMatrix. -// -// Arguments: -// m_out = Target vtr::NdMatrix. -// m_in = Source capnproto message reader. -// copy_fun = Function to convert from CapType to CType. -void ToIntraClusterLookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, - std::unordered_map>& tile_min_cost, - const std::vector& physical_tile_types, - const VprIntraClusterLookahead::Reader& intra_cluster_lookahead_builder) { - - inter_tile_pin_primitive_pin_delay.clear(); - tile_min_cost.clear(); - - int num_tile_types = intra_cluster_lookahead_builder.getPhysicalTileNumPins().size(); - VTR_ASSERT(num_tile_types == (int)physical_tile_types.size()); - - std::vector tile_num_pins(num_tile_types); - std::vector tile_num_sinks(num_tile_types); - - for (int tile_idx = 0; tile_idx < num_tile_types; tile_idx++) { - tile_num_pins[tile_idx] = intra_cluster_lookahead_builder.getPhysicalTileNumPins()[tile_idx]; - tile_num_sinks[tile_idx] = intra_cluster_lookahead_builder.getTileNumSinks()[tile_idx]; - } - - int num_seen_sinks = 0; - int num_seen_pins = 0; - for(int tile_type_idx = 0; tile_type_idx < num_tile_types; tile_type_idx++) { - int cur_tile_num_pins = tile_num_pins[tile_type_idx]; - t_physical_tile_type_ptr physical_type_ptr = &physical_tile_types[tile_type_idx]; - inter_tile_pin_primitive_pin_delay[physical_type_ptr] = util::t_ipin_primitive_sink_delays(cur_tile_num_pins); - for(int pin_num = 0; pin_num < cur_tile_num_pins; pin_num++) { - inter_tile_pin_primitive_pin_delay[physical_type_ptr][pin_num].clear(); - int pin_num_sinks = intra_cluster_lookahead_builder.getPinNumSinks()[num_seen_pins]; - num_seen_pins++; - for(int sink_idx = 0; sink_idx < pin_num_sinks; sink_idx++) { - int sink_pin_num = intra_cluster_lookahead_builder.getPinSinks()[num_seen_sinks]; - auto cost = intra_cluster_lookahead_builder.getPinSinkCosts()[num_seen_sinks]; - inter_tile_pin_primitive_pin_delay[physical_type_ptr][pin_num].insert(std::make_pair(sink_pin_num, - util::Cost_Entry(cost.getDelay(), cost.getCongestion()))); - num_seen_sinks++; - } - - - } - } - - num_seen_sinks = 0; - for(int tile_type_idx = 0; tile_type_idx < num_tile_types; tile_type_idx++) { - int cur_tile_num_sinks = tile_num_sinks[tile_type_idx]; - t_physical_tile_type_ptr physical_type_ptr = &physical_tile_types[tile_type_idx]; - tile_min_cost[physical_type_ptr] = std::unordered_map(); - for(int sink_idx = 0; sink_idx < cur_tile_num_sinks; sink_idx++) { - int sink_num = intra_cluster_lookahead_builder.getTileSinks()[num_seen_sinks]; - auto cost = intra_cluster_lookahead_builder.getTileMinCosts()[num_seen_sinks]; - tile_min_cost[physical_type_ptr].insert(std::make_pair(sink_num, - util::Cost_Entry(cost.getDelay(), cost.getCongestion()))); - num_seen_sinks++; - - } +template +void toVector(std::vector& vec_out, + const typename capnp::List::Reader& m_in, + const std::function&, + int, + const ElemType&)>& copy_fun) { + int size = m_in.size(); + vec_out.resize(size); + for(int idx = 0; idx < size; idx++) { + copy_fun(vec_out, idx, m_in[idx]); } - - } -void FromIntraClusterLookahead(VprIntraClusterLookahead::Builder& intra_cluster_lookahead_builder, - const std::unordered_map& inter_tile_pin_primitive_pin_delay, - const std::unordered_map>& tile_min_cost, - const std::vector& physical_tile_types) { - - ::capnp::List::Builder physical_tile_num_pin_arr_builder; - ::capnp::List::Builder pin_num_sink_arr_builder; - ::capnp::List::Builder pin_sink_arr_builder; - ::capnp::List::Builder pin_sink_cost_builder; - ::capnp::List::Builder tile_num_sinks_builder; - ::capnp::List::Builder tile_sinks_builder; - ::capnp::List::Builder tile_sink_min_cost_builder; - - int num_tile_types = physical_tile_types.size(); - - physical_tile_num_pin_arr_builder = intra_cluster_lookahead_builder.initPhysicalTileNumPins(num_tile_types); - - // Count the number of pins for each tile - { - int total_num_pin = 0; - for (const auto& tile_type : physical_tile_types) { - const auto tile_pin_primitive_delay = inter_tile_pin_primitive_pin_delay.find(&tile_type); - if (tile_pin_primitive_delay == inter_tile_pin_primitive_pin_delay.end()) { - physical_tile_num_pin_arr_builder.set(tile_type.index, 0); - continue; - } - int tile_num_pins = tile_pin_primitive_delay->second.size(); - physical_tile_num_pin_arr_builder.set(tile_type.index, tile_num_pins); - total_num_pin += tile_num_pins; - } - - pin_num_sink_arr_builder = intra_cluster_lookahead_builder.initPinNumSinks(total_num_pin); - } - - // Count the number of sinks for each pin - { - int pin_num = 0; - int total_pin_num_sinks = 0; - for (const auto& tile_type : physical_tile_types) { - const auto tile_pin_primitive_delay = inter_tile_pin_primitive_pin_delay.find(&tile_type); - if (tile_pin_primitive_delay == inter_tile_pin_primitive_pin_delay.end()) { - continue; - } - for (const auto& pin : tile_pin_primitive_delay->second) { - int pin_num_sinks = pin.size(); - pin_num_sink_arr_builder.set(pin_num, pin_num_sinks); - pin_num++; - total_pin_num_sinks += pin_num_sinks; - } - } - - pin_sink_arr_builder = intra_cluster_lookahead_builder.initPinSinks(total_pin_num_sinks); - pin_sink_cost_builder = intra_cluster_lookahead_builder.initPinSinkCosts(total_pin_num_sinks); - } - - // Iterate over sinks of each pin and store the cost of getting to the sink from the respective pin and the sink ptc number - { - int pin_flat_sink_idx = 0; - for (const auto& tile_type : physical_tile_types) { - const auto tile_pin_primitive_delay = inter_tile_pin_primitive_pin_delay.find(&tile_type); - if (tile_pin_primitive_delay == inter_tile_pin_primitive_pin_delay.end()) { - continue; - } - for (const auto& pin : tile_pin_primitive_delay->second) { - for (const auto& sink : pin) { - pin_sink_arr_builder.set(pin_flat_sink_idx, sink.first); - pin_sink_cost_builder[pin_flat_sink_idx].setDelay(sink.second.delay); - pin_sink_cost_builder[pin_flat_sink_idx].setCongestion(sink.second.congestion); - pin_flat_sink_idx++; - } - } - } +template +void toUnorderedMap( + std::unordered_map& map_in, + const int begin_flat_idx, + const int end_flat_idx, + const typename capnp::List::Reader& m_out_key, + const typename capnp::List::Reader& m_out_val, + const std::function&, + const KeyType&, + const typename CapValType::Reader&)>& copy_fun) { + + for(int flat_idx = begin_flat_idx; flat_idx < end_flat_idx; flat_idx++) { + copy_fun(map_in, m_out_key[flat_idx], m_out_val[flat_idx]); } +} +template +void fromVector(typename capnp::List::Builder& m_out, + const std::vector& vec_in, + const std::function::Builder&, + int, + const ElemType&)>& copy_fun) { - // Store the information related to tile_min cost - - tile_num_sinks_builder = intra_cluster_lookahead_builder.initTileNumSinks(num_tile_types); - - // Count the number of sinks for each tile - { - int tile_total_num_sinks = 0; - for (const auto& tile_type : physical_tile_types) { - const auto tile_min_cost_entry = tile_min_cost.find(&tile_type); - if (tile_min_cost_entry == tile_min_cost.end()) { - tile_num_sinks_builder.set(tile_type.index, 0); - continue; - } - int tile_num_sinks = (int)tile_min_cost_entry->second.size(); - tile_num_sinks_builder.set(tile_type.index, tile_num_sinks); - tile_total_num_sinks += tile_num_sinks; - } - - tile_sinks_builder = intra_cluster_lookahead_builder.initTileSinks(tile_total_num_sinks); - tile_sink_min_cost_builder = intra_cluster_lookahead_builder.initTileMinCosts(tile_total_num_sinks); + for(int idx = 0; idx < (int)vec_in.size(); idx++) { + copy_fun(m_out, idx, vec_in[idx]); } +} - // Iterate over sinks of each tile and store the minimum cost to get to that sink and the sink ptc number - { - int pin_flat_sink_idx = 0; - for (const auto& tile_type : physical_tile_types) { - const auto tile_min_cost_entry = tile_min_cost.find(&tile_type); - if (tile_min_cost_entry == tile_min_cost.end()) { - continue; - } - for (const auto& sink : tile_min_cost_entry->second) { - tile_sinks_builder.set(pin_flat_sink_idx, sink.first); - tile_sink_min_cost_builder[pin_flat_sink_idx].setDelay(sink.second.delay); - tile_sink_min_cost_builder[pin_flat_sink_idx].setCongestion(sink.second.congestion); - pin_flat_sink_idx++; - } +template +void FromUnorderedMap( + typename capnp::List::Builder& m_out_key, + typename capnp::List::Builder& m_out_val, + const KeyType out_offset, + const std::unordered_map& map_in, + const std::function::Builder&, + typename capnp::List::Builder&, + int, + const KeyType&, + const CostType&)>& copy_fun) { + + int flat_idx = out_offset; + for (const auto& entry : map_in) { + copy_fun(m_out_key, m_out_val, flat_idx, entry.first, entry.second); + flat_idx++; } - } - } diff --git a/libs/libvtrcapnproto/map_lookahead.capnp b/libs/libvtrcapnproto/map_lookahead.capnp index 6dd777d1c3e..1c7eaed3e14 100644 --- a/libs/libvtrcapnproto/map_lookahead.capnp +++ b/libs/libvtrcapnproto/map_lookahead.capnp @@ -15,7 +15,4 @@ struct VprIntraClusterLookahead { pinNumSinks @1 :List(Int64); pinSinks @2 :List(Int64); pinSinkCosts @3 :List(VprMapCostEntry); - tileNumSinks @4 :List(Int64); - tileSinks @5 :List(Int64); - tileMinCosts @6 :List(VprMapCostEntry); } \ No newline at end of file diff --git a/vpr/src/route/clock_connection_builders.cpp b/vpr/src/route/clock_connection_builders.cpp index fbed3a3b62d..e24bdbe8ea0 100644 --- a/vpr/src/route/clock_connection_builders.cpp +++ b/vpr/src/route/clock_connection_builders.cpp @@ -76,18 +76,18 @@ void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_ // Connect to x-channel wires unsigned num_wires_x = x_wire_indices.size() * fc; for (size_t i = 0; i < num_wires_x; i++) { - clock_graph.add_edge(rr_edges_to_create, x_wire_indices[i], RRNodeId(clock_index), arch_switch_idx); + clock_graph.add_edge(rr_edges_to_create, x_wire_indices[i], RRNodeId(clock_index), arch_switch_idx, false); } // Connect to y-channel wires unsigned num_wires_y = y_wire_indices.size() * fc; for (size_t i = 0; i < num_wires_y; i++) { - clock_graph.add_edge(rr_edges_to_create, y_wire_indices[i], RRNodeId(clock_index), arch_switch_idx); + clock_graph.add_edge(rr_edges_to_create, y_wire_indices[i], RRNodeId(clock_index), arch_switch_idx, false); } // Connect to virtual clock sink node // used by the two stage router - clock_graph.add_edge(rr_edges_to_create, RRNodeId(clock_index), virtual_clock_network_root_idx, arch_switch_idx); + clock_graph.add_edge(rr_edges_to_create, RRNodeId(clock_index), virtual_clock_network_root_idx, arch_switch_idx, false); } } @@ -210,7 +210,7 @@ void ClockToClockConneciton::create_switches(const ClockRRGraphBuilder& clock_gr if (from_itter == from_rr_node_indices.end()) { from_itter = from_rr_node_indices.begin(); } - clock_graph.add_edge(rr_edges_to_create, RRNodeId(*from_itter), RRNodeId(to_index), arch_switch_idx); + clock_graph.add_edge(rr_edges_to_create, RRNodeId(*from_itter), RRNodeId(to_index), arch_switch_idx, false); from_itter++; } } @@ -325,7 +325,7 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra //Create edges depending on Fc for (size_t i = 0; i < clock_network_indices.size() * fc; i++) { - clock_graph.add_edge(rr_edges_to_create, RRNodeId(clock_network_indices[i]), RRNodeId(clock_pin_node_idx), arch_switch_idx); + clock_graph.add_edge(rr_edges_to_create, RRNodeId(clock_network_indices[i]), RRNodeId(clock_pin_node_idx), arch_switch_idx, false); } } } diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index 3a1606e5831..d27c2c5e362 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -321,8 +321,8 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB clock_graph); // connect drive point to each half rib using a directed switch - clock_graph.add_edge(rr_edges_to_create, RRNodeId(drive_node_idx), RRNodeId(left_node_idx), drive.switch_idx); - clock_graph.add_edge(rr_edges_to_create, RRNodeId(drive_node_idx), RRNodeId(right_node_idx), drive.switch_idx); + clock_graph.add_edge(rr_edges_to_create, RRNodeId(drive_node_idx), RRNodeId(left_node_idx), drive.switch_idx, false); + clock_graph.add_edge(rr_edges_to_create, RRNodeId(drive_node_idx), RRNodeId(right_node_idx), drive.switch_idx, false); } } } @@ -662,8 +662,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap clock_graph); // connect drive point to each half spine using a directed switch - clock_graph.add_edge(rr_edges_to_create, RRNodeId(drive_node_idx), RRNodeId(left_node_idx), drive.switch_idx); - clock_graph.add_edge(rr_edges_to_create, RRNodeId(drive_node_idx), RRNodeId(right_node_idx), drive.switch_idx); + clock_graph.add_edge(rr_edges_to_create, RRNodeId(drive_node_idx), RRNodeId(left_node_idx), drive.switch_idx, false); + clock_graph.add_edge(rr_edges_to_create, RRNodeId(drive_node_idx), RRNodeId(right_node_idx), drive.switch_idx, false); } } } diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp index b518970dcc0..b3291bcbb59 100644 --- a/vpr/src/route/router_lookahead_map.cpp +++ b/vpr/src/route/router_lookahead_map.cpp @@ -205,37 +205,63 @@ struct t_dijkstra_data { t_wire_cost_map f_wire_cost_map; /******** File-Scope Functions ********/ + +/*** + * @brief Fill f_wire_cost_map. It is a look-up table from CHANX/CHANY (to SINKs) for various distances + * @param segment_inf + */ Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int layer_num, int delta_x, int delta_y); static void compute_router_wire_lookahead(const std::vector& segment_inf); -static void compute_tiles_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, - std::unordered_map>& tile_min_cost, +/*** + * @brief Compute the cost from pin to sinks of tiles - Compute the minimum cost to get to each tile sink from pins on the cluster + * @param inter_tile_pin_primitive_pin_delay + * @param tile_min_cost + * @param det_routing_arch + * @param device_ctx + */ +static void compute_tiles_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, + std::unordered_map>& tile_min_cost, const t_det_routing_arch& det_routing_arch, const DeviceContext& device_ctx); - -static void compute_tile_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, +/*** + * @brief Compute the cose from tile pins to tile sinks + * @param inter_tile_pin_primitive_pin_delay [physical_tile_type_idx][from_pin_ptc_num][sink_ptc_num] -> cost + * @param physical_tile + * @param det_routing_arch + * @param delayless_switch + */ +static void compute_tile_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, t_physical_tile_type_ptr physical_tile, const t_det_routing_arch& det_routing_arch, const int delayless_switch); -static void store_min_cost_to_sinks(std::unordered_map>& tile_min_cost, +/*** + * @brief Compute the minimum cost to get to the sinks from pins on the cluster + * @param tile_min_cost [physical_tile_idx][sink_ptc_num] -> min_cost + * @param physical_tile + * @param inter_tile_pin_primitive_pin_delay [physical_tile_type_idx][from_pin_ptc_num][sink_ptc_num] -> cost + */ +static void store_min_cost_to_sinks(std::unordered_map>& tile_min_cost, t_physical_tile_type_ptr physical_tile, - const std::unordered_map& inter_tile_pin_primitive_pin_delay); + const std::unordered_map& inter_tile_pin_primitive_pin_delay); +/*** + * @brief Iterate over the first and second dimension of f_wire_cost_map to get the minimum cost for each dx and dy_ + * @param internal_opin_global_cost_map This map is populated in this function. [dx][dy] -> cost + */ static void min_global_cost_map(vtr::NdMatrix& internal_opin_global_cost_map); // Read the file and fill inter_tile_pin_primitive_pin_delay and tile_min_cost -static void read_intra_cluster_router_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, - std::unordered_map>& tile_min_cost, +static void read_intra_cluster_router_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, const std::string& file); // Write the file with inter_tile_pin_primitive_pin_delay and tile_min_cost static void write_intra_cluster_router_lookahead(const std::string& file, - const std::unordered_map& inter_tile_pin_primitive_pin_delay, - const std::unordered_map>& tile_min_cost); + const std::unordered_map& inter_tile_pin_primitive_pin_delay); /* returns index of a node from which to start routing */ static RRNodeId get_start_node(int layer, int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset); @@ -300,8 +326,8 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod // delay_cost and cong_cost only represent the cost to get to the root-level pins. The below offsets are used to represent the intra-cluster cost // of getting to a sink - delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).delay; - cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).congestion; + delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type->index).at(to_node_ptc_num).delay; + cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type->index).at(to_node_ptc_num).congestion; return delay_cost + cong_cost + delay_offset_cost + cong_offset_cost; } else if (from_rr_type == OPIN) { @@ -311,21 +337,21 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod // Similar to CHANX and CHANY std::tie(delay_cost, cong_cost) = get_expected_delay_and_cong(current_node, target_node, params, R_upstream); - delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).delay; - cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).congestion; + delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type->index).at(to_node_ptc_num).delay; + cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type->index).at(to_node_ptc_num).congestion; return delay_cost + cong_cost + delay_offset_cost + cong_offset_cost; } else { if (node_in_same_physical_tile(current_node, target_node)) { delay_offset_cost = 0.; cong_offset_cost = 0.; - const auto& pin_delays = inter_tile_pin_primitive_pin_delay.at(from_physical_type)[from_node_ptc_num]; + const auto& pin_delays = inter_tile_pin_primitive_pin_delay.at(from_physical_type->index)[from_node_ptc_num]; auto pin_delay_itr = pin_delays.find(rr_graph.node_ptc_num(target_node)); if (pin_delay_itr == pin_delays.end()) { // There isn't any intra-cluster path to connect the current OPIN to the SINK, thus it has to outside. // The best estimation we have now, it the minimum intra-cluster delay to the sink. However, this cost is incomplete, // since it does not consider the cost of going outside of the cluster and, then, returning to it. - delay_cost = params.criticality * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).delay; - cong_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).congestion; + delay_cost = params.criticality * tile_min_cost.at(to_physical_type->index).at(to_node_ptc_num).delay; + cong_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type->index).at(to_node_ptc_num).congestion; return delay_cost + cong_cost; } else { delay_cost = params.criticality * pin_delay_itr->second.delay; @@ -342,15 +368,15 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod delay_cost = params.criticality * distance_based_min_cost[to_layer_num][delta_x][delta_y].delay; cong_cost = (1. - params.criticality) * distance_based_min_cost[to_layer_num][delta_x][delta_y].congestion; - delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).delay; - cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).congestion; + delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type->index).at(to_node_ptc_num).delay; + cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type->index).at(to_node_ptc_num).congestion; } return delay_cost + cong_cost + delay_offset_cost + cong_offset_cost; } } else if (from_rr_type == IPIN) { // we assume that route-through is not enabled. VTR_ASSERT(node_in_same_physical_tile(current_node, target_node)); - const auto& pin_delays = inter_tile_pin_primitive_pin_delay.at(from_physical_type)[from_node_ptc_num]; + const auto& pin_delays = inter_tile_pin_primitive_pin_delay.at(from_physical_type->index)[from_node_ptc_num]; auto pin_delay_itr = pin_delays.find(rr_graph.node_ptc_num(target_node)); if (pin_delay_itr == pin_delays.end()) { delay_cost = std::numeric_limits::max() / 1e12; @@ -374,8 +400,8 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod delay_cost = params.criticality * distance_based_min_cost[to_layer_num][delta_x][delta_y].delay; cong_cost = (1. - params.criticality) * distance_based_min_cost[to_layer_num][delta_x][delta_y].congestion; - delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).delay; - cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).congestion; + delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type->index).at(to_node_ptc_num).delay; + cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type->index).at(to_node_ptc_num).congestion; } return delay_cost + cong_cost + delay_offset_cost + cong_offset_cost; } else { @@ -567,12 +593,22 @@ void MapLookahead::read(const std::string& file) { void MapLookahead::read_intra_cluster(const std::string& file) { vtr::ScopedStartFinishTimer timer("Loading router intra cluster lookahead map"); + is_flat_ = true; // Maps related to global resources should not be empty VTR_ASSERT(!f_wire_cost_map.empty()); read_intra_cluster_router_lookahead(inter_tile_pin_primitive_pin_delay, - tile_min_cost, file); + const auto& tiles = g_vpr_ctx.device().physical_tile_types; + for (const auto& tile : tiles) { + if (is_empty_type(&tile)) { + continue; + } + store_min_cost_to_sinks(tile_min_cost, + &tile, + inter_tile_pin_primitive_pin_delay); + } + // The information about distance_based_min_cost is not stored in the file, thus it needs to be computed min_global_cost_map(distance_based_min_cost); } @@ -583,8 +619,7 @@ void MapLookahead::write(const std::string& file) const { void MapLookahead::write_intra_cluster(const std::string& file) const { write_intra_cluster_router_lookahead(file, - inter_tile_pin_primitive_pin_delay, - tile_min_cost); + inter_tile_pin_primitive_pin_delay); } /******** Function Definitions ********/ @@ -1346,8 +1381,8 @@ static void print_router_cost_map(const t_routing_cost_map& router_cost_map) { } } -static void compute_tiles_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, - std::unordered_map>& tile_min_cost, +static void compute_tiles_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, + std::unordered_map>& tile_min_cost, const t_det_routing_arch& det_routing_arch, const DeviceContext& device_ctx) { const auto& tiles = device_ctx.physical_tile_types; @@ -1367,7 +1402,7 @@ static void compute_tiles_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, +static void compute_tile_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, t_physical_tile_type_ptr physical_tile, const t_det_routing_arch& det_routing_arch, const int delayless_switch) { @@ -1398,16 +1433,16 @@ static void compute_tile_lookahead(std::unordered_mapindex, pin_delays)); VTR_ASSERT(insert_res.second); rr_graph_builder.clear(); } -static void store_min_cost_to_sinks(std::unordered_map>& tile_min_cost, +static void store_min_cost_to_sinks(std::unordered_map>& tile_min_cost, t_physical_tile_type_ptr physical_tile, - const std::unordered_map& inter_tile_pin_primitive_pin_delay) { - const auto& tile_pin_delays = inter_tile_pin_primitive_pin_delay.at(physical_tile); + const std::unordered_map& inter_tile_pin_primitive_pin_delay) { + const auto& tile_pin_delays = inter_tile_pin_primitive_pin_delay.at(physical_tile->index); std::unordered_map min_cost_map; for (auto& primitive_sink_pair : physical_tile->primitive_class_inf) { int primitive_sink = primitive_sink_pair.first; @@ -1430,7 +1465,7 @@ static void store_min_cost_to_sinks(std::unordered_mapindex, min_cost_map)); VTR_ASSERT(insert_res.second); } @@ -1482,21 +1517,78 @@ void DeltaDelayModel::write(const std::string& /*file*/) const { } static void read_intra_cluster_router_lookahead(std::unordered_map& /*inter_tile_pin_primitive_pin_delay*/, - std::unordered_map>& /*tile_min_cost*/, const std::string& /*file*/) { VPR_THROW(VPR_ERROR_PLACE, "MapLookahead::read_intra_cluster_router_lookahead " DISABLE_ERROR); } static void write_intra_cluster_router_lookahead(const std::string& /*file*/, - const std::unordered_map& /*inter_tile_pin_primitive_pin_delay*/, - const std::unordered_map>& /*tile_min_cost*/) { + const std::unordered_map& /*inter_tile_pin_primitive_pin_delay*/) { VPR_THROW(VPR_ERROR_PLACE, "MapLookahead::write_intra_cluster_router_lookahead " DISABLE_ERROR); } #else /* VTR_ENABLE_CAPNPROTO */ -static void read_intra_cluster_router_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, - std::unordered_map>& tile_min_cost, +static void ToCostEntry(Cost_Entry* out, const VprMapCostEntry::Reader& in) { + out->delay = in.getDelay(); + out->congestion = in.getCongestion(); +} + +static void FromCostEntry(VprMapCostEntry::Builder* out, const Cost_Entry& in) { + out->setDelay(in.delay); + out->setCongestion(in.congestion); +} + +static void toIntEntry(std::vector& out, + int idx, + const int& cost) { + out[idx] = cost; +} + +static void fromIntEntry(::capnp::List::Builder& out, + int idx, + const int& cost) { + out.set(idx, cost); +} + +static void toPairEntry(std::unordered_map& map_out, + const int& key, + const VprMapCostEntry::Reader& cap_cost) { + VTR_ASSERT(map_out.find(key) == map_out.end()); + util::Cost_Entry cost(cap_cost.getDelay(), cap_cost.getCongestion()); + map_out[key] = cost; +} + +static void fromPairEntry(::capnp::List::Builder& out_key, + ::capnp::List<::VprMapCostEntry, ::capnp::Kind::STRUCT>::Builder& out_val, + int flat_idx, + const int& key, + const util::Cost_Entry& cost) { + out_key.set(flat_idx, key); + out_val[flat_idx].setDelay(cost.delay); + out_val[flat_idx].setCongestion(cost.congestion); +} + +static void getIntraClusterArrayFlatSize(int& num_tile_types, + int& num_pins, + int& num_sinks, + const std::unordered_map& inter_tile_pin_primitive_pin_delay) { + const auto& physical_tile_types = g_vpr_ctx.device().physical_tile_types; + num_tile_types = (int)physical_tile_types.size(); + + num_pins = 0; + for (const auto& tile_type : inter_tile_pin_primitive_pin_delay) { + num_pins += (int)tile_type.second.size(); + } + + num_sinks = 0; + for (const auto& tile_type : inter_tile_pin_primitive_pin_delay) { + for (const auto& pin_sink : tile_type.second) { + num_sinks += (int)pin_sink.size(); + } + } +} + +static void read_intra_cluster_router_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, const std::string& file) { MmapFile f(file); @@ -1506,35 +1598,102 @@ static void read_intra_cluster_router_lookahead(std::unordered_map(); - ToIntraClusterLookahead(inter_tile_pin_primitive_pin_delay, - tile_min_cost, - g_vpr_ctx.device().physical_tile_types, - map); + std::vector physical_tile_num_pin_arr; + toVector(physical_tile_num_pin_arr, + map.getPhysicalTileNumPins(), + toIntEntry); + + std::vector pin_num_sink_arr; + toVector(pin_num_sink_arr, + map.getPinNumSinks(), + toIntEntry); + + int num_seen_pair = 0; + int num_seen_pin = 0; + for (int physical_tile_idx = 0; physical_tile_idx < (int)physical_tile_num_pin_arr.size(); physical_tile_idx++) { + int num_pins = physical_tile_num_pin_arr[physical_tile_idx]; + util::t_ipin_primitive_sink_delays tile_pin_sink_cost_map(num_pins); + + for (int pin_num = 0; pin_num < num_pins; pin_num++) { + std::unordered_map pin_sink_cost_map; + toUnorderedMap(pin_sink_cost_map, + num_seen_pair, + num_seen_pair + pin_num_sink_arr[num_seen_pin], + map.getPinSinks(), + map.getPinSinkCosts(), + toPairEntry); + tile_pin_sink_cost_map[pin_num] = pin_sink_cost_map; + num_seen_pair += (int)pin_sink_cost_map.size(); + VTR_ASSERT((int)pin_sink_cost_map.size() == pin_num_sink_arr[num_seen_pin]); + ++num_seen_pin; + } + inter_tile_pin_primitive_pin_delay[physical_tile_idx] = tile_pin_sink_cost_map; + } } static void write_intra_cluster_router_lookahead(const std::string& file, - const std::unordered_map& inter_tile_pin_primitive_pin_delay, - const std::unordered_map>& tile_min_cost) { + const std::unordered_map& inter_tile_pin_primitive_pin_delay) { ::capnp::MallocMessageBuilder builder; auto vpr_intra_cluster_lookahead_builder = builder.initRoot(); - FromIntraClusterLookahead(vpr_intra_cluster_lookahead_builder, - inter_tile_pin_primitive_pin_delay, - tile_min_cost, - g_vpr_ctx.device().physical_tile_types); + int num_tile_types, num_pins, num_sinks; + getIntraClusterArrayFlatSize(num_tile_types, + num_pins, + num_sinks, + inter_tile_pin_primitive_pin_delay); + + std::vector physical_tile_num_pin_arr(num_tile_types, 0); + { + for (const auto& physical_type : inter_tile_pin_primitive_pin_delay) { + int physical_type_idx = physical_type.first; + physical_tile_num_pin_arr[physical_type_idx] = (int)physical_type.second.size(); + } - writeMessageToFile(file, &builder); -} + ::capnp::List::Builder physical_tile_num_pin_arr_builder = vpr_intra_cluster_lookahead_builder.initPhysicalTileNumPins(num_tile_types); + fromVector(physical_tile_num_pin_arr_builder, + physical_tile_num_pin_arr, + fromIntEntry); + } -static void ToCostEntry(Cost_Entry* out, const VprMapCostEntry::Reader& in) { - out->delay = in.getDelay(); - out->congestion = in.getCongestion(); -} + std::vector pin_num_sink_arr(num_pins, 0); + { + int num_seen_pin = 0; + for (int physical_tile_idx = 0; physical_tile_idx < num_tile_types; ++physical_tile_idx) { + if (inter_tile_pin_primitive_pin_delay.find(physical_tile_idx) == inter_tile_pin_primitive_pin_delay.end()) { + continue; + } + for (const auto& pin_sinks : inter_tile_pin_primitive_pin_delay.at(physical_tile_idx)) { + pin_num_sink_arr[num_seen_pin] = (int)pin_sinks.size(); + ++num_seen_pin; + } + } + ::capnp::List::Builder pin_num_sink_arr_builder = vpr_intra_cluster_lookahead_builder.initPinNumSinks(num_pins); + fromVector(pin_num_sink_arr_builder, + pin_num_sink_arr, + fromIntEntry); + } -static void FromCostEntry(VprMapCostEntry::Builder* out, const Cost_Entry& in) { - out->setDelay(in.delay); - out->setCongestion(in.congestion); + { + ::capnp::List::Builder pin_sink_arr_builder = vpr_intra_cluster_lookahead_builder.initPinSinks(num_sinks); + ::capnp::List::Builder pin_sink_cost_builder = vpr_intra_cluster_lookahead_builder.initPinSinkCosts(num_sinks); + + int num_seen_pin = 0; + for (int physical_tile_idx = 0; physical_tile_idx < num_tile_types; ++physical_tile_idx) { + for (int pin_num = 0; pin_num < physical_tile_num_pin_arr[physical_tile_idx]; ++pin_num) { + const std::unordered_map& pin_sinks = inter_tile_pin_primitive_pin_delay.at(physical_tile_idx).at(pin_num); + FromUnorderedMap( + pin_sink_arr_builder, + pin_sink_cost_builder, + num_seen_pin, + pin_sinks, + fromPairEntry); + num_seen_pin += (int)pin_sinks.size(); + } + } + } + + writeMessageToFile(file, &builder); } void read_router_lookahead(const std::string& file) { diff --git a/vpr/src/route/router_lookahead_map.h b/vpr/src/route/router_lookahead_map.h index 00dc5bf62ad..f3e82531510 100644 --- a/vpr/src/route/router_lookahead_map.h +++ b/vpr/src/route/router_lookahead_map.h @@ -16,9 +16,9 @@ class MapLookahead : public RouterLookahead { //Look-up table from SOURCE/OPIN to CHANX/CHANY of various types util::t_src_opin_delays src_opin_delays; // Lookup table from a tile pins to the primitive classes inside that tile - std::unordered_map inter_tile_pin_primitive_pin_delay; // [physical_tile_type][from_pin_physical_num][sink_physical_num] -> cost + std::unordered_map inter_tile_pin_primitive_pin_delay; // [physical_tile_type][from_pin_physical_num][sink_physical_num] -> cost // Lookup table to store the minimum cost to reach to a primitive pin from the root-level IPINs - std::unordered_map> tile_min_cost; // [physical_tile_type][sink_physical_num] -> cost + std::unordered_map> tile_min_cost; // [physical_tile_type][sink_physical_num] -> cost // Lookup table to store the minimum cost for each dx and dy vtr::NdMatrix distance_based_min_cost; // [layer_num][dx][dy] -> cost const t_det_routing_arch& det_routing_arch_; diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h index f3a3d43249a..24f02df20f8 100644 --- a/vpr/src/route/router_lookahead_map_utils.h +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -64,6 +64,10 @@ class Cost_Entry { bool valid() const { return std::isfinite(delay) && std::isfinite(congestion); } + + bool operator==(const Cost_Entry& other) const { + return delay == other.delay && congestion == other.congestion; + } }; /** diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index dc0d7a06d04..ef1e51d055e 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -200,7 +200,8 @@ static void alloc_and_load_intra_cluster_rr_graph(RRGraphBuilder& rr_graph_build const vtr::vector>& chain_pin_nums, float R_minW_nmos, float R_minW_pmos, - bool is_flat); + bool is_flat, + bool load_rr_graph); static void set_clusters_pin_chains(const ClusteredNetlist& clb_nlist, vtr::vector& pin_chains, @@ -306,7 +307,8 @@ static void add_intra_cluster_edges_rr_graph(RRGraphBuilder& rr_graph_builder, float R_minW_nmos, float R_minW_pmos, int& num_edges, - bool is_flat); + bool is_flat, + bool load_rr_graph); static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder, t_rr_edge_info_set& rr_edges_to_create, @@ -315,8 +317,21 @@ static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder, int i, int j); -/* - * Build the internal edges of blocks inside the given location +/*** + * @brief Add the intra-cluster edges + * @param rr_graph_builder + * @param num_collapsed_nodes Return the number of nodes that are removed due to collapsing + * @param cluster_blk_id Cluser block id of the cluster that its edges are being added + * @param i + * @param j + * @param cap Capacity number of the location that cluster is being mapped to + * @param R_minW_nmos + * @param R_minW_pmos + * @param rr_edges_to_create + * @param nodes_to_collapse Sotre the nodes in the cluster that needs to be collapsed + * @param grid + * @param is_flat + * @param load_rr_graph */ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, int& num_collapsed_nodes, @@ -330,7 +345,8 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, t_rr_edge_info_set& rr_edges_to_create, const t_cluster_pin_chain& nodes_to_collapse, const DeviceGrid& grid, - bool is_flat); + bool is_flat, + bool load_rr_graph); /* * Connect the pins of the given t_pb to their drivers - It doesn't add the edges going in/out of pins on a chain @@ -345,7 +361,8 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, int rel_cap, int layer, int i, - int j); + int j, + bool is_remapped); /** * Edges going in/out of collapse nodes are not added by the normal routine. This function add those edges @@ -372,7 +389,8 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder, float R_minW_pmos, int layer, int i, - int j); + int j, + bool load_rr_graph); /** * @note This funtion is used to add the fan-in edges of the given chain node to the chain's sink with the modified delay * @param rr_graph_builder @@ -406,7 +424,8 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, int node_idx, int layer, int i, - int j); + int j, + bool load_rr_graph); /** * @note Return the minimum delay to the chain's sink since a pin outside of the chain may have connections to multiple pins inside the chain. @@ -561,18 +580,23 @@ static void add_pin_chain(const std::vector& pin_chain, std::vector>& all_chains, bool is_new_chain); -// Return the edge id of an intra-tile edge with the same delay. If there isn't any, create a new one and return the ID -static int find_create_intra_cluster_sw_arch_idx(std::map& arch_sw_inf, - float delay); - -// Add the newly added arch sw to data structures related to rr switch and switch_fanin_remap. This function should be used for the edge types -// added after allocating rr switches -static void find_create_rr_switch(RRGraphBuilder& rr_graph_builder, - std::vector>& switch_fanin_remap, - float R_minW_nmos, - float R_minW_pmos, - const t_arch_switch_inf& arch_sw_inf, - const int arch_sw_id); +/*** + * @brief Return a pair. The firt element indicates whether the switch is added or it was already added. The second element is the switch index. + * @param rr_graph + * @param arch_sw_inf + * @param R_minW_nmos Needs to be passed to use create_rr_switch_from_arch_switch + * @param R_minW_pmos Needs to be passed to use create_rr_switch_from_arch_switch + * @param is_rr_sw If it is true, the function would search in the data structure that store rr switches. + * Otherwise, it would search in the data structure that store switches that are not rr switches. + * @param delay + * @return + */ +static std::pair find_create_intra_cluster_sw(RRGraphBuilder& rr_graph, + std::map& arch_sw_inf, + float R_minW_nmos, + float R_minW_pmos, + bool is_rr_sw, + float delay); static float get_delay_directly_connected_pins(t_physical_tile_type_ptr physical_type, t_logical_block_type_ptr logical_block, @@ -611,7 +635,8 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type, float R_minW_nmos, float R_minW_pmos, RRGraphBuilder& rr_graph_builder, - bool is_flat); + bool is_flat, + bool load_rr_graph); /******************* Subroutine definitions *******************************/ @@ -630,7 +655,8 @@ void create_rr_graph(const t_graph_type graph_type, auto& mutable_device_ctx = g_vpr_ctx.mutable_device(); bool echo_enabled = getEchoEnabled() && isEchoFileEnabled(E_ECHO_RR_GRAPH_INDEXED_DATA); const char* echo_file_name = getEchoFileName(E_ECHO_RR_GRAPH_INDEXED_DATA); - if (!det_routing_arch->read_rr_graph_filename.empty()) { + bool load_rr_graph = !det_routing_arch->read_rr_graph_filename.empty(); + if (load_rr_graph) { if (device_ctx.read_rr_graph_filename != det_routing_arch->read_rr_graph_filename) { free_rr_graph(); @@ -703,7 +729,8 @@ void create_rr_graph(const t_graph_type graph_type, det_routing_arch->R_minW_nmos, det_routing_arch->R_minW_pmos, mutable_device_ctx.rr_graph_builder, - is_flat); + is_flat, + load_rr_graph); if (router_opts.reorder_rr_graph_nodes_algorithm != DONT_REORDER) { mutable_device_ctx.rr_graph_builder.reorder_nodes(router_opts.reorder_rr_graph_nodes_algorithm, @@ -724,8 +751,11 @@ void create_rr_graph(const t_graph_type graph_type, print_rr_graph_stats(); - //Write out rr graph file if needed - if (!det_routing_arch->write_rr_graph_filename.empty()) { + // Write out rr graph file if needed - Currently, writing the flat rr-graph is not supported since loading from a flat rr-graph is not supported. + // When this function is called in any stage other than routing, the is_flat flag passed to this function is false, regardless of the flag passed + // through command line. So, the graph conrresponding to global resources will be created and written down to file if needed. During routing, if flat-routing + // is enabled, intra-cluster resources will be added to the graph, but this new bigger graph will not be written down. + if (!det_routing_arch->write_rr_graph_filename.empty() && !is_flat) { write_rr_graph(&mutable_device_ctx.rr_graph_builder, &mutable_device_ctx.rr_graph, device_ctx.physical_tile_types, @@ -750,7 +780,8 @@ static void add_intra_cluster_edges_rr_graph(RRGraphBuilder& rr_graph_builder, float R_minW_nmos, float R_minW_pmos, int& num_edges, - bool is_flat) { + bool is_flat, + bool load_rr_graph) { VTR_ASSERT(is_flat); /* This function should be called if placement is done! */ @@ -775,7 +806,8 @@ static void add_intra_cluster_edges_rr_graph(RRGraphBuilder& rr_graph_builder, rr_edges_to_create, nodes_to_collapse[cluster_blk_id], grid, - is_flat); + is_flat, + load_rr_graph); uniquify_edges(rr_edges_to_create); alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); num_edges += rr_edges_to_create.size(); @@ -820,7 +852,7 @@ static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder, pin_physical_num); VTR_ASSERT(sw_idx != -1); - rr_edges_to_create.emplace_back(driving_pin_node_id, pin_rr_node_id, sw_idx); + rr_edges_to_create.emplace_back(driving_pin_node_id, pin_rr_node_id, sw_idx, false); } } } @@ -1318,6 +1350,10 @@ static void build_rr_graph(const t_graph_type graph_type, if (clb_to_clb_directs != nullptr) { delete[] clb_to_clb_directs; } + + // We are done with building the RR Graph. Thus, we can clear the storages only used + // to build the RR Graph + device_ctx.rr_graph_builder.clear_temp_storage(); } static void build_intra_cluster_rr_graph(const t_graph_type graph_type, @@ -1328,13 +1364,17 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type, float R_minW_nmos, float R_minW_pmos, RRGraphBuilder& rr_graph_builder, - bool is_flat) { + bool is_flat, + bool load_rr_graph) { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; auto& device_ctx = g_vpr_ctx.mutable_device(); vtr::ScopedStartFinishTimer timer("Build intra-cluster routing resource graph"); rr_graph_builder.reset_rr_graph_flags(); + // When we are building intra-cluster resources, the edges already built are + // already remapped. + rr_graph_builder.init_edge_remap(true); vtr::vector pin_chains(clb_nlist.blocks().size()); set_clusters_pin_chains(clb_nlist, pin_chains, is_flat); @@ -1356,7 +1396,8 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type, cluster_flat_chain_pins, R_minW_nmos, R_minW_pmos, - is_flat); + is_flat, + load_rr_graph); /* AA: Note that in the case of dedicated networks, we are currently underestimating the additional node count due to the clock networks. * Thus this below error is logged; it's not actually an error, the node estimation needs to get fixed for dedicated clock networks. */ @@ -1365,11 +1406,17 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type, expected_node_count, rr_graph.num_nodes()); } - remap_rr_node_switch_indices(rr_graph_builder, - g_vpr_ctx.device().switch_fanin_remap); + if (!load_rr_graph) { + remap_rr_node_switch_indices(rr_graph_builder, + g_vpr_ctx.device().switch_fanin_remap); + } else { + rr_graph_builder.mark_edges_as_rr_switch_ids(); + } rr_graph_builder.partition_edges(); + rr_graph_builder.clear_temp_storage(); + check_rr_graph(device_ctx.rr_graph, types, device_ctx.rr_indexed_data, @@ -1578,6 +1625,8 @@ t_rr_switch_inf create_rr_switch_from_arch_switch(const t_arch_switch_inf& arch_ rr_switch_inf.power_buffer_type = arch_sw_inf.power_buffer_type; rr_switch_inf.power_buffer_size = arch_sw_inf.power_buffer_size; + rr_switch_inf.intra_tile = arch_sw_inf.intra_tile; + return rr_switch_inf; } /* This function is same as create_rr_switch_from_arch_switch() in terms of functionality. It is tuned for clients functions in routing resource graph builder */ @@ -2087,7 +2136,8 @@ static void alloc_and_load_intra_cluster_rr_graph(RRGraphBuilder& rr_graph_build const vtr::vector>& chain_pin_nums, float R_minW_nmos, float R_minW_pmos, - bool is_flat) { + bool is_flat, + bool load_rr_graph) { t_rr_edge_info_set rr_edges_to_create; int num_edges = 0; for (int layer = 0; layer < grid.get_num_layers(); layer++) { @@ -2149,7 +2199,8 @@ static void alloc_and_load_intra_cluster_rr_graph(RRGraphBuilder& rr_graph_build R_minW_nmos, R_minW_pmos, num_edges, - is_flat); + is_flat, + load_rr_graph); } VTR_LOG("Internal edge count:%d\n", num_edges); @@ -2291,11 +2342,11 @@ static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, * }*/ if (class_type == DRIVER) { VTR_ASSERT(pin_type == DRIVER); - rr_edges_to_create.emplace_back(class_rr_node_id, pin_rr_node_id, delayless_switch); + rr_edges_to_create.emplace_back(class_rr_node_id, pin_rr_node_id, delayless_switch, false); } else { VTR_ASSERT(class_type == RECEIVER); VTR_ASSERT(pin_type == RECEIVER); - rr_edges_to_create.emplace_back(pin_rr_node_id, class_rr_node_id, delayless_switch); + rr_edges_to_create.emplace_back(pin_rr_node_id, class_rr_node_id, delayless_switch, false); } } } @@ -2328,11 +2379,11 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, auto pin_type = get_pin_type_from_pin_physical_num(physical_type_ptr, pin_num); if (class_type == DRIVER) { VTR_ASSERT(pin_type == DRIVER); - rr_edges_to_create.emplace_back(class_rr_node_id, pin_rr_node_id, delayless_switch); + rr_edges_to_create.emplace_back(class_rr_node_id, pin_rr_node_id, delayless_switch, false); } else { VTR_ASSERT(class_type == RECEIVER); VTR_ASSERT(pin_type == RECEIVER); - rr_edges_to_create.emplace_back(pin_rr_node_id, class_rr_node_id, delayless_switch); + rr_edges_to_create.emplace_back(pin_rr_node_id, class_rr_node_id, delayless_switch, false); } } } @@ -2497,7 +2548,8 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, t_rr_edge_info_set& rr_edges_to_create, const t_cluster_pin_chain& nodes_to_collapse, const DeviceGrid& grid, - bool is_flat) { + bool is_flat, + bool load_rr_graph) { VTR_ASSERT(is_flat); /* Internal edges are added from the start tile */ int width_offset = grid.get_width_offset({i, j, layer}); @@ -2536,7 +2588,8 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, rel_cap, layer, i, - j); + j, + load_rr_graph); add_pb_child_to_list(pb_q, pb); } @@ -2553,7 +2606,8 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, R_minW_pmos, layer, i, - j); + j, + load_rr_graph); } static void add_pb_edges(RRGraphBuilder& rr_graph_builder, @@ -2566,7 +2620,8 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, int rel_cap, int layer, int i, - int j) { + int j, + bool is_remapped) { auto pin_num_range = get_pb_pins(physical_type, sub_tile, logical_block, @@ -2619,8 +2674,27 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, logical_block, pin_physical_num, conn_pin_physical_num); - VTR_ASSERT(sw_idx != -1); - rr_edges_to_create.emplace_back(parent_pin_node_id, conn_pin_node_id, sw_idx); + + if (is_remapped) { + bool found = false; + float delay = g_vpr_ctx.device().all_sw_inf.at(sw_idx).Tdel(); + const auto& rr_switches = rr_graph_builder.rr_switch(); + for (int sw_id = 0; sw_id < (int)rr_switches.size(); sw_id++) { + const auto& rr_switch = rr_switches[RRSwitchId(sw_id)]; + if (rr_switch.intra_tile) { + if (rr_switch.Tdel == delay) { + sw_idx = sw_id; + found = true; + break; + } + } + } + // If the graph is loaded from a file, we expect that all sw types are already listed there since currently, we are not doing any further + // Optimization. If the optimization done when the rr graph file was generated is different from the current optimization, in the case that + // these optimizations create different RR switches, this VTR ASSERT can be removed. + VTR_ASSERT(found); + } + rr_edges_to_create.emplace_back(parent_pin_node_id, conn_pin_node_id, sw_idx, is_remapped); } } } @@ -2635,7 +2709,8 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder, float R_minW_pmos, int layer, int i, - int j) { + int j, + bool load_rr_graph) { // Store the cluster pins in a set to make the search more run-time efficient std::unordered_set cluster_pins_set(cluster_pins.begin(), cluster_pins.end()); @@ -2662,7 +2737,8 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder, node_idx, layer, i, - j); + j, + load_rr_graph); } } return num_collapsed_pins; @@ -2682,7 +2758,8 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, int node_idx, int layer, int i, - int j) { + int j, + bool load_rr_graph) { // Chain node pin physical number int pin_physical_num = nodes_to_collapse.chains[chain_idx][node_idx].pin_physical_num; const auto& pin_chain_idx = nodes_to_collapse.pin_chain_idx; @@ -2775,18 +2852,34 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, } for (auto src_pair : src_node_edge_pair) { - int arch_sw = find_create_intra_cluster_sw_arch_idx(all_sw_inf, - src_pair.second); - // The internal edges are added after switch_fanin_remap is initialized; thus, if a new arch_sw is added, - // switch _fanin_remap should be updated. - find_create_rr_switch(rr_graph_builder, - g_vpr_ctx.mutable_device().switch_fanin_remap, - R_minW_nmos, - R_minW_pmos, - all_sw_inf[arch_sw], - arch_sw); - - rr_edges_to_create.emplace_back(src_pair.first, sink_rr_node_id, arch_sw); + float delay = src_pair.second; + bool is_rr_sw_id = load_rr_graph; + bool is_new_sw; + int sw_id; + std::tie(is_new_sw, sw_id) = find_create_intra_cluster_sw(rr_graph_builder, + all_sw_inf, + R_minW_nmos, + R_minW_pmos, + is_rr_sw_id, + delay); + + if (!is_rr_sw_id && is_new_sw) { + // Currently we assume that if rr graph is read from a file, we shouldn't get into this block + VTR_ASSERT(!load_rr_graph); + // The internal edges are added after switch_fanin_remap is initialized; thus, if a new arch_sw is added, + // switch _fanin_remap should be updated. + t_rr_switch_inf rr_sw_inf = create_rr_switch_from_arch_switch(create_internal_arch_sw(delay), + R_minW_nmos, + R_minW_pmos); + auto rr_sw_id = rr_graph_builder.add_rr_switch(rr_sw_inf); + // If rr graph is loaded from a file, switch_fanin_remap is going to be empty + if (!load_rr_graph) { + auto& switch_fanin_remap = g_vpr_ctx.mutable_device().switch_fanin_remap; + switch_fanin_remap.push_back({{UNDEFINED, size_t(rr_sw_id)}}); + } + } + + rr_edges_to_create.emplace_back(src_pair.first, sink_rr_node_id, sw_id, is_rr_sw_id); } } } @@ -4157,7 +4250,7 @@ static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder, //back fairly directly to the architecture file in the case of pin equivalence RRNodeId inode = pick_best_direct_connect_target_rr_node(rr_graph, from_rr_node, inodes); - rr_edges_to_create.emplace_back(from_rr_node, inode, clb_to_clb_directs[i].switch_index); + rr_edges_to_create.emplace_back(from_rr_node, inode, clb_to_clb_directs[i].switch_index, false); ++num_pins; } } @@ -4674,56 +4767,62 @@ static void add_pin_chain(const std::vector& pin_chain, } } -static int find_create_intra_cluster_sw_arch_idx(std::map& arch_sw_inf, - float delay) { - // Check whether is there any other intra-tile edge with the same delay - auto find_res = std::find_if(arch_sw_inf.begin(), arch_sw_inf.end(), - [delay](const std::pair& sw_inf_pair) { - const t_arch_switch_inf& sw_inf = std::get<1>(sw_inf_pair); - if (sw_inf.intra_tile && sw_inf.Tdel() == delay) { - return true; - } else { - return false; - } - }); - - // There isn't any other intra-tile edge with the same delay - Create a new one! - if (find_res == arch_sw_inf.end()) { - auto arch_sw = create_internal_arch_sw(delay); - int max_key_num = std::numeric_limits::min(); - // Find the maximum edge index - for (const auto& arch_sw_pair : arch_sw_inf) { - if (arch_sw_pair.first > max_key_num) { - max_key_num = arch_sw_pair.first; +static std::pair find_create_intra_cluster_sw(RRGraphBuilder& rr_graph, + std::map& arch_sw_inf, + float R_minW_nmos, + float R_minW_pmos, + bool is_rr_sw, + float delay) { + const auto& rr_graph_switches = rr_graph.rr_switch(); + + if (is_rr_sw) { + for (int rr_switch_id = 0; rr_switch_id < (int)rr_graph_switches.size(); rr_switch_id++) { + const auto& rr_sw = rr_graph_switches[RRSwitchId(rr_switch_id)]; + if (rr_sw.intra_tile) { + if (rr_sw.Tdel == delay) { + return std::make_pair(false, rr_switch_id); + } } } - int new_key_num = ++max_key_num; - arch_sw_inf.insert(std::make_pair(new_key_num, arch_sw)); - // We assume that the delay of internal switches is not dependent on their fan-in - // If this assumption proven to not be accurate, the implementation needs to be changed. - VTR_ASSERT(arch_sw.fixed_Tdel()); + t_rr_switch_inf new_rr_switch_inf = create_rr_switch_from_arch_switch(create_internal_arch_sw(delay), + R_minW_nmos, + R_minW_pmos); + RRSwitchId rr_sw_id = rr_graph.add_rr_switch(new_rr_switch_inf); - return new_key_num; - } else { - return find_res->first; - } -} + return std::make_pair(true, (size_t)rr_sw_id); -static void find_create_rr_switch(RRGraphBuilder& rr_graph_builder, - std::vector>& switch_fanin_remap, - float R_minW_nmos, - float R_minW_pmos, - const t_arch_switch_inf& arch_sw_inf, - const int arch_sw_id) { - if ((int)switch_fanin_remap.size() > arch_sw_id) { - return; } else { - t_rr_switch_inf rr_switch = create_rr_switch_from_arch_switch(arch_sw_inf, - R_minW_nmos, - R_minW_pmos); - auto rr_switch_id = rr_graph_builder.add_rr_switch(rr_switch); - switch_fanin_remap.push_back({{UNDEFINED, size_t(rr_switch_id)}}); - VTR_ASSERT(((int)switch_fanin_remap.size() - 1) == arch_sw_id); + // Check whether is there any other intra-tile edge with the same delay + auto find_res = std::find_if(arch_sw_inf.begin(), arch_sw_inf.end(), + [delay](const std::pair& sw_inf_pair) { + const t_arch_switch_inf& sw_inf = std::get<1>(sw_inf_pair); + if (sw_inf.intra_tile && sw_inf.Tdel() == delay) { + return true; + } else { + return false; + } + }); + + // There isn't any other intra-tile edge with the same delay - Create a new one! + if (find_res == arch_sw_inf.end()) { + auto arch_sw = create_internal_arch_sw(delay); + int max_key_num = std::numeric_limits::min(); + // Find the maximum edge index + for (const auto& arch_sw_pair : arch_sw_inf) { + if (arch_sw_pair.first > max_key_num) { + max_key_num = arch_sw_pair.first; + } + } + int new_key_num = ++max_key_num; + arch_sw_inf.insert(std::make_pair(new_key_num, arch_sw)); + // We assume that the delay of internal switches is not dependent on their fan-in + // If this assumption proven to not be accurate, the implementation needs to be changed. + VTR_ASSERT(arch_sw.fixed_Tdel()); + + return std::make_pair(true, new_key_num); + } else { + return std::make_pair(false, find_res->first); + } } } diff --git a/vpr/src/route/rr_graph2.cpp b/vpr/src/route/rr_graph2.cpp index 9ae222ca20c..6fa292edea8 100644 --- a/vpr/src/route/rr_graph2.cpp +++ b/vpr/src/route/rr_graph2.cpp @@ -730,7 +730,7 @@ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder, continue; } - rr_edges_to_create.emplace_back(from_rr_node, to_node, to_switch); + rr_edges_to_create.emplace_back(from_rr_node, to_node, to_switch, false); ++num_conn; } } @@ -818,10 +818,10 @@ int get_unidir_opin_connections(RRGraphBuilder& rr_graph_builder, } /* Add to the list. */ - rr_edges_to_create.emplace_back(from_rr_node, inc_inode_index, seg_details[inc_track].arch_opin_switch()); + rr_edges_to_create.emplace_back(from_rr_node, inc_inode_index, seg_details[inc_track].arch_opin_switch(), false); ++num_edges; - rr_edges_to_create.emplace_back(from_rr_node, dec_inode_index, seg_details[dec_track].arch_opin_switch()); + rr_edges_to_create.emplace_back(from_rr_node, dec_inode_index, seg_details[dec_track].arch_opin_switch(), false); ++num_edges; } @@ -1629,7 +1629,7 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder, /*int to_node = get_rr_node_index(L_rr_node_indices, x + width_offset, y + height_offset, IPIN, ipin, side);*/ RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, x, y, IPIN, ipin, side); if (to_node) { - rr_edges_to_create.emplace_back(from_rr_node, to_node, wire_to_ipin_switch); + rr_edges_to_create.emplace_back(from_rr_node, to_node, wire_to_ipin_switch, false); ++num_conn; } } @@ -1970,7 +1970,7 @@ static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, } /* Add the edge to the list */ - rr_edges_to_create.emplace_back(from_rr_node, to_node, switch_types[i]); + rr_edges_to_create.emplace_back(from_rr_node, to_node, switch_types[i], false); ++num_conn; } } @@ -2041,14 +2041,14 @@ static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, src_switch = switch_override; } - rr_edges_to_create.emplace_back(from_rr_node, to_node, src_switch); + rr_edges_to_create.emplace_back(from_rr_node, to_node, src_switch, false); ++edge_count; auto& device_ctx = g_vpr_ctx.device(); if (device_ctx.arch_switch_inf[src_switch].directionality() == BI_DIRECTIONAL) { //Add reverse edge since bi-directional - rr_edges_to_create.emplace_back(to_node, from_rr_node, src_switch); + rr_edges_to_create.emplace_back(to_node, from_rr_node, src_switch, false); ++edge_count; } } @@ -2139,13 +2139,13 @@ static int get_unidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, VTR_ASSERT(iswitch != OPEN); /* Add edge to list. */ - rr_edges_to_create.emplace_back(from_rr_node, to_node, iswitch); + rr_edges_to_create.emplace_back(from_rr_node, to_node, iswitch, false); ++count; auto& device_ctx = g_vpr_ctx.device(); if (device_ctx.arch_switch_inf[iswitch].directionality() == BI_DIRECTIONAL) { //Add reverse edge since bi-directional - rr_edges_to_create.emplace_back(to_node, from_rr_node, iswitch); + rr_edges_to_create.emplace_back(to_node, from_rr_node, iswitch, false); ++count; } } diff --git a/vpr/src/route/rr_graph_clock.cpp b/vpr/src/route/rr_graph_clock.cpp index cbee0690b51..84872485f15 100644 --- a/vpr/src/route/rr_graph_clock.cpp +++ b/vpr/src/route/rr_graph_clock.cpp @@ -190,14 +190,16 @@ void ClockRRGraphBuilder::map_relative_seg_indices(const t_unified_to_parallel_s void ClockRRGraphBuilder::add_edge(t_rr_edge_info_set* rr_edges_to_create, RRNodeId src_node, RRNodeId sink_node, - int arch_switch_idx) const { + int arch_switch_idx, + bool edge_remapped) const { + VTR_ASSERT(edge_remapped == false); const auto& device_ctx = g_vpr_ctx.device(); VTR_ASSERT(arch_switch_idx < (int)device_ctx.arch_switch_inf.size()); - rr_edges_to_create->emplace_back(src_node, sink_node, arch_switch_idx); + rr_edges_to_create->emplace_back(src_node, sink_node, arch_switch_idx, edge_remapped); const auto& sw = device_ctx.arch_switch_inf[arch_switch_idx]; if (!sw.buffered() && !sw.configurable()) { // This is short, create a reverse edge. - rr_edges_to_create->emplace_back(sink_node, src_node, arch_switch_idx); + rr_edges_to_create->emplace_back(sink_node, src_node, arch_switch_idx, edge_remapped); } } diff --git a/vpr/src/route/rr_graph_clock.h b/vpr/src/route/rr_graph_clock.h index 70f48eb8732..6ce575b7423 100644 --- a/vpr/src/route/rr_graph_clock.h +++ b/vpr/src/route/rr_graph_clock.h @@ -118,10 +118,19 @@ class ClockRRGraphBuilder { static void map_relative_seg_indices(const t_unified_to_parallel_seg_index& indices_map); + /*** + * @brief Add an edge to the rr graph + * @param rr_edges_to_create The interface to rr-graph builder + * @param src_node End point of the edge + * @param sink_node Start point of the edge + * @param arch_switch_idx + * @param edge_remapped Indicate whether the edge idx refer to arch edge idx or rr graph edge idx. Currently, we only support arch edge idx + */ void add_edge(t_rr_edge_info_set* rr_edges_to_create, RRNodeId src_node, RRNodeId sink_node, - int arch_switch_idx) const; + int arch_switch_idx, + bool edge_remapped) const; public: /* Creates the routing resourse (rr) graph of the clock network and appends it to the diff --git a/vtr_flow/scripts/python_libs/vtr/flow.py b/vtr_flow/scripts/python_libs/vtr/flow.py index 060a5dab886..0aab0f8f3a3 100644 --- a/vtr_flow/scripts/python_libs/vtr/flow.py +++ b/vtr_flow/scripts/python_libs/vtr/flow.py @@ -291,7 +291,13 @@ def run( do_second_run = False second_run_args = vpr_args - if "write_rr_graph" in vpr_args or "analysis" in vpr_args or "route" in vpr_args: + if ( + "write_rr_graph" in vpr_args + or "analysis" in vpr_args + or "route" in vpr_args + or "write_router_lookahead" in vpr_args + or "write_intra_cluster_router_lookahead" in vpr_args + ): do_second_run = True vtr.vpr.run( diff --git a/vtr_flow/scripts/python_libs/vtr/vpr/vpr.py b/vtr_flow/scripts/python_libs/vtr/vpr/vpr.py index b7509c1e700..d0e5953fbe0 100644 --- a/vtr_flow/scripts/python_libs/vtr/vpr/vpr.py +++ b/vtr_flow/scripts/python_libs/vtr/vpr/vpr.py @@ -282,6 +282,20 @@ def run_second_time( second_run_args["read_rr_graph"] = rr_graph_out_file second_run_args["write_rr_graph"] = rr_graph_out_file2 + inter_cluster_router_lookahead = "" + if "write_router_lookahead" in second_run_args: + inter_cluster_router_lookahead = second_run_args["write_router_lookahead"] + second_run_args["read_router_lookahead"] = inter_cluster_router_lookahead + second_run_args["write_router_lookahead"] = "inter_cluster_router_lookahead2.capnp" + + intra_cluster_router_lookahead = "" + if "write_intra_cluster_router_lookahead" in second_run_args: + intra_cluster_router_lookahead = second_run_args["write_intra_cluster_router_lookahead"] + second_run_args["read_intra_cluster_router_lookahead"] = intra_cluster_router_lookahead + second_run_args[ + "write_intra_cluster_router_lookahead" + ] = "intra_cluster_router_lookahead2.capnp" + # run VPR run( architecture, @@ -302,6 +316,26 @@ def run_second_time( if diff_result: raise InspectError("failed: vpr (RR Graph XML output not consistent when reloaded)") + if "write_inter_cluster_router_lookahead" in second_run_args: + cmd = ["diff", inter_cluster_router_lookahead, "inter_cluster_router_lookahead2.capnp"] + _, diff_result = command_runner.run_system_command( + cmd, temp_dir, log_filename="diff.inter_cluster_router_lookahead.out", indent_depth=1 + ) + if diff_result: + raise InspectError( + "failed: vpr (Inter Cluster Router Lookahead output not consistent when reloaded)" + ) + + if "write_intra_cluster_router_lookahead" in second_run_args: + cmd = ["diff", intra_cluster_router_lookahead, "intra_cluster_router_lookahead2.capnp"] + _, diff_result = command_runner.run_system_command( + cmd, temp_dir, log_filename="diff.intra_cluster_router_lookahead.out", indent_depth=1 + ) + if diff_result: + raise InspectError( + "failed: vpr (Intra Cluster Router Lookahead not consistent when reloaded)" + ) + def cmp_full_vs_incr_sta( architecture, diff --git a/vtr_flow/scripts/run_vtr_flow.py b/vtr_flow/scripts/run_vtr_flow.py index fd65d4467e7..7f91c016164 100755 --- a/vtr_flow/scripts/run_vtr_flow.py +++ b/vtr_flow/scripts/run_vtr_flow.py @@ -416,6 +416,22 @@ def vtr_command_argparser(prog=None): action="store_true", help="Do a second-run of the incremental analysis to compare the result files", ) + vpr.add_argument( + "-verify_inter_cluster_router_lookahead", + default=False, + action="store_true", + help="Tells VPR to verify the inter-cluster router lookahead.", + ) + vpr.add_argument( + "-verify_intra_cluster_router_lookahead", + default=False, + action="store_true", + help="Tells VPR to verify the intra-cluster router lookahead. \ + Intra-cluster router lookahead information \ + is stored in a separate data structure than the \ + inter-cluster router lookahead information, \ + and they are written into separate files.", + ) return parser @@ -712,6 +728,13 @@ def process_vpr_args(args, prog, temp_dir, vpr_args): if args.verify_rr_graph: rr_graph_out_file = "rr_graph" + args.rr_graph_ext vpr_args["write_rr_graph"] = rr_graph_out_file + if args.verify_inter_cluster_router_lookahead: + vpr_args["write_router_lookahead"] = "inter_cluster_router_lookahead.capnp" + if args.verify_intra_cluster_router_lookahead: + assert ( + "flat_routing" in vpr_args + ), "Flat router should be enabled if intra cluster router lookahead is to be verified" + vpr_args["write_intra_cluster_router_lookahead"] = "intra_cluster_router_lookahead.capnp" return vpr_args diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/task_list.txt index 8112117030f..32f33563996 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/task_list.txt @@ -1,13 +1,16 @@ regression_tests/vtr_reg_nightly_test2/vtr_reg_netlist_writer regression_tests/vtr_reg_nightly_test2/vtr_func_formal regression_tests/vtr_reg_nightly_test2/vtr_bidir -regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph +regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph +regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_flat_router regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_bidir regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_complex_switch regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_titan -regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_error_check +regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_error_check +#regression_tests/vtr_reg_nightly_test2/vpr_verify_router_lookahead_flat_router @TODO: fix this test regression_tests/vtr_reg_nightly_test2/vtr_timing_update_diff regression_tests/vtr_reg_nightly_test2/vtr_timing_update_diff_titan regression_tests/vtr_reg_nightly_test2/vtr_reg_multiclock_mcnc regression_tests/vtr_reg_nightly_test2/titan_other +regression_tests/vtr_reg_nightly_test2/titan_other_flat_router regression_tests/vtr_reg_nightly_test2/titan_quick_qor diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/titan_other_flat_router/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/titan_other_flat_router/config/config.txt new file mode 100644 index 00000000000..7d5f6fe2744 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/titan_other_flat_router/config/config.txt @@ -0,0 +1,46 @@ +# +############################################ +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/titan_other_blif + +# Path to directory of SDC files +sdc_dir=benchmarks/titan_other_blif + +# Path to directory of architectures to use +archs_dir=arch/titan + +# Add circuits to list to sweep +circuit_list_add=carpat_stratixiv_arch_timing.blif +circuit_list_add=CH_DFSIN_stratixiv_arch_timing.blif +circuit_list_add=EKF-SLAM_Jacobians_stratixiv_arch_timing.blif +circuit_list_add=JPEG_stratixiv_arch_timing.blif +circuit_list_add=leon2_stratixiv_arch_timing.blif +circuit_list_add=leon3mp_stratixiv_arch_timing.blif +circuit_list_add=MMM_stratixiv_arch_timing.blif +circuit_list_add=radar20_stratixiv_arch_timing.blif +circuit_list_add=random_stratixiv_arch_timing.blif +circuit_list_add=Reed_Solomon_stratixiv_arch_timing.blif +circuit_list_add=sudoku_check_stratixiv_arch_timing.blif +circuit_list_add=ucsb_152_tap_fir_stratixiv_arch_timing.blif +circuit_list_add=uoft_raytracer_stratixiv_arch_timing.blif +circuit_list_add=wb_conmax_stratixiv_arch_timing.blif +circuit_list_add=picosoc_stratixiv_arch_timing.blif +circuit_list_add=murax_stratixiv_arch_timing.blif + +# Add architectures to list to sweep +arch_list_add=stratixiv_arch.timing.xml + +# Parse info and how to parse +parse_file=vpr_titan.txt + +# How to parse QoR info +qor_parse_file=qor_vpr_titan.txt + +# Pass requirements +pass_requirements_file=pass_requirements_vpr_titan.txt + +script_params=-starting_stage vpr --route_chan_width 300 --max_router_iterations 400 --router_lookahead map --flat_routing true + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/titan_other_flat_router/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/titan_other_flat_router/config/golden_results.txt new file mode 100644 index 00000000000..adf7f19d5f3 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/titan_other_flat_router/config/golden_results.txt @@ -0,0 +1,17 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error num_io num_LAB num_DSP num_M9K num_M144K num_PLL vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops total_internal_heap_pushes total_internal_heap_pops total_external_heap_pushes total_external_heap_pops total_external_SOURCE_pushes total_external_SOURCE_pops total_internal_SOURCE_pushes total_internal_SOURCE_pops total_external_SINK_pushes total_external_SINK_pops total_internal_SINK_pushes total_internal_SINK_pops total_external_IPIN_pushes total_external_IPIN_pops total_internal_IPIN_pushes total_internal_IPIN_pops total_external_OPIN_pushes total_external_OPIN_pops total_internal_OPIN_pushes total_internal_OPIN_pops total_external_CHANX_pushes total_external_CHANX_pops total_internal_CHANX_pushes total_internal_CHANX_pops total_external_CHANY_pushes total_external_CHANY_pops total_internal_CHANY_pushes total_internal_CHANY_pops rt_node_SOURCE_pushes rt_node_SINK_pushes rt_node_IPIN_pushes rt_node_OPIN_pushes rt_node_CHANX_pushes rt_node_CHANY_pushes rt_node_SOURCE_high_fanout_pushes rt_node_SINK_high_fanout_pushes rt_node_IPIN_high_fanout_pushes rt_node_OPIN_high_fanout_pushes rt_node_CHANX_high_fanout_pushes rt_node_CHANY_high_fanout_pushes rt_node_SOURCE_entire_tree_pushes rt_node_SINK_entire_tree_pushes rt_node_IPIN_entire_tree_pushes rt_node_OPIN_entire_tree_pushes rt_node_CHANX_entire_tree_pushes rt_node_CHANY_entire_tree_pushes adding_all_rt adding_high_fanout_rt total_number_of_adding_all_rt_from_calling_high_fanout_rt logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time +stratixiv_arch.timing.xml carpat_stratixiv_arch_timing.blif common 613.11 vpr 5.70 GiB 274 964 36 59 0 2 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 5972436 22 252 53001 29054 7 24705 1335 89 66 5874 DSP auto 1516.1 MiB 37.02 270604 1963.0 MiB 79.69 0.59 7.51948 -39463.8 -6.51948 3.08082 116.03 0.136151 0.114617 18.5295 15.5742 367538 6.93861 80111 1.51238 126065 299395 326873530 52514623 23734707 2835516 303138823 49679107 0 0 275957 219798 187739 187739 299395 299395 17466088 189853 20997324 843213 16959508 11238575 2162031 1473110 131265712 20781045 0 0 137259776 17281895 0 0 275957 0 1945661 693680 648224 732871 14588 0 1092004 78552 215953 284056 261369 0 853657 615128 432271 448815 2916996 38300 5865 0 0 1.48105e+08 25213.7 43 3157876 31702500 48537 7.50537 3.06292 -43885.4 -6.50537 0 0 31.94 37.58 19.20 5832.2 MiB 209.41 31.3265 26.8043 1963.0 MiB 70.54 111.27 +stratixiv_arch.timing.xml CH_DFSIN_stratixiv_arch_timing.blif common 432.37 vpr 5.31 GiB 36 1577 10 10 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 5563168 3 33 48977 39238 1 26166 1633 54 40 2160 LAB auto 1550.2 MiB 78.90 293178 1720.9 MiB 92.79 0.84 76.4898 -85096.3 -75.4898 76.4898 13.08 0.129138 0.113191 14.8691 12.0109 380614 7.77191 89471 1.82695 137748 443751 231034492 28523668 42721891 4327212 188312601 24196456 0 0 380735 263139 307860 307860 443751 443751 4258385 307928 38462229 1312960 19686438 14448400 3435176 2307362 80876073 5336681 0 0 83183845 3795587 0 0 380735 0 1520191 1187586 1355737 1806793 17577 0 491308 92987 353841 436817 363158 0 1028883 1094599 1001896 1369976 5185088 81521 25159 0 0 5.45450e+07 25252.3 23 2487572 26413249 65185 71.3786 71.3786 -148083 -70.3786 0 0 11.36 25.95 15.84 5432.8 MiB 159.51 21.1531 17.147 1584.0 MiB 72.72 48.84 +stratixiv_arch.timing.xml EKF-SLAM_Jacobians_stratixiv_arch_timing.blif common 1137.42 vpr 6.00 GiB 574 2798 16 0 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 6295900 4 570 66175 54803 2 39719 3388 91 67 6097 io auto 1814.5 MiB 152.81 659105 2150.9 MiB 203.22 1.65 27.9549 -108129 -26.9549 6.60481 110.04 0.265018 0.208157 30.0057 23.3304 928965 14.0391 205561 3.10656 318061 1713262 1309020715 198867477 165385685 19411668 1143635030 179455809 0 0 1606409 1132054 1200190 1200190 1713262 1713262 11109841 1200553 145492948 5120573 82954893 67299733 16573066 11445779 515388678 60869406 0 0 532981428 48885927 0 0 1606409 0 4251904 6365499 5501435 6214972 16513 0 475037 102831 576267 702481 1589896 0 3776867 6262668 4925168 5512491 23192501 124029 31693 0 0 1.53690e+08 25207.4 26 4532884 54099067 129419 28.8746 7.07712 -120356 -27.8746 0 0 30.14 55.11 28.11 6148.3 MiB 482.40 43.7738 34.7197 2150.9 MiB 69.51 120.45 +stratixiv_arch.timing.xml JPEG_stratixiv_arch_timing.blif common 491.86 vpr 5.92 GiB 36 1338 8 149 2 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 6207704 3 33 52402 39411 1 28023 1533 73 54 3942 M9K auto 1573.3 MiB 82.70 338946 1840.0 MiB 88.23 0.74 16.6959 -307104 -15.6959 16.6959 32.85 0.14467 0.111336 16.2972 12.7743 421173 8.03857 96171 1.83553 139206 529190 212860000 25231305 44167996 4469518 168692004 20761787 0 0 414427 274001 303970 303970 529190 529190 2493253 304111 39578720 1442934 18325065 12798038 3645659 2223393 73520139 4289961 0 0 74049577 3065707 0 0 414427 0 3857786 1300556 1404548 1708349 52233 0 2451477 239907 479373 636788 362194 0 1406309 1060649 925175 1071561 5584934 168432 23179 0 0 9.96430e+07 25277.3 34 3037750 30141692 81349 17.8179 17.8179 -331477 -16.8179 0 0 19.93 38.49 23.74 6062.2 MiB 166.89 25.1046 19.9907 1785.5 MiB 69.35 71.72 +stratixiv_arch.timing.xml leon2_stratixiv_arch_timing.blif common 241.79 vpr 4.85 GiB 251 954 1 17 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 5081652 55 196 20131 19956 1 8402 1223 44 33 1452 io auto 1387.5 MiB 45.75 122203 1548.3 MiB 20.21 0.22 7.45116 -75262 -6.45116 7.45116 8.45 0.0585202 0.0434269 5.63729 4.32211 169635 8.42781 39586 1.96671 53358 234674 76267559 7992265 22622735 1817746 53644824 6174519 0 0 154198 105434 162906 162906 234674 234674 311722 162906 20886917 696920 5842523 4244502 1346946 780718 23499477 1074270 0 0 23828196 529935 0 0 154198 0 1033870 486810 703478 840462 10329 0 757241 50167 361130 434639 143869 0 276629 436643 342348 405823 1693100 91644 14265 0 0 3.65488e+07 25171.3 28 1360736 16121963 53571 8.15417 8.15417 -83167.3 -7.15417 0 0 7.64 16.02 9.95 4962.6 MiB 111.31 9.00327 7.08734 1422.7 MiB 72.47 27.26 +stratixiv_arch.timing.xml leon3mp_stratixiv_arch_timing.blif common 437.95 vpr 5.47 GiB 255 2097 1 28 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 5737060 84 171 36458 36247 3 20325 2381 62 46 2852 LAB auto 1558.7 MiB 93.27 297719 1765.9 MiB 70.87 0.60 11.8295 -83637.5 -10.8295 4.15308 23.09 0.143424 0.109358 15.9535 12.1927 391686 10.7476 86849 2.38308 125508 598448 176572429 18767989 56989900 4329627 119582529 14438362 0 0 321488 243072 387320 387320 598448 598448 729596 388062 53179581 1773169 13605190 9294322 2890383 1714938 51883666 2763115 0 0 52976757 1605543 0 0 321488 0 3233365 948310 2524354 3092049 16532 0 2808024 79915 1288536 1576664 304956 0 425341 868395 1235818 1515385 4406118 295792 112609 0 0 7.20371e+07 25258.4 15 2648834 32075053 67963 12.6145 4.26424 -93963.1 -11.6145 0 0 14.12 33.11 19.68 5602.6 MiB 156.50 20.7684 16.0249 1669.6 MiB 75.28 52.39 +stratixiv_arch.timing.xml MMM_stratixiv_arch_timing.blif common 738.24 vpr 6.19 GiB 478 1236 1 300 4 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 6488112 202 276 35125 30509 3 21529 2019 106 79 8374 M9K auto 1502.7 MiB 73.51 272765 2207.4 MiB 60.53 0.48 9.19202 -37842.6 -8.19202 3.20486 182.37 0.136898 0.103736 16.4493 12.685 320956 9.13910 66352 1.88935 100167 415115 157993319 23482405 34323808 3445578 123669511 20036827 0 0 381066 234878 227243 227243 415115 415115 642224 227351 30610104 1127641 23077196 16096941 2917523 1667944 49697291 2100860 0 0 50025557 1384432 0 0 381066 0 2227139 1268941 918473 1226694 17433 0 743446 82428 157685 229301 363633 0 1483693 1186513 760788 997393 5526842 51844 12315 0 0 2.11299e+08 25232.8 20 3958220 42984491 64532 7.91139 3.47755 -67229.6 -6.91139 0 0 39.80 57.95 29.64 6336.0 MiB 171.53 21.4318 16.6995 2207.4 MiB 72.19 182.81 +stratixiv_arch.timing.xml radar20_stratixiv_arch_timing.blif common 453.19 vpr 5.36 GiB 5 331 31 105 0 2 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 5624880 3 2 14862 10304 26 7485 474 89 66 5874 DSP auto 1320.4 MiB 34.47 125450 1843.3 MiB 10.94 0.11 5.8049 -30200.5 -4.8049 3.81023 118.15 0.0607072 0.0495509 5.99461 4.9777 149819 10.0983 29897 2.01517 42261 171257 55596455 6297388 13474202 1014981 42122253 5282407 0 0 82491 63008 57241 57241 171257 171257 1336653 57241 12612743 431764 3714083 2370064 607711 348952 18453794 1550735 0 0 18560482 1247126 0 0 82491 0 3038453 167652 365739 468667 2750 0 2838075 13146 204158 311224 79741 0 200378 154506 161581 157443 842142 91976 7787 0 0 1.48105e+08 25213.7 13 2294013 23927517 37921 4.65543 3.70404 -39170.9 -3.65543 0 0 28.14 27.33 13.44 5493.0 MiB 119.75 7.84913 6.5749 1843.3 MiB 75.94 121.31 +stratixiv_arch.timing.xml random_stratixiv_arch_timing.blif common 830.95 vpr 5.89 GiB 693 1763 25 16 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 6178032 35 658 51416 37539 1 27797 2497 108 80 8640 io auto 1608.9 MiB 76.67 253700 2300.4 MiB 108.89 0.80 36.6178 -57981.3 -35.6178 36.6178 171.91 0.158204 0.12925 22.1759 17.8341 342733 6.86771 79937 1.60178 141214 573898 344874414 48577102 53252645 6506672 291621769 42070430 0 0 538278 391276 360228 360228 573898 573898 7081017 360265 46490833 1687480 31676243 23376052 5649636 3854018 124496081 10461477 0 0 128008200 7512408 0 0 538278 0 2119611 1913570 745327 1186799 18990 0 631202 95269 152008 238977 519288 0 1488409 1818301 593319 947822 5873469 54852 3680 0 0 2.18145e+08 25248.3 26 4400944 49964640 86213 36.9093 36.9093 -62252.5 -35.9093 0 0 39.64 51.65 21.96 6033.2 MiB 216.24 30.927 25.2045 2300.4 MiB 73.56 188.58 +stratixiv_arch.timing.xml Reed_Solomon_stratixiv_arch_timing.blif common 766.05 vpr 5.51 GiB 753 1119 5 32 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 5782540 13 740 25173 25306 1 12707 1909 117 87 10179 io auto 1437.3 MiB 59.18 158034 2351.1 MiB 40.46 0.31 9.0794 -29604.5 -8.0794 7.71568 193.63 0.0805726 0.0704518 10.3821 8.39077 193023 7.66999 42155 1.67508 67723 316841 110094427 12492137 30525922 2684272 79568505 9807865 0 0 262116 180684 198341 198341 316841 316841 495413 198377 27700196 926544 11964014 7741355 2246769 1260203 33325147 1126514 0 0 33585590 543278 0 0 262116 0 965199 881583 635973 843315 9466 0 311246 41223 259881 322038 252650 0 653953 840360 376092 521277 2888017 64488 4750 0 0 2.57091e+08 25257.0 19 4146327 46175295 62070 8.10698 7.61424 -41671.4 -7.10698 0 0 51.25 47.23 13.69 5647.0 MiB 151.94 14.057 11.4172 2351.1 MiB 74.52 242.82 +stratixiv_arch.timing.xml sudoku_check_stratixiv_arch_timing.blif common 239.62 vpr 5.17 GiB 54 667 0 40 0 1 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 5417848 2 52 16673 16662 2 12034 762 37 27 999 LAB auto 1362.5 MiB 40.22 180205 1521.9 MiB 18.15 0.19 5.54068 -21652.7 -4.54068 4.89195 4.65 0.0717041 0.0562898 6.47426 5.14154 231827 13.9077 53255 3.19485 68116 367622 159055598 19108453 34672375 3059318 124383223 16049135 0 0 285481 200249 223228 223228 367622 367622 537770 223228 31582305 1047276 11283921 8661165 2436967 1444171 55997826 3999152 0 0 56340478 2942362 0 0 285481 0 1841453 1028515 862108 1209475 4776 0 1409514 29885 265500 444129 280705 0 431939 998630 596608 765346 3197345 87150 7950 0 0 2.50432e+07 25068.2 19 1111277 11680547 35180 5.83036 5.24345 -27598.2 -4.83036 0 0 5.20 13.17 8.78 5290.9 MiB 128.35 9.38202 7.5432 1362.6 MiB 75.64 16.03 +stratixiv_arch.timing.xml ucsb_152_tap_fir_stratixiv_arch_timing.blif common 164.03 vpr 4.71 GiB 42 750 0 0 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 4934640 13 29 26295 20086 1 12166 792 39 29 1131 LAB auto 1355.7 MiB 14.50 79073 1500.5 MiB 10.23 0.14 4.99447 -4598.46 -3.99447 2.56728 7.90 0.0345151 0.0268055 2.50144 1.99306 84514 3.21432 19907 0.757122 53271 71125 35543996 4314319 7119832 718101 28424164 3596218 0 0 69624 64004 39306 39306 71125 71125 76245 39306 6383411 206696 3803297 2570461 595672 376276 12231924 577806 0 0 12273392 369339 0 0 69624 0 32314 76306 73971 84801 484 0 9456 2388 8600 9401 69140 0 22858 73918 65371 75400 314449 1998 167 0 0 2.84345e+07 25141.0 15 1246346 12342987 13974 3.67397 2.81465 -5505.51 -2.67397 0 0 5.86 9.66 5.54 4819.0 MiB 89.28 3.67814 2.97416 1358.0 MiB 70.67 17.94 +stratixiv_arch.timing.xml uoft_raytracer_stratixiv_arch_timing.blif common 1311.51 vpr 5.95 GiB 964 975 19 34 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 6241032 542 422 37277 26038 1 20638 1992 147 109 16023 io auto 1457.5 MiB 52.03 265616 2972.0 MiB 108.24 0.92 9.50199 -37449.9 -8.50199 7.76547 403.54 0.100768 0.0806254 15.2083 12.1832 346126 9.28599 74414 1.99641 111598 332633 224653350 32780919 29959843 2965662 194693507 29815257 0 0 284178 214218 223746 223746 332633 332633 7808378 223794 27013393 957475 14955789 10307218 2329639 1461336 84833337 10478661 0 0 86872257 8581838 0 0 284178 0 1366644 796403 767936 968197 21123 0 829349 112845 328050 381217 263055 0 537295 683558 439886 586980 2699195 69880 5687 0 0 4.05153e+08 25285.7 48 5879056 65354821 47001 9.73561 7.96552 -51266 -8.73561 0 0 84.60 91.99 17.90 6094.8 MiB 236.21 25.2388 20.8791 2972.0 MiB 73.53 401.12 +stratixiv_arch.timing.xml wb_conmax_stratixiv_arch_timing.blif common 1535.02 vpr 5.78 GiB 1107 721 0 0 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 6061664 403 704 15490 16194 1 8416 1828 167 124 20708 io auto 1355.1 MiB 49.09 189937 3408.7 MiB 26.87 0.23 11.9901 -21784.8 -10.9901 5.40422 629.40 0.0607233 0.0480809 7.17479 5.77349 228474 14.7507 37908 2.44741 42194 231304 73567441 8146353 21695096 1870795 51872345 6275558 0 0 178600 126519 151747 151747 231304 231304 304999 151783 19833642 686525 7902121 4862978 1451550 826447 21591790 718777 0 0 21921688 390273 0 0 178600 0 783161 616011 485116 679882 1937 0 252342 9618 232249 278611 176663 0 530819 606393 252867 401271 2122828 55170 12244 0 0 5.23921e+08 25300.4 17 6720933 74598148 36972 11.2142 5.7031 -32750.7 -10.2142 0 0 97.15 109.46 8.97 5919.6 MiB 205.54 9.39266 7.63448 3408.7 MiB 74.35 503.67 +stratixiv_arch.timing.xml picosoc_stratixiv_arch_timing.blif common 217.36 vpr 4.73 GiB 35 735 0 6 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 4957700 18 17 16969 16357 1 6386 776 39 29 1131 LAB auto 1350.1 MiB 45.21 83840 1498.7 MiB 9.11 0.13 6.69002 -41482 -5.69002 6.69002 7.29 0.0329439 0.0249959 2.72854 2.16481 116609 6.87350 27569 1.62505 49810 263564 88698888 10211286 25294017 2152197 63404871 8059089 0 0 156263 110796 194462 194462 263564 263564 339975 195157 23310055 782065 7176427 5432894 1564135 995772 27669612 1438557 0 0 28024395 798019 0 0 156263 0 1148787 494004 1066644 1219674 5816 0 879456 28726 433164 511308 150447 0 269331 465278 633480 708366 2315472 114033 31198 0 0 2.84345e+07 25141.0 42 1091507 12261694 43464 6.84887 6.84887 -47681.8 -5.84887 0 0 6.66 11.68 6.83 4841.5 MiB 112.17 6.53399 5.3424 1354.1 MiB 74.98 17.19 +stratixiv_arch.timing.xml murax_stratixiv_arch_timing.blif common 103.86 vpr 4.39 GiB 35 73 0 8 0 0 success 46b9987-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-07-03T21:14:06 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 4608068 18 17 2291 2142 1 1503 116 16 12 192 LAB M9K auto 1225.8 MiB 5.49 10808 1365.3 MiB 0.94 0.01 5.25051 -3435.86 -4.25051 3.99423 0.13 0.00605524 0.00493609 0.267528 0.226197 13997 6.11757 3635 1.58872 7890 29854 10761012 1268842 2657138 256907 8103874 1011935 0 0 26185 18375 14254 14254 29854 29854 36615 14254 2380851 83280 1242024 822552 220248 125398 3374556 126276 0 0 3436425 34599 0 0 26185 0 156638 82530 40368 52509 1851 0 69535 9390 18202 20582 24334 0 87103 73140 22166 31927 285419 5552 110 0 0 4.72128e+06 24590.0 20 153885 1496224 4500 4.15507 4.15507 -3832.57 -3.15507 0 0 1.16 1.77 1.22 4500.1 MiB 79.71 0.535418 0.455236 1264.1 MiB 75.21 0.90 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_router_lookahead_flat_router/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_router_lookahead_flat_router/config/config.txt new file mode 100644 index 00000000000..67c2c9f502a --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_router_lookahead_flat_router/config/config.txt @@ -0,0 +1,30 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/verilog + +# Path to directory of architectures to use +archs_dir=arch/timing + +# Add circuits to list to sweep +circuit_list_add=raygentop.v + +# Add architectures to list to sweep +arch_list_add=k6_frac_N10_frac_chain_mem32K_40nm.xml +arch_list_add=k6_frac_N10_mem32K_40nm.xml +arch_list_add=k6_N10_mem32K_40nm.xml + +# Parse info and how to parse +parse_file=vpr_fixed_chan_width.txt +parse_file=vpr_parse_second_file.txt + +# How to parse QoR info +qor_parse_file=qor_rr_graph.txt + +# Pass requirements +pass_requirements_file=pass_requirements_verify_rr_graph.txt + +# Script parameters +script_params = -verify_inter_cluster_router_lookahead -verify_intra_cluster_router_lookahead --route_chan_width 130 --flat_routing true diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_router_lookahead_flat_router/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_router_lookahead_flat_router/config/golden_results.txt new file mode 100644 index 00000000000..923229b832d --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_router_lookahead_flat_router/config/golden_results.txt @@ -0,0 +1,6 @@ + arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time routed_wirelength total_nets_routed total_connections_routed total_heap_pushes total_heap_pops logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem router_lookahead_computation_time + k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml raygentop.v common 26.57 vpr 82.09 MiB -1 -1 4.09 45804 3 0.98 -1 -1 40164 -1 -1 112 236 1 6 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 84056 236 305 3195 3007 1 1538 660 19 19 361 io auto 44.6 MiB 1.97 12550 82.1 MiB 2.18 0.03 4.23319 -2592.08 -4.23319 4.23319 0.09 0.00734041 0.00664649 0.800573 0.722627 22506 5369 14230 2808527 640885 1.72706e+07 8.96013e+06 2.90560e+06 8048.76 16 4.88723 4.88723 -2997.25 -4.88723 -6.66982 -0.193384 82.1 MiB 1.04 1.20892 1.10479 82.1 MiB 1.24 + k6_frac_N10_frac_chain_mem32K_40nm.xml raygentop.v common 26.32 vpr 82.26 MiB -1 -1 3.75 46008 3 0.93 -1 -1 40016 -1 -1 120 236 1 6 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 84232 236 305 3195 3007 1 1534 668 19 19 361 io auto 45.3 MiB 2.86 12092 82.3 MiB 2.03 0.03 4.31218 -2553.6 -4.31218 4.31218 0.07 0.00696218 0.00631702 0.765105 0.691642 19341 4478 11708 2097942 457216 1.72706e+07 9.39128e+06 2.71656e+06 7525.11 12 4.99952 4.99952 -3010.14 -4.99952 0 0 82.3 MiB 0.83 1.12422 1.027 82.3 MiB 1.15 + k6_frac_N10_mem32K_40nm.xml raygentop.v common 26.02 vpr 77.67 MiB -1 -1 4.82 49248 8 1.50 -1 -1 41880 -1 -1 116 235 1 6 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 79536 235 305 2594 2755 1 1443 663 19 19 361 io auto 39.9 MiB 2.23 11448 77.7 MiB 1.82 0.03 4.41088 -2430.87 -4.41088 4.41088 0.08 0.00649293 0.00576414 0.647429 0.579732 18768 3833 11059 1989583 427191 1.72706e+07 9.1757e+06 2.71663e+06 7525.28 11 5.29026 5.29026 -2756.74 -5.29026 -8.67533 -0.17036 77.7 MiB 0.75 0.955384 0.866802 77.7 MiB 1.18 + k6_N10_mem32K_40nm.xml raygentop.v common 25.03 vpr 76.39 MiB -1 -1 4.72 48856 8 1.50 -1 -1 41792 -1 -1 165 235 1 6 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 78228 235 305 2594 2755 1 1461 712 19 19 361 io auto 38.7 MiB 0.95 12269 76.4 MiB 1.85 0.03 4.59709 -2576.68 -4.59709 4.59709 0.09 0.00642268 0.00566871 0.591253 0.53058 18465 7376 21416 6225690 1224003 1.72706e+07 1.18165e+07 2.57233e+06 7125.57 19 4.96959 4.96959 -2849.65 -4.96959 -0.0066982 -0.0066982 76.4 MiB 1.84 0.995979 0.901619 76.4 MiB 1.10 + hard_fpu_arch_timing.xml raygentop.v common 389.64 vpr 322.97 MiB -1 -1 36.07 182560 40 111.52 -1 -1 74952 -1 -1 3776 235 -1 -1 success 897b3a8-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-5.10.35-v8 x86_64 2023-01-31T03:36:34 gh-actions-runner-vtr-auto-spawned5 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 330724 235 305 20496 20801 1 8995 4316 68 68 4624 clb auto 158.1 MiB 4.44 176765 323.0 MiB 49.61 0.36 22.409 -30564.1 -22.409 22.409 1.35 0.0561504 0.0436831 6.55008 5.20912 249819 53589 173978 17781644 1967620 9.87441e+06 8.65503e+06 1.89440e+07 4096.88 23 25.1849 25.1849 -37219.3 -25.1849 -0.1702 -0.0851 323.0 MiB 8.47 9.92861 8.04481 323.0 MiB 16.99 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_flat_router/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_flat_router/config/config.txt new file mode 100644 index 00000000000..8200d0b72ba --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_flat_router/config/config.txt @@ -0,0 +1,30 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/verilog + +# Path to directory of architectures to use +archs_dir=arch/timing + +# Add circuits to list to sweep +circuit_list_add=raygentop.v + +# Add architectures to list to sweep +arch_list_add=k6_frac_N10_frac_chain_mem32K_40nm.xml +arch_list_add=k6_frac_N10_mem32K_40nm.xml +arch_list_add=k6_N10_mem32K_40nm.xml + +# Parse info and how to parse +parse_file=vpr_fixed_chan_width.txt +parse_file=vpr_parse_second_file.txt + +# How to parse QoR info +qor_parse_file=qor_rr_graph.txt + +# Pass requirements +pass_requirements_file=pass_requirements_verify_rr_graph.txt + +# Script parameters +script_params = -verify_rr_graph --route_chan_width 130 --flat_routing true diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_flat_router/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_flat_router/config/golden_results.txt new file mode 100644 index 00000000000..89bfddf4c2a --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test2/vpr_verify_rr_graph_flat_router/config/golden_results.txt @@ -0,0 +1,4 @@ + arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time routed_wirelength avg_routed_wirelength routed_wiresegment avg_routed_wiresegment total_nets_routed total_connections_routed total_heap_pushes total_heap_pops total_internal_heap_pushes total_internal_heap_pops total_external_heap_pushes total_external_heap_pops total_external_SOURCE_pushes total_external_SOURCE_pops total_internal_SOURCE_pushes total_internal_SOURCE_pops total_external_SINK_pushes total_external_SINK_pops total_internal_SINK_pushes total_internal_SINK_pops total_external_IPIN_pushes total_external_IPIN_pops total_internal_IPIN_pushes total_internal_IPIN_pops total_external_OPIN_pushes total_external_OPIN_pops total_internal_OPIN_pushes total_internal_OPIN_pops total_external_CHANX_pushes total_external_CHANX_pops total_internal_CHANX_pushes total_internal_CHANX_pops total_external_CHANY_pushes total_external_CHANY_pops total_internal_CHANY_pushes total_internal_CHANY_pops rt_node_SOURCE_pushes rt_node_SINK_pushes rt_node_IPIN_pushes rt_node_OPIN_pushes rt_node_CHANX_pushes rt_node_CHANY_pushes rt_node_SOURCE_high_fanout_pushes rt_node_SINK_high_fanout_pushes rt_node_IPIN_high_fanout_pushes rt_node_OPIN_high_fanout_pushes rt_node_CHANX_high_fanout_pushes rt_node_CHANY_high_fanout_pushes rt_node_SOURCE_entire_tree_pushes rt_node_SINK_entire_tree_pushes rt_node_IPIN_entire_tree_pushes rt_node_OPIN_entire_tree_pushes rt_node_CHANX_entire_tree_pushes rt_node_CHANY_entire_tree_pushes adding_all_rt adding_high_fanout_rt total_number_of_adding_all_rt_from_calling_high_fanout_rt logic_block_area_total logic_block_area_used routing_area_total routing_area_per_tile crit_path_route_success_iteration num_rr_graph_nodes num_rr_graph_edges collapsed_nodes critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS create_rr_graph_time create_intra_cluster_rr_graph_time adding_internal_edges route_mem crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time router_lookahead_mem tile_lookahead_computation_time router_lookahead_computation_time + k6_frac_N10_frac_chain_mem32K_40nm.xml raygentop.v common 34.63 vpr 89.45 MiB -1 -1 4.17 44324 3 0.99 -1 -1 40052 -1 -1 120 236 1 6 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:47 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 91592 236 305 3195 3007 1 1534 668 19 19 361 io auto 51.5 MiB 3.06 12599 88.1 MiB 2.53 0.03 4.24001 -2559.11 -4.24001 4.24001 0.08 0.00703497 0.00627172 0.802031 0.712049 19974 6.26734 5280 1.65673 6526 18770 3689605 528791 1030973 111808 2658632 416983 0 0 16730 12773 14146 14146 18770 18770 224557 14146 936825 46119 21173 8605 58648 34146 1211508 184522 0 0 1187248 195564 0 0 16730 0 40770 29717 38660 37282 376 0 18525 1308 12079 8421 16354 0 22245 28409 26581 28861 132774 2435 210 1.72706e+07 9.39128e+06 2.71656e+06 7525.11 12 96193 892470 34148 5.01727 5.01727 -2984.54 -5.01727 0 0 0.53 2.34 1.97 89.4 MiB 4.11 1.03305 0.916665 88.1 MiB 0.19 1.20 + k6_frac_N10_mem32K_40nm.xml raygentop.v common 31.48 vpr 84.21 MiB -1 -1 5.23 47816 8 1.57 -1 -1 39848 -1 -1 116 235 1 6 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:47 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 86228 235 305 2594 2755 1 1443 663 19 19 361 io auto 45.8 MiB 2.27 11299 84.2 MiB 1.97 0.03 4.34768 -2449.09 -4.34768 4.34768 0.08 0.00676897 0.00595337 0.614033 0.544589 18412 7.11712 4900 1.89409 5321 18506 2801879 378042 1030672 105034 1771207 273008 0 0 16834 12903 13221 13221 18506 18506 154960 13221 934222 42356 22545 7768 61110 31269 791918 117393 0 0 788563 121405 0 0 16834 0 41203 33014 29458 31077 138 0 14120 408 7178 6802 16696 0 27083 32606 22280 24275 133920 1822 78 1.72706e+07 9.1757e+06 2.71663e+06 7525.28 12 84433 860913 20615 5.00924 5.00924 -2770.59 -5.00924 0 0 0.50 1.98 1.66 84.2 MiB 3.28 0.834215 0.742088 84.2 MiB 0.10 1.16 + k6_N10_mem32K_40nm.xml raygentop.v common 30.60 vpr 81.84 MiB -1 -1 5.30 47740 8 1.66 -1 -1 40352 -1 -1 165 235 1 6 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:47 gh-actions-runner-vtr-auto-spawned30 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 83808 235 305 2594 2755 1 1461 712 19 19 361 io auto 44.2 MiB 1.01 12332 81.8 MiB 2.37 0.03 4.70207 -2540.5 -4.70207 4.70207 0.10 0.00690155 0.0061386 0.684322 0.606061 19350 7.47971 5182 2.00309 4886 17328 2712776 345540 1018367 90671 1694409 254869 0 0 15725 11657 13191 13191 17328 17328 144955 13191 928553 33418 21186 6632 56761 28268 760040 108620 0 0 755037 113235 0 0 15725 0 29034 31368 29626 28102 101 0 7343 270 6792 6097 15624 0 21691 31098 22834 22005 123960 1713 51 1.72706e+07 1.18165e+07 2.57233e+06 7125.57 11 75944 916321 23401 5.96046 5.96046 -2896.81 -5.96046 0 0 0.50 2.39 2.10 81.8 MiB 3.54 0.89765 0.797281 81.8 MiB 0.05 1.18 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/task_list.txt index 5a58cab89f2..7524b980bed 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/task_list.txt @@ -1,7 +1,6 @@ regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop +regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_predictor_off regression_tests/vtr_reg_nightly_test3/vtr_reg_qor -regression_tests/vtr_reg_nightly_test3/complex_switch - - +regression_tests/vtr_reg_nightly_test3/complex_switch \ No newline at end of file diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router/config/config.txt new file mode 100755 index 00000000000..01eba58da65 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router/config/config.txt @@ -0,0 +1,32 @@ +# +############################################ +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/verilog + +# Path to directory of architectures to use +archs_dir=arch/timing + +# Add circuits to list to sweep +circuit_list_add=bgm.v +circuit_list_add=LU8PEEng.v +circuit_list_add=stereovision0.v +circuit_list_add=stereovision1.v +circuit_list_add=stereovision2.v + +# Add architectures to list to sweep +arch_list_add=k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml + +# Parse info and how to parse +parse_file=vpr_standard.txt + +# How to parse QoR info +qor_parse_file=qor_standard.txt + +# Pass requirements +pass_requirements_file=pass_requirements.txt + +#Script parameters +script_params=-track_memory_usage --max_router_iterations 300 --flat_routing true --has_choking_spot true diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router/config/golden_results.txt new file mode 100644 index 00000000000..d6944eeddb2 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test3/vtr_reg_qor_chain_depop_flat_router/config/golden_results.txt @@ -0,0 +1,6 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time +k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml bgm.v common 1994.99 vpr 898.68 MiB -1 -1 61.01 621344 14 118.10 -1 -1 123276 -1 -1 2287 257 0 11 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:32 gh-actions-runner-vtr-auto-spawned40 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 920252 257 32 35747 33389 1 18576 2587 58 58 3364 clb auto 366.7 MiB 66.13 238606 672.7 MiB 114.11 0.89 17.1228 -22971.9 -17.1228 17.1228 66.43 0.122725 0.105945 15.0557 11.8391 78 394768 147 2.00088e+08 1.27615e+08 1.92320e+07 5717.01 1361.68 122.113 97.7861 1114397 11021065 660432 349453 53 168974 802689 141308298 36713877 45717834 6972091 95590464 29741786 0 0 792797 514828 747302 747302 910805 802689 2332383 1256218 40556637 4114854 1173010 341749 3457595 1539720 44978029 13690833 0 0 46359740 13705684 0 0 792797 0 1048626 1953882 3472000 3402169 10847209 13973 508 19.5428 19.5428 -25846 -19.5428 0 0 2.52407e+07 7305.90 14.24 130.51 4.88 34.68 0.19 14.24 14.3145 11.5453 +k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml LU8PEEng.v common 3071.55 vpr 833.25 MiB -1 -1 71.48 455940 98 132.11 -1 -1 115232 -1 -1 1800 114 45 8 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:32 gh-actions-runner-vtr-auto-spawned40 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 853252 114 102 35713 31804 1 16705 2069 51 51 2601 clb auto 339.9 MiB 62.95 216212 561.5 MiB 92.18 0.71 65.1279 -53179 -65.1279 65.1279 49.18 0.120059 0.101817 14.974 11.8502 96 359574 127 1.52527e+08 1.2484e+08 1.77902e+07 6839.76 2509.64 176.104 141.33 1051316 9491173 587229 299163 55 145183 626548 203387007 66494555 41102351 7195584 162284656 59298971 0 0 557595 348397 529205 529205 686930 626548 3869612 1660071 37419060 5169165 813788 245793 2438766 1051474 77969311 28517504 0 0 79102740 28346398 0 0 557595 0 1827926 1276985 1909761 1891717 6237045 74462 22447 75.1357 75.1357 -67113 -75.1357 -0.0967573 -0.0199062 2.21294e+07 8508.02 8.37 98.67 2.89 16.30 0.13 8.37 6.86691 5.51408 +k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml stereovision0.v common 491.17 vpr 361.63 MiB -1 -1 13.25 101972 5 13.80 -1 -1 69408 -1 -1 673 169 0 0 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:32 gh-actions-runner-vtr-auto-spawned40 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 370312 169 197 23321 21461 1 6785 1039 33 33 1089 clb auto 180.8 MiB 12.19 42112 221.0 MiB 12.30 0.12 3.10868 -13056.7 -3.10868 3.10868 5.95 0.0421986 0.0328892 4.76161 3.85299 58 63990 244 6.0475e+07 3.62708e+07 4.62388e+06 4245.99 385.70 79.3922 66.3623 452845 3145025 280765 57244 49 46719 122488 12901034 3413607 6417997 1071484 6483037 2342123 0 0 99450 91334 71450 71450 127765 122488 173471 117401 5739165 636855 123351 34238 451617 220807 2978920 1062567 0 0 3135845 1056467 0 0 99450 0 417639 158865 157759 165849 512493 25863 4191 3.58485 3.58485 -15007.4 -3.58485 0 0 5.85783e+06 5379.09 3.06 25.65 1.05 12.08 0.19 3.06 4.27202 3.58561 +k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml stereovision1.v common 1731.31 vpr 388.00 MiB -1 -1 10.47 123456 3 17.68 -1 -1 77352 -1 -1 655 115 0 40 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:32 gh-actions-runner-vtr-auto-spawned40 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 397312 115 145 22868 19305 1 9712 955 40 40 1600 mult_36 auto 177.4 MiB 9.52 82272 238.0 MiB 13.23 0.12 5.15059 -21406.4 -5.15059 5.15059 8.56 0.0361559 0.0312351 4.65663 3.86719 76 154775 184 9.16046e+07 5.11412e+07 8.72311e+06 5451.94 1612.65 90.8987 76.51 519336 3912846 269202 122007 30 61007 155688 38640235 6222360 6668491 918705 31971744 5303655 0 0 115015 107081 104788 104788 162701 155688 2093369 113670 5939763 436808 143497 55137 451012 219128 14854225 2513893 0 0 14775865 2516167 0 0 115015 0 643171 157906 453642 427511 882511 43939 8570 5.48939 5.48939 -24883.8 -5.48939 0 0 1.18598e+07 6887.37 5.55 30.87 1.96 11.01 0.18 5.55 3.14734 2.6417 +k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml stereovision2.v common 5871.63 vpr 1.05 GiB -1 -1 14.96 197124 3 8.63 -1 -1 155544 -1 -1 1490 149 0 179 success 8528925-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:37:32 gh-actions-runner-vtr-auto-spawned40 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 1100488 149 182 55416 37075 1 28670 2000 80 80 6400 mult_36 auto 360.6 MiB 29.07 291939 1074.7 MiB 79.70 0.55 12.5458 -48952.6 -12.5458 12.5458 135.77 0.108801 0.0950539 15.7001 12.8522 78 437698 224 3.90281e+08 1.51186e+08 3.79986e+07 5881.04 5479.15 81.5782 67.7414 1647473 12742193 618027 380228 28 138212 241867 64603805 10299476 9587932 1487408 55015873 8812068 0 0 208298 193889 166401 166401 247323 241867 3307020 170969 8438805 627104 233900 115896 693506 424548 25609640 4121299 0 0 25698912 4237503 0 0 208298 0 605901 210574 379701 368344 1028019 35661 9945 13.929 13.929 -56870.7 -13.929 0 0 4.76105e+07 7382.88 19.01 37.94 5.93 15.28 0.13 19.01 3.18631 2.64796 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/config.txt new file mode 100644 index 00000000000..caed2da9784 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/config.txt @@ -0,0 +1,28 @@ +# +############################################ +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/verilog + +# Path to directory of architectures to use +archs_dir=arch/timing + +# Add circuits to list to sweep +circuit_list_add=spree.v + +# Add architectures to list to sweep +arch_list_add=k6_frac_N10_frac_chain_mem32K_40nm.xml + +# Parse info and how to parse +parse_file=vpr_standard.txt + +# How to parse QoR info +qor_parse_file=qor_standard.txt + +# Pass requirements +pass_requirements_file=pass_requirements.txt + +script_params=-track_memory_usage --route_chan_width 100 --max_router_iterations 100 --router_lookahead map --flat_routing true + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/golden_results.txt new file mode 100644 index 00000000000..6a885701bc1 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_flat_router/config/golden_results.txt @@ -0,0 +1,2 @@ + arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_num_rr_graph_nodes crit_path_num_rr_graph_edges crit_path_collapsed_nodes crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops crit_path_total_internal_heap_pushes crit_path_total_internal_heap_pops crit_path_total_external_heap_pushes crit_path_total_external_heap_pops crit_path_total_external_SOURCE_pushes crit_path_total_external_SOURCE_pops crit_path_total_internal_SOURCE_pushes crit_path_total_internal_SOURCE_pops crit_path_total_external_SINK_pushes crit_path_total_external_SINK_pops crit_path_total_internal_SINK_pushes crit_path_total_internal_SINK_pops crit_path_total_external_IPIN_pushes crit_path_total_external_IPIN_pops crit_path_total_internal_IPIN_pushes crit_path_total_internal_IPIN_pops crit_path_total_external_OPIN_pushes crit_path_total_external_OPIN_pops crit_path_total_internal_OPIN_pushes crit_path_total_internal_OPIN_pops crit_path_total_external_CHANX_pushes crit_path_total_external_CHANX_pops crit_path_total_internal_CHANX_pushes crit_path_total_internal_CHANX_pops crit_path_total_external_CHANY_pushes crit_path_total_external_CHANY_pops crit_path_total_internal_CHANY_pushes crit_path_total_internal_CHANY_pops crit_path_rt_node_SOURCE_pushes crit_path_rt_node_SINK_pushes crit_path_rt_node_IPIN_pushes crit_path_rt_node_OPIN_pushes crit_path_rt_node_CHANX_pushes crit_path_rt_node_CHANY_pushes crit_path_adding_all_rt crit_path_adding_high_fanout_rt crit_path_total_number_of_adding_all_rt_from_calling_high_fanout_rt critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_create_rr_graph_time crit_path_create_intra_cluster_rr_graph_time crit_path_tile_lookahead_computation_time crit_path_router_lookahead_computation_time crit_path_total_timing_analysis_time crit_path_total_sta_time + k6_frac_N10_frac_chain_mem32K_40nm.xml spree.v common 12.82 vpr 76.19 MiB -1 -1 3.42 34124 16 0.76 -1 -1 37916 -1 -1 61 45 3 1 success 8528925 release IPO VTR_ASSERT_LEVEL=3 GNU 9.5.0 on Linux-5.10.35-v8 x86_64 2023-05-29T15:34:55 gh-actions-runner-vtr-auto-spawned83 /root/vtr-verilog-to-routing/vtr-verilog-to-routing 78016 45 32 1188 1147 1 781 142 14 14 196 memory auto 39.1 MiB 3.14 6687 76.2 MiB 0.85 0.01 9.87688 -6144.34 -9.87688 9.87688 0.04 0.00303074 0.00250348 0.260087 0.214733 -1 10707 13 9.20055e+06 5.32753e+06 1.21359e+06 5900 2.66 0.354898 0.295042 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt index e0781c9c180..0e973fc00d0 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt @@ -82,3 +82,4 @@ regression_tests/vtr_reg_strong/koios_no_complex_dsp regression_tests/vtr_reg_strong/strong_timing_fail regression_tests/vtr_reg_strong/strong_timing_no_fail regression_tests/vtr_reg_strong/strong_noc +regression_tests/vtr_reg_strong/strong_flat_router