diff --git a/libs/libarchfpga/src/device_grid.h b/libs/libarchfpga/src/device_grid.h index b4a7b7c031e..2077f2b9419 100644 --- a/libs/libarchfpga/src/device_grid.h +++ b/libs/libarchfpga/src/device_grid.h @@ -50,7 +50,7 @@ class DeviceGrid { * @brief Return the number of instances of the specified tile type on the specified layer. If the layer_num is -1, return the total number of instances of the specified tile type on all layers. * @note This function should be used if count_instances() is called in the constructor. */ - size_t num_instances(t_physical_tile_type_ptr type, int layer_num = 0) const; + size_t num_instances(t_physical_tile_type_ptr type, int layer_num) const; /** * @brief Returns the block types which limits the device size (may be empty if @@ -59,23 +59,23 @@ class DeviceGrid { std::vector limiting_resources() const { return limiting_resources_; } ///@brief Return the t_physical_tile_type_ptr at the specified location - inline t_physical_tile_type_ptr get_physical_type(size_t x, size_t y, int layer_num = 0) const { - return grid_[layer_num][x][y].type; + inline t_physical_tile_type_ptr get_physical_type(const t_physical_tile_loc& tile_loc) const { + return grid_[tile_loc.layer_num][tile_loc.x][tile_loc.y].type; } ///@brief Return the width offset of the tile at the specified location. The root location of the tile is where width_offset and height_offset are 0. - inline int get_width_offset(size_t x, size_t y, int layer_num = 0) const { - return grid_[layer_num][x][y].width_offset; + inline int get_width_offset(const t_physical_tile_loc& tile_loc) const { + return grid_[tile_loc.layer_num][tile_loc.x][tile_loc.y].width_offset; } ///@brief Return the height offset of the tile at the specified location. The root location of the tile is where width_offset and height_offset are 0 - inline int get_height_offset(size_t x, size_t y, int layer_num = 0) const { - return grid_[layer_num][x][y].height_offset; + inline int get_height_offset(const t_physical_tile_loc& tile_loc) const { + return grid_[tile_loc.layer_num][tile_loc.x][tile_loc.y].height_offset; } ///@brief Return the metadata of the tile at the specified location - inline const t_metadata_dict* get_metadata(size_t x, size_t y, int layer_num = 0) const { - return grid_[layer_num][x][y].meta; + inline const t_metadata_dict* get_metadata(const t_physical_tile_loc& tile_loc) const { + return grid_[tile_loc.layer_num][tile_loc.x][tile_loc.y].meta; } ///@brief Given t_grid_tile, return the x coordinate of the tile on the given layer - Used by serializer functions @@ -94,6 +94,12 @@ class DeviceGrid { return diff % grid_.dim_size(2); } + ///@brief Given t_grid_tile, return the layer number of the tile - Used by serializer functions + inline int get_grid_loc_layer(const t_grid_tile*& grid_loc) const { + int layer_num = std::floor(static_cast(grid_loc - &grid_.get(0)) / (width() * height())); + return layer_num; + } + ///@brief Return the nth t_grid_tile on the given layer of the flattened grid - Used by serializer functions inline const t_grid_tile* get_grid_locs_grid_loc(int n) const { return &grid_.get(n); diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index a4699e2ccd8..dde02162379 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -814,6 +814,31 @@ struct t_physical_pin { } }; +/** + * @brief Describes The location of a physical tile + * @param layer_num The die number of the physical tile. If the FPGA only has one die, or the physical tile is located + * on the base die, layer_num is equal to zero. If it is one the die above base die, it is one, etc. + * @param x The x location of the physical tile on the given die + * @param y The y location of the physical tile on the given die + */ +struct t_physical_tile_loc { + int x = OPEN; + int y = OPEN; + int layer_num = OPEN; + + t_physical_tile_loc() = default; + + t_physical_tile_loc(int x_val, int y_val, int layer_num_val) + : x(x_val) + , y(y_val) + , layer_num(layer_num_val) {} + + // Returns true if this type location layer_num/x/y is not equal to OPEN + operator bool() const { + return !(x == OPEN || y == OPEN || layer_num == OPEN); + } +}; + /** Describes I/O and clock ports of a physical tile type * * It corresponds to tags in the FPGA architecture description diff --git a/libs/librrgraph/CMakeLists.txt b/libs/librrgraph/CMakeLists.txt index 37a4a275cc2..372e8a6f33c 100644 --- a/libs/librrgraph/CMakeLists.txt +++ b/libs/librrgraph/CMakeLists.txt @@ -48,14 +48,14 @@ add_custom_target( COMMAND ${CMAKE_COMMAND} -E make_directory rr_graph_generate COMMAND ${CMAKE_COMMAND} -E chdir rr_graph_generate git clone https://github.com/duck2/uxsdcxx COMMAND python3 -mpip install --user -r rr_graph_generate/uxsdcxx/requirements.txt - COMMAND ${CMAKE_COMMAND} -E chdir rr_graph_generate python3 uxsdcxx/uxsdcxx.py ${CMAKE_CURRENT_SOURCE_DIR}/src/base/rr_graph.xsd - COMMAND ${CMAKE_COMMAND} -E chdir rr_graph_generate python3 uxsdcxx/uxsdcap.py ${CMAKE_CURRENT_SOURCE_DIR}/src/base/rr_graph.xsd + COMMAND ${CMAKE_COMMAND} -E chdir rr_graph_generate python3 uxsdcxx/uxsdcxx.py ${CMAKE_CURRENT_SOURCE_DIR}/src/io/rr_graph.xsd + COMMAND ${CMAKE_COMMAND} -E chdir rr_graph_generate python3 uxsdcxx/uxsdcap.py ${CMAKE_CURRENT_SOURCE_DIR}/src/io/rr_graph.xsd COMMAND ${CMAKE_COMMAND} -E copy rr_graph_generate/rr_graph_uxsdcxx.h rr_graph_generate/rr_graph_uxsdcxx_capnp.h rr_graph_generate/rr_graph_uxsdcxx_interface.h - ${CMAKE_CURRENT_SOURCE_DIR}/src/base/gen + ${CMAKE_CURRENT_SOURCE_DIR}/src/io/gen COMMAND ${CMAKE_COMMAND} -E copy rr_graph_generate/rr_graph_uxsdcxx.capnp ${CMAKE_CURRENT_SOURCE_DIR}/../libvtrcapnproto/gen - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/base/rr_graph.xsd + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/src/io/rr_graph.xsd WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) \ No newline at end of file diff --git a/libs/librrgraph/src/base/check_rr_graph.cpp b/libs/librrgraph/src/base/check_rr_graph.cpp index 524f6a56b93..8ed76ed2f58 100644 --- a/libs/librrgraph/src/base/check_rr_graph.cpp +++ b/libs/librrgraph/src/base/check_rr_graph.cpp @@ -234,9 +234,11 @@ void check_rr_graph(const RRGraphView& rr_graph, size_t inode = (size_t)rr_node; t_rr_type rr_type = rr_graph.node_type(rr_node); int ptc_num = rr_graph.node_ptc_num(rr_node); + int layer_num = rr_graph.node_layer(rr_node); int xlow = rr_graph.node_xlow(rr_node); int ylow = rr_graph.node_ylow(rr_node); - t_physical_tile_type_ptr type = grid.get_physical_type(xlow, ylow); + + t_physical_tile_type_ptr type = grid.get_physical_type({xlow, ylow, layer_num}); if (rr_type == IPIN || rr_type == OPIN) { // #TODO: No edges are added for internal pins. However, they need to be checked somehow! @@ -273,7 +275,9 @@ void check_rr_graph(const RRGraphView& rr_graph, if (!is_chain && !is_fringe && !is_wire) { if (rr_graph.node_type(rr_node) == IPIN || rr_graph.node_type(rr_node) == OPIN) { if (has_adjacent_channel(rr_graph, grid, node)) { - auto block_type = grid.get_physical_type(rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node)); + auto block_type = grid.get_physical_type({rr_graph.node_xlow(rr_node), + rr_graph.node_ylow(rr_node), + rr_graph.node_layer(rr_node)}); std::string pin_name = block_type_pin_index_to_name(block_type, rr_graph.node_pin_num(rr_node), is_flat); /* Print error messages for all the sides that a node may appear */ for (const e_side& node_side : SIDES) { @@ -312,7 +316,9 @@ static bool rr_node_is_global_clb_ipin(const RRGraphView& rr_graph, const Device int ipin; t_physical_tile_type_ptr type; - type = grid.get_physical_type(rr_graph.node_xlow(inode), rr_graph.node_ylow(inode)); + type = grid.get_physical_type({rr_graph.node_xlow(inode), + rr_graph.node_ylow(inode), + rr_graph.node_layer(inode)}); if (rr_graph.node_type(inode) != IPIN) return (false); @@ -335,7 +341,7 @@ void check_rr_node(const RRGraphView& rr_graph, //Make sure over-flow doesn't happen VTR_ASSERT(inode >= 0); - int xlow, ylow, xhigh, yhigh, ptc_num, capacity; + int xlow, ylow, xhigh, yhigh, layer_num, ptc_num, capacity; t_rr_type rr_type; t_physical_tile_type_ptr type; int nodes_per_chan, tracks_per_node; @@ -348,6 +354,7 @@ void check_rr_node(const RRGraphView& rr_graph, xhigh = rr_graph.node_xhigh(rr_node); ylow = rr_graph.node_ylow(rr_node); yhigh = rr_graph.node_yhigh(rr_node); + layer_num = rr_graph.node_layer(rr_node); ptc_num = rr_graph.node_ptc_num(rr_node); capacity = rr_graph.node_capacity(rr_node); cost_index = rr_graph.node_cost_index(rr_node); @@ -363,6 +370,11 @@ void check_rr_node(const RRGraphView& rr_graph, "in check_rr_node: rr endpoints (%d,%d) and (%d,%d) are out of range.\n", xlow, ylow, xhigh, yhigh); } + if (layer_num < 0 || layer_num > int(grid.get_num_layers()) - 1) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, + "in check_rr_node: rr endpoints layer_num (%d) is out of range.\n", layer_num); + } + if (ptc_num < 0) { VPR_ERROR(VPR_ERROR_ROUTE, "in check_rr_node: inode %d (type %d) had a ptc_num of %d.\n", inode, rr_type, ptc_num); @@ -374,7 +386,7 @@ void check_rr_node(const RRGraphView& rr_graph, } /* Check that the segment is within the array and such. */ - type = grid.get_physical_type(xlow, ylow); + type = grid.get_physical_type({xlow, ylow, layer_num}); switch (rr_type) { case SOURCE: diff --git a/libs/librrgraph/src/base/rr_graph_builder.cpp b/libs/librrgraph/src/base/rr_graph_builder.cpp index 535e027ca9f..072b47804ab 100644 --- a/libs/librrgraph/src/base/rr_graph_builder.cpp +++ b/libs/librrgraph/src/base/rr_graph_builder.cpp @@ -28,26 +28,27 @@ MetadataStorage>& RRGraphBuilder::rr_edge_metadata() void RRGraphBuilder::add_node_to_all_locs(RRNodeId node) { t_rr_type node_type = node_storage_.node_type(node); short node_ptc_num = node_storage_.node_ptc_num(node); + short node_layer = node_storage_.node_layer(node); for (int ix = node_storage_.node_xlow(node); ix <= node_storage_.node_xhigh(node); ix++) { for (int iy = node_storage_.node_ylow(node); iy <= node_storage_.node_yhigh(node); iy++) { switch (node_type) { case SOURCE: case SINK: case CHANY: - node_lookup_.add_node(node, ix, iy, node_type, node_ptc_num, SIDES[0]); + node_lookup_.add_node(node,node_layer, ix, iy, node_type, node_ptc_num, SIDES[0]); break; case CHANX: /* Currently need to swap x and y for CHANX because of chan, seg convention * TODO: Once the builders is reworked for use consistent (x, y) convention, * the following swapping can be removed */ - node_lookup_.add_node(node, iy, ix, node_type, node_ptc_num, SIDES[0]); + node_lookup_.add_node(node,node_layer, iy, ix, node_type, node_ptc_num, SIDES[0]); break; case OPIN: case IPIN: for (const e_side& side : SIDES) { if (node_storage_.is_node_on_specific_side(node, side)) { - node_lookup_.add_node(node, ix, iy, node_type, node_ptc_num, side); + node_lookup_.add_node(node,node_layer, ix, iy, node_type, node_ptc_num, side); } } break; diff --git a/libs/librrgraph/src/base/rr_graph_builder.h b/libs/librrgraph/src/base/rr_graph_builder.h index f1777355f07..5c00e1d2c18 100644 --- a/libs/librrgraph/src/base/rr_graph_builder.h +++ b/libs/librrgraph/src/base/rr_graph_builder.h @@ -165,6 +165,11 @@ class RRGraphBuilder { node_storage_.set_node_coordinates(id, x1, y1, x2, y2); } + /** @brief Set the node layer (specifies which die the node is located at) */ + inline void set_node_layer(RRNodeId id, short layer){ + node_storage_.set_node_layer(id,layer); + } + /** @brief The ptc_num carries different meanings for different node types * (true in VPR RRG that is currently supported, may not be true in customized RRG) * CHANX or CHANY: the track id in routing channels @@ -179,6 +184,11 @@ class RRGraphBuilder { node_storage_.set_node_ptc_num(id, new_ptc_num); } + /** @brief set the layer number at which RRNodeId is located at */ + inline void set_node_layer(RRNodeId id, int layer){ + node_storage_.set_node_layer(id, layer); + } + /** @brief set_node_pin_num() is designed for logic blocks, which are IPIN and OPIN nodes */ inline void set_node_pin_num(RRNodeId id, int new_pin_num) { node_storage_.set_node_pin_num(id, new_pin_num); diff --git a/libs/librrgraph/src/base/rr_graph_storage.cpp b/libs/librrgraph/src/base/rr_graph_storage.cpp index 94ca29b7636..9934752dce0 100644 --- a/libs/librrgraph/src/base/rr_graph_storage.cpp +++ b/libs/librrgraph/src/base/rr_graph_storage.cpp @@ -624,6 +624,10 @@ const char* t_rr_graph_storage::node_side_string(RRNodeId id) const { return SIDE_STRING[NUM_SIDES]; } +void t_rr_graph_storage::set_node_layer(RRNodeId id, short layer) { + node_layer_[id] = layer; +} + void t_rr_graph_storage::set_node_ptc_num(RRNodeId id, int new_ptc_num) { node_ptc_[id].ptc_.pin_num = new_ptc_num; //TODO: eventually remove } @@ -777,6 +781,7 @@ int t_rr_graph_view::node_class_num(RRNodeId id) const { return get_node_class_num(node_storage_, node_ptc_, id); } + t_rr_graph_view t_rr_graph_storage::view() const { VTR_ASSERT(partitioned_); VTR_ASSERT(node_storage_.size() == node_fan_in_.size()); @@ -785,6 +790,7 @@ t_rr_graph_view t_rr_graph_storage::view() const { vtr::make_const_array_view_id(node_ptc_), vtr::make_const_array_view_id(node_first_edge_), vtr::make_const_array_view_id(node_fan_in_), + vtr::make_const_array_view_id(node_layer_), vtr::make_const_array_view_id(edge_src_node_), vtr::make_const_array_view_id(edge_dest_node_), vtr::make_const_array_view_id(edge_switch_)); diff --git a/libs/librrgraph/src/base/rr_graph_storage.h b/libs/librrgraph/src/base/rr_graph_storage.h index 6d150c02641..b6c85caa22e 100644 --- a/libs/librrgraph/src/base/rr_graph_storage.h +++ b/libs/librrgraph/src/base/rr_graph_storage.h @@ -77,6 +77,7 @@ struct alignas(16) t_rr_node_data { } dir_side_; uint16_t capacity_ = 0; + }; // t_rr_node_data is a key data structure, so fail at compile time if the @@ -226,6 +227,14 @@ class t_rr_graph_storage { return node_fan_in_[id]; } + /* Find the layer number that RRNodeId is located at. + * it is zero if the FPGA only has one die. + * The layer number start from the base die (base die: 0, the die above it: 1, etc.) + * */ + short node_layer(RRNodeId id) const{ + return node_layer_[id]; + } + // This prefetechs hot RR node data required for optimization. // // Note: This is optional, but may lower time spent on memory stalls in @@ -393,6 +402,7 @@ class t_rr_graph_storage { make_room_in_vector(&node_storage_, size_t(elem_position)); node_ptc_.reserve(node_storage_.capacity()); node_ptc_.resize(node_storage_.size()); + node_layer_.resize(node_storage_.size()); } // Reserve storage for RR nodes. @@ -401,6 +411,7 @@ class t_rr_graph_storage { VTR_ASSERT(!edges_read_); node_storage_.reserve(size); node_ptc_.reserve(size); + node_layer_.reserve(size); } // Resize node storage to accomidate size RR nodes. @@ -409,6 +420,7 @@ class t_rr_graph_storage { VTR_ASSERT(!edges_read_); node_storage_.resize(size); node_ptc_.resize(size); + node_layer_.resize(size); } // Number of RR nodes that can be accessed. @@ -429,6 +441,7 @@ class t_rr_graph_storage { node_ptc_.clear(); node_first_edge_.clear(); node_fan_in_.clear(); + node_layer_.clear(); seen_edge_.clear(); edge_src_node_.clear(); edge_dest_node_.clear(); @@ -448,6 +461,7 @@ class t_rr_graph_storage { node_ptc_.shrink_to_fit(); node_first_edge_.shrink_to_fit(); node_fan_in_.shrink_to_fit(); + node_layer_.shrink_to_fit(); seen_edge_.shrink_to_fit(); edge_src_node_.shrink_to_fit(); edge_dest_node_.shrink_to_fit(); @@ -461,6 +475,7 @@ class t_rr_graph_storage { VTR_ASSERT(!edges_read_); node_storage_.emplace_back(); node_ptc_.emplace_back(); + node_layer_.emplace_back(); } // Given `order`, a vector mapping each RRNodeId to a new one (old -> new), @@ -479,6 +494,7 @@ class t_rr_graph_storage { void set_node_type(RRNodeId id, t_rr_type new_type); void set_node_coordinates(RRNodeId id, short x1, short y1, short x2, short y2); + void set_node_layer(RRNodeId id, short layer); void set_node_cost_index(RRNodeId, RRIndexedDataId new_cost_index); void set_node_rc_index(RRNodeId, NodeRCIndex new_rc_index); void set_node_capacity(RRNodeId, short new_capacity); @@ -670,6 +686,12 @@ class t_rr_graph_storage { // Fan in counts for each RR node. vtr::vector node_fan_in_; + // Layer number that each RR node is located at + // Layer number refers to the die that the node belongs to. The layer number of base die is zero and die above it one, etc. + // This data is also considered as a hot data since it is used in inner loop of router, but since it didn't fit nicely into t_rr_node_data due to alignment issues, we had to store it + // in a separate vector. + vtr::vector node_layer_; + // Edge storage. vtr::vector edge_src_node_; vtr::vector edge_dest_node_; @@ -721,6 +743,7 @@ class t_rr_graph_view { const vtr::array_view_id node_ptc, const vtr::array_view_id node_first_edge, const vtr::array_view_id node_fan_in, + const vtr::array_view_id node_layer, const vtr::array_view_id edge_src_node, const vtr::array_view_id edge_dest_node, const vtr::array_view_id edge_switch) @@ -728,6 +751,7 @@ class t_rr_graph_view { , node_ptc_(node_ptc) , node_first_edge_(node_first_edge) , node_fan_in_(node_fan_in) + , node_layer_(node_layer) , edge_src_node_(edge_src_node) , edge_dest_node_(edge_dest_node) , edge_switch_(edge_switch) {} @@ -784,6 +808,11 @@ class t_rr_graph_view { return node_fan_in_[id]; } + /* Retrieve layer(die) number that RRNodeId is located at */ + short node_layer(RRNodeId id) const{ + return node_layer_[id]; + } + // This prefetechs hot RR node data required for optimization. // // Note: This is optional, but may lower time spent on memory stalls in @@ -824,6 +853,7 @@ class t_rr_graph_view { vtr::array_view_id node_ptc_; vtr::array_view_id node_first_edge_; vtr::array_view_id node_fan_in_; + vtr::array_view_id node_layer_; vtr::array_view_id edge_src_node_; vtr::array_view_id edge_dest_node_; vtr::array_view_id edge_switch_; diff --git a/libs/librrgraph/src/base/rr_graph_view.h b/libs/librrgraph/src/base/rr_graph_view.h index cdae9ebe5de..3d808b23c71 100644 --- a/libs/librrgraph/src/base/rr_graph_view.h +++ b/libs/librrgraph/src/base/rr_graph_view.h @@ -160,6 +160,11 @@ class RRGraphView { return node_storage_.node_yhigh(node); } + /** @brief Get the layer num of a routing resource node. This function is inlined for runtime optimization. */ + inline short node_layer(RRNodeId node) const { + return node_storage_.node_layer(node); + } + /** @brief Get the first out coming edge of resource node. This function is inlined for runtime optimization. */ inline RREdgeId node_first_edge(RRNodeId node) const { return node_storage_.first_edge(node); diff --git a/libs/librrgraph/src/base/rr_node_types.h b/libs/librrgraph/src/base/rr_node_types.h index ed0da0b37fe..56c2b97c3e6 100644 --- a/libs/librrgraph/src/base/rr_node_types.h +++ b/libs/librrgraph/src/base/rr_node_types.h @@ -112,8 +112,8 @@ struct t_rr_rc_data { float C; }; -// This is the data type of fast lookups of an rr-node given an (rr_type, x, y, and the side) -//[0..num_rr_types-1][0..grid_width-1][0..grid_height-1][0..NUM_SIDES-1][0..max_ptc-1] -typedef std::array, 3>, NUM_RR_TYPES> t_rr_node_indices; +// This is the data type of fast lookups of an rr-node given an (rr_type, layer, x, y, and the side) +//[0..num_rr_types-1][0..num_layer-1][0..grid_width-1][0..grid_height-1][0..NUM_SIDES-1][0..max_ptc-1] +typedef std::array, 4>, NUM_RR_TYPES> t_rr_node_indices; #endif diff --git a/libs/librrgraph/src/base/rr_spatial_lookup.cpp b/libs/librrgraph/src/base/rr_spatial_lookup.cpp index 6f6bae475d3..5b76b3418af 100644 --- a/libs/librrgraph/src/base/rr_spatial_lookup.cpp +++ b/libs/librrgraph/src/base/rr_spatial_lookup.cpp @@ -4,7 +4,8 @@ RRSpatialLookup::RRSpatialLookup() { } -RRNodeId RRSpatialLookup::find_node(int x, +RRNodeId RRSpatialLookup::find_node(int layer, + int x, int y, t_rr_type type, int ptc, @@ -27,8 +28,8 @@ RRNodeId RRSpatialLookup::find_node(int x, node_side = SIDES[0]; } - /* Pre-check: the x, y, side and ptc should be non negative numbers! Otherwise, return an invalid id */ - if ((x < 0) || (y < 0) || (node_side == NUM_SIDES) || (ptc < 0)) { + /* Pre-check: the layer, x, y, side and ptc should be non-negative numbers! Otherwise, return an invalid id */ + if ((layer < 0) || (x < 0) || (y < 0) || (node_side == NUM_SIDES) || (ptc < 0)) { return RRNodeId::INVALID(); } @@ -44,9 +45,9 @@ RRNodeId RRSpatialLookup::find_node(int x, std::swap(node_x, node_y); } - VTR_ASSERT_SAFE(3 == rr_node_indices_[type].ndims()); + VTR_ASSERT_SAFE(4 == rr_node_indices_[type].ndims()); - /* Sanity check to ensure the x, y, side and ptc are in range + /* Sanity check to ensure the layer, x, y, side and ptc are in range * - Return an valid id by searching in look-up when all the parameters are in range * - Return an invalid id if any out-of-range is detected */ @@ -54,26 +55,31 @@ RRNodeId RRSpatialLookup::find_node(int x, return RRNodeId::INVALID(); } - if (node_x >= rr_node_indices_[type].dim_size(0)) { + if (size_t(layer) >= rr_node_indices_[type].dim_size(0)) { return RRNodeId::INVALID(); } - if (node_y >= rr_node_indices_[type].dim_size(1)) { + if (node_x >= rr_node_indices_[type].dim_size(1)) { return RRNodeId::INVALID(); } - if (node_side >= rr_node_indices_[type].dim_size(2)) { + if(node_y >= rr_node_indices_[type].dim_size(2)){ return RRNodeId::INVALID(); } - if (size_t(ptc) >= rr_node_indices_[type][node_x][node_y][node_side].size()) { + if (node_side >= rr_node_indices_[type].dim_size(3)) { return RRNodeId::INVALID(); } - return RRNodeId(rr_node_indices_[type][node_x][node_y][node_side][ptc]); + if (size_t(ptc) >= rr_node_indices_[type][layer][node_x][node_y][node_side].size()) { + return RRNodeId::INVALID(); + } + + return RRNodeId(rr_node_indices_[type][layer][node_x][node_y][node_side][ptc]); } -std::vector RRSpatialLookup::find_nodes(int x, +std::vector RRSpatialLookup::find_nodes(int layer, + int x, int y, t_rr_type type, e_side side) const { @@ -82,8 +88,8 @@ std::vector RRSpatialLookup::find_nodes(int x, */ std::vector nodes; - /* Pre-check: the x, y, type are valid! Otherwise, return an empty vector */ - if (x < 0 || y < 0) { + /* Pre-check: the layer, x, y are valid! Otherwise, return an empty vector */ + if (layer < 0 || x < 0 || y < 0) { return nodes; } @@ -99,7 +105,7 @@ std::vector RRSpatialLookup::find_nodes(int x, std::swap(node_x, node_y); } - VTR_ASSERT_SAFE(3 == rr_node_indices_[type].ndims()); + VTR_ASSERT_SAFE(4 == rr_node_indices_[type].ndims()); /* Sanity check to ensure the x, y, side are in range * - Return a list of valid ids by searching in look-up when all the parameters are in range @@ -109,28 +115,32 @@ std::vector RRSpatialLookup::find_nodes(int x, return nodes; } - if (node_x >= rr_node_indices_[type].dim_size(0)) { + if (size_t(layer) >= rr_node_indices_[type].dim_size(0)) { return nodes; } - if (node_y >= rr_node_indices_[type].dim_size(1)) { + if (node_x >= rr_node_indices_[type].dim_size(1)) { return nodes; } - if (side >= rr_node_indices_[type].dim_size(2)) { + if(node_y >= rr_node_indices_[type].dim_size(2)){ + return nodes; + } + + if (side >= rr_node_indices_[type].dim_size(3)) { return nodes; } /* Reserve space to avoid memory fragmentation */ size_t num_nodes = 0; - for (const auto& node : rr_node_indices_[type][node_x][node_y][side]) { + for (const auto& node : rr_node_indices_[type][layer][node_x][node_y][side]) { if (RRNodeId(node)) { num_nodes++; } } nodes.reserve(num_nodes); - for (const auto& node : rr_node_indices_[type][node_x][node_y][side]) { + for (const auto& node : rr_node_indices_[type][layer][node_x][node_y][side]) { if (RRNodeId(node)) { nodes.push_back(RRNodeId(node)); } @@ -139,7 +149,8 @@ std::vector RRSpatialLookup::find_nodes(int x, return nodes; } -std::vector RRSpatialLookup::find_channel_nodes(int x, +std::vector RRSpatialLookup::find_channel_nodes(int layer, + int x, int y, t_rr_type type) const { /* Pre-check: node type should be routing tracks! */ @@ -147,10 +158,11 @@ std::vector RRSpatialLookup::find_channel_nodes(int x, return std::vector(); } - return find_nodes(x, y, type); + return find_nodes(layer, x, y, type); } -std::vector RRSpatialLookup::find_nodes_at_all_sides(int x, +std::vector RRSpatialLookup::find_nodes_at_all_sides(int layer, + int x, int y, t_rr_type rr_type, int ptc) const { @@ -159,17 +171,17 @@ std::vector RRSpatialLookup::find_nodes_at_all_sides(int x, /* TODO: Consider to access the raw data like find_node() rather than calling find_node() many times, which hurts runtime */ if (rr_type == IPIN || rr_type == OPIN) { indices.reserve(NUM_SIDES); - //For pins we need to look at all the sides of the current grid tile + //For pins, we need to look at all the sides of the current grid tile for (e_side side : SIDES) { - RRNodeId rr_node_index = find_node(x, y, rr_type, ptc, side); + RRNodeId rr_node_index = find_node(layer, x, y, rr_type, ptc, side); if (rr_node_index) { indices.push_back(rr_node_index); } } indices.shrink_to_fit(); } else { - //Sides do not effect non-pins so there should only be one per ptc - RRNodeId rr_node_index = find_node(x, y, rr_type, ptc); + //Sides do not affect non-pins so there should only be one per ptc + RRNodeId rr_node_index = find_node(layer, x, y, rr_type, ptc); if (rr_node_index) { indices.push_back(rr_node_index); } @@ -178,81 +190,86 @@ std::vector RRSpatialLookup::find_nodes_at_all_sides(int x, return indices; } -std::vector RRSpatialLookup::find_grid_nodes_at_all_sides(int x, +std::vector RRSpatialLookup::find_grid_nodes_at_all_sides(int layer, + int x, int y, t_rr_type rr_type) const { VTR_ASSERT(rr_type == SOURCE || rr_type == OPIN || rr_type == IPIN || rr_type == SINK); if (rr_type == SOURCE || rr_type == SINK) { - return find_nodes(x, y, rr_type); + return find_nodes(layer,x, y, rr_type); } std::vector nodes; /* Reserve space to avoid memory fragmentation */ size_t num_nodes = 0; for (e_side node_side : SIDES) { - num_nodes += find_nodes(x, y, rr_type, node_side).size(); + num_nodes += find_nodes(layer,x, y, rr_type, node_side).size(); } nodes.reserve(num_nodes); for (e_side node_side : SIDES) { - std::vector temp_nodes = find_nodes(x, y, rr_type, node_side); + std::vector temp_nodes = find_nodes(layer,x, y, rr_type, node_side); nodes.insert(nodes.end(), temp_nodes.begin(), temp_nodes.end()); } return nodes; } -void RRSpatialLookup::reserve_nodes(int x, +void RRSpatialLookup::reserve_nodes(int layer, + int x, int y, t_rr_type type, int num_nodes, e_side side) { - VTR_ASSERT_SAFE(3 == rr_node_indices_[type].ndims()); + VTR_ASSERT_SAFE(4 == rr_node_indices_[type].ndims()); /* For non-IPIN/OPIN nodes, the side should always be the TOP side which follows the convention in find_node() API! */ if (type != IPIN && type != OPIN) { VTR_ASSERT(side == SIDES[0]); } - resize_nodes(x, y, type, side); + resize_nodes(layer, x, y, type, side); - rr_node_indices_[type][x][y][side].reserve(num_nodes); + rr_node_indices_[type][layer][x][y][side].reserve(num_nodes); } void RRSpatialLookup::add_node(RRNodeId node, + int layer, int x, int y, t_rr_type type, int ptc, e_side side) { VTR_ASSERT(node); /* Must have a valid node id to be added */ - VTR_ASSERT_SAFE(3 == rr_node_indices_[type].ndims()); + VTR_ASSERT_SAFE(4 == rr_node_indices_[type].ndims()); /* For non-IPIN/OPIN nodes, the side should always be the TOP side which follows the convention in find_node() API! */ if (type != IPIN && type != OPIN) { VTR_ASSERT(side == SIDES[0]); } - resize_nodes(x, y, type, side); + resize_nodes(layer, x, y, type, side); - if (size_t(ptc) >= rr_node_indices_[type][x][y][side].size()) { + if (size_t(ptc) >= rr_node_indices_[type][layer][x][y][side].size()) { /* Deposit invalid ids to newly allocated elements while original elements are untouched */ - rr_node_indices_[type][x][y][side].resize(ptc + 1, int(size_t(RRNodeId::INVALID()))); + rr_node_indices_[type][layer][x][y][side].resize(ptc + 1, int(size_t(RRNodeId::INVALID()))); } /* Resize on demand finished; Register the node */ - rr_node_indices_[type][x][y][side][ptc] = int(size_t(node)); + rr_node_indices_[type][layer][x][y][side][ptc] = int(size_t(node)); } -void RRSpatialLookup::mirror_nodes(const vtr::Point& src_coord, +void RRSpatialLookup::mirror_nodes(const int layer, + const vtr::Point& src_coord, const vtr::Point& des_coord, t_rr_type type, e_side side) { VTR_ASSERT(SOURCE == type || SINK == type); - resize_nodes(des_coord.x(), des_coord.y(), type, side); - rr_node_indices_[type][des_coord.x()][des_coord.y()][side] = rr_node_indices_[type][src_coord.x()][src_coord.y()][side]; + resize_nodes(layer, des_coord.x(), des_coord.y(), type, side); + rr_node_indices_[type][layer][des_coord.x()][des_coord.y()][side] = rr_node_indices_[type][layer][src_coord.x()][src_coord.y()][side]; } -void RRSpatialLookup::resize_nodes(int x, +void RRSpatialLookup::resize_nodes(int layer, + int x, int y, t_rr_type type, e_side side) { @@ -263,25 +280,30 @@ void RRSpatialLookup::resize_nodes(int x, VTR_ASSERT(type < rr_node_indices_.size()); VTR_ASSERT(x >= 0); VTR_ASSERT(y >= 0); - - if ((x >= int(rr_node_indices_[type].dim_size(0))) - || (y >= int(rr_node_indices_[type].dim_size(1))) - || (size_t(side) >= rr_node_indices_[type].dim_size(2))) { - rr_node_indices_[type].resize({std::max(rr_node_indices_[type].dim_size(0), size_t(x) + 1), - std::max(rr_node_indices_[type].dim_size(1), size_t(y) + 1), - std::max(rr_node_indices_[type].dim_size(2), size_t(side) + 1)}); + VTR_ASSERT(layer >= 0); + + if ((layer >= int(rr_node_indices_[type].dim_size(0))) + || (x >= int(rr_node_indices_[type].dim_size(1))) + || (y >= int(rr_node_indices_[type].dim_size(2))) + || (size_t(side) >= rr_node_indices_[type].dim_size(3))) { + rr_node_indices_[type].resize({std::max(rr_node_indices_[type].dim_size(0),size_t(layer)+1), + std::max(rr_node_indices_[type].dim_size(1), size_t(x) + 1), + std::max(rr_node_indices_[type].dim_size(2), size_t(y) + 1), + std::max(rr_node_indices_[type].dim_size(3), size_t(side) + 1)}); } } void RRSpatialLookup::reorder(const vtr::vector dest_order) { // update rr_node_indices, a map to optimize rr_index lookups for (auto& grid : rr_node_indices_) { - for (size_t x = 0; x < grid.dim_size(0); x++) { - for (size_t y = 0; y < grid.dim_size(1); y++) { - for (size_t s = 0; s < grid.dim_size(2); s++) { - for (auto& node : grid[x][y][s]) { - if (node != OPEN) { - node = size_t(dest_order[RRNodeId(node)]); + for(size_t l = 0; l < grid.dim_size(0); l++) { + for (size_t x = 0; x < grid.dim_size(1); x++) { + for (size_t y = 0; y < grid.dim_size(2); y++) { + for (size_t s = 0; s < grid.dim_size(3); s++) { + for (auto &node: grid[l][x][y][s]) { + if (node != OPEN) { + node = size_t(dest_order[RRNodeId(node)]); + } } } } diff --git a/libs/librrgraph/src/base/rr_spatial_lookup.h b/libs/librrgraph/src/base/rr_spatial_lookup.h index adffd0445fc..ccfe73a7633 100644 --- a/libs/librrgraph/src/base/rr_spatial_lookup.h +++ b/libs/librrgraph/src/base/rr_spatial_lookup.h @@ -41,24 +41,25 @@ class RRSpatialLookup { /** * @brief Returns the index of the specified routing resource node. * + * @param layer specified which FPGA die the node is located at (e.g. multi-die(3D) FPGA) * @param (x, y) are the grid location within the FPGA * @param rr_type specifies the type of resource, - * @param ptc gives a unique number of resources of that type (e.g. CHANX) at that (x,y). + * @param ptc gives a unique number of resources of that type (e.g. CHANX) at that (layer,x,y). * * @note All ptcs start at 0 and are positive. * Depending on what type of resource this is, ptc can be * - the class number of a common SINK/SOURCE node of grid, * starting at 0 and go up to logical_class_inf size - 1 of SOURCEs + SINKs in a grid * - pin number of an input/output pin of a grid. They would normally start at 0 - * and go to the number of pins on a block at that (x, y) location + * and go to the number of pins on a block at that (layer,x,y) location * - track number of a routing wire in a channel. They would normally go from 0 - * to channel_width - 1 at that (x,y) + * to channel_width - 1 at that (layer,x,y) * * @note An invalid id will be returned if the node does not exist * * @note For segments (CHANX and CHANY) of length > 1, the segment is - * given an rr_index based on the (x,y) location at which it starts (i.e. - * lowest (x,y) location at which this segment exists). + * given an rr_index based on the (layer,x,y) location at which it starts (i.e. + * lowest (layer,x,y) location at which this segment exists). * * @note The 'side' argument only applies to IPIN/OPIN types, and specifies which * side of the grid tile the node should be located on. The value is ignored @@ -67,7 +68,8 @@ class RRSpatialLookup { * This routine also performs error checking to make sure the node in * question exists. */ - RRNodeId find_node(int x, + RRNodeId find_node(int layer, + int x, int y, t_rr_type type, int ptc, @@ -76,18 +78,20 @@ class RRSpatialLookup { /** * @brief Returns the indices of the specified routing resource nodes, representing routing tracks in a channel. * + * @param layer specified which FPGA die the node is located at (e.g. multi-die(3D) FPGA) * @param (x, y) are the coordinate of the routing channel within the FPGA * @param rr_type specifies the type of routing channel, either x-direction or y-direction * * @note - * - Return an empty list if there are no routing channel at the given (x, y) location + * - Return an empty list if there are no routing channel at the given (layer,x,y) location * - The node list returned only contain valid ids - * For example, if the 2nd routing track does not exist in a routing channel at (x, y) location, - * while the 3rd routing track does exist in a routing channel at (x, y) location, + * For example, if the 2nd routing track does not exist in a routing channel at (layer,x,y) location, + * while the 3rd routing track does exist in a routing channel at (layer,x, y) location, * the node list will not contain the node for the 2nd routing track, but the 2nd element in the list * will be the node for the 3rd routing track */ - std::vector find_channel_nodes(int x, + std::vector find_channel_nodes(int layer, + int x, int y, t_rr_type type) const; @@ -95,26 +99,29 @@ class RRSpatialLookup { * @brief Like find_node() but returns all matching nodes on all the sides. * * This is particularly useful for getting all instances - * of a specific IPIN/OPIN at a specific grid tile (x,y) location. + * of a specific IPIN/OPIN at a specific grid tile (layer,x,y). */ - std::vector find_nodes_at_all_sides(int x, + std::vector find_nodes_at_all_sides(int layer, + int x, int y, t_rr_type rr_type, int ptc) const; /** - * @brief Returns all matching nodes on all the sides at a specific grid tile (x,y) location. + * @brief Returns all matching nodes on all the sides at a specific grid tile (layer,x,y) location. * * As this is applicable to grid pins, the type of nodes are limited to SOURCE/SINK/IPIN/OPIN */ - std::vector find_grid_nodes_at_all_sides(int x, + std::vector find_grid_nodes_at_all_sides(int layer, + int x, int y, t_rr_type rr_type) const; /* -- Mutators -- */ public: - /** @brief Reserve the memory for a list of nodes at (x, y) location with given type and side */ - void reserve_nodes(int x, + /** @brief Reserve the memory for a list of nodes at (layer, x, y) location with given type and side */ + void reserve_nodes(int layer, + int x, int y, t_rr_type type, int num_nodes, @@ -125,6 +132,7 @@ class RRSpatialLookup { * * @note You must have a valid node id to register the node in the lookup * + * @param layer specified which FPGA die the node is located at (e.g. multi-die(3D) FPGA) * @param (x, y) are the coordinate of the node to be indexable in the fast look-up * @param type is the type of a node * @param ptc is a feature number of a node, which can be @@ -144,6 +152,7 @@ class RRSpatialLookup { * As such, multiple node addition could be efficiently implemented */ void add_node(RRNodeId node, + int layer, int x, int y, t_rr_type type, @@ -155,7 +164,7 @@ class RRSpatialLookup { * a destination coordinate. * * This function is mostly need by SOURCE and SINK nodes which are indexable in multiple locations. - * Considering a bounding box (x, y)->(x + width, y + height) of a multi-height and multi-width grid, + * Considering a bounding box (layer, x, y)->(layer, x + width, y + height) of a multi-height and multi-width grid, * SOURCE and SINK nodes are indexable in any location inside the boundry. * * An example of usage: @@ -189,13 +198,14 @@ class RRSpatialLookup { * corner when dealing with large blocks. But this may require the data structure to be dependent * on DeviceGrid information (it needs to identify if a grid has height > 1 as well as width > 1) */ - void mirror_nodes(const vtr::Point& src_coord, + void mirror_nodes(const int layer, + const vtr::Point& src_coord, const vtr::Point& des_coord, t_rr_type type, e_side side); /** - * @brief Resize the given 3 dimensions (x, y, side) of the RRSpatialLookup data structure for the given type + * @brief Resize the given 4 dimensions (layer, x, y, side) of the RRSpatialLookup data structure for the given type * * This function will keep any existing data * @@ -205,7 +215,8 @@ class RRSpatialLookup { * TODO: should have a reserve function but vtd::ndmatrix does not have such API * as a result, resize can be an internal one while reserve function is a public mutator */ - void resize_nodes(int x, + void resize_nodes(int layer, + int x, int y, t_rr_type type, e_side side); @@ -220,17 +231,18 @@ class RRSpatialLookup { private: /* An internal API to find all the nodes in a specific location with a given type * For OPIN/IPIN nodes that may exist on multiple sides, a specific side must be provided - * This API is NOT public because its too powerful for developers with very limited sanity checks + * This API is NOT public because it is too powerful for developers with very limited sanity checks * But it is used to build the public APIs find_channel_nodes() etc., where sufficient sanity checks are applied */ - std::vector find_nodes(int x, + std::vector find_nodes(int layer, + int x, int y, t_rr_type type, e_side side = SIDES[0]) const; /* -- Internal data storage -- */ private: - /* Fast look-up: TODO: Should rework the data type. Currently it is based on a 3-dimensional arrqay mater where some dimensions must always be accessed with a specific index. Such limitation should be overcome */ + /* Fast look-up: TODO: Should rework the data type. Currently it is based on a 3-dimensional array mater where some dimensions must always be accessed with a specific index. Such limitation should be overcome */ t_rr_node_indices rr_node_indices_; }; diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h index 6f4a1eaf05f..843aa582f12 100644 --- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h +++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx.h @@ -4,9 +4,9 @@ * https://github.com/duck2/uxsdcxx * Modify only if your build process doesn't involve regenerating this file. * - * Cmdline: uxsdcxx/uxsdcxx.py /home/oscar/Desktop/vtr-new/libs/librrgraph/src/base/rr_graph.xsd - * Input file: /home/oscar/Desktop/vtr-new/libs/librrgraph/src/base/rr_graph.xsd - * md5sum of input file: 41df83ecf127a53590711ddec605742a + * Cmdline: uxsdcxx/uxsdcxx.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * md5sum of input file: 8672cb3951993f7e0ea3433a02507672 */ #include @@ -82,12 +82,12 @@ template inline void load_block_types(const pugi::xml_node &root, T &out, Context &context, const std::function *report_error, ptrdiff_t *offset_debug); template inline void load_grid_loc(const pugi::xml_node &root, T &out, Context &context, const std::function *report_error, ptrdiff_t *offset_debug); -inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, const std::function * report_error); +inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, int* layer, const std::function * report_error); template inline void load_grid_locs(const pugi::xml_node &root, T &out, Context &context, const std::function *report_error, ptrdiff_t *offset_debug); template inline void load_node_loc(const pugi::xml_node &root, T &out, Context &context, const std::function *report_error, ptrdiff_t *offset_debug); -inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function * report_error); +inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * layer, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function * report_error); template inline void load_node_timing(const pugi::xml_node &root, T &out, Context &context, const std::function *report_error, ptrdiff_t *offset_debug); inline void load_node_timing_required_attributes(const pugi::xml_node &root, float * C, float * R, const std::function * report_error); @@ -269,14 +269,14 @@ constexpr const char *atok_lookup_t_block_type[] = {"height", "id", "name", "wid enum class gtok_t_block_types {BLOCK_TYPE}; constexpr const char *gtok_lookup_t_block_types[] = {"block_type"}; -enum class atok_t_grid_loc {BLOCK_TYPE_ID, HEIGHT_OFFSET, WIDTH_OFFSET, X, Y}; -constexpr const char *atok_lookup_t_grid_loc[] = {"block_type_id", "height_offset", "width_offset", "x", "y"}; +enum class atok_t_grid_loc {BLOCK_TYPE_ID, HEIGHT_OFFSET, WIDTH_OFFSET, X, Y, LAYER}; +constexpr const char *atok_lookup_t_grid_loc[] = {"block_type_id", "height_offset", "width_offset", "x", "y", "layer"}; enum class gtok_t_grid_locs {GRID_LOC}; constexpr const char *gtok_lookup_t_grid_locs[] = {"grid_loc"}; -enum class atok_t_node_loc {PTC, SIDE, XHIGH, XLOW, YHIGH, YLOW}; -constexpr const char *atok_lookup_t_node_loc[] = {"ptc", "side", "xhigh", "xlow", "yhigh", "ylow"}; +enum class atok_t_node_loc {LAYER, PTC, SIDE, XHIGH, XLOW, YHIGH, YLOW}; +constexpr const char *atok_lookup_t_node_loc[] = {"layer", "ptc", "side", "xhigh", "xlow", "yhigh", "ylow"}; enum class atok_t_node_timing {C, R}; @@ -1015,6 +1015,21 @@ inline atok_t_grid_loc lex_attr_t_grid_loc(const char *in, const std::function * report_error){ int out; - // global variable, must set to 0 before using it to avoid changed by other errors - errno = 0; out = std::strtol(in, NULL, 10); if(errno != 0) noreturn_report(report_error, ("Invalid value `" + std::string(in) + "` when loading into a int.").c_str()); @@ -2082,8 +2103,6 @@ inline int load_int(const char *in, const std::function * re inline unsigned int load_unsigned_int(const char *in, const std::function * report_error){ unsigned int out; - // global variable, must set to 0 before using it to avoid changed by other errors - errno = 0; out = std::strtoul(in, NULL, 10); if(errno != 0) noreturn_report(report_error, ("Invalid value `" + std::string(in) + "` when loading into a unsigned int.").c_str()); @@ -2092,8 +2111,6 @@ inline unsigned int load_unsigned_int(const char *in, const std::function * report_error){ float out; - // global variable, must set to 0 before using it to avoid changed by other errors - errno = 0; out = std::strtof(in, NULL); if(errno != 0) noreturn_report(report_error, ("Invalid value `" + std::string(in) + "` when loading into a float.").c_str()); @@ -2291,14 +2308,14 @@ inline void load_block_type_required_attributes(const pugi::xml_node &root, int if(!test_astate.all()) attr_error(test_astate, atok_lookup_t_block_type, report_error); } -inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, const std::function * report_error){ - std::bitset<5> astate = 0; +inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * block_type_id, int * height_offset, int * width_offset, int * x, int * y, int* layer, const std::function * report_error){ + std::bitset<6> astate = 0; for(pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()){ atok_t_grid_loc in = lex_attr_t_grid_loc(attr.name(), report_error); if(astate[(int)in] == 0) astate[(int)in] = 1; else noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); switch(in){ - case atok_t_grid_loc::BLOCK_TYPE_ID: + case atok_t_grid_loc::BLOCK_TYPE_ID: *block_type_id = load_int(attr.value(), report_error); break; case atok_t_grid_loc::HEIGHT_OFFSET: @@ -2313,20 +2330,25 @@ inline void load_grid_loc_required_attributes(const pugi::xml_node &root, int * case atok_t_grid_loc::Y: *y = load_int(attr.value(), report_error); break; + case atok_t_grid_loc::LAYER: + *layer=load_int(attr.value(), report_error); default: break; /* Not possible. */ } } - std::bitset<5> test_astate = astate | std::bitset<5>(0b00000); + std::bitset<6> test_astate = astate | std::bitset<6>(0b000000); if(!test_astate.all()) attr_error(test_astate, atok_lookup_t_grid_loc, report_error); } -inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function * report_error){ - std::bitset<6> astate = 0; +inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * layer, int * ptc, int * xhigh, int * xlow, int * yhigh, int * ylow, const std::function * report_error){ + std::bitset<7> astate = 0; for(pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()){ atok_t_node_loc in = lex_attr_t_node_loc(attr.name(), report_error); if(astate[(int)in] == 0) astate[(int)in] = 1; else noreturn_report(report_error, ("Duplicate attribute " + std::string(attr.name()) + " in .").c_str()); switch(in){ + case atok_t_node_loc::LAYER: + *layer = load_int(attr.value(), report_error); + break; case atok_t_node_loc::PTC: *ptc = load_int(attr.value(), report_error); break; @@ -2348,7 +2370,7 @@ inline void load_node_loc_required_attributes(const pugi::xml_node &root, int * default: break; /* Not possible. */ } } - std::bitset<6> test_astate = astate | std::bitset<6>(0b000010); + std::bitset<7> test_astate = astate | std::bitset<7>(0b0000100); if(!test_astate.all()) attr_error(test_astate, atok_lookup_t_node_loc, report_error); } @@ -3202,8 +3224,10 @@ inline void load_grid_locs(const pugi::xml_node &root, T &out, Context &context, memset(&grid_loc_x, 0, sizeof(grid_loc_x)); int grid_loc_y; memset(&grid_loc_y, 0, sizeof(grid_loc_y)); - load_grid_loc_required_attributes(node, &grid_loc_block_type_id, &grid_loc_height_offset, &grid_loc_width_offset, &grid_loc_x, &grid_loc_y, report_error); - auto child_context = out.add_grid_locs_grid_loc(context, grid_loc_block_type_id, grid_loc_height_offset, grid_loc_width_offset, grid_loc_x, grid_loc_y); + int grid_loc_layer; + memset(&grid_loc_layer,0,sizeof(grid_loc_layer)); + load_grid_loc_required_attributes(node, &grid_loc_block_type_id, &grid_loc_height_offset, &grid_loc_width_offset, &grid_loc_x, &grid_loc_y, &grid_loc_layer, report_error); + auto child_context = out.add_grid_locs_grid_loc(context, grid_loc_block_type_id, grid_loc_height_offset, grid_loc_width_offset, grid_loc_x, grid_loc_y, grid_loc_layer); load_grid_loc(node, out, child_context, report_error, offset_debug); out.finish_grid_locs_grid_loc(child_context); } @@ -3227,6 +3251,9 @@ inline void load_node_loc(const pugi::xml_node &root, T &out, Context &context, for(pugi::xml_attribute attr = root.first_attribute(); attr; attr = attr.next_attribute()){ atok_t_node_loc in = lex_attr_t_node_loc(attr.name(), report_error); switch(in){ + case atok_t_node_loc::LAYER: + /* Attribute layer is already set */ + break; case atok_t_node_loc::PTC: /* Attribute ptc is already set */ break; @@ -3408,6 +3435,8 @@ inline void load_node(const pugi::xml_node &root, T &out, Context &context, cons switch(in){ case gtok_t_node::LOC: { + int node_loc_layer; + memset(&node_loc_layer, 0, sizeof(node_loc_layer)); int node_loc_ptc; memset(&node_loc_ptc, 0, sizeof(node_loc_ptc)); int node_loc_xhigh; @@ -3418,8 +3447,8 @@ inline void load_node(const pugi::xml_node &root, T &out, Context &context, cons memset(&node_loc_yhigh, 0, sizeof(node_loc_yhigh)); int node_loc_ylow; memset(&node_loc_ylow, 0, sizeof(node_loc_ylow)); - load_node_loc_required_attributes(node, &node_loc_ptc, &node_loc_xhigh, &node_loc_xlow, &node_loc_yhigh, &node_loc_ylow, report_error); - auto child_context = out.init_node_loc(context, node_loc_ptc, node_loc_xhigh, node_loc_xlow, node_loc_yhigh, node_loc_ylow); + load_node_loc_required_attributes(node, &node_loc_layer, &node_loc_ptc, &node_loc_xhigh, &node_loc_xlow, &node_loc_yhigh, &node_loc_ylow, report_error); + auto child_context = out.init_node_loc(context, node_loc_layer, node_loc_ptc, node_loc_xhigh, node_loc_xlow, node_loc_yhigh, node_loc_ylow); load_node_loc(node, out, child_context, report_error, offset_debug); out.finish_node_loc(child_context); } @@ -3920,6 +3949,8 @@ inline void write_grid_locs(T &in, std::ostream &os, Context &context){ os << " width_offset=\"" << in.get_grid_loc_width_offset(child_context) << "\""; os << " x=\"" << in.get_grid_loc_x(child_context) << "\""; os << " y=\"" << in.get_grid_loc_y(child_context) << "\""; + os << " layer=\"" << in.get_grid_loc_layer(child_context) << "\""; + os << "/>\n"; } } @@ -3958,6 +3989,7 @@ inline void write_node(T &in, std::ostream &os, Context &context){ { auto child_context = in.get_node_loc(context); os << " @@ -687,7 +687,7 @@ inline void load_grid_locs_capnp_type(const ucap::GridLocs::Reader &root, T &out auto data = root.getGridLocs(); out.preallocate_grid_locs_grid_loc(context, data.size()); for(const auto & el : data) { - auto child_context = out.add_grid_locs_grid_loc(context, el.getBlockTypeId(), el.getHeightOffset(), el.getWidthOffset(), el.getX(), el.getY()); + auto child_context = out.add_grid_locs_grid_loc(context, el.getBlockTypeId(), el.getHeightOffset(), el.getWidthOffset(), el.getX(), el.getY(), el.getLayer()); load_grid_loc_capnp_type(el, out, child_context, report_error, stack); out.finish_grid_locs_grid_loc(child_context); stack->back().second += 1; @@ -775,7 +775,7 @@ inline void load_node_capnp_type(const ucap::Node::Reader &root, T &out, Context stack->push_back(std::make_pair("getLoc", 0)); if (root.hasLoc()) { auto child_el = root.getLoc(); - auto child_context = out.init_node_loc(context, child_el.getPtc(), child_el.getXhigh(), child_el.getXlow(), child_el.getYhigh(), child_el.getYlow()); + auto child_context = out.init_node_loc(context, child_el.getLayer(), child_el.getPtc(), child_el.getXhigh(), child_el.getXlow(), child_el.getYhigh(), child_el.getYlow()); load_node_loc_capnp_type(child_el, out, child_context, report_error, stack); out.finish_node_loc(child_context); } @@ -1153,6 +1153,7 @@ inline void write_node_capnp_type(T &in, ucap::Node::Builder &root, Context &con { auto child_context = in.get_node_loc(context); auto node_loc = root.initLoc(); + node_loc.setLayer(in.get_node_loc_layer(child_context)); node_loc.setPtc(in.get_node_loc_ptc(child_context)); if((bool)in.get_node_loc_side(child_context)) node_loc.setSide(conv_to_enum_loc_side(in.get_node_loc_side(child_context))); diff --git a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h index 69795c800ca..9a61c8cbe12 100644 --- a/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h +++ b/libs/librrgraph/src/io/gen/rr_graph_uxsdcxx_interface.h @@ -4,9 +4,9 @@ * https://github.com/duck2/uxsdcxx * Modify only if your build process doesn't involve regenerating this file. * - * Cmdline: uxsdcxx/uxsdcxx.py /home/oscar/Desktop/vtr-new/libs/librrgraph/src/base/rr_graph.xsd - * Input file: /home/oscar/Desktop/vtr-new/libs/librrgraph/src/base/rr_graph.xsd - * md5sum of input file: 41df83ecf127a53590711ddec605742a + * Cmdline: uxsdcxx/uxsdcxx.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd + * md5sum of input file: 8672cb3951993f7e0ea3433a02507672 */ #include @@ -332,6 +332,7 @@ class RrGraphBase { * * * + * * * * @@ -342,6 +343,7 @@ class RrGraphBase { virtual inline int get_grid_loc_width_offset(typename ContextTypes::GridLocReadContext &ctx) = 0; virtual inline int get_grid_loc_x(typename ContextTypes::GridLocReadContext &ctx) = 0; virtual inline int get_grid_loc_y(typename ContextTypes::GridLocReadContext &ctx) = 0; + virtual inline int get_grid_loc_layer(typename ContextTypes::GridLocReadContext &ctx) =0; /** Generated for complex type "grid_locs": * @@ -351,13 +353,14 @@ class RrGraphBase { * */ virtual inline void preallocate_grid_locs_grid_loc(typename ContextTypes::GridLocsWriteContext &ctx, size_t size) = 0; - virtual inline typename ContextTypes::GridLocWriteContext add_grid_locs_grid_loc(typename ContextTypes::GridLocsWriteContext &ctx, int block_type_id, int height_offset, int width_offset, int x, int y) = 0; + virtual inline typename ContextTypes::GridLocWriteContext add_grid_locs_grid_loc(typename ContextTypes::GridLocsWriteContext &ctx, int block_type_id, int height_offset, int width_offset, int x, int y, int layer) = 0; virtual inline void finish_grid_locs_grid_loc(typename ContextTypes::GridLocWriteContext &ctx) = 0; virtual inline size_t num_grid_locs_grid_loc(typename ContextTypes::GridLocsReadContext &ctx) = 0; virtual inline typename ContextTypes::GridLocReadContext get_grid_locs_grid_loc(int n, typename ContextTypes::GridLocsReadContext &ctx) = 0; /** Generated for complex type "node_loc": * + * * * * @@ -366,6 +369,7 @@ class RrGraphBase { * * */ + virtual inline int get_node_loc_layer(typename ContextTypes::NodeLocReadContext &ctx) = 0; virtual inline int get_node_loc_ptc(typename ContextTypes::NodeLocReadContext &ctx) = 0; virtual inline enum_loc_side get_node_loc_side(typename ContextTypes::NodeLocReadContext &ctx) = 0; virtual inline void set_node_loc_side(enum_loc_side side, typename ContextTypes::NodeLocWriteContext &ctx) = 0; @@ -436,7 +440,7 @@ class RrGraphBase { virtual inline void set_node_direction(enum_node_direction direction, typename ContextTypes::NodeWriteContext &ctx) = 0; virtual inline unsigned int get_node_id(typename ContextTypes::NodeReadContext &ctx) = 0; virtual inline enum_node_type get_node_type(typename ContextTypes::NodeReadContext &ctx) = 0; - virtual inline typename ContextTypes::NodeLocWriteContext init_node_loc(typename ContextTypes::NodeWriteContext &ctx, int ptc, int xhigh, int xlow, int yhigh, int ylow) = 0; + virtual inline typename ContextTypes::NodeLocWriteContext init_node_loc(typename ContextTypes::NodeWriteContext &ctx, int layer, int ptc, int xhigh, int xlow, int yhigh, int ylow) = 0; virtual inline void finish_node_loc(typename ContextTypes::NodeLocWriteContext &ctx) = 0; virtual inline typename ContextTypes::NodeLocReadContext get_node_loc(typename ContextTypes::NodeReadContext &ctx) = 0; virtual inline typename ContextTypes::NodeTimingWriteContext init_node_timing(typename ContextTypes::NodeWriteContext &ctx, float C, float R) = 0; diff --git a/libs/librrgraph/src/io/rr_graph.xsd b/libs/librrgraph/src/io/rr_graph.xsd index cdc60f654e4..4c05adfe5d5 100644 --- a/libs/librrgraph/src/io/rr_graph.xsd +++ b/libs/librrgraph/src/io/rr_graph.xsd @@ -208,6 +208,7 @@ + @@ -258,6 +259,7 @@ + diff --git a/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h b/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h index 0f0cb893a4e..2f0017be2ac 100644 --- a/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h +++ b/libs/librrgraph/src/io/rr_graph_uxsdcxx_serializer.h @@ -627,11 +627,12 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { * */ - inline int init_node_loc(int& inode, int ptc, int xhigh, int xlow, int yhigh, int ylow) final { + inline int init_node_loc(int& inode, int layer, int ptc, int xhigh, int xlow, int yhigh, int ylow) final { auto node = (*rr_nodes_)[inode]; RRNodeId node_id = node.id(); rr_graph_builder_->set_node_coordinates(node_id, xlow, ylow, xhigh, yhigh); + rr_graph_builder_->set_node_layer(node_id, layer); rr_graph_builder_->set_node_ptc_num(node_id, ptc); return inode; } @@ -643,6 +644,9 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { inline int get_node_loc_ptc(const t_rr_node& node) final { return rr_graph_->node_ptc_num(node.id()); } + inline int get_node_loc_layer(const t_rr_node& node) final { + return rr_graph_->node_layer(node.id()); + } inline int get_node_loc_xhigh(const t_rr_node& node) final { return rr_graph_->node_xhigh(node.id()); } @@ -1453,10 +1457,10 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { grid_.grid_size(), size); } } - inline void* add_grid_locs_grid_loc(void*& /*ctx*/, int block_type_id, int height_offset, int width_offset, int x, int y) final { - const auto& type = grid_.get_physical_type(x, y); - int grid_width_offset = grid_.get_width_offset(x, y); - int grid_height_offset = grid_.get_height_offset(x, y); + inline void* add_grid_locs_grid_loc(void*& /*ctx*/, int block_type_id, int height_offset, int width_offset, int x, int y, int layer) final { + const auto& type = grid_.get_physical_type({x, y, layer}); + int grid_width_offset = grid_.get_width_offset({x, y, layer}); + int grid_height_offset = grid_.get_height_offset({x, y, layer}); if (type->index != block_type_id) { report_error( @@ -1497,6 +1501,11 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { inline int get_grid_loc_y(const t_grid_tile*& grid_loc) final { return grid_.get_grid_loc_y(grid_loc); } + + inline int get_grid_loc_layer(const t_grid_tile*& grid_loc) final{ + return grid_.get_grid_loc_layer(grid_loc); + } + inline size_t num_grid_locs_grid_loc(void*& /*iter*/) final { return grid_.grid_size(); } @@ -1504,6 +1513,7 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { return grid_.get_grid_locs_grid_loc(n); } + /** Generated for complex type "rr_graph": * * @@ -1627,16 +1637,16 @@ class RrGraphSerializer final : public uxsd::RrGraphBase { private: /*Allocates and load the rr_node look up table. SINK and SOURCE, IPIN and OPIN - *share the same look up table. CHANX and CHANY have individual look ups */ + *share the same look-up table. CHANX and CHANY have individual look-ups */ void process_rr_node_indices() { auto& rr_graph_builder = (*rr_graph_builder_); /* Alloc the lookup table */ for (t_rr_type rr_type : RR_TYPES) { if (rr_type == CHANX) { - rr_graph_builder.node_lookup().resize_nodes(grid_.height(), grid_.width(), rr_type, NUM_SIDES); + rr_graph_builder.node_lookup().resize_nodes(grid_.get_num_layers(),grid_.height(), grid_.width(), rr_type, NUM_SIDES); } else { - rr_graph_builder.node_lookup().resize_nodes(grid_.width(), grid_.height(), rr_type, NUM_SIDES); + rr_graph_builder.node_lookup().resize_nodes(grid_.get_num_layers(),grid_.width(), grid_.height(), rr_type, NUM_SIDES); } } diff --git a/libs/librrgraph/src/utils/describe_rr_node.cpp b/libs/librrgraph/src/utils/describe_rr_node.cpp index 484f0d47e3e..ee74b482686 100644 --- a/libs/librrgraph/src/utils/describe_rr_node.cpp +++ b/libs/librrgraph/src/utils/describe_rr_node.cpp @@ -29,8 +29,9 @@ std::string describe_rr_node(const RRGraphView& rr_graph, seg_index); } } else if (rr_graph.node_type(RRNodeId(inode)) == IPIN || rr_graph.node_type(RRNodeId(inode)) == OPIN) { - auto type = grid.get_physical_type(rr_graph.node_xlow(RRNodeId(inode)), - rr_graph.node_ylow(RRNodeId(inode))); + auto type = grid.get_physical_type({rr_graph.node_xlow(RRNodeId(inode)), + rr_graph.node_ylow(RRNodeId(inode)), + rr_graph.node_layer(RRNodeId(inode))}); std::string pin_name = block_type_pin_index_to_name(type, rr_graph.node_pin_num(RRNodeId(inode)), is_flat); diff --git a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp index 7013bcf8ad2..db77f7bc999 100644 --- a/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp +++ b/libs/libvtrcapnproto/gen/rr_graph_uxsdcxx.capnp @@ -2,11 +2,11 @@ # https://github.com/duck2/uxsdcxx # Modify only if your build process doesn't involve regenerating this file. # -# Cmdline: uxsdcxx/uxsdcap.py /research/ece/lnis/USERS/tang/github/vtr-verilog-to-routing/vpr/src/route/rr_graph.xsd -# Input file: /research/ece/lnis/USERS/tang/github/vtr-verilog-to-routing/vpr/src/route/rr_graph.xsd -# md5sum of input file: cd57d47fc9dfa62c7030397ca759217e +# Cmdline: uxsdcxx/uxsdcap.py /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd +# Input file: /home/amin/vtr-verilog-to-routing/libs/librrgraph/src/io/rr_graph.xsd +# md5sum of input file: 8672cb3951993f7e0ea3433a02507672 -@0xe4650d345d47589d; +@0xe9a519eb0e454dd4; using Cxx = import "/capnp/c++.capnp"; $Cxx.namespace("ucap"); @@ -154,6 +154,7 @@ struct GridLoc { widthOffset @2 :Int32; x @3 :Int32; y @4 :Int32; + layer @5 : Int32; } struct GridLocs { @@ -161,12 +162,13 @@ struct GridLocs { } struct NodeLoc { - ptc @0 :Int32; - side @1 :LocSide; - xhigh @2 :Int32; - xlow @3 :Int32; - yhigh @4 :Int32; - ylow @5 :Int32; + layer @0 :Int32; + ptc @1 :Int32; + side @2 :LocSide; + xhigh @3 :Int32; + xlow @4 :Int32; + yhigh @5 :Int32; + ylow @6 :Int32; } struct NodeTiming { diff --git a/utils/fasm/src/fasm.cpp b/utils/fasm/src/fasm.cpp index 925799f22a4..90d4b6671ee 100644 --- a/utils/fasm/src/fasm.cpp +++ b/utils/fasm/src/fasm.cpp @@ -57,10 +57,11 @@ void FasmWriterVisitor::visit_clb_impl(ClusterBlockId blk_id, const t_pb* clb) { int x = place_ctx.block_locs[blk_id].loc.x; int y = place_ctx.block_locs[blk_id].loc.y; + int layer_num = place_ctx.block_locs[blk_id].loc.layer; int sub_tile = place_ctx.block_locs[blk_id].loc.sub_tile; - physical_tile_ = device_ctx.grid.get_physical_type(x, y); + physical_tile_ = device_ctx.grid.get_physical_type({x, y, layer_num}); logical_block_ = cluster_ctx.clb_nlist.block_type(blk_id); - const auto& grid_meta = device_ctx.grid.get_metadata(x, y); + const auto& grid_meta = device_ctx.grid.get_metadata({x, y, layer_num}); blk_prefix_ = ""; clb_prefix_ = ""; diff --git a/utils/fasm/test/test_fasm.cpp b/utils/fasm/test/test_fasm.cpp index 3632f8fae81..ef55f4604f5 100644 --- a/utils/fasm/test/test_fasm.cpp +++ b/utils/fasm/test/test_fasm.cpp @@ -192,7 +192,8 @@ static std::string get_pin_feature (size_t inode) { // Get tile physical tile and the pin number int ilow = rr_graph.node_xlow(RRNodeId(inode)); int jlow = rr_graph.node_ylow(RRNodeId(inode)); - auto physical_tile = device_ctx.grid.get_physical_type(ilow, jlow); + int layer_num = rr_graph.node_layer(RRNodeId(inode)); + auto physical_tile = device_ctx.grid.get_physical_type({ilow, jlow, layer_num}); int pin_num = rr_graph.node_pin_num(RRNodeId(inode)); // Get the sub tile (type, not instance) and index of its pin that matches diff --git a/utils/route_diag/src/main.cpp b/utils/route_diag/src/main.cpp index 7e8756d6430..892674cc43b 100644 --- a/utils/route_diag/src/main.cpp +++ b/utils/route_diag/src/main.cpp @@ -155,6 +155,9 @@ static void profile_source(const Netlist<>& net_list, vtr::ScopedStartFinishTimer timer("Profiling source"); const auto& device_ctx = g_vpr_ctx.device(); const auto& grid = device_ctx.grid; + // TODO: We assume if this function is called, the grid has a 2D structure - It assumes everything is on layer number 0, so it won't work yet for multi-layer FPGAs + VTR_ASSERT(grid.get_num_layers() == 1); + int layer_num = 0; auto router_lookahead = make_router_lookahead(det_routing_arch, router_opts.lookahead_type, @@ -175,17 +178,17 @@ static void profile_source(const Netlist<>& net_list, for (int sink_x = start_x; sink_x <= end_x; sink_x++) { for (int sink_y = start_y; sink_y <= end_y; sink_y++) { - if(device_ctx.grid.get_physical_type(sink_x, sink_y) == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + if(device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num}) == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { continue; } auto best_sink_ptcs = get_best_classes(RECEIVER, - device_ctx.grid.get_physical_type(sink_x, sink_y)); + device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num})); bool successfully_routed; for (int sink_ptc : best_sink_ptcs) { VTR_ASSERT(sink_ptc != OPEN); - - int sink_rr_node = size_t(device_ctx.rr_graph.node_lookup().find_node(sink_x, sink_y, SINK, sink_ptc)); + //TODO: should pass layer_num instead of 0 to node_lookup once the multi-die FPGAs support is completed + int sink_rr_node = size_t(device_ctx.rr_graph.node_lookup().find_node(0,sink_x, sink_y, SINK, sink_ptc)); if (directconnect_exists(source_rr_node, sink_rr_node)) { //Skip if we shouldn't measure direct connects and a direct connect exists diff --git a/vpr/src/base/SetupGrid.cpp b/vpr/src/base/SetupGrid.cpp index b15b3e25469..3569f5bff1f 100644 --- a/vpr/src/base/SetupGrid.cpp +++ b/vpr/src/base/SetupGrid.cpp @@ -156,7 +156,6 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo auto auto_layout_itr = std::find_if(grid_layouts.begin(), grid_layouts.end(), is_auto_grid_def); if (auto_layout_itr != grid_layouts.end()) { //Automatic grid layout, find the smallest height/width - VTR_ASSERT_SAFE_MSG(std::find_if(auto_layout_itr + 1, grid_layouts.end(), is_auto_grid_def) == grid_layouts.end(), "Only one "); //Determine maximum device size to try before concluding that the circuit cannot fit on any device @@ -175,7 +174,7 @@ static DeviceGrid auto_size_device_grid(const std::vector& grid_layo const auto& grid_def = *auto_layout_itr; VTR_ASSERT(grid_def.aspect_ratio >= 0.); - //Initial size is 3x3, the smallest possible while avoiding + //Initial size is num_layers x 3 x 3, the smallest possible while avoiding //start before end location issues with location //specifications size_t width = 3; @@ -277,7 +276,7 @@ static std::vector grid_overused_resources(const Devic //Initialize available tile counts std::unordered_map avail_tiles; for (auto& tile_type : device_ctx.physical_tile_types) { - avail_tiles[&tile_type] = grid.num_instances(&tile_type); + avail_tiles[&tile_type] = grid.num_instances(&tile_type, -1); } //Sort so we allocate logical blocks with the fewest equivalent sites first (least flexible) @@ -691,54 +690,61 @@ static void set_grid_block_type(int priority, ///@brief Check grid is valid static void CheckGrid(const DeviceGrid& grid) { - for (int layer = 0; layer < grid.get_num_layers(); layer++) { //Check each die individually - for (size_t i = 0; i < grid.width(); ++i) { - for (size_t j = 0; j < grid.height(); ++j) { - auto type = grid.get_physical_type(i, j); + for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) { //Check each die individually + for (int i = 0; i < (int)grid.width(); ++i) { + for (int j = 0; j < (int)grid.height(); ++j) { + const t_physical_tile_loc tile_loc(i, j, layer_num); + const auto& type = grid.get_physical_type(tile_loc); + int width_offset = grid.get_width_offset(tile_loc); + int height_offset = grid.get_height_offset(tile_loc); if (nullptr == type) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has no type.\n", i, j); + VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has no type.\n", i, j, layer_num); } - int width_offset = grid.get_width_offset(i, j); - int height_offset = grid.get_height_offset(i, j); - if ((width_offset < 0) - || (width_offset >= type->width)) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has invalid width offset (%d).\n", i, j, layer, + if ((grid.get_width_offset(tile_loc) < 0) + || (grid.get_width_offset(tile_loc) >= type->width)) { + VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has invalid width offset (%d).\n", + i, + j, + layer_num, width_offset); } - if ((height_offset < 0) - || (height_offset >= type->height)) { - VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has invalid height offset (%d).\n", i, j, layer, + if ((grid.get_height_offset(tile_loc) < 0) + || (grid.get_height_offset(tile_loc) >= type->height)) { + VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) has invalid height offset (%d).\n", + i, + j, + layer_num, height_offset); } //Verify that type and width/height offsets are correct (e.g. for dimension > 1 blocks) - if (width_offset == 0 && height_offset == 0) { + if (grid.get_width_offset(tile_loc) == 0 && grid.get_height_offset(tile_loc) == 0) { //From the root block check that all other blocks are correct - for (size_t x = i; x < i + type->width; ++x) { + for (int x = i; x < i + type->width; ++x) { int x_offset = x - i; - for (size_t y = j; y < j + type->height; ++y) { + for (int y = j; y < j + type->height; ++y) { int y_offset = y - j; - - const auto& tile_type = grid.get_physical_type(x, y); - int tile_width_offset = grid.get_width_offset(x, y); - int tile_height_offset = grid.get_height_offset(x, y); + const t_physical_tile_loc tile_loc_offset(x, y, layer_num); + const auto& tile_type = grid.get_physical_type(tile_loc_offset); + int tile_width_offset = grid.get_width_offset(tile_loc_offset); + int tile_height_offset = grid.get_height_offset(tile_loc_offset); if (tile_type != type) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) should have type '%s' (based on root location) but has type '%s'\n", - i, j, layer, type->name, tile_type->name); + i, j, layer_num, type->name, type->name); } if (tile_width_offset != x_offset) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) of type '%s' should have width offset '%d' (based on root location) but has '%d'\n", - i, j, layer, type->name, x_offset, tile_width_offset); + i, j, layer_num, type->name, x_offset, tile_width_offset); } if (tile_height_offset != y_offset) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Grid Location (%d,%d,%d) of type '%s' should have height offset '%d' (based on root location) but has '%d'\n", - i, j, layer, type->name, y_offset, tile_height_offset); + i, j, layer_num, type->name, y_offset, tile_height_offset); } } } @@ -751,13 +757,15 @@ static void CheckGrid(const DeviceGrid& grid) { float calculate_device_utilization(const DeviceGrid& grid, std::map instance_counts) { //Record the resources of the grid std::map grid_resources; - for (size_t x = 0; x < grid.width(); ++x) { - for (size_t y = 0; y < grid.height(); ++y) { - int width_offset = grid.get_width_offset(x, y); - int height_offset = grid.get_height_offset(x, y); - if (width_offset == 0 && height_offset == 0) { - const auto& type = grid.get_physical_type(x, y); - ++grid_resources[type]; + for (int layer_num = 0; layer_num < grid.get_num_layers(); ++layer_num) { + for (int x = 0; x < (int)grid.width(); ++x) { + for (int y = 0; y < (int)grid.height(); ++y) { + int width_offset = grid.get_width_offset({x, y, layer_num}); + int height_offset = grid.get_height_offset({x, y, layer_num}); + if (width_offset == 0 && height_offset == 0) { + const auto& type = grid.get_physical_type({x, y, layer_num}); + ++grid_resources[type]; + } } } } diff --git a/vpr/src/base/gen/vpr_constraints_uxsdcxx_interface.h b/vpr/src/base/gen/vpr_constraints_uxsdcxx_interface.h index 6da8558b84d..8939778861e 100644 --- a/vpr/src/base/gen/vpr_constraints_uxsdcxx_interface.h +++ b/vpr/src/base/gen/vpr_constraints_uxsdcxx_interface.h @@ -58,6 +58,7 @@ class VprConstraintsBase { * * */ + virtual inline int get_add_region_layer_num(typename ContextTypes::AddRegionReadContext& ctx) = 0; virtual inline int get_add_region_subtile(typename ContextTypes::AddRegionReadContext& ctx) = 0; virtual inline void set_add_region_subtile(int subtile, typename ContextTypes::AddRegionWriteContext& ctx) = 0; virtual inline int get_add_region_x_high(typename ContextTypes::AddRegionReadContext& ctx) = 0; diff --git a/vpr/src/base/read_place.cpp b/vpr/src/base/read_place.cpp index 68b51ef5b02..17c40e4781e 100644 --- a/vpr/src/base/read_place.cpp +++ b/vpr/src/base/read_place.cpp @@ -200,16 +200,36 @@ void read_place_body(std::ifstream& placement_file, } else if (tokens[0][0] == '#') { continue; //Skip commented lines - } else if (tokens.size() == 4 || (tokens.size() > 4 && tokens[4][0] == '#')) { + } else if ((tokens.size() == 4 || (tokens.size() > 4 && tokens[4][0] == '#')) || (tokens.size() == 5 || (tokens.size() > 5 && tokens[5][0] == '#'))) { //Load the block location // - //We should have 4 tokens of actual data, with an optional 5th (commented) token indicating VPR's + // If the place file corresponds to a 3D architecture, it should contain 5 tokens of actual data, with an optional 6th (commented) token indicating VPR's internal block number. + // If it belongs to 2D architecture file, supported for backward compatability, We should have 4 tokens of actual data, with an optional 5th (commented) token indicating VPR's //internal block number + int block_name_index = 0; + int block_x_index = 1; + int block_y_index = 2; + int sub_tile_index_index = 3; + int block_layer_index; + if (tokens.size() == 4 || (tokens.size() > 4 && tokens[4][0] == '#')) { + //2D architecture + block_layer_index = -1; + + } else { + // 3D architecture + block_layer_index = 4; + } - std::string block_name = tokens[0]; - int block_x = vtr::atoi(tokens[1]); - int block_y = vtr::atoi(tokens[2]); - int sub_tile_index = vtr::atoi(tokens[3]); + std::string block_name = tokens[block_name_index]; + int block_x = vtr::atoi(tokens[block_x_index]); + int block_y = vtr::atoi(tokens[block_y_index]); + int sub_tile_index = vtr::atoi(tokens[sub_tile_index_index]); + int block_layer; + if (block_layer_index != -1) { + block_layer = vtr::atoi(tokens[block_layer_index]); + } else { + block_layer = 0; + } //c-style block name needed for printing block name in error messages char const* c_block_name = block_name.c_str(); @@ -230,7 +250,7 @@ void read_place_body(std::ifstream& placement_file, //Check if block is listed multiple times with conflicting locations in constraints file if (seen_blocks[blk_id] > 0) { - if (block_x != place_ctx.block_locs[blk_id].loc.x || block_y != place_ctx.block_locs[blk_id].loc.y || sub_tile_index != place_ctx.block_locs[blk_id].loc.sub_tile) { + if (block_x != place_ctx.block_locs[blk_id].loc.x || block_y != place_ctx.block_locs[blk_id].loc.y || sub_tile_index != place_ctx.block_locs[blk_id].loc.sub_tile || block_layer != place_ctx.block_locs[blk_id].loc.layer) { std::string cluster_name = cluster_ctx.clb_nlist.block_name(blk_id); VPR_THROW(VPR_ERROR_PLACE, "The location of cluster %s (#%d) is specified %d times in the constraints file with conflicting locations. \n" @@ -243,6 +263,7 @@ void read_place_body(std::ifstream& placement_file, loc.x = block_x; loc.y = block_y; loc.sub_tile = sub_tile_index; + loc.layer = block_layer; if (seen_blocks[blk_id] == 0) { set_block_location(blk_id, loc); @@ -301,8 +322,8 @@ void print_place(const char* net_file, net_file, net_id); fprintf(fp, "Array size: %zu x %zu logic blocks\n\n", device_ctx.grid.width(), device_ctx.grid.height()); - fprintf(fp, "#block name\tx\ty\tsubblk\tblock number\n"); - fprintf(fp, "#----------\t--\t--\t------\t------------\n"); + fprintf(fp, "#block name\tx\ty\tsubblk\tlayer\tblock number\n"); + fprintf(fp, "#----------\t--\t--\t------\t-----\t------------\n"); if (!place_ctx.block_locs.empty()) { //Only if placement exists for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { @@ -310,7 +331,11 @@ void print_place(const char* net_file, if (strlen(cluster_ctx.clb_nlist.block_name(blk_id).c_str()) < 8) fprintf(fp, "\t"); - fprintf(fp, "%d\t%d\t%d", place_ctx.block_locs[blk_id].loc.x, place_ctx.block_locs[blk_id].loc.y, place_ctx.block_locs[blk_id].loc.sub_tile); + fprintf(fp, "%d\t%d\t%d\t%d", + place_ctx.block_locs[blk_id].loc.x, + place_ctx.block_locs[blk_id].loc.y, + place_ctx.block_locs[blk_id].loc.sub_tile, + place_ctx.block_locs[blk_id].loc.layer); fprintf(fp, "\t#%zu\n", size_t(blk_id)); } } diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp index 348beea3033..e5d59d6245b 100644 --- a/vpr/src/base/read_route.cpp +++ b/vpr/src/base/read_route.cpp @@ -55,7 +55,7 @@ static void process_route(std::ifstream& fp, const char* filename, int& lineno); static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* filename, int& lineno); static void process_nets(std::ifstream& fp, ClusterNetId inet, std::string name, std::vector input_tokens, const char* filename, int& lineno); static void process_global_blocks(std::ifstream& fp, ClusterNetId inet, const char* filename, int& lineno); -static void format_coordinates(int& x, int& y, std::string coord, ClusterNetId net, const char* filename, const int lineno); +static void format_coordinates(int& layer_num, int& x, int& y, std::string coord, ClusterNetId net, const char* filename, const int lineno); static void format_pin_info(std::string& pb_name, std::string& port_name, int& pb_pin_num, std::string input); static std::string format_name(std::string name); static bool check_rr_graph_connectivity(RRNodeId prev_node, RRNodeId node); @@ -233,7 +233,7 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file /*remember the position of the last line in order to go back*/ std::streampos oldpos = fp.tellg(); - int inode, x, y, x2, y2, ptc, switch_id, net_pin_index, offset; + int inode, layer_num, x, y, layer_num2, x2, y2, ptc, switch_id, net_pin_index, offset; std::string prev_type; int node_count = 0; std::string input; @@ -278,11 +278,11 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file "Node %d has a type that does not match the RR graph", inode); } - format_coordinates(x, y, tokens[3], inet, filename, lineno); + format_coordinates(layer_num, x, y, tokens[3], inet, filename, lineno); auto rr_node = RRNodeId(inode); if (tokens[4] == "to") { - format_coordinates(x2, y2, tokens[5], inet, filename, lineno); + format_coordinates(layer_num2, x2, y2, tokens[5], inet, filename, lineno); if (rr_graph.node_xlow(rr_node) != x || rr_graph.node_xhigh(rr_node) != x2 || rr_graph.node_yhigh(rr_node) != y2 || rr_graph.node_ylow(rr_node) != y) { vpr_throw(VPR_ERROR_ROUTE, filename, lineno, "The coordinates of node %d does not match the rr graph", inode); @@ -312,7 +312,7 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file /* Verify types and ptc*/ if (tokens[2] == "SOURCE" || tokens[2] == "SINK" || tokens[2] == "OPIN" || tokens[2] == "IPIN") { - const auto& type = device_ctx.grid.get_physical_type(x, y); + const auto& type = device_ctx.grid.get_physical_type({x, y, layer_num}); if (tokens[4 + offset] == "Pad:" && !is_io_type(type)) { vpr_throw(VPR_ERROR_ROUTE, filename, lineno, "Node %d is of the wrong type", inode); @@ -333,16 +333,17 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file /*Process switches and pb pin info if it is ipin or opin type*/ if (tokens[6 + offset] != "Switch:") { /*This is an opin or ipin, process its pin nums*/ - auto type = device_ctx.grid.get_physical_type(x, y); + auto type = device_ctx.grid.get_physical_type({x, y, layer_num}); if (!is_io_type(type) && (tokens[2] == "IPIN" || tokens[2] == "OPIN")) { int pin_num = rr_graph.node_pin_num(RRNodeId(inode)); - int height_offset = device_ctx.grid.get_height_offset(x, y); + int height_offset = device_ctx.grid.get_height_offset({x, y, layer_num}); int capacity, relative_pin; std::tie(capacity, relative_pin) = get_capacity_location_from_physical_pin(type, pin_num); - ClusterBlockId iblock = place_ctx.grid_blocks[x][y - height_offset].blocks[capacity]; + ClusterBlockId iblock = place_ctx.grid_blocks.block_at_location({x, y - height_offset, capacity, layer_num}); + t_pb_graph_pin* pb_pin; pb_pin = get_pb_graph_node_pin_from_block_pin(iblock, pin_num); @@ -419,7 +420,7 @@ static void process_global_blocks(std::ifstream& fp, ClusterNetId inet, const ch auto& place_ctx = g_vpr_ctx.placement(); std::string block, bnum_str; - int x, y; + int layer_num, x, y; std::vector tokens; int pin_counter = 0; @@ -439,7 +440,7 @@ static void process_global_blocks(std::ifstream& fp, ClusterNetId inet, const ch fp.seekg(oldpos); return; } else { - format_coordinates(x, y, tokens[4], inet, filename, lineno); + format_coordinates(layer_num, x, y, tokens[4], inet, filename, lineno); /*remove ()*/ bnum_str = format_name(tokens[2]); @@ -472,17 +473,30 @@ static void process_global_blocks(std::ifstream& fp, ClusterNetId inet, const ch } ///@brief Parse coordinates in the form of (x,y) into correct x and y values -static void format_coordinates(int& x, int& y, std::string coord, ClusterNetId net, const char* filename, const int lineno) { +static void format_coordinates(int& layer_num, int& x, int& y, std::string coord, ClusterNetId net, const char* filename, const int lineno) { coord = format_name(coord); + std::stringstream coord_stream(coord); - if (!(coord_stream >> x)) { - vpr_throw(VPR_ERROR_ROUTE, filename, lineno, - "Net %lu has coordinates that is not in the form (x,y)", size_t(net)); + std::vector coords; + int tmp_coord; + while (coord_stream >> tmp_coord) { + coords.push_back(tmp_coord); + coord_stream.ignore(1, ','); } - coord_stream.ignore(1, ' '); - if (!(coord_stream >> y)) { + if (coords.size() != 2 && coords.size() != 3) { vpr_throw(VPR_ERROR_ROUTE, filename, lineno, - "Net %lu has coordinates that is not in the form (x,y)", size_t(net)); + "Net %lu has coordinates that is not in the form (layer_num,x,y)", size_t(net)); + } + + if (coords.size() == 2) { + layer_num = 0; + x = coords[0]; + y = coords[1]; + } else { + VTR_ASSERT(coords.size() == 3); + layer_num = coords[0]; + x = coords[1]; + y = coords[2]; } } @@ -578,9 +592,10 @@ void print_route(const Netlist<>& net_list, t_rr_type rr_type = rr_graph.node_type(inode); int ilow = rr_graph.node_xlow(inode); int jlow = rr_graph.node_ylow(inode); + int layer_num = rr_graph.node_layer(inode); - fprintf(fp, "Node:\t%zu\t%6s (%d,%d) ", size_t(inode), - rr_graph.node_type_string(inode), ilow, jlow); + fprintf(fp, "Node:\t%zu\t%6s (%d,%d,%d) ", size_t(inode), + rr_graph.node_type_string(inode), layer_num, ilow, jlow); if ((ilow != rr_graph.node_xhigh(inode)) || (jlow != rr_graph.node_yhigh(inode))) @@ -590,7 +605,7 @@ void print_route(const Netlist<>& net_list, switch (rr_type) { case IPIN: case OPIN: - if (is_io_type(device_ctx.grid.get_physical_type(ilow, jlow))) { + if (is_io_type(device_ctx.grid.get_physical_type({ilow, jlow, layer_num}))) { fprintf(fp, " Pad: "); } else { /* IO Pad. */ fprintf(fp, " Pin: "); @@ -604,7 +619,7 @@ void print_route(const Netlist<>& net_list, case SOURCE: case SINK: - if (is_io_type(device_ctx.grid.get_physical_type(ilow, jlow))) { + if (is_io_type(device_ctx.grid.get_physical_type({ilow, jlow, layer_num}))) { fprintf(fp, " Pad: "); } else { /* IO Pad. */ fprintf(fp, " Class: "); @@ -620,17 +635,18 @@ void print_route(const Netlist<>& net_list, fprintf(fp, "%d ", rr_graph.node_ptc_num(inode)); - auto physical_tile = device_ctx.grid.get_physical_type(ilow, jlow); + auto physical_tile = device_ctx.grid.get_physical_type({ilow, jlow, layer_num}); if (!is_io_type(physical_tile) && (rr_type == IPIN || rr_type == OPIN)) { int pin_num = rr_graph.node_pin_num(inode); - int xoffset = device_ctx.grid.get_width_offset(ilow, jlow); - int yoffset = device_ctx.grid.get_height_offset(ilow, jlow); + int xoffset = device_ctx.grid.get_width_offset({ilow, jlow, layer_num}); + int yoffset = device_ctx.grid.get_height_offset({ilow, jlow, layer_num}); const t_sub_tile* sub_tile; int sub_tile_rel_cap; std::tie(sub_tile, sub_tile_rel_cap) = get_sub_tile_from_pin_physical_num(physical_tile, pin_num); int sub_tile_offset = sub_tile->capacity.low + sub_tile_rel_cap; - ClusterBlockId iblock = place_ctx.grid_blocks[ilow - xoffset][jlow - yoffset].blocks[sub_tile_offset]; + ClusterBlockId iblock = place_ctx.grid_blocks.block_at_location({ilow - xoffset, jlow - yoffset, + sub_tile_offset, layer_num}); VTR_ASSERT(iblock); const t_pb_graph_pin* pb_pin; if (is_pin_on_tile(physical_tile, pin_num)) { diff --git a/vpr/src/base/region.cpp b/vpr/src/base/region.cpp index 594ec76564e..5c38f9ace86 100644 --- a/vpr/src/base/region.cpp +++ b/vpr/src/base/region.cpp @@ -9,17 +9,23 @@ Region::Region() { region_bounds.set_ymin(999); region_bounds.set_xmax(-1); region_bounds.set_ymax(-1); + layer_num = -1; } -vtr::Rect Region::get_region_rect() const { - return region_bounds; +RegionRectCoord Region::get_region_rect() const { + return RegionRectCoord(region_bounds, layer_num); } -void Region::set_region_rect(int _xmin, int _ymin, int _xmax, int _ymax) { - region_bounds.set_xmin(_xmin); - region_bounds.set_xmax(_xmax); - region_bounds.set_ymin(_ymin); - region_bounds.set_ymax(_ymax); +void Region::set_region_rect(const RegionRectCoord& rect_coord) { + region_bounds.set_xmin(rect_coord.xmin); + region_bounds.set_xmax(rect_coord.xmax); + region_bounds.set_ymin(rect_coord.ymin); + region_bounds.set_ymax(rect_coord.ymax); + layer_num = rect_coord.layer_num; +} + +int Region::get_layer_num() const { + return layer_num; } int Region::get_sub_tile() const { @@ -31,11 +37,18 @@ void Region::set_sub_tile(int _sub_tile) { } bool Region::empty() { - return (region_bounds.xmax() < region_bounds.xmin() || region_bounds.ymax() < region_bounds.ymin()); + return (region_bounds.xmax() < region_bounds.xmin() + || region_bounds.ymax() < region_bounds.ymin() + || layer_num < 0); } bool Region::is_loc_in_reg(t_pl_loc loc) { bool is_loc_in_reg = false; + int loc_layer_num = loc.layer; + + if (layer_num != loc_layer_num) { + return is_loc_in_reg; + } vtr::Point loc_coord(loc.x, loc.y); @@ -58,10 +71,21 @@ bool Region::is_loc_in_reg(t_pl_loc loc) { bool do_regions_intersect(Region r1, Region r2) { bool intersect = true; - vtr::Rect r1_rect = r1.get_region_rect(); - vtr::Rect r2_rect = r2.get_region_rect(); + const auto r1_reg_coord = r1.get_region_rect(); + const auto r2_reg_coord = r2.get_region_rect(); + + vtr::Rect r1_rect(r1_reg_coord.xmin, r1_reg_coord.ymin, r1_reg_coord.xmax, r1_reg_coord.ymax); + vtr::Rect r2_rect(r2_reg_coord.xmin, r2_reg_coord.ymin, r2_reg_coord.xmax, r2_reg_coord.ymax); + + int r1_layer_num = r1_reg_coord.layer_num; + int r2_layer_num = r2_reg_coord.layer_num; + vtr::Rect intersect_rect; + if (r1_layer_num != r2_layer_num) { + return intersect; + } + intersect_rect = intersection(r1_rect, r2_rect); /** @@ -77,10 +101,22 @@ bool do_regions_intersect(Region r1, Region r2) { Region intersection(const Region& r1, const Region& r2) { Region intersect; - vtr::Rect r1_rect = r1.get_region_rect(); - vtr::Rect r2_rect = r2.get_region_rect(); + + const auto r1_reg_coord = r1.get_region_rect(); + const auto r2_reg_coord = r2.get_region_rect(); + + vtr::Rect r1_rect(r1_reg_coord.xmin, r1_reg_coord.ymin, r1_reg_coord.xmax, r1_reg_coord.ymax); + vtr::Rect r2_rect(r2_reg_coord.xmin, r2_reg_coord.ymin, r2_reg_coord.xmax, r2_reg_coord.ymax); + + int r1_layer_num = r1_reg_coord.layer_num; + int r2_layer_num = r2_reg_coord.layer_num; + vtr::Rect intersect_rect; + if (r1_layer_num != r2_layer_num) { + return intersect; + } + /* * If the subtiles of two regions match (i.e. they both have no subtile specified, or the same subtile specified), * the regions are intersected. The resulting intersection region will have a rectangle that reflects their overlap, @@ -97,24 +133,27 @@ Region intersection(const Region& r1, const Region& r2) { if (r1.get_sub_tile() == r2.get_sub_tile()) { intersect.set_sub_tile(r1.get_sub_tile()); intersect_rect = intersection(r1_rect, r2_rect); - intersect.set_region_rect(intersect_rect.xmin(), intersect_rect.ymin(), intersect_rect.xmax(), intersect_rect.ymax()); + intersect.set_region_rect({intersect_rect, r1_layer_num}); } else if (r1.get_sub_tile() == NO_SUBTILE && r2.get_sub_tile() != NO_SUBTILE) { intersect.set_sub_tile(r2.get_sub_tile()); intersect_rect = intersection(r1_rect, r2_rect); - intersect.set_region_rect(intersect_rect.xmin(), intersect_rect.ymin(), intersect_rect.xmax(), intersect_rect.ymax()); + intersect.set_region_rect({intersect_rect, r1_layer_num}); } else if (r1.get_sub_tile() != NO_SUBTILE && r2.get_sub_tile() == NO_SUBTILE) { intersect.set_sub_tile(r1.get_sub_tile()); intersect_rect = intersection(r1_rect, r2_rect); - intersect.set_region_rect(intersect_rect.xmin(), intersect_rect.ymin(), intersect_rect.xmax(), intersect_rect.ymax()); + intersect.set_region_rect({intersect_rect, r1_layer_num}); } return intersect; } void print_region(FILE* fp, Region region) { + const auto region_coord = region.get_region_rect(); + const auto region_rect = vtr::Rect(region_coord.xmin, region_coord.ymin, region_coord.xmax, region_coord.ymax); fprintf(fp, "\tRegion: \n"); - print_rect(fp, region.get_region_rect()); + fprintf(fp, "\tlayer: %d\n", region.get_layer_num()); + print_rect(fp, region_rect); fprintf(fp, "\tsubtile: %d\n\n", region.get_sub_tile()); } diff --git a/vpr/src/base/region.h b/vpr/src/base/region.h index 75a25f5071d..7b1ceec6dda 100644 --- a/vpr/src/base/region.h +++ b/vpr/src/base/region.h @@ -4,6 +4,50 @@ #include #include "vpr_types.h" +/** + * @brief This class stores the data for each constraint region on a layer + * @param xmin The minimum x coordinate of the region + * @param ymin The minimum y coordinate of the region + * @param xmax The maximum x coordinate of the region + * @param ymax The maximum y coordinate of the region + * @param layer_num The layer number of the region + */ +struct RegionRectCoord { + RegionRectCoord() = default; + RegionRectCoord(int _xmin, int _ymin, int _xmax, int _ymax, int _layer_num) + : xmin(_xmin) + , ymin(_ymin) + , xmax(_xmax) + , ymax(_ymax) + , layer_num(_layer_num) {} + + RegionRectCoord(const vtr::Rect& rect, int _layer_num) + : xmin(rect.xmin()) + , ymin(rect.ymin()) + , xmax(rect.xmax()) + , ymax(rect.ymax()) + , layer_num(_layer_num) {} + + int xmin; + int ymin; + int xmax; + int ymax; + int layer_num; + + /// @brief Convert to a vtr::Rect + vtr::Rect get_rect() const { + return vtr::Rect(xmin, ymin, xmax, ymax); + } + + /// @brief Equality operator + bool operator==(const RegionRectCoord& rhs) const { + vtr::Rect lhs_rect(xmin, ymin, xmax, ymax); + vtr::Rect rhs_rect(rhs.xmin, rhs.ymin, rhs.xmax, rhs.ymax); + return lhs_rect == rhs_rect + && layer_num == rhs.layer_num; + } +}; + /** * @file * @brief This file defines the Region class. The Region class stores the data for each constraint region. @@ -26,12 +70,17 @@ class Region { /** * @brief Accessor for the region's rectangle */ - vtr::Rect get_region_rect() const; + RegionRectCoord get_region_rect() const; /** * @brief Mutator for the region's rectangle */ - void set_region_rect(int _xmin, int _ymin, int _xmax, int _ymax); + void set_region_rect(const RegionRectCoord& rect_coord); + + /** + * @brief Accessor for the region's layer number + */ + int get_layer_num() const; /** * @brief Accessor for the region's subtile @@ -59,12 +108,15 @@ class Region { bool is_loc_in_reg(t_pl_loc loc); bool operator==(const Region& reg) const { - return (reg.get_region_rect() == this->get_region_rect() && reg.get_sub_tile() == this->get_sub_tile()); + return (reg.get_region_rect() == this->get_region_rect() + && reg.get_sub_tile() == this->get_sub_tile() + && reg.layer_num == this->layer_num); } private: //may need to include zmin, zmax for future use in 3D FPGA designs vtr::Rect region_bounds; ///< xmin, ymin, xmax, ymax inclusive + int layer_num; ///< layer number of the region int sub_tile; ///< users will optionally select a subtile }; @@ -96,11 +148,12 @@ namespace std { template<> struct hash { std::size_t operator()(const Region& reg) const noexcept { - vtr::Rect rect = reg.get_region_rect(); - std::size_t seed = std::hash{}(rect.xmin()); - vtr::hash_combine(seed, rect.ymin()); - vtr::hash_combine(seed, rect.xmax()); - vtr::hash_combine(seed, rect.ymax()); + const auto region_coord = reg.get_region_rect(); + std::size_t seed = std::hash{}(region_coord.xmin); + vtr::hash_combine(seed, region_coord.ymin); + vtr::hash_combine(seed, region_coord.xmax); + vtr::hash_combine(seed, region_coord.ymax); + vtr::hash_combine(seed, region_coord.layer_num); vtr::hash_combine(seed, reg.get_sub_tile()); return seed; } diff --git a/vpr/src/base/setup_noc.cpp b/vpr/src/base/setup_noc.cpp index e836dd808c4..ad59fb21f10 100644 --- a/vpr/src/base/setup_noc.cpp +++ b/vpr/src/base/setup_noc.cpp @@ -40,7 +40,7 @@ void setup_noc(const t_arch& arch) { // store the reference to device grid with // need to set this first before adding routers to the model - noc_ctx.noc_model.set_device_grid_width((int)device_ctx.grid.width()); + noc_ctx.noc_model.set_device_grid_spec((int)device_ctx.grid.width(), (int)device_ctx.grid.height()); // generate noc model generate_noc(arch, noc_ctx, noc_router_tiles); @@ -59,9 +59,7 @@ void setup_noc(const t_arch& arch) { } void identify_and_store_noc_router_tile_positions(const DeviceGrid& device_grid, std::vector& noc_router_tiles, std::string noc_router_tile_name) { - int grid_width = device_grid.width(); - int grid_height = device_grid.height(); - + const int num_layers = device_grid.get_num_layers(); int curr_tile_width; int curr_tile_height; int curr_tile_width_offset; @@ -72,35 +70,37 @@ void identify_and_store_noc_router_tile_positions(const DeviceGrid& device_grid, double curr_tile_centroid_y; // go through the device - for (int i = 0; i < grid_width; i++) { - for (int j = 0; j < grid_height; j++) { - // get some information from the current tile - const auto& type = device_grid.get_physical_type(i, j); - int width_offset = device_grid.get_width_offset(i, j); - int height_offset = device_grid.get_height_offset(i, j); - - curr_tile_name.assign(type->name); - curr_tile_width_offset = width_offset; - curr_tile_height_offset = height_offset; - - curr_tile_height = type->height; - curr_tile_width = type->width; - - /* - * Only store the tile position if it is a noc router. - * Additionally, since a router tile can span multiple grid locations, we only add the tile if the height and width offset are zero (this prevents the router from being added multiple times for each grid location it spans). - */ - if (!(noc_router_tile_name.compare(curr_tile_name)) && !curr_tile_width_offset && !curr_tile_height_offset) { - // calculating the centroid position of the current tile - curr_tile_centroid_x = (curr_tile_width - 1) / (double)2 + i; - curr_tile_centroid_y = (curr_tile_height - 1) / (double)2 + j; - - noc_router_tiles.push_back({i, j, curr_tile_centroid_x, curr_tile_centroid_y}); + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + int grid_width = (int)device_grid.width(); + int grid_height = (int)device_grid.height(); + for (int i = 0; i < grid_width; i++) { + for (int j = 0; j < grid_height; j++) { + // get some information from the current tile + const auto& type = device_grid.get_physical_type({i, j, layer_num}); + int width_offset = device_grid.get_width_offset({i, j, layer_num}); + int height_offset = device_grid.get_height_offset({i, j, layer_num}); + + curr_tile_name.assign(type->name); + curr_tile_width_offset = width_offset; + curr_tile_height_offset = height_offset; + + curr_tile_height = type->height; + curr_tile_width = type->width; + + /* + * Only store the tile position if it is a noc router. + * Additionally, since a router tile can span multiple grid locations, we only add the tile if the height and width offset are zero (this prevents the router from being added multiple times for each grid location it spans). + */ + if (!(noc_router_tile_name.compare(curr_tile_name)) && !curr_tile_width_offset && !curr_tile_height_offset) { + // calculating the centroid position of the current tile + curr_tile_centroid_x = (curr_tile_width - 1) / (double)2 + i; + curr_tile_centroid_y = (curr_tile_height - 1) / (double)2 + j; + + noc_router_tiles.emplace_back(i, j, layer_num, curr_tile_centroid_x, curr_tile_centroid_y); + } } } } - - return; } void generate_noc(const t_arch& arch, NocContext& noc_ctx, std::vector& noc_router_tiles) { @@ -215,8 +215,10 @@ void create_noc_routers(const t_noc_inf& noc_info, NocStorage* noc_model, std::v // at this point, the closest user described router to the current physical router was found // so add the router to the NoC - noc_model->add_router(logical_router->id, noc_router_tiles[closest_physical_router].grid_width_position, - noc_router_tiles[closest_physical_router].grid_height_position); + noc_model->add_router(logical_router->id, + noc_router_tiles[closest_physical_router].grid_width_position, + noc_router_tiles[closest_physical_router].grid_height_position, + noc_router_tiles[closest_physical_router].layer_position); // add the new assignment to the tracker router_assignments[closest_physical_router] = logical_router->id; diff --git a/vpr/src/base/setup_noc.h b/vpr/src/base/setup_noc.h index 2f96268f787..23737d1c5b1 100644 --- a/vpr/src/base/setup_noc.h +++ b/vpr/src/base/setup_noc.h @@ -49,8 +49,16 @@ // a data structure to store the position information of a noc router in the FPGA device struct t_noc_router_tile_position { + t_noc_router_tile_position(int x, int y, int layer_num, double centroid_x, double centroid_y) + : grid_width_position(x) + , grid_height_position(y) + , layer_position(layer_num) + , tile_centroid_x(centroid_x) + , tile_centroid_y(centroid_y) {} + int grid_width_position; int grid_height_position; + int layer_position; double tile_centroid_x; double tile_centroid_y; diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp index 337a1964d6b..bc09e68418d 100644 --- a/vpr/src/base/stats.cpp +++ b/vpr/src/base/stats.cpp @@ -70,19 +70,21 @@ void routing_stats(const Netlist<>& net_list, VTR_LOG("Logic area (in minimum width transistor areas, excludes I/Os and empty grid tiles)...\n"); area = 0; - for (size_t i = 0; i < device_ctx.grid.width(); i++) { - for (size_t j = 0; j < device_ctx.grid.height(); j++) { - auto type = device_ctx.grid.get_physical_type(i, j); - int width_offset = device_ctx.grid.get_width_offset(i, j); - int height_offset = device_ctx.grid.get_height_offset(i, j); - if (width_offset == 0 - && height_offset == 0 - && !is_io_type(type) - && type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - if (type->area == UNDEFINED) { - area += grid_logic_tile_area * type->width * type->height; - } else { - area += type->area; + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { + auto type = device_ctx.grid.get_physical_type({i, j, layer_num}); + int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); + int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); + if (width_offset == 0 + && height_offset == 0 + && !is_io_type(type) + && type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + if (type->area == UNDEFINED) { + area += grid_logic_tile_area * type->width * type->height; + } else { + area += type->area; + } } } } diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 23353e22d15..c75645fd5d8 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -478,14 +478,14 @@ void vpr_create_device_grid(const t_vpr_setup& vpr_setup, const t_arch& Arch) { continue; } - if (device_ctx.grid.num_instances(&type) != 0) { + if (device_ctx.grid.num_instances(&type, -1) != 0) { VTR_LOG("\tPhysical Tile %s:\n", type.name); auto equivalent_sites = get_equivalent_sites_set(&type); for (auto logical_block : equivalent_sites) { float util = 0.; - size_t num_inst = device_ctx.grid.num_instances(&type); + size_t num_inst = device_ctx.grid.num_instances(&type, -1); if (num_inst != 0) { util = float(num_type_instances[logical_block]) / num_inst; } diff --git a/vpr/src/base/vpr_constraints_serializer.h b/vpr/src/base/vpr_constraints_serializer.h index 4007b7c5c3b..5405eb0e21a 100644 --- a/vpr/src/base/vpr_constraints_serializer.h +++ b/vpr/src/base/vpr_constraints_serializer.h @@ -163,6 +163,11 @@ class VprConstraintsSerializer final : public uxsd::VprConstraintsBase * */ + + virtual inline int get_add_region_layer_num(Region& r) final { + return r.get_layer_num(); + } + virtual inline int get_add_region_subtile(Region& r) final { return r.get_sub_tile(); } @@ -172,23 +177,23 @@ class VprConstraintsSerializer final : public uxsd::VprConstraintsBase rect = r.get_region_rect(); - return rect.xmax(); + const auto reg_coord = r.get_region_rect(); + return reg_coord.xmax; } virtual inline int get_add_region_x_low(Region& r) final { - vtr::Rect rect = r.get_region_rect(); - return rect.xmin(); + const auto reg_coord = r.get_region_rect(); + return reg_coord.xmin; } virtual inline int get_add_region_y_high(Region& r) final { - vtr::Rect rect = r.get_region_rect(); - return rect.ymax(); + const auto reg_coord = r.get_region_rect(); + return reg_coord.ymax; } virtual inline int get_add_region_y_low(Region& r) final { - vtr::Rect rect = r.get_region_rect(); - return rect.ymin(); + const auto reg_coord = r.get_region_rect(); + return reg_coord.ymin; } /** Generated for complex type "partition": @@ -232,7 +237,7 @@ class VprConstraintsSerializer final : public uxsd::VprConstraintsBase vertical_cuts; + // This function has not been tested for multi-layer grids + VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); int horizontal_interval = device_ctx.grid.width() / horizontal_cutpoints; VTR_LOG("Device grid width is %d, horizontal interval is %d\n", device_ctx.grid.width(), horizontal_interval); @@ -138,7 +144,8 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int int ymax = vertical_cuts[j + 1] - 1; Region reg; - reg.set_region_rect(xmin, ymin, xmax, ymax); + // This function has not been tested for multi-layer grids. An assertion is used earlier to make sure that the grid has only one layer + reg.set_region_rect({xmin, ymin, xmax, ymax, 0}); std::vector atoms; region_atoms.insert({reg, atoms}); @@ -176,7 +183,8 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int } Region current_reg; - current_reg.set_region_rect(xminimum, yminimum, xmaximum, ymaximum); + // This function has not been tested for multi-layer grids. An assertion is used earlier to make sure that the grid has only one layer + current_reg.set_region_rect({xminimum, yminimum, xmaximum, ymaximum, 0}); auto got = region_atoms.find(current_reg); @@ -192,8 +200,9 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int Partition part; PartitionId partid(num_partitions); std::string part_name = "Part" + std::to_string(num_partitions); - vtr::Rect rect = region.first.get_region_rect(); - create_partition(part, part_name, rect.xmin(), rect.ymin(), rect.xmax(), rect.ymax()); + const auto reg_coord = region.first.get_region_rect(); + create_partition(part, part_name, + {reg_coord.xmin, reg_coord.ymin, reg_coord.xmax, reg_coord.ymax, reg_coord.layer_num}); constraints.add_partition(part); for (unsigned int k = 0; k < region.second.size(); k++) { @@ -204,11 +213,11 @@ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int } } -void create_partition(Partition& part, std::string part_name, int xmin, int ymin, int xmax, int ymax) { +void create_partition(Partition& part, std::string part_name, const RegionRectCoord& region_cord) { part.set_name(part_name); PartitionRegion part_pr; Region part_region; - part_region.set_region_rect(xmin, ymin, xmax, ymax); + part_region.set_region_rect(region_cord); std::vector part_regions; part_regions.push_back(part_region); part_pr.set_partition_region(part_regions); diff --git a/vpr/src/base/vpr_constraints_writer.h b/vpr/src/base/vpr_constraints_writer.h index 756f8c17c29..955542be637 100644 --- a/vpr/src/base/vpr_constraints_writer.h +++ b/vpr/src/base/vpr_constraints_writer.h @@ -45,6 +45,6 @@ void setup_vpr_floorplan_constraints_one_loc(VprConstraints& constraints, int ex */ void setup_vpr_floorplan_constraints_cutpoints(VprConstraints& constraints, int horizontal_cutpoints, int vertical_cutpoints); -void create_partition(Partition& part, std::string part_name, int xmin, int ymin, int xmax, int ymax); +void create_partition(Partition& part, std::string part_name, const RegionRectCoord& region_cord); #endif /* VPR_SRC_BASE_VPR_CONSTRAINTS_WRITER_H_ */ diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 7d615e36949..7c83980fcc4 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -369,7 +369,7 @@ struct PlacementContext : public Context { vtr::vector_map physical_pins; ///@brief Clustered block associated with each grid location (i.e. inverse of block_locs) - vtr::Matrix grid_blocks; //[0..device_ctx.grid.width()-1][0..device_ctx.grid.width()-1] + GridBlock grid_blocks; ///@brief The pl_macros array stores all the placement macros (usually carry chains). std::vector pl_macros; @@ -379,6 +379,7 @@ struct PlacementContext : public Context { * * Used to efficiently find logically 'adjacent' blocks of the same * block type even though the may be physically far apart + * Indexed with logical block type index: [0...num_logical_block_types-1] -> logical block compressed grid */ t_compressed_block_grids compressed_block_grids; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index b793b361b3d..5db0c4b82be 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -575,10 +575,19 @@ struct t_net_power { * the region: (1..device_ctx.grid.width()-2, 1..device_ctx.grid.height()-1) */ struct t_bb { - int xmin = 0; - int xmax = 0; - int ymin = 0; - int ymax = 0; + t_bb() = default; + t_bb(int xmin_, int xmax_, int ymin_, int ymax_) + : xmin(xmin_) + , xmax(xmax_) + , ymin(ymin_) + , ymax(ymax_) { + VTR_ASSERT(xmax_ >= xmin_); + VTR_ASSERT(ymax_ >= ymin_); + } + int xmin = OPEN; + int xmax = OPEN; + int ymin = OPEN; + int ymax = OPEN; }; /** @@ -660,22 +669,26 @@ struct hash { * * x: x-coordinate * y: y-coordinate - * z: z-coordinate (capacity postion) + * sub_tile: sub-tile number (capacity position) + * layer: layer (die) number * * @note t_pl_offset should be used to represent an offset between t_pl_loc. */ struct t_pl_loc { t_pl_loc() = default; - t_pl_loc(int xloc, int yloc, int sub_tile_loc) + t_pl_loc(int xloc, int yloc, int sub_tile_loc, int layer_num) : x(xloc) , y(yloc) - , sub_tile(sub_tile_loc) {} + , sub_tile(sub_tile_loc) + , layer(layer_num) {} int x = OPEN; int y = OPEN; int sub_tile = OPEN; + int layer = OPEN; t_pl_loc& operator+=(const t_pl_offset& rhs) { + VTR_ASSERT(this->layer != OPEN); x += rhs.x; y += rhs.y; sub_tile += rhs.sub_tile; @@ -683,6 +696,7 @@ struct t_pl_loc { } t_pl_loc& operator-=(const t_pl_offset& rhs) { + VTR_ASSERT(this->layer != OPEN); x -= rhs.x; y -= rhs.y; sub_tile -= rhs.sub_tile; @@ -706,15 +720,17 @@ struct t_pl_loc { } friend t_pl_offset operator-(const t_pl_loc& lhs, const t_pl_loc& rhs) { - return t_pl_offset(lhs.x - rhs.x, lhs.y - rhs.y, lhs.sub_tile - rhs.sub_tile); + VTR_ASSERT(lhs.layer == rhs.layer); + return {lhs.x - rhs.x, lhs.y - rhs.y, lhs.sub_tile - rhs.sub_tile}; } friend bool operator<(const t_pl_loc& lhs, const t_pl_loc& rhs) { + VTR_ASSERT(lhs.layer == rhs.layer); return std::tie(lhs.x, lhs.y, lhs.sub_tile) < std::tie(rhs.x, rhs.y, rhs.sub_tile); } friend bool operator==(const t_pl_loc& lhs, const t_pl_loc& rhs) { - return std::tie(lhs.x, lhs.y, lhs.sub_tile) == std::tie(rhs.x, rhs.y, rhs.sub_tile); + return std::tie(lhs.layer, lhs.x, lhs.y, lhs.sub_tile) == std::tie(rhs.layer, rhs.x, rhs.y, rhs.sub_tile); } friend bool operator!=(const t_pl_loc& lhs, const t_pl_loc& rhs) { @@ -778,6 +794,50 @@ struct t_grid_blocks { } }; +class GridBlock { + public: + GridBlock() = default; + + GridBlock(size_t width, size_t height, size_t layers) { + grid_blocks_.resize({layers, width, height}); + } + + inline void initialized_grid_block_at_location(const t_physical_tile_loc& loc, int num_sub_tiles) { + grid_blocks_[loc.layer_num][loc.x][loc.y].blocks.resize(num_sub_tiles, EMPTY_BLOCK_ID); + } + + inline void set_block_at_location(const t_pl_loc& loc, ClusterBlockId blk_id) { + grid_blocks_[loc.layer][loc.x][loc.y].blocks[loc.sub_tile] = blk_id; + } + + inline ClusterBlockId block_at_location(const t_pl_loc& loc) const { + return grid_blocks_[loc.layer][loc.x][loc.y].blocks[loc.sub_tile]; + } + + inline size_t num_blocks_at_location(const t_physical_tile_loc& loc) const { + return grid_blocks_[loc.layer_num][loc.x][loc.y].blocks.size(); + } + + inline int set_usage(const t_physical_tile_loc loc, int usage) { + return grid_blocks_[loc.layer_num][loc.x][loc.y].usage = usage; + } + + inline int get_usage(const t_physical_tile_loc loc) const { + return grid_blocks_[loc.layer_num][loc.x][loc.y].usage; + } + + inline bool is_sub_tile_empty(const t_physical_tile_loc loc, int sub_tile) const { + return grid_blocks_[loc.layer_num][loc.x][loc.y].subtile_empty(sub_tile); + } + + inline void clear() { + grid_blocks_.clear(); + } + + private: + vtr::NdMatrix grid_blocks_; +}; + ///@brief Names of various files struct t_file_name_opts { std::string ArchFile; diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index ca89925be94..e8568639986 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -985,20 +985,24 @@ static void highlight_blocks(double x, double y) { /// determine block /// ezgl::rectangle clb_bbox; + //TODO: Change when graphics supports 3D FPGAs + VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); + int layer_num = 0; // iterate over grid x - for (size_t i = 0; i < device_ctx.grid.width(); ++i) { + for (int i = 0; i < (int)device_ctx.grid.width(); ++i) { if (draw_coords->tile_x[i] > x) { break; // we've gone to far in the x direction } // iterate over grid y - for (size_t j = 0; j < device_ctx.grid.height(); ++j) { + for (int j = 0; j < (int)device_ctx.grid.height(); ++j) { if (draw_coords->tile_y[j] > y) { break; // we've gone to far in the y direction } // iterate over sub_blocks - const auto& type = device_ctx.grid.get_physical_type(i, j); + const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); for (int k = 0; k < type->capacity; ++k) { - clb_index = place_ctx.grid_blocks[i][j].blocks[k]; + // TODO: Change when graphics supports 3D + clb_index = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}); if (clb_index != EMPTY_BLOCK_ID) { clb_bbox = draw_coords->get_absolute_clb_bbox(clb_index, cluster_ctx.clb_nlist.block_type(clb_index)); @@ -1371,6 +1375,8 @@ bool highlight_loc_with_specific_color(int x, int y, ezgl::color& loc_color) { t_pl_loc curr_loc; curr_loc.x = x; curr_loc.y = y; + //TODO: Graphic currently doesn't support 3D FPGAs + curr_loc.layer = 0; //search for the current location in the vector of colored locations auto it = std::find_if(draw_state->colored_locations.begin(), diff --git a/vpr/src/draw/draw_basic.cpp b/vpr/src/draw/draw_basic.cpp index 7beb012d3d2..75802edf8c5 100644 --- a/vpr/src/draw/draw_basic.cpp +++ b/vpr/src/draw/draw_basic.cpp @@ -108,13 +108,17 @@ void drawplace(ezgl::renderer* g) { ClusterBlockId bnum; int num_sub_tiles; + //TODO: Change when graphics supports 3D FPGAs + VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); + int layer_num = 0; + g->set_line_width(0); - for (size_t i = 0; i < device_ctx.grid.width(); i++) { - for (size_t j = 0; j < device_ctx.grid.height(); j++) { + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { /* Only the first block of a group should control drawing */ - const auto& type = device_ctx.grid.get_physical_type(i, j); - int width_offset = device_ctx.grid.get_width_offset(i, j); - int height_offset = device_ctx.grid.get_height_offset(i, j); + const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); + int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); + int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); if (width_offset > 0 || height_offset > 0) @@ -128,7 +132,8 @@ void drawplace(ezgl::renderer* g) { for (int k = 0; k < num_sub_tiles; ++k) { /* Look at the tile at start of large block */ - bnum = place_ctx.grid_blocks[i][j].blocks[k]; + //TODO: Change when graphics supports 3D + bnum = place_ctx.grid_blocks.block_at_location({i, j, k, 0}); /* Fill background for the clb. Do not fill if "show_blk_internal" * is toggled. */ @@ -161,7 +166,10 @@ void drawplace(ezgl::renderer* g) { g->set_color(block_color); /* Get coords of current sub_tile */ - ezgl::rectangle abs_clb_bbox = draw_coords->get_absolute_clb_bbox(i, j, k, + ezgl::rectangle abs_clb_bbox = draw_coords->get_absolute_clb_bbox(layer_num, + i, + j, + k, logical_block_type); ezgl::point2d center = abs_clb_bbox.center(); diff --git a/vpr/src/draw/draw_floorplanning.cpp b/vpr/src/draw/draw_floorplanning.cpp index 24f1f94679b..126bbd63212 100644 --- a/vpr/src/draw/draw_floorplanning.cpp +++ b/vpr/src/draw/draw_floorplanning.cpp @@ -100,12 +100,17 @@ static void highlight_partition(ezgl::renderer* g, int partitionID, int alpha) { // the on screen units for ezgl to use. for (int region = 0; (size_t)region < regions.size(); region++) { - auto tile_rect = regions[region].get_region_rect(); - - ezgl::rectangle top_right = draw_coords->get_absolute_clb_bbox(tile_rect.xmax(), - tile_rect.ymax(), 0); - ezgl::rectangle bottom_left = draw_coords->get_absolute_clb_bbox(tile_rect.xmin(), - tile_rect.ymin(), 0); + const auto reg_coord = regions[region].get_region_rect(); + + //TODO: 0 should be replaced with the actual z value of the region when graph is 3D + ezgl::rectangle top_right = draw_coords->get_absolute_clb_bbox(reg_coord.layer_num, + reg_coord.xmax, + reg_coord.ymax, + 0); + ezgl::rectangle bottom_left = draw_coords->get_absolute_clb_bbox(reg_coord.layer_num, + reg_coord.xmin, + reg_coord.ymin, + 0); ezgl::rectangle on_screen_rect(bottom_left.bottom_left(), top_right.top_right()); diff --git a/vpr/src/draw/draw_noc.cpp b/vpr/src/draw/draw_noc.cpp index 700f82a132c..771be541034 100644 --- a/vpr/src/draw/draw_noc.cpp +++ b/vpr/src/draw/draw_noc.cpp @@ -37,8 +37,9 @@ void draw_noc(ezgl::renderer* g) { // check that the NoC tile has a capacity greater than 0 (can we assume it always will?) and if not then we cant draw anythign as the NoC tile wont be drawn /* since the vector of routers all have a reference positions on the grid to the corresponding physical tile, just use the first router in the vector and get its position, then use this to get the capcity of a noc router tile */ - const auto& type = device_ctx.grid.get_physical_type(router_list.begin()->get_router_grid_position_x(), - router_list.begin()->get_router_grid_position_y()); + const auto& type = device_ctx.grid.get_physical_type({router_list.begin()->get_router_grid_position_x(), + router_list.begin()->get_router_grid_position_y(), + router_list.begin()->get_router_layer_position()}); int num_subtiles = type->capacity; if (num_subtiles == 0) { @@ -230,10 +231,12 @@ void draw_noc_links(ezgl::renderer* g, t_logical_block_type_ptr noc_router_logic NocRouterId sink_router; // source router grid coordinates + int source_router_layer_position = 0; int source_router_x_position = 0; int source_router_y_position = 0; // sink router grid coordinates + int sink_router_layer_position = 0; int sink_router_x_position = 0; int sink_router_y_position = 0; @@ -262,16 +265,18 @@ void draw_noc_links(ezgl::renderer* g, t_logical_block_type_ptr noc_router_logic sink_router = noc_link_list[link_id].get_sink_router(); // calculate the grid positions of the source and sink routers + source_router_layer_position = router_list[source_router].get_router_layer_position(); source_router_x_position = router_list[source_router].get_router_grid_position_x(); source_router_y_position = router_list[source_router].get_router_grid_position_y(); + sink_router_layer_position = router_list[sink_router].get_router_layer_position(); sink_router_x_position = router_list[sink_router].get_router_grid_position_x(); sink_router_y_position = router_list[sink_router].get_router_grid_position_y(); // get the initial drawing coordinates of the noc link // it will be drawn from the center of two routers it connects - link_coords.start = draw_coords->get_absolute_clb_bbox(source_router_x_position, source_router_y_position, 0, noc_router_logical_block_type).center(); - link_coords.end = draw_coords->get_absolute_clb_bbox(sink_router_x_position, sink_router_y_position, 0, noc_router_logical_block_type).center(); + link_coords.start = draw_coords->get_absolute_clb_bbox(source_router_layer_position, source_router_x_position, source_router_y_position, 0, noc_router_logical_block_type).center(); + link_coords.end = draw_coords->get_absolute_clb_bbox(sink_router_layer_position, sink_router_x_position, sink_router_y_position, 0, noc_router_logical_block_type).center(); // determine the current noc link type link_type = determine_noc_link_type(link_coords.start, link_coords.end); diff --git a/vpr/src/draw/draw_rr.cpp b/vpr/src/draw/draw_rr.cpp index 0e634421568..c4a4cde6278 100644 --- a/vpr/src/draw/draw_rr.cpp +++ b/vpr/src/draw/draw_rr.cpp @@ -592,7 +592,9 @@ void draw_get_rr_src_sink_coords(const t_rr_node& node, float* xcen, float* ycen auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; RRNodeId rr_node = node.id(); - t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node)); + t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(rr_node), + rr_graph.node_ylow(rr_node), + rr_graph.node_layer(rr_node)}); //Number of classes (i.e. src/sinks) we need to draw float num_class = tile_type->class_inf.size(); @@ -678,6 +680,10 @@ int draw_check_rr_node_hit(float click_x, float click_y) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; + //TODO: Change when graphics supports 3D FPGAs + VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); + int layer_num = 0; + for (const RRNodeId& rr_id : device_ctx.rr_graph.nodes()) { size_t inode = (size_t)rr_id; switch (rr_graph.node_type(rr_id)) { @@ -685,9 +691,9 @@ int draw_check_rr_node_hit(float click_x, float click_y) { case OPIN: { int i = rr_graph.node_xlow(rr_id); int j = rr_graph.node_ylow(rr_id); - t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type(i, j); - int width_offset = device_ctx.grid.get_width_offset(i, j); - int height_offset = device_ctx.grid.get_height_offset(i, j); + t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({i, j, layer_num}); + int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); + int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); int ipin = rr_graph.node_pin_num(rr_id); float xcen, ycen; for (const e_side& iside : SIDES) { @@ -852,12 +858,13 @@ void draw_get_rr_pin_coords(const t_rr_node& node, float* xcen, float* ycen, con i = rr_graph.node_xlow(rr_node); j = rr_graph.node_ylow(rr_node); + int layer_num = rr_graph.node_layer(rr_node); xc = draw_coords->tile_x[i]; yc = draw_coords->tile_y[j]; ipin = rr_graph.node_pin_num(rr_node); - type = device_ctx.grid.get_physical_type(i, j); + type = device_ctx.grid.get_physical_type({i, j, layer_num}); pins_per_sub_tile = type->num_pins / type->capacity; k = ipin / pins_per_sub_tile; diff --git a/vpr/src/draw/draw_rr_edges.cpp b/vpr/src/draw/draw_rr_edges.cpp index 6c1b5bfc4c4..6ff00263676 100644 --- a/vpr/src/draw/draw_rr_edges.cpp +++ b/vpr/src/draw/draw_rr_edges.cpp @@ -412,10 +412,15 @@ void draw_pin_to_chan_edge(int pin_node, int chan_node, ezgl::renderer* g) { auto pin_rr = RRNodeId(pin_node); auto chan_rr = RRNodeId(chan_node); - const auto& grid_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(pin_rr), rr_graph.node_ylow(pin_rr)); - int width_offset = device_ctx.grid.get_width_offset(rr_graph.node_xlow(pin_rr), rr_graph.node_ylow(pin_rr)); - int height_offset = device_ctx.grid.get_height_offset(rr_graph.node_xlow(pin_rr), rr_graph.node_ylow(pin_rr)); - ; + const auto& grid_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(pin_rr), + rr_graph.node_ylow(pin_rr), + rr_graph.node_layer(pin_rr)}); + int width_offset = device_ctx.grid.get_width_offset({rr_graph.node_xlow(pin_rr), + rr_graph.node_ylow(pin_rr), + rr_graph.node_layer(pin_rr)}); + int height_offset = device_ctx.grid.get_height_offset({rr_graph.node_xlow(pin_rr), + rr_graph.node_ylow(pin_rr), + rr_graph.node_layer(pin_rr)}); float x1 = 0, y1 = 0; /* If there is only one side, no need for the following inference!!! diff --git a/vpr/src/draw/draw_types.cpp b/vpr/src/draw/draw_types.cpp index 428dc4f9c5e..d1532564938 100644 --- a/vpr/src/draw/draw_types.cpp +++ b/vpr/src/draw/draw_types.cpp @@ -79,10 +79,15 @@ float t_draw_coords::get_tile_height() { ezgl::rectangle t_draw_coords::get_pb_bbox(ClusterBlockId clb_index, const t_pb_graph_node& pb_gnode) { auto& place_ctx = g_vpr_ctx.placement(); auto& cluster_ctx = g_vpr_ctx.clustering(); - return get_pb_bbox(place_ctx.block_locs[clb_index].loc.x, place_ctx.block_locs[clb_index].loc.y, place_ctx.block_locs[clb_index].loc.sub_tile, cluster_ctx.clb_nlist.block_type(clb_index), pb_gnode); + return get_pb_bbox(place_ctx.block_locs[clb_index].loc.layer, + place_ctx.block_locs[clb_index].loc.x, + place_ctx.block_locs[clb_index].loc.y, + place_ctx.block_locs[clb_index].loc.sub_tile, + cluster_ctx.clb_nlist.block_type(clb_index), + pb_gnode); } -ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type, const t_pb_graph_node& pb_gnode) { +ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type, const t_pb_graph_node& pb_gnode) { auto& device_ctx = g_vpr_ctx.device(); t_draw_pb_type_info& blk_type_info = this->blk_info.at(logical_block_type->index); @@ -90,7 +95,7 @@ ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_x, int grid_y, int sub_block // if getting clb bbox, apply location info. if (pb_gnode.is_root()) { - const auto& type = device_ctx.grid.get_physical_type(grid_x, grid_y); + const auto& type = device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer}); float sub_blk_offset = this->tile_width * (sub_block_index / (float)type->capacity); result += ezgl::point2d(this->tile_x[grid_x], this->tile_y[grid_y]); @@ -101,7 +106,7 @@ ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_x, int grid_y, int sub_block return result; } -ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type) { +ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type) { auto& device_ctx = g_vpr_ctx.device(); t_draw_pb_type_info& blk_type_info = this->blk_info.at(logical_block_type->index); @@ -110,7 +115,7 @@ ezgl::rectangle t_draw_coords::get_pb_bbox(int grid_x, int grid_y, int sub_block // if getting clb bbox, apply location info. if (pb_gnode.is_root()) { - const auto& type = device_ctx.grid.get_physical_type(grid_x, grid_y); + const auto& type = device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer}); float sub_blk_offset = this->tile_width * (sub_block_index / (float)type->capacity); result += ezgl::point2d(this->tile_x[grid_x], this->tile_y[grid_y]); @@ -139,17 +144,17 @@ ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(const ClusterBlockId clb_in auto& place_ctx = g_vpr_ctx.placement(); t_pl_loc loc = place_ctx.block_locs[clb_index].loc; - return get_pb_bbox(loc.x, loc.y, loc.sub_tile, block_type); + return get_pb_bbox(loc.layer, loc.x, loc.y, loc.sub_tile, block_type); } -ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(int grid_x, int grid_y, int sub_block_index) { +ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index) { auto& device_ctx = g_vpr_ctx.device(); - const auto& type = device_ctx.grid.get_physical_type(grid_x, grid_y); - return get_pb_bbox(grid_x, grid_y, sub_block_index, pick_logical_type(type)); + const auto& type = device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer}); + return get_pb_bbox(grid_layer, grid_x, grid_y, sub_block_index, pick_logical_type(type)); } -ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type) { - return get_pb_bbox(grid_x, grid_y, sub_block_index, logical_block_type); +ezgl::rectangle t_draw_coords::get_absolute_clb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr logical_block_type) { + return get_pb_bbox(grid_layer, grid_x, grid_y, sub_block_index, logical_block_type); } #endif // NO_GRAPHICS diff --git a/vpr/src/draw/draw_types.h b/vpr/src/draw/draw_types.h index 5ccc7e80a71..857519ba25f 100644 --- a/vpr/src/draw/draw_types.h +++ b/vpr/src/draw/draw_types.h @@ -349,10 +349,10 @@ struct t_draw_coords { ezgl::rectangle get_pb_bbox(ClusterBlockId clb_index, const t_pb_graph_node& pb_gnode); ///@brief returns bounding box of sub block at given location of given type w. given pb - ezgl::rectangle get_pb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr type, const t_pb_graph_node& pb_gnode); + ezgl::rectangle get_pb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr type, const t_pb_graph_node& pb_gnode); ///@brief returns pb of sub block of given idx/given type at location - ezgl::rectangle get_pb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr type); + ezgl::rectangle get_pb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr type); /** * @brief returns a bounding box for the given pb in the given @@ -367,13 +367,13 @@ struct t_draw_coords { * @brief Returns a bounding box for the clb at device_ctx.grid[grid_x][grid_y].blocks[sub_block_index], * even if it is empty. */ - ezgl::rectangle get_absolute_clb_bbox(int grid_x, int grid_y, int sub_block_index); + ezgl::rectangle get_absolute_clb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index); /** * @brief Returns a bounding box for the clb at device_ctx.grid[grid_x][grid_y].blocks[sub_block_index], * of given type even if it is empty. */ - ezgl::rectangle get_absolute_clb_bbox(int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr block_type); + ezgl::rectangle get_absolute_clb_bbox(int grid_layer, int grid_x, int grid_y, int sub_block_index, const t_logical_block_type_ptr block_type); private: float tile_width; diff --git a/vpr/src/draw/intra_logic_block.cpp b/vpr/src/draw/intra_logic_block.cpp index 66bfc8bd640..285ade3c027 100644 --- a/vpr/src/draw/intra_logic_block.cpp +++ b/vpr/src/draw/intra_logic_block.cpp @@ -154,12 +154,15 @@ void draw_internal_draw_subblk(ezgl::renderer* g) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.placement(); - for (size_t i = 0; i < device_ctx.grid.width(); i++) { - for (size_t j = 0; j < device_ctx.grid.height(); j++) { + //TODO: Change when graphics supports 3D FPGAs + VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); + int layer_num = 0; + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { /* Only the first block of a group should control drawing */ - const auto& type = device_ctx.grid.get_physical_type(i, j); - int width_offset = device_ctx.grid.get_width_offset(i, j); - int height_offset = device_ctx.grid.get_height_offset(i, j); + const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); + int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); + int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); if (width_offset > 0 || height_offset > 0) continue; @@ -171,11 +174,13 @@ void draw_internal_draw_subblk(ezgl::renderer* g) { int num_sub_tiles = type->capacity; for (int k = 0; k < num_sub_tiles; ++k) { /* Don't draw if block is empty. */ - if (place_ctx.grid_blocks[i][j].blocks[k] == EMPTY_BLOCK_ID || place_ctx.grid_blocks[i][j].blocks[k] == INVALID_BLOCK_ID) + // TODO: Change when graphics supports 3D + if (place_ctx.grid_blocks.block_at_location({i, j, k, 0}) == EMPTY_BLOCK_ID || place_ctx.grid_blocks.block_at_location({i, j, k, 0}) == INVALID_BLOCK_ID) continue; /* Get block ID */ - ClusterBlockId bnum = place_ctx.grid_blocks[i][j].blocks[k]; + // TODO: Change when graphics supports 3D + ClusterBlockId bnum = place_ctx.grid_blocks.block_at_location({i, j, k, 0}); /* Safety check, that physical blocks exists in the CLB */ if (cluster_ctx.clb_nlist.block_pb(bnum) == nullptr) continue; @@ -282,8 +287,9 @@ draw_internal_calc_coords(int type_descrip_index, t_pb_graph_node* pb_graph_node double left, bot, right, top; int capacity = device_ctx.physical_tile_types[type_descrip_index].capacity; - const auto& type = device_ctx.grid.get_physical_type(1, 0); - if (capacity > 1 && device_ctx.grid.width() > 0 && device_ctx.grid.height() > 0 && place_ctx.grid_blocks[1][0].usage != 0 + // TODO: this is a hack - should be fixed for the layer_num + const auto& type = device_ctx.grid.get_physical_type({1, 0, 0}); + if (capacity > 1 && device_ctx.grid.width() > 0 && device_ctx.grid.height() > 0 && place_ctx.grid_blocks.get_usage({1, 0, 0}) != 0 && type_descrip_index == type->index) { // that should test for io blocks, and setting capacity_divisor > 1 // will squish every thing down diff --git a/vpr/src/draw/manual_moves.cpp b/vpr/src/draw/manual_moves.cpp index 554334f63cd..f2005c2bc6c 100644 --- a/vpr/src/draw/manual_moves.cpp +++ b/vpr/src/draw/manual_moves.cpp @@ -115,7 +115,8 @@ void calculate_cost_callback(GtkWidget* /*widget*/, GtkWidget* grid) { valid_input = false; } - t_pl_loc to = t_pl_loc(x_location, y_location, subtile_location); + // TODO: When graphic is updated to support 3D, this will need to be updated + t_pl_loc to = t_pl_loc(x_location, y_location, subtile_location, 0); valid_input = is_manual_move_legal(ClusterBlockId(block_id), to); if (valid_input) { @@ -160,7 +161,7 @@ bool is_manual_move_legal(ClusterBlockId block_id, t_pl_loc to) { } //If the block s not compatible - auto physical_tile = device_ctx.grid.get_physical_type(to.x, to.y); + auto physical_tile = device_ctx.grid.get_physical_type({to.x, to.y, to.layer}); auto logical_block = cluster_ctx.clb_nlist.block_type(block_id); if (to.sub_tile < 0 || to.sub_tile >= physical_tile->capacity || !is_sub_tile_compatible(physical_tile, logical_block, to.sub_tile)) { invalid_breakpoint_entry_window("Blocks are not compatible"); @@ -168,7 +169,7 @@ bool is_manual_move_legal(ClusterBlockId block_id, t_pl_loc to) { } //If the destination block is user constrained, abort this swap - auto b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile]; + auto b_to = place_ctx.grid_blocks.block_at_location(to); if (b_to != INVALID_BLOCK_ID && b_to != EMPTY_BLOCK_ID) { if (place_ctx.block_locs[b_to].is_fixed) { invalid_breakpoint_entry_window("Block is fixed"); diff --git a/vpr/src/draw/search_bar.cpp b/vpr/src/draw/search_bar.cpp index 2c3c6627ffa..361728a904c 100644 --- a/vpr/src/draw/search_bar.cpp +++ b/vpr/src/draw/search_bar.cpp @@ -241,9 +241,10 @@ void auto_zoom_rr_node(int rr_node_id) { case OPIN: { int i = rr_graph.node_xlow(RRNodeId(rr_node_id)); int j = rr_graph.node_ylow(RRNodeId(rr_node_id)); - t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type(i, j); - int width_offset = device_ctx.grid.get_width_offset(i, j); - int height_offset = device_ctx.grid.get_height_offset(i, j); + int layer_num = rr_graph.node_layer(RRNodeId(rr_node_id)); + t_physical_tile_type_ptr type = device_ctx.grid.get_physical_type({i, j, layer_num}); + int width_offset = device_ctx.grid.get_width_offset({i, j, layer_num}); + int height_offset = device_ctx.grid.get_height_offset({i, j, layer_num}); int ipin = rr_graph.node_ptc_num(RRNodeId(rr_node_id)); float xcen, ycen; diff --git a/vpr/src/noc/noc_router.cpp b/vpr/src/noc/noc_router.cpp index 74eb32edb26..5ea2c05c1b9 100644 --- a/vpr/src/noc/noc_router.cpp +++ b/vpr/src/noc/noc_router.cpp @@ -1,10 +1,11 @@ #include "noc_router.h" // constructor -NocRouter::NocRouter(int id, int grid_position_x, int grid_position_y) +NocRouter::NocRouter(int id, int grid_position_x, int grid_position_y, int layer_position) : router_user_id(id) , router_grid_position_x(grid_position_x) - , router_grid_position_y(grid_position_y) { + , router_grid_position_y(grid_position_y) + , router_layer_position(layer_position) { // initialize variables router_block_ref = ClusterBlockId(0); } @@ -22,6 +23,10 @@ int NocRouter::get_router_grid_position_y(void) const { return router_grid_position_y; } +int NocRouter::get_router_layer_position(void) const { + return router_layer_position; +} + ClusterBlockId NocRouter::get_router_block_ref(void) const { return router_block_ref; } diff --git a/vpr/src/noc/noc_router.h b/vpr/src/noc/noc_router.h index 337dabc7921..a48c64cb971 100644 --- a/vpr/src/noc/noc_router.h +++ b/vpr/src/noc/noc_router.h @@ -45,11 +45,12 @@ class NocRouter { // device position of the physical router tile int router_grid_position_x; /*(router_key, converted_id)); return; @@ -123,6 +125,12 @@ void NocStorage::set_device_grid_width(int grid_width) { return; } +void NocStorage::set_device_grid_spec(int grid_width, int grid_height) { + device_grid_width = grid_width; + num_layer_blocks = grid_width * grid_height; + return; +} + bool NocStorage::remove_link(NocRouterId src_router_id, NocRouterId sink_router_id) { // This status variable is used to report externally whether the link was removed or not bool link_removed_status = false; @@ -221,9 +229,9 @@ NocLinkId NocStorage::get_parallel_link(NocLinkId current_link) const { return parallel_link; } -int NocStorage::generate_router_key_from_grid_location(int grid_position_x, int grid_position_y) const { +int NocStorage::generate_router_key_from_grid_location(int grid_position_x, int grid_position_y, int layer_position) const { // calculate the key value - return (device_grid_width * grid_position_y + grid_position_x); + return (num_layer_blocks * layer_position + device_grid_width * grid_position_y + grid_position_x); } void NocStorage::echo_noc(char* file_name) const { diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h index c1d1e025af0..71e95b8f838 100644 --- a/vpr/src/noc/noc_storage.h +++ b/vpr/src/noc/noc_storage.h @@ -138,6 +138,7 @@ class NocStorage { * */ int device_grid_width; + int num_layer_blocks; // prevent "copying" of this object NocStorage(const NocStorage&) = delete; @@ -288,7 +289,7 @@ class NocStorage { * @param grid_position_y The vertical position on the FPGA of the physical * tile that this router represents. */ - void add_router(int id, int grid_position_x, int grid_position_y); + void add_router(int id, int grid_position_x, int grid_position_y, int layer_poisition); /** * @brief Creates a new link and adds it to the NoC. The newly created @@ -336,7 +337,9 @@ class NocStorage { void set_device_grid_width(int grid_width); - // general utility functions + void set_device_grid_spec(int grid_width, int grid_height); + + // general utiliy functions /** * @brief The link is removed from the outgoing vector of links for * the source router. The link is not removed from the vector of all @@ -433,12 +436,17 @@ class NocStorage { * * @param grid_position_x The horizontal position on the FPGA of the physical * tile that this router represents. - * @param grid_position_y The vertical position on the FPGA of the physical - * tile that this router represents. + * + * @param grid_position_y The vertical position on the FPGA of the phyical + * tile that this router represents. + * + * @param layer_position The layer number of the phyical + * tile that this router represents. + * * @return int Represents a unique key that can be used to identify a * hard router block. */ - int generate_router_key_from_grid_location(int grid_position_x, int grid_position_y) const; + int generate_router_key_from_grid_location(int grid_position_x, int grid_position_y, int layer_position) const; /** * @brief Writes out the NocStorage class information to a file. diff --git a/vpr/src/noc/read_xml_noc_traffic_flows_file.cpp b/vpr/src/noc/read_xml_noc_traffic_flows_file.cpp index 7a20109d041..b785d2c4da6 100644 --- a/vpr/src/noc/read_xml_noc_traffic_flows_file.cpp +++ b/vpr/src/noc/read_xml_noc_traffic_flows_file.cpp @@ -252,7 +252,9 @@ t_physical_tile_type_ptr get_physical_type_of_noc_router_tile(const DeviceContex VTR_ASSERT(physical_noc_router != noc_ctx.noc_model.get_noc_routers().end()); //Using the routers grid position go to the device and identify the physical type of the tile located there. - return device_ctx.grid.get_physical_type(physical_noc_router->get_router_grid_position_x(), physical_noc_router->get_router_grid_position_y()); + return device_ctx.grid.get_physical_type({physical_noc_router->get_router_grid_position_x(), + physical_noc_router->get_router_grid_position_y(), + physical_noc_router->get_router_layer_position()}); } bool check_that_all_router_blocks_have_an_associated_traffic_flow(NocContext& noc_ctx, t_physical_tile_type_ptr noc_router_tile_type, std::string noc_flows_file) { diff --git a/vpr/src/pack/cluster_util.cpp b/vpr/src/pack/cluster_util.cpp index 8bc8e87923d..0e12305dc70 100644 --- a/vpr/src/pack/cluster_util.cpp +++ b/vpr/src/pack/cluster_util.cpp @@ -2078,9 +2078,9 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, int rhs_num_instances = 0; // Count number of instances for each type for (auto type : lhs->equivalent_tiles) - lhs_num_instances += device_ctx.grid.num_instances(type); + lhs_num_instances += device_ctx.grid.num_instances(type, -1); for (auto type : rhs->equivalent_tiles) - rhs_num_instances += device_ctx.grid.num_instances(type); + rhs_num_instances += device_ctx.grid.num_instances(type, -1); float lhs_util = vtr::safe_ratio(num_used_type_instances[lhs], lhs_num_instances); float rhs_util = vtr::safe_ratio(num_used_type_instances[rhs], rhs_num_instances); @@ -2179,7 +2179,7 @@ void start_new_cluster(t_cluster_placement_stats* cluster_placement_stats, // Check used type instances against the possible equivalent physical locations unsigned int num_instances = 0; for (auto equivalent_tile : block_type->equivalent_tiles) { - num_instances += device_ctx.grid.num_instances(equivalent_tile); + num_instances += device_ctx.grid.num_instances(equivalent_tile, -1); } if (num_used_type_instances[block_type] > num_instances) { diff --git a/vpr/src/pack/constraints_report.cpp b/vpr/src/pack/constraints_report.cpp index 77e612cc8b3..f75823aefab 100644 --- a/vpr/src/pack/constraints_report.cpp +++ b/vpr/src/pack/constraints_report.cpp @@ -42,7 +42,7 @@ bool floorplan_constraints_regions_overfull() { bool floorplan_regions_overfull = false; for (auto& region_info : regions_count_info) { - vtr::Rect rect = region_info.first.get_region_rect(); + const auto rect = region_info.first.get_region_rect(); for (unsigned int j = 0; j < block_types.size(); j++) { int num_assigned_blocks = region_info.second[j]; int num_tiles = 0; @@ -50,7 +50,7 @@ bool floorplan_constraints_regions_overfull() { if (num_assigned_blocks > num_tiles) { floorplan_regions_overfull = true; floorplanning_ctx.overfull_regions.push_back(region_info.first); - VTR_LOG("\n \nRegion (%d, %d) to (%d, %d) st %d \n", rect.xmin(), rect.ymin(), rect.xmax(), rect.ymax(), region_info.first.get_sub_tile()); + VTR_LOG("\n \nRegion (%d, %d) to (%d, %d) st %d \n", rect.xmin, rect.ymin, rect.xmax, rect.ymax, region_info.first.get_sub_tile()); VTR_LOG("Assigned %d blocks of type %s, but only has %d tiles of that type\n", num_assigned_blocks, block_types[j].name, num_tiles); } } diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index a1868c80778..252dc37a98d 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -237,7 +237,7 @@ bool try_pack(t_packer_opts* packer_opts, int num_instances = 0; for (auto type : iter->first->equivalent_tiles) - num_instances += grid.num_instances(type); + num_instances += grid.num_instances(type, -1); resource_avail += std::string(iter->first->name) + ": " + std::to_string(num_instances); } @@ -369,7 +369,7 @@ static bool try_size_device_grid(const t_arch& arch, const std::map& net_li const DeviceContext& device_ctx, ClusteringContext& clustering_ctx, const vtr::vector& rr_node_nets, - const vtr::Point& grid_coord, + const t_pl_loc& grid_coord, const ClusterBlockId& blk_id, - const int& sub_tile_z, size_t& num_mismatches, const bool& verbose, bool is_flat) { + const int sub_tile_z = grid_coord.sub_tile; + const int coord_x = grid_coord.x; + const int coord_y = grid_coord.y; + const int coord_layer = grid_coord.layer; const auto& node_lookup = device_ctx.rr_graph.node_lookup(); /* Handle each pin */ auto logical_block = clustering_ctx.clb_nlist.block_type(blk_id); - auto physical_tile = device_ctx.grid.get_physical_type(grid_coord.x(), grid_coord.y()); + auto physical_tile = device_ctx.grid.get_physical_type({coord_x, coord_y, coord_layer}); /* Narrow down side search for grids * The wanted side depends on the location of the grid. @@ -87,16 +90,16 @@ static void update_cluster_pin_with_post_routing_results(const Netlist<>& net_li * ------------------------------------------------------- */ std::vector wanted_sides; - if (device_ctx.grid.height() - 1 == grid_coord.y()) { /* TOP side */ + if ((int)device_ctx.grid.height() - 1 == coord_y) { /* TOP side */ wanted_sides.push_back(BOTTOM); } - if (device_ctx.grid.width() - 1 == grid_coord.x()) { /* RIGHT side */ + if ((int)device_ctx.grid.width() - 1 == coord_x) { /* RIGHT side */ wanted_sides.push_back(LEFT); } - if (0 == grid_coord.y()) { /* BOTTOM side */ + if (0 == coord_y) { /* BOTTOM side */ wanted_sides.push_back(TOP); } - if (0 == grid_coord.x()) { /* LEFT side */ + if (0 == coord_x) { /* LEFT side */ wanted_sides.push_back(RIGHT); } @@ -155,7 +158,7 @@ static void update_cluster_pin_with_post_routing_results(const Netlist<>& net_li short valid_routing_net_cnt = 0; for (const e_side& pin_side : pin_sides) { /* Find the net mapped to this pin in routing results */ - RRNodeId rr_node = node_lookup.find_node(grid_coord.x(), grid_coord.y(), rr_node_type, physical_pin, pin_side); + RRNodeId rr_node = node_lookup.find_node(coord_layer, coord_x, coord_y, rr_node_type, physical_pin, pin_side); /* Bypass invalid nodes, after that we must have a valid rr_node id */ if (!rr_node) { @@ -238,13 +241,14 @@ static void update_cluster_pin_with_post_routing_results(const Netlist<>& net_li } VTR_LOGV(verbose, - "Fixed up net '%s' mapping mismatch at clustered block '%s' pin 'grid[%ld][%ld].%s.%s[%d]' (was net '%s')\n", + "Fixed up net '%s' mapping mismatch at clustered block '%s' pin 'grid[%ld][%ld].%s.%s[%d] - layer %d' (was net '%s')\n", routing_net_name.c_str(), clustering_ctx.clb_nlist.block_pb(blk_id)->name, - grid_coord.x(), grid_coord.y(), + coord_x, coord_y, clustering_ctx.clb_nlist.block_pb(blk_id)->pb_graph_node->pb_type->name, get_pb_graph_node_pin_from_block_pin(blk_id, physical_pin)->port->name, get_pb_graph_node_pin_from_block_pin(blk_id, physical_pin)->pin_number, + coord_layer, cluster_net_name.c_str()); /* Update counter */ @@ -1079,8 +1083,6 @@ void sync_netlists_to_routing(const Netlist<>& net_list, clb_blk_id = convert_to_cluster_block_id(blk_id); } VTR_ASSERT(clb_blk_id != ClusterBlockId::INVALID()); - vtr::Point grid_coord(placement_ctx.block_locs[clb_blk_id].loc.x, - placement_ctx.block_locs[clb_blk_id].loc.y); if (seen_block_ids.insert(clb_blk_id).second) { update_cluster_pin_with_post_routing_results(net_list, @@ -1088,9 +1090,8 @@ void sync_netlists_to_routing(const Netlist<>& net_list, device_ctx, clustering_ctx, rr_node_nets, - grid_coord, + placement_ctx.block_locs[clb_blk_id].loc, clb_blk_id, - placement_ctx.block_locs[clb_blk_id].loc.sub_tile, num_mismatches, verbose, is_flat); diff --git a/vpr/src/pack/re_cluster.cpp b/vpr/src/pack/re_cluster.cpp index 92f52b8f8e5..34e0ada9669 100644 --- a/vpr/src/pack/re_cluster.cpp +++ b/vpr/src/pack/re_cluster.cpp @@ -26,7 +26,7 @@ bool move_mol_to_new_cluster(t_pack_molecule* molecule, unsigned int num_instances = 0; for (auto equivalent_tile : block_type->equivalent_tiles) { - num_instances += device_ctx.grid.num_instances(equivalent_tile); + num_instances += device_ctx.grid.num_instances(equivalent_tile, -1); } if (helper_ctx.num_used_type_instances[block_type] == num_instances) { diff --git a/vpr/src/place/centroid_move_generator.cpp b/vpr/src/place/centroid_move_generator.cpp index 68b579787d6..22e2a4ed6a9 100644 --- a/vpr/src/place/centroid_move_generator.cpp +++ b/vpr/src/place/centroid_move_generator.cpp @@ -20,7 +20,7 @@ e_create_move CentroidMoveGenerator::propose_move(t_pl_blocks_to_be_moved& block t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = device_ctx.grid.get_physical_type(from.x, from.y); + auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); t_range_limiters range_limiters; diff --git a/vpr/src/place/compressed_grid.cpp b/vpr/src/place/compressed_grid.cpp index cb7e184845b..7ba21771524 100644 --- a/vpr/src/place/compressed_grid.cpp +++ b/vpr/src/place/compressed_grid.cpp @@ -5,20 +5,27 @@ std::vector create_compressed_block_grids() { auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; + const int num_layers = grid.get_num_layers(); //Collect the set of x/y locations for each instace of a block type - std::vector>> block_locations(device_ctx.logical_block_types.size()); - for (size_t x = 0; x < grid.width(); ++x) { - for (size_t y = 0; y < grid.height(); ++y) { - int width_offset = grid.get_width_offset(x, y); - int height_offset = grid.get_height_offset(x, y); - if (width_offset == 0 && height_offset == 0) { - const auto& type = grid.get_physical_type(x, y); - auto equivalent_sites = get_equivalent_sites_set(type); - - for (auto& block : equivalent_sites) { - //Only record at block root location - block_locations[block->index].emplace_back(x, y); + std::vector>>> block_locations(device_ctx.logical_block_types.size()); // [logical_block_type][layer_num][0...num_instance_on_layer] -> (x, y) + for (int block_type_num = 0; block_type_num < (int)device_ctx.logical_block_types.size(); block_type_num++) { + block_locations[block_type_num].resize(num_layers); + } + + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + for (int x = 0; x < (int)grid.width(); ++x) { + for (int y = 0; y < (int)grid.height(); ++y) { + int width_offset = grid.get_width_offset({x, y, layer_num}); + int height_offset = grid.get_height_offset(t_physical_tile_loc(x, y, layer_num)); + if (width_offset == 0 && height_offset == 0) { + const auto& type = grid.get_physical_type({x, y, layer_num}); + auto equivalent_sites = get_equivalent_sites_set(type); + + for (auto& block : equivalent_sites) { + //Only record at block root location + block_locations[block->index][layer_num].emplace_back(x, y); + } } } } @@ -26,7 +33,7 @@ std::vector create_compressed_block_grids() { std::vector compressed_type_grids(device_ctx.logical_block_types.size()); for (const auto& logical_block : device_ctx.logical_block_types) { - auto compressed_block_grid = create_compressed_block_grid(block_locations[logical_block.index]); + auto compressed_block_grid = create_compressed_block_grid(block_locations[logical_block.index], num_layers); for (const auto& physical_tile : logical_block.equivalent_tiles) { std::vector compatible_sub_tiles; @@ -55,7 +62,7 @@ std::vector create_compressed_block_grids() { } //Given a set of locations, returns a 2D matrix in a compressed space -t_compressed_block_grid create_compressed_block_grid(const std::vector>& locations) { +t_compressed_block_grid create_compressed_block_grid(const std::vector>>& locations, int num_layers) { t_compressed_block_grid compressed_grid; if (locations.empty()) { @@ -63,120 +70,111 @@ t_compressed_block_grid create_compressed_block_grid(const std::vector x_locs; - std::vector y_locs; - - //Record all the x/y locations seperately - for (auto point : locations) { - x_locs.emplace_back(point.x()); - y_locs.emplace_back(point.y()); - } + std::vector> x_locs(num_layers); + std::vector> y_locs(num_layers); + compressed_grid.compressed_to_grid_x.resize(num_layers); + compressed_grid.compressed_to_grid_y.resize(num_layers); + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + auto& layer_x_locs = x_locs[layer_num]; + auto& layer_y_locs = y_locs[layer_num]; + //Record all the x/y locations seperately + for (auto point : locations[layer_num]) { + layer_x_locs.emplace_back(point.x()); + layer_y_locs.emplace_back(point.y()); + } - //Uniquify x/y locations - std::sort(x_locs.begin(), x_locs.end()); - x_locs.erase(unique(x_locs.begin(), x_locs.end()), x_locs.end()); + //Uniquify x/y locations + std::sort(layer_x_locs.begin(), layer_x_locs.end()); + layer_x_locs.erase(unique(layer_x_locs.begin(), layer_x_locs.end()), layer_x_locs.end()); - std::sort(y_locs.begin(), y_locs.end()); - y_locs.erase(unique(y_locs.begin(), y_locs.end()), y_locs.end()); + std::sort(layer_y_locs.begin(), layer_y_locs.end()); + layer_y_locs.erase(unique(layer_y_locs.begin(), layer_y_locs.end()), layer_y_locs.end()); - //The index of an x-position in x_locs corresponds to it's compressed - //x-coordinate (similarly for y) - compressed_grid.compressed_to_grid_x = x_locs; - compressed_grid.compressed_to_grid_y = y_locs; + //The index of an x-position in x_locs corresponds to it's compressed + //x-coordinate (similarly for y) + if (!layer_x_locs.empty()) { + compressed_grid.compressed_to_grid_layer.push_back(layer_num); + } + compressed_grid.compressed_to_grid_x[layer_num] = std::move(layer_x_locs); + compressed_grid.compressed_to_grid_y[layer_num] = std::move(layer_y_locs); + } } - // - //Build the compressed grid - // - - //Create a full/dense x-dimension (since there must be at least one - //block per x location) - compressed_grid.grid.resize(compressed_grid.compressed_to_grid_x.size()); - - //Fill-in the y-dimensions - // - //Note that we build the y-dimension sparsely (using a flat map), since - //there may not be full columns of blocks at each x location, this makes - //it efficient to find the non-empty blocks in the y dimension - for (auto point : locations) { - //Determine the compressed indices in the x & y dimensions - auto x_itr = std::lower_bound(compressed_grid.compressed_to_grid_x.begin(), compressed_grid.compressed_to_grid_x.end(), point.x()); - int cx = std::distance(compressed_grid.compressed_to_grid_x.begin(), x_itr); - - auto y_itr = std::lower_bound(compressed_grid.compressed_to_grid_y.begin(), compressed_grid.compressed_to_grid_y.end(), point.y()); - int cy = std::distance(compressed_grid.compressed_to_grid_y.begin(), y_itr); - - VTR_ASSERT(cx >= 0 && cx < (int)compressed_grid.compressed_to_grid_x.size()); - VTR_ASSERT(cy >= 0 && cy < (int)compressed_grid.compressed_to_grid_y.size()); - - VTR_ASSERT(compressed_grid.compressed_to_grid_x[cx] == point.x()); - VTR_ASSERT(compressed_grid.compressed_to_grid_y[cy] == point.y()); - - auto result = compressed_grid.grid[cx].insert(std::make_pair(cy, t_type_loc(point.x(), point.y()))); - - VTR_ASSERT_MSG(result.second, "Duplicates should not exist in compressed grid space"); + compressed_grid.grid.resize(num_layers); + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + auto& layer_compressed_grid = compressed_grid.grid[layer_num]; + const auto& layer_compressed_x_locs = compressed_grid.compressed_to_grid_x[layer_num]; + const auto& layer_compressed_y_locs = compressed_grid.compressed_to_grid_y[layer_num]; + // + //Build the compressed grid + // + + //Create a full/dense x-dimension (since there must be at least one + //block per x location) + layer_compressed_grid.resize(layer_compressed_x_locs.size()); + + //Fill-in the y-dimensions + // + //Note that we build the y-dimension sparsely (using a flat map), since + //there may not be full columns of blocks at each x location, this makes + //it efficient to find the non-empty blocks in the y dimension + for (auto point : locations[layer_num]) { + //Determine the compressed indices in the x & y dimensions + auto x_itr = std::lower_bound(layer_compressed_x_locs.begin(), layer_compressed_x_locs.end(), point.x()); + int cx = std::distance(layer_compressed_x_locs.begin(), x_itr); + + auto y_itr = std::lower_bound(layer_compressed_y_locs.begin(), layer_compressed_y_locs.end(), point.y()); + int cy = std::distance(layer_compressed_y_locs.begin(), y_itr); + + VTR_ASSERT(cx >= 0 && cx < (int)layer_compressed_x_locs.size()); + VTR_ASSERT(cy >= 0 && cy < (int)layer_compressed_y_locs.size()); + + VTR_ASSERT(layer_compressed_x_locs[cx] == point.x()); + VTR_ASSERT(layer_compressed_y_locs[cy] == point.y()); + + auto result = layer_compressed_grid[cx].insert(std::make_pair(cy, t_physical_tile_loc(point.x(), point.y(), layer_num))); + + VTR_ASSERT_MSG(result.second, "Duplicates should not exist in compressed grid space"); + } } return compressed_grid; } -int grid_to_compressed(const std::vector& coords, int point) { - auto itr = std::lower_bound(coords.begin(), coords.end(), point); - VTR_ASSERT(*itr == point); - - return std::distance(coords.begin(), itr); -} - -/** - * @brief find the nearest location in the compressed grid. - * - * Useful when the point is of a different block type from coords. - * - * @param point represents a coordinate in one dimension of the point - * @param coords represents vector of coordinate values of a single type only - * - * Hence, the exact point coordinate will not be found in coords if they are of different block types. In this case the function will return - * the nearest compressed location to point by rounding it down - */ -int grid_to_compressed_approx(const std::vector& coords, int point) { - auto itr = std::lower_bound(coords.begin(), coords.end(), point); - if (itr == coords.end()) - return std::distance(coords.begin(), itr - 1); - return std::distance(coords.begin(), itr); -} - /*Print the contents of the compressed grids to an echo file*/ void echo_compressed_grids(char* filename, const std::vector& comp_grids) { FILE* fp; fp = vtr::fopen(filename, "w"); auto& device_ctx = g_vpr_ctx.device(); + int num_layers = device_ctx.grid.get_num_layers(); fprintf(fp, "--------------------------------------------------------------\n"); fprintf(fp, "Compressed Grids: \n"); fprintf(fp, "--------------------------------------------------------------\n"); fprintf(fp, "\n"); - - for (int i = 0; i < (int)comp_grids.size(); i++) { - fprintf(fp, "\n\nGrid type: %s \n", device_ctx.logical_block_types[i].name); - - fprintf(fp, "X coordinates: \n"); - for (int j = 0; j < (int)comp_grids[i].compressed_to_grid_x.size(); j++) { - fprintf(fp, "%d ", comp_grids[i].compressed_to_grid_x[j]); - } + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + fprintf(fp, "Layer Num: %d \n", layer_num); + fprintf(fp, "--------------------------------------------------------------\n"); fprintf(fp, "\n"); + for (int i = 0; i < (int)comp_grids.size(); i++) { + fprintf(fp, "\n\nGrid type: %s \n", device_ctx.logical_block_types[i].name); - fprintf(fp, "Y coordinates: \n"); - for (int k = 0; k < (int)comp_grids[i].compressed_to_grid_y.size(); k++) { - fprintf(fp, "%d ", comp_grids[i].compressed_to_grid_y[k]); - } - fprintf(fp, "\n"); + fprintf(fp, "X coordinates: \n"); + for (int j = 0; j < (int)comp_grids[i].compressed_to_grid_x.size(); j++) { + auto grid_loc = comp_grids[i].compressed_loc_to_grid_loc({j, 0, layer_num}); + fprintf(fp, "%d ", grid_loc.x); + } + fprintf(fp, "\n"); - fprintf(fp, "Subtiles: \n"); - for (int s = 0; s < (int)comp_grids[i].compatible_sub_tiles_for_tile.size(); s++) { - fprintf(fp, "%d ", comp_grids[i].compressed_to_grid_y[s]); + fprintf(fp, "Y coordinates: \n"); + for (int k = 0; k < (int)comp_grids[i].compressed_to_grid_y.size(); k++) { + auto grid_loc = comp_grids[i].compressed_loc_to_grid_loc({0, k, layer_num}); + fprintf(fp, "%d ", grid_loc.y); + } + fprintf(fp, "\n"); + //TODO: Print the compatible sub-tiles for a logical block type } - fprintf(fp, "\n"); } fclose(fp); diff --git a/vpr/src/place/compressed_grid.h b/vpr/src/place/compressed_grid.h index 49c652607fb..067815591cc 100644 --- a/vpr/src/place/compressed_grid.h +++ b/vpr/src/place/compressed_grid.h @@ -6,38 +6,102 @@ #include "vtr_geometry.h" #include "vtr_flat_map.h" -struct t_type_loc { - int x = OPEN; - int y = OPEN; - - t_type_loc(int x_val, int y_val) - : x(x_val) - , y(y_val) {} - - //Returns true if this type location has valid x/y values - operator bool() const { - return !(x == OPEN || y == OPEN); - } -}; - struct t_compressed_block_grid { + // The compressed grid of a block type stores only the coordinates that are occupied by that particular block type. + // For instance, if a DSP block exists only in the 2nd, 3rd, and 5th columns, the compressed grid of X axis will solely store the values 2, 3, and 5. + // Consequently, the compressed to_grid_x will contain only three members. The same approach is applicable to other compressed directions. + // This compressed data structure helps to move blocks in a more efficient way. For instance, if I need to move a DSP block to the next compatible column, I can simply get + // the next compatible column number by accessing the next element in the compressed grid instead of iterating over all columns to find the next compatible column. //If 'cx' is an index in the compressed grid space, then //'compressed_to_grid_x[cx]' is the corresponding location in the //full (uncompressed) device grid. - std::vector compressed_to_grid_x; - std::vector compressed_to_grid_y; + std::vector> compressed_to_grid_x; // [0...num_layers-1][0...num_columns-1] -> uncompressed x + std::vector> compressed_to_grid_y; // [0...num_layers-1][0...num_rows-1] -> uncompressed y + std::vector compressed_to_grid_layer; // [0...num_layers-1] -> uncompressed layer //The grid is stored with a full/dense x-dimension (since only //x values which exist are considered), while the y-dimension is //stored sparsely, since we may not have full columns of blocks. //This makes it easy to check whether there exist - std::vector> grid; + std::vector>> grid; //The sub type compatibility for a given physical tile and a compressed block grid //corresponding to the possible placement location for a given logical block // - key: physical tile index // - value: vector of compatible sub tiles for the physical tile/logical block pair std::unordered_map> compatible_sub_tiles_for_tile; + + inline size_t get_num_columns(int layer_num) const { + return compressed_to_grid_x[layer_num].size(); + } + + inline size_t get_num_rows(int layer_num) const { + return compressed_to_grid_y[layer_num].size(); + } + + inline t_physical_tile_loc grid_loc_to_compressed_loc(t_physical_tile_loc grid_loc) const { + int cx = OPEN; + int cy = OPEN; + int layer_num = grid_loc.layer_num; + + auto itr_x = std::lower_bound(compressed_to_grid_x[layer_num].begin(), compressed_to_grid_x[layer_num].end(), grid_loc.x); + VTR_ASSERT(*itr_x == grid_loc.x); + cx = std::distance(compressed_to_grid_x[layer_num].begin(), itr_x); + + auto itr_y = std::lower_bound(compressed_to_grid_y[layer_num].begin(), compressed_to_grid_y[layer_num].end(), grid_loc.y); + VTR_ASSERT(*itr_y == grid_loc.y); + cy = std::distance(compressed_to_grid_y[layer_num].begin(), itr_y); + + return {cx, cy, layer_num}; + } + + /** + * @brief find the nearest location in the compressed grid. + * + * Useful when the point is of a different block type from coords. + * + * @param point represents a coordinate in one dimension of the point + * @param coords represents vector of coordinate values of a single type only + * + * Hence, the exact point coordinate will not be found in coords if they are of different block types. In this case the function will return + * the nearest compressed location to point by rounding it down + */ + inline t_physical_tile_loc grid_loc_to_compressed_loc_approx(t_physical_tile_loc grid_loc) const { + int cx = OPEN; + int cy = OPEN; + int layer_num = grid_loc.layer_num; + + auto itr_x = std::lower_bound(compressed_to_grid_x[layer_num].begin(), compressed_to_grid_x[layer_num].end(), grid_loc.x); + if (itr_x == compressed_to_grid_x[layer_num].end()) + cx = std::distance(compressed_to_grid_x[layer_num].begin(), itr_x - 1); + else + cx = std::distance(compressed_to_grid_x[layer_num].begin(), itr_x); + + auto itr_y = std::lower_bound(compressed_to_grid_y[layer_num].begin(), compressed_to_grid_y[layer_num].end(), grid_loc.y); + if (itr_y == compressed_to_grid_y[layer_num].end()) + cy = std::distance(compressed_to_grid_y[layer_num].begin(), itr_y - 1); + else + cy = std::distance(compressed_to_grid_y[layer_num].begin(), itr_y); + + return {cx, cy, layer_num}; + } + + inline t_physical_tile_loc compressed_loc_to_grid_loc(t_physical_tile_loc compressed_loc) const { + int layer_num = compressed_loc.layer_num; + return {compressed_to_grid_x[layer_num][compressed_loc.x], compressed_to_grid_y[layer_num][compressed_loc.y], layer_num}; + } + + inline const std::vector& compatible_sub_tile_num(int physical_type_index) const { + return compatible_sub_tiles_for_tile.at(physical_type_index); + } + + inline const vtr::flat_map2& get_column_block_map(int cx, int layer_num) const { + return grid[layer_num][cx]; + } + + inline const std::vector& get_layer_nums() const { + return compressed_to_grid_layer; + } }; //Compressed grid space for each block type @@ -47,22 +111,7 @@ typedef std::vector t_compressed_block_grids; std::vector create_compressed_block_grids(); -t_compressed_block_grid create_compressed_block_grid(const std::vector>& locations); - -int grid_to_compressed(const std::vector& coords, int point); - -/** - * @brief find the nearest location in the compressed grid. - * - * Useful when the point is of a different block type from coords. - * - * @param point represents a coordinate in one dimension of the point - * @param coords represents vector of coordinate values of a single type only - * - * Hence, the exact point coordinate will not be found in coords if they are of different block types. In this case the function will return - * the nearest compressed location to point by rounding it down - */ -int grid_to_compressed_approx(const std::vector& coords, int point); +t_compressed_block_grid create_compressed_block_grid(const std::vector>>& locations, int num_layers); /** * @brief print the contents of the compressed grids to an echo file diff --git a/vpr/src/place/critical_uniform_move_generator.cpp b/vpr/src/place/critical_uniform_move_generator.cpp index 4c50c4688ab..32d531138a5 100644 --- a/vpr/src/place/critical_uniform_move_generator.cpp +++ b/vpr/src/place/critical_uniform_move_generator.cpp @@ -18,7 +18,7 @@ e_create_move CriticalUniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y); + auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); t_pl_loc to; diff --git a/vpr/src/place/cut_spreader.cpp b/vpr/src/place/cut_spreader.cpp index 4bce2068b70..695b8ffc275 100644 --- a/vpr/src/place/cut_spreader.cpp +++ b/vpr/src/place/cut_spreader.cpp @@ -175,14 +175,16 @@ void CutSpreader::init() { } int CutSpreader::occ_at(int x, int y) { - if (!is_loc_on_chip(x, y)) { + //TODO: layer_num should be passed + if (!is_loc_on_chip({x, y, 0})) { return 0; } return occupancy[x][y]; } int CutSpreader::tiles_at(int x, int y) { - if (!is_loc_on_chip(x, y)) { + //TODO: layer_num should be passed + if (!is_loc_on_chip({x, y, 0})) { return 0; } return int(subtiles_at_location[x][y].size()); @@ -200,7 +202,8 @@ int CutSpreader::tiles_at(int x, int y) { void CutSpreader::merge_regions(SpreaderRegion& merged, SpreaderRegion& mergee) { for (int x = mergee.bb.xmin(); x <= mergee.bb.xmax(); x++) for (int y = mergee.bb.ymin(); y <= mergee.bb.ymax(); y++) { - if (!is_loc_on_chip(x, y)) { //location is not within the chip + //TODO: layer_num should be passed + if (!is_loc_on_chip({x, y, 0})) { //location is not within the chip continue; } //x and y might belong to "merged" region already, no further action is required @@ -235,7 +238,8 @@ void CutSpreader::grow_region(SpreaderRegion& r, vtr::Rect rect_to_include, auto process_location = [&](int x, int y) { //x and y should represent a location on the chip, otherwise no processing is required - if (!is_loc_on_chip(x, y)) { + //TODO: layer_num should be passed + if (!is_loc_on_chip({x, y, 0})) { return; } // kicks in only when grid is not claimed, claimed by another region, or part of a macro @@ -403,6 +407,10 @@ std::pair CutSpreader::cut_region(SpreaderRegion& r, bool dir) { const ClusteredNetlist& clb_nlist = g_vpr_ctx.clustering().clb_nlist; PlacementContext& place_ctx = g_vpr_ctx.mutable_placement(); + // TODO: CutSpreader is not compatible with 3D FPGA + VTR_ASSERT(device_ctx.grid.get_num_layers() == 1); + int layer_num = 0; + std::vector cut_blks; init_cut_blks(r, cut_blks); // copy all logic blocks to cut into cut_blks @@ -416,13 +424,13 @@ std::pair CutSpreader::cut_region(SpreaderRegion& r, bool dir) { auto blk = cut_blks.at(0); auto& tiles_type = clb_nlist.block_type(blk)->equivalent_tiles; auto loc = ap->blk_locs[blk].loc; - if (std::find(tiles_type.begin(), tiles_type.end(), device_ctx.grid.get_physical_type(loc.x, loc.y)) == tiles_type.end()) { + if (std::find(tiles_type.begin(), tiles_type.end(), device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer})) == tiles_type.end()) { // logic block type doesn't match tile type // exhaustive search for tile of right type // this search should be fast as region must be small at this point (only 1 logic block left) for (int x = r.bb.xmin(); x <= r.bb.xmax(); x++) for (int y = r.bb.ymin(); y <= r.bb.ymax(); y++) { - if (std::find(tiles_type.begin(), tiles_type.end(), device_ctx.grid.get_physical_type(x, y)) != tiles_type.end()) { + if (std::find(tiles_type.begin(), tiles_type.end(), device_ctx.grid.get_physical_type({x, y, layer_num})) != tiles_type.end()) { VTR_ASSERT(blks_at_location[x][y].empty()); ap->blk_locs[blk].rawx = x; ap->blk_locs[blk].rawy = y; @@ -954,11 +962,12 @@ void CutSpreader::strict_legalize() { */ void CutSpreader::bind_tile(t_pl_loc sub_tile, ClusterBlockId blk) { auto& place_ctx = g_vpr_ctx.mutable_placement(); - VTR_ASSERT(place_ctx.grid_blocks[sub_tile.x][sub_tile.y].blocks[sub_tile.sub_tile] == EMPTY_BLOCK_ID); + VTR_ASSERT(place_ctx.grid_blocks.block_at_location(sub_tile) == EMPTY_BLOCK_ID); VTR_ASSERT(place_ctx.block_locs[blk].is_fixed == false); - place_ctx.grid_blocks[sub_tile.x][sub_tile.y].blocks[sub_tile.sub_tile] = blk; + place_ctx.grid_blocks.set_block_at_location(sub_tile, blk); place_ctx.block_locs[blk].loc = sub_tile; - place_ctx.grid_blocks[sub_tile.x][sub_tile.y].usage++; + place_ctx.grid_blocks.set_usage({sub_tile.x, sub_tile.y, sub_tile.layer}, + place_ctx.grid_blocks.get_usage({sub_tile.x, sub_tile.y, sub_tile.layer}) + 1); ap->blk_locs[blk].loc = sub_tile; } @@ -968,12 +977,13 @@ void CutSpreader::bind_tile(t_pl_loc sub_tile, ClusterBlockId blk) { */ void CutSpreader::unbind_tile(t_pl_loc sub_tile) { auto& place_ctx = g_vpr_ctx.mutable_placement(); - VTR_ASSERT(place_ctx.grid_blocks[sub_tile.x][sub_tile.y].blocks[sub_tile.sub_tile] != EMPTY_BLOCK_ID); - ClusterBlockId blk = place_ctx.grid_blocks[sub_tile.x][sub_tile.y].blocks[sub_tile.sub_tile]; + VTR_ASSERT(place_ctx.grid_blocks.block_at_location(sub_tile) != EMPTY_BLOCK_ID); + ClusterBlockId blk = place_ctx.grid_blocks.block_at_location(sub_tile); VTR_ASSERT(place_ctx.block_locs[blk].is_fixed == false); place_ctx.block_locs[blk].loc = t_pl_loc{}; - place_ctx.grid_blocks[sub_tile.x][sub_tile.y].blocks[sub_tile.sub_tile] = EMPTY_BLOCK_ID; - place_ctx.grid_blocks[sub_tile.x][sub_tile.y].usage--; + place_ctx.grid_blocks.set_block_at_location(sub_tile, EMPTY_BLOCK_ID); + place_ctx.grid_blocks.set_usage({sub_tile.x, sub_tile.y, sub_tile.layer}, + place_ctx.grid_blocks.get_usage({sub_tile.x, sub_tile.y, sub_tile.layer}) - 1); } /* @@ -985,7 +995,7 @@ bool CutSpreader::is_placed(ClusterBlockId blk) { auto& place_ctx = g_vpr_ctx.mutable_placement(); if (place_ctx.block_locs[blk].loc != t_pl_loc{}) { auto loc = place_ctx.block_locs[blk].loc; - VTR_ASSERT(place_ctx.grid_blocks[loc.x][loc.y].blocks[loc.sub_tile] == blk); + VTR_ASSERT(place_ctx.grid_blocks.block_at_location(loc) == blk); return true; } return false; @@ -1022,7 +1032,7 @@ bool CutSpreader::try_place_blk(ClusterBlockId blk, // then blk is placed in best_subtile if (exceeds_explore_limit && best_subtile != t_pl_loc{}) { // find the logic block bound to (placed on) best_subtile - ClusterBlockId bound_blk = place_ctx.grid_blocks[best_subtile.x][best_subtile.y].blocks[best_subtile.sub_tile]; + ClusterBlockId bound_blk = place_ctx.grid_blocks.block_at_location(best_subtile); if (bound_blk != EMPTY_BLOCK_ID) { // if best_subtile has a logic block unbind_tile(best_subtile); // clear bound_block and best_subtile's placement info remaining.emplace(1, bound_blk); // put bound_blk back into remaining blocks to place @@ -1032,8 +1042,8 @@ bool CutSpreader::try_place_blk(ClusterBlockId blk, } // if exploration limit is not met or a candidate sub_tile is not found yet - for (auto sub_t : subtiles_at_location[nx][ny]) { // for each available sub_tile at random location - ClusterBlockId bound_blk = place_ctx.grid_blocks[sub_t.x][sub_t.y].blocks[sub_t.sub_tile]; // logic blk at [nx, ny] + for (auto sub_t : subtiles_at_location[nx][ny]) { // for each available sub_tile at random location + ClusterBlockId bound_blk = place_ctx.grid_blocks.block_at_location(sub_t); // logic blk at [nx, ny] if (bound_blk == EMPTY_BLOCK_ID || ripup_radius_met || rand() % (20000) < 10) { @@ -1109,7 +1119,7 @@ bool CutSpreader::try_place_macro(ClusterBlockId blk, // ensure the target location has compatible tile auto blk_t = clb_nlist.block_type(blk); - auto result = std::find(blk_t->equivalent_tiles.begin(), blk_t->equivalent_tiles.end(), g_vpr_ctx.device().grid.get_physical_type(target.x, target.y)); + auto result = std::find(blk_t->equivalent_tiles.begin(), blk_t->equivalent_tiles.end(), g_vpr_ctx.device().grid.get_physical_type({target.x, target.y, target.layer})); if (result == blk_t->equivalent_tiles.end()) { placement_impossible = true; break; @@ -1117,7 +1127,7 @@ bool CutSpreader::try_place_macro(ClusterBlockId blk, // if the target location has a logic block, ensure it's not part of a macro // because a macro placed before the current one has higher priority (longer chain) - ClusterBlockId bound = place_ctx.grid_blocks[target.x][target.y].blocks[target.sub_tile]; + ClusterBlockId bound = place_ctx.grid_blocks.block_at_location(target); if (bound != EMPTY_BLOCK_ID && imacro(bound) != NO_MACRO) { placement_impossible = true; break; @@ -1136,7 +1146,7 @@ bool CutSpreader::try_place_macro(ClusterBlockId blk, if (!placement_impossible) { // if placement is possible, apply this placement for (auto& target : targets) { - ClusterBlockId bound = place_ctx.grid_blocks[target.second.x][target.second.y].blocks[target.second.sub_tile]; + ClusterBlockId bound = place_ctx.grid_blocks.block_at_location(target.second); if (bound != EMPTY_BLOCK_ID) { // if target location has a logic block, displace it and put it in remaining queue to be placed later unbind_tile(target.second); diff --git a/vpr/src/place/directed_moves_util.cpp b/vpr/src/place/directed_moves_util.cpp index bf412386057..4e1c3f618b1 100644 --- a/vpr/src/place/directed_moves_util.cpp +++ b/vpr/src/place/directed_moves_util.cpp @@ -1,6 +1,6 @@ #include "directed_moves_util.h" -void get_coordinate_of_pin(ClusterPinId pin, int& x, int& y) { +void get_coordinate_of_pin(ClusterPinId pin, t_physical_tile_loc& tile_loc) { auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; auto& place_ctx = g_vpr_ctx.placement(); @@ -9,22 +9,27 @@ void get_coordinate_of_pin(ClusterPinId pin, int& x, int& y) { int pnum = tile_pin_index(pin); ClusterBlockId block = cluster_ctx.clb_nlist.pin_block(pin); - x = place_ctx.block_locs[block].loc.x + physical_tile_type(block)->pin_width_offset[pnum]; - y = place_ctx.block_locs[block].loc.y + physical_tile_type(block)->pin_height_offset[pnum]; + tile_loc.x = place_ctx.block_locs[block].loc.x + physical_tile_type(block)->pin_width_offset[pnum]; + tile_loc.y = place_ctx.block_locs[block].loc.y + physical_tile_type(block)->pin_height_offset[pnum]; + tile_loc.layer_num = place_ctx.block_locs[block].loc.layer; - x = std::max(std::min(x, (int)grid.width() - 2), 1); //-2 for no perim channels - y = std::max(std::min(y, (int)grid.height() - 2), 1); //-2 for no perim channels + tile_loc.x = std::max(std::min(tile_loc.x, (int)grid.width() - 2), 1); //-2 for no perim channels + tile_loc.y = std::max(std::min(tile_loc.y, (int)grid.height() - 2), 1); //-2 for no perim channels } void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc& centroid, const PlacerCriticalities* criticalities) { auto& cluster_ctx = g_vpr_ctx.clustering(); - int x, y, ipin; + t_physical_tile_loc tile_loc; + int ipin; float acc_weight = 0; float acc_x = 0; float acc_y = 0; float weight = 1; + int from_block_layer_num = g_vpr_ctx.placement().block_locs[b_from].loc.layer; + VTR_ASSERT(from_block_layer_num != OPEN); + //iterate over the from block pins for (ClusterPinId pin_id : cluster_ctx.clb_nlist.block_pins(b_from)) { ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id); @@ -56,10 +61,10 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc weight = 1; } - get_coordinate_of_pin(sink_pin_id, x, y); + get_coordinate_of_pin(sink_pin_id, tile_loc); - acc_x += x * weight; - acc_y += y * weight; + acc_x += tile_loc.x * weight; + acc_y += tile_loc.y * weight; acc_weight += weight; } } @@ -75,10 +80,10 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc ClusterPinId source_pin = cluster_ctx.clb_nlist.net_driver(net_id); - get_coordinate_of_pin(source_pin, x, y); + get_coordinate_of_pin(source_pin, tile_loc); - acc_x += x * weight; - acc_y += y * weight; + acc_x += tile_loc.x * weight; + acc_y += tile_loc.y * weight; acc_weight += weight; } } @@ -86,6 +91,8 @@ void calculate_centroid_loc(ClusterBlockId b_from, bool timing_weights, t_pl_loc //Calculate the centroid location centroid.x = acc_x / acc_weight; centroid.y = acc_y / acc_weight; + // TODO: For now, we don't move the centroid to a different layer + centroid.layer = from_block_layer_num; } static std::map available_reward_function = { diff --git a/vpr/src/place/directed_moves_util.h b/vpr/src/place/directed_moves_util.h index 602d79312a6..d706028dc04 100644 --- a/vpr/src/place/directed_moves_util.h +++ b/vpr/src/place/directed_moves_util.h @@ -17,7 +17,7 @@ enum e_reward_function { e_reward_function string_to_reward(std::string st); ///@brief Helper function that returns the x, y coordinates of a pin -void get_coordinate_of_pin(ClusterPinId pin, int& x, int& y); +void get_coordinate_of_pin(ClusterPinId pin, t_physical_tile_loc& tile_loc); /** * @brief Calculates the exact centroid location diff --git a/vpr/src/place/feasible_region_move_generator.cpp b/vpr/src/place/feasible_region_move_generator.cpp index 01759fe3ccb..ee69aeda5f0 100644 --- a/vpr/src/place/feasible_region_move_generator.cpp +++ b/vpr/src/place/feasible_region_move_generator.cpp @@ -22,7 +22,7 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& //from block data t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y); + auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); /* Calculate the feasible region */ @@ -113,6 +113,8 @@ e_create_move FeasibleRegionMoveGenerator::propose_move(t_pl_blocks_to_be_moved& t_pl_loc center; center.x = (FR_coords.xmin + FR_coords.xmax) / 2; center.y = (FR_coords.ymin + FR_coords.ymax) / 2; + // TODO: Currently, we don't move blocks between different types of layers + center.layer = from.layer; if (!find_to_loc_centroid(cluster_from_type, from, center, range_limiters, to, b_from)) return e_create_move::ABORT; } diff --git a/vpr/src/place/grid_tile_lookup.cpp b/vpr/src/place/grid_tile_lookup.cpp index 3b4355ef5e1..92504b6cc55 100644 --- a/vpr/src/place/grid_tile_lookup.cpp +++ b/vpr/src/place/grid_tile_lookup.cpp @@ -1,11 +1,11 @@ #include "grid_tile_lookup.h" -void GridTileLookup::fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix& type_count) { +void GridTileLookup::fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix& type_count) { auto& device_ctx = g_vpr_ctx.device(); - int num_rows = device_ctx.grid.height(); - int num_cols = device_ctx.grid.width(); - + int num_layers = device_ctx.grid.get_num_layers(); + int width = (int)device_ctx.grid.width(); + int height = (int)device_ctx.grid.height(); /* * Iterating through every location on the grid to store the number of subtiles of * the correct type at each location. For each location, we store the cumulative @@ -13,39 +13,45 @@ void GridTileLookup::fill_type_matrix(t_logical_block_type_ptr block_type, vtr:: * subtiles at the location, plus the number of subtiles at the locations above and to * the right of it. */ - for (int i_col = type_count.dim_size(0) - 1; i_col >= 0; i_col--) { - for (int j_row = type_count.dim_size(1) - 1; j_row >= 0; j_row--) { - const auto& tile = device_ctx.grid.get_physical_type(i_col, j_row); - int height_offset = device_ctx.grid.get_height_offset(i_col, j_row); - int width_offset = device_ctx.grid.get_width_offset(i_col, j_row); - type_count[i_col][j_row] = 0; - - if (is_tile_compatible(tile, block_type) && height_offset == 0 && width_offset == 0) { - for (const auto& sub_tile : tile->sub_tiles) { - if (is_sub_tile_compatible(tile, block_type, sub_tile.capacity.low)) { - type_count[i_col][j_row] = sub_tile.capacity.total(); + std::vector layer_acc_type_count(num_layers, 0); + for (int layer_num = num_layers - 1; layer_num >= 0; layer_num--) { + int num_rows = (int)device_ctx.grid.height(); + int num_cols = (int)device_ctx.grid.width(); + + for (int i_col = width - 1; i_col >= 0; i_col--) { + for (int j_row = height - 1; j_row >= 0; j_row--) { + const auto& tile = device_ctx.grid.get_physical_type({i_col, j_row, layer_num}); + int height_offset = device_ctx.grid.get_height_offset({i_col, j_row, layer_num}); + int width_offset = device_ctx.grid.get_width_offset({i_col, j_row, layer_num}); + type_count[layer_num][i_col][j_row] = 0; + + if (is_tile_compatible(tile, block_type) && height_offset == 0 && width_offset == 0) { + for (const auto& sub_tile : tile->sub_tiles) { + if (is_sub_tile_compatible(tile, block_type, sub_tile.capacity.low)) { + type_count[layer_num][i_col][j_row] = sub_tile.capacity.total(); + layer_acc_type_count[layer_num] += sub_tile.capacity.total(); + } } } - } - if (i_col < num_cols - 1) { - type_count[i_col][j_row] += type_count[i_col + 1][j_row]; - } - if (j_row < num_rows - 1) { - type_count[i_col][j_row] += type_count[i_col][j_row + 1]; - } - if (i_col < (num_cols - 1) && j_row < (num_rows - 1)) { - type_count[i_col][j_row] -= type_count[i_col + 1][j_row + 1]; + if (i_col < num_cols - 1) { + type_count[layer_num][i_col][j_row] += type_count[layer_num][i_col + 1][j_row]; + } + if (j_row < num_rows - 1) { + type_count[layer_num][i_col][j_row] += type_count[layer_num][i_col][j_row + 1]; + } + if (i_col < (num_cols - 1) && j_row < (num_rows - 1)) { + type_count[layer_num][i_col][j_row] -= type_count[layer_num][i_col + 1][j_row + 1]; + } + if (layer_num < num_layers - 1) { + type_count[layer_num][i_col][j_row] += layer_acc_type_count[layer_num + 1]; + } } } } //The total number of subtiles for the block type will be at [0][0] - max_placement_locations[block_type->index] = type_count[0][0]; -} - -vtr::NdMatrix& GridTileLookup::get_type_grid(t_logical_block_type_ptr block_type) { - return block_type_matrices[block_type->index]; + max_placement_locations[block_type->index] = type_count[0][0][0]; } int GridTileLookup::total_type_tiles(t_logical_block_type_ptr block_type) { @@ -62,41 +68,46 @@ int GridTileLookup::total_type_tiles(t_logical_block_type_ptr block_type) { int GridTileLookup::region_tile_count(const Region& reg, t_logical_block_type_ptr block_type) { auto& device_ctx = g_vpr_ctx.device(); int subtile = reg.get_sub_tile(); - + int layer_num = reg.get_layer_num(); /*Intersect the region with the grid, in case the region passed in goes out of bounds * By intersecting with the grid, we ensure that we are only counting tiles for the part of the * region that fits on the grid.*/ Region grid_reg; - grid_reg.set_region_rect(0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); + grid_reg.set_region_rect({0, + 0, + (int)device_ctx.grid.width() - 1, + (int)device_ctx.grid.height() - 1, + layer_num}); Region intersect_reg; intersect_reg = intersection(reg, grid_reg); - vtr::Rect intersect_rect = intersect_reg.get_region_rect(); + const auto intersect_coord = intersect_reg.get_region_rect(); + VTR_ASSERT(intersect_coord.layer_num == layer_num); - int xmin = intersect_rect.xmin(); - int ymin = intersect_rect.ymin(); - int xmax = intersect_rect.xmax(); - int ymax = intersect_rect.ymax(); - auto& type_grid = block_type_matrices[block_type->index]; + int xmin = intersect_coord.xmin; + int ymin = intersect_coord.ymin; + int xmax = intersect_coord.xmax; + int ymax = intersect_coord.ymax; + auto& layer_type_grid = block_type_matrices[block_type->index]; - int xdim = type_grid.dim_size(0); - int ydim = type_grid.dim_size(1); + int xdim = (int)layer_type_grid.dim_size(1); + int ydim = (int)layer_type_grid.dim_size(2); int num_tiles = 0; if (subtile == NO_SUBTILE) { - num_tiles = type_grid[xmin][ymin]; + num_tiles = layer_type_grid[layer_num][xmin][ymin]; if ((ymax + 1) < ydim) { - num_tiles -= type_grid[xmin][ymax + 1]; + num_tiles -= layer_type_grid[layer_num][xmin][ymax + 1]; } if ((xmax + 1) < xdim) { - num_tiles -= type_grid[xmax + 1][ymin]; + num_tiles -= layer_type_grid[layer_num][xmax + 1][ymin]; } if ((xmax + 1) < xdim && (ymax + 1) < ydim) { - num_tiles += type_grid[xmax + 1][ymax + 1]; + num_tiles += layer_type_grid[layer_num][xmax + 1][ymax + 1]; } } else { num_tiles = region_with_subtile_count(reg, block_type); @@ -112,17 +123,18 @@ int GridTileLookup::region_tile_count(const Region& reg, t_logical_block_type_pt int GridTileLookup::region_with_subtile_count(const Region& reg, t_logical_block_type_ptr block_type) { auto& device_ctx = g_vpr_ctx.device(); int num_sub_tiles = 0; - vtr::Rect reg_rect = reg.get_region_rect(); + + const auto reg_coord = reg.get_region_rect(); int subtile = reg.get_sub_tile(); - int xmin = reg_rect.xmin(); - int ymin = reg_rect.ymin(); - int xmax = reg_rect.xmax(); - int ymax = reg_rect.ymax(); + int xmin = reg_coord.xmin; + int ymin = reg_coord.ymin; + int xmax = reg_coord.xmax; + int ymax = reg_coord.ymax; for (int i = xmax; i >= xmin; i--) { for (int j = ymax; j >= ymin; j--) { - const auto& tile = device_ctx.grid.get_physical_type(i, j); + const auto& tile = device_ctx.grid.get_physical_type({i, j, reg_coord.layer_num}); if (is_sub_tile_compatible(tile, block_type, subtile)) { num_sub_tiles++; } diff --git a/vpr/src/place/grid_tile_lookup.h b/vpr/src/place/grid_tile_lookup.h index a014e0d5786..3f79e847856 100644 --- a/vpr/src/place/grid_tile_lookup.h +++ b/vpr/src/place/grid_tile_lookup.h @@ -22,15 +22,14 @@ class GridTileLookup { max_placement_locations.resize(device_ctx.logical_block_types.size()); for (const auto& type : device_ctx.logical_block_types) { - vtr::NdMatrix type_count({device_ctx.grid.width(), device_ctx.grid.height()}); + int num_layers = device_ctx.grid.get_num_layers(); + vtr::NdMatrix type_count({static_cast(num_layers), device_ctx.grid.width(), device_ctx.grid.height()}); fill_type_matrix(&type, type_count); block_type_matrices.push_back(type_count); } } - vtr::NdMatrix& get_type_grid(t_logical_block_type_ptr block_type); - - void fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix& type_count); + void fill_type_matrix(t_logical_block_type_ptr block_type, vtr::NdMatrix& type_count); int region_tile_count(const Region& reg, t_logical_block_type_ptr block_type); @@ -46,7 +45,7 @@ class GridTileLookup { * give the number of placement locations that are at, or above and to the right of the given [x,y] for * the given block type. */ - std::vector> block_type_matrices; + std::vector> block_type_matrices; /* * Stores the total number of placement locations (i.e. compatible subtiles) for each block type. diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index cd484e7f7f3..38bc772fefc 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -271,11 +271,14 @@ static bool is_loc_legal(t_pl_loc& loc, PartitionRegion& pr, t_logical_block_typ //Check if the location is within its constraint region for (auto reg : pr.get_partition_region()) { - if (reg.get_region_rect().contains(vtr::Point(loc.x, loc.y))) { + const auto reg_coord = reg.get_region_rect(); + vtr::Rect reg_rect(reg_coord.xmin, reg_coord.ymin, reg_coord.xmax, reg_coord.ymax); + if (reg_coord.layer_num != loc.layer) continue; + if (reg_rect.contains(vtr::Point(loc.x, loc.y))) { //check if the location is compatible with the block type - const auto& type = grid.get_physical_type(loc.x, loc.y); - int height_offset = grid.get_height_offset(loc.x, loc.y); - int width_offset = grid.get_width_offset(loc.x, loc.y); + const auto& type = grid.get_physical_type({loc.x, loc.y, loc.layer}); + int height_offset = grid.get_height_offset({loc.x, loc.y, loc.layer}); + int width_offset = grid.get_width_offset({loc.x, loc.y, loc.layer}); if (is_tile_compatible(type, block_type)) { //Check if the location is an anchor position if (height_offset == 0 && width_offset == 0) { @@ -291,42 +294,45 @@ static bool is_loc_legal(t_pl_loc& loc, PartitionRegion& pr, t_logical_block_typ static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_ptr block_type) { const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index]; + const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + const int centroid_loc_layer_num = centroid_loc.layer; //Determine centroid location in the compressed space of the current block - int cx_centroid = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, centroid_loc.x); - int cy_centroid = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, centroid_loc.y); + auto compressed_centroid_loc = get_compressed_loc_approx(compressed_block_grid, + centroid_loc, + num_layers); //range limit (rlim) set a limit for the neighbor search in the centroid placement //the neighbor location should be within the defined range to calculated centroid location int first_rlim = 15; - int rlim_x = std::min(compressed_block_grid.compressed_to_grid_x.size(), first_rlim); - int rlim_y = std::min(compressed_block_grid.compressed_to_grid_y.size(), first_rlim); - //Determine the valid compressed grid location ranges - int min_cx, max_cx, delta_cx; - int min_cy, max_cy; + auto search_range = get_compressed_grid_target_search_range(compressed_block_grid, + compressed_centroid_loc, + first_rlim, + num_layers); - min_cx = std::max(0, cx_centroid - rlim_x); - max_cx = std::min(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_centroid + rlim_x); - - min_cy = std::max(0, cy_centroid - rlim_y); - max_cy = std::min(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_centroid + rlim_y); - - delta_cx = max_cx - min_cx; + int delta_cx = search_range[centroid_loc_layer_num].xmax - search_range[centroid_loc_layer_num].xmin; //Block has not been placed yet, so the "from" coords will be (-1, -1) - int cx_from = -1; - int cy_from = -1; + int cx_from = OPEN; + int cy_from = OPEN; + int layer_from = centroid_loc_layer_num; - int cx_to, cy_to; + t_physical_tile_loc to_compressed_loc; - bool legal = find_compatible_compressed_loc_in_range(block_type, min_cx, max_cx, min_cy, max_cy, delta_cx, cx_from, cy_from, cx_to, cy_to, false); + bool legal = find_compatible_compressed_loc_in_range(block_type, + delta_cx, + {cx_from, cy_from, layer_from}, + search_range[centroid_loc_layer_num], + to_compressed_loc, + false, + centroid_loc_layer_num); if (!legal) { return false; } - compressed_grid_to_loc(block_type, cx_to, cy_to, centroid_loc); + compressed_grid_to_loc(block_type, to_compressed_loc, centroid_loc); return legal; } @@ -334,12 +340,21 @@ static bool find_centroid_neighbor(t_pl_loc& centroid_loc, t_logical_block_type_ static std::vector find_centroid_loc(t_pl_macro pl_macro, t_pl_loc& centroid) { auto& cluster_ctx = g_vpr_ctx.clustering(); - int x, y; + t_physical_tile_loc tile_loc; float acc_weight = 0; float acc_x = 0; float acc_y = 0; + int head_layer_num = OPEN; + bool find_layer = false; + std::vector layer_count(g_vpr_ctx.device().grid.get_num_layers(), 0); ClusterBlockId head_blk = pl_macro.members.at(0).blk_index; + // For now, we put the macro in the same layer as the head block + head_layer_num = g_vpr_ctx.placement().block_locs[head_blk].loc.layer; + // If block is placed, we use the layer of the block. Otherwise, the layer will be determined later + if (head_layer_num == OPEN) { + find_layer = true; + } std::vector connected_blocks_to_update; //iterate over the from block pins @@ -374,10 +389,13 @@ static std::vector find_centroid_loc(t_pl_macro pl_macro, t_pl_l continue; } - get_coordinate_of_pin(sink_pin_id, x, y); - - acc_x += x; - acc_y += y; + get_coordinate_of_pin(sink_pin_id, tile_loc); + if (find_layer) { + VTR_ASSERT(tile_loc.layer_num != OPEN); + layer_count[tile_loc.layer_num]++; + } + acc_x += tile_loc.x; + acc_y += tile_loc.y; acc_weight++; } } @@ -391,10 +409,13 @@ static std::vector find_centroid_loc(t_pl_macro pl_macro, t_pl_l continue; } - get_coordinate_of_pin(source_pin, x, y); - - acc_x += x; - acc_y += y; + get_coordinate_of_pin(source_pin, tile_loc); + if (find_layer) { + VTR_ASSERT(tile_loc.layer_num != OPEN); + layer_count[tile_loc.layer_num]++; + } + acc_x += tile_loc.x; + acc_y += tile_loc.y; acc_weight++; } } @@ -403,19 +424,27 @@ static std::vector find_centroid_loc(t_pl_macro pl_macro, t_pl_l if (acc_weight > 0) { centroid.x = acc_x / acc_weight; centroid.y = acc_y / acc_weight; + if (find_layer) { + auto max_element = std::max_element(layer_count.begin(), layer_count.end()); + VTR_ASSERT(*max_element != 0); + auto index = std::distance(layer_count.begin(), max_element); + centroid.layer = static_cast(index); + } else { + centroid.layer = head_layer_num; + } } return connected_blocks_to_update; } static bool try_centroid_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_logical_block_type_ptr block_type, enum e_pad_loc_type pad_loc_type, vtr::vector& block_scores) { - t_pl_loc centroid_loc(OPEN, OPEN, OPEN); + t_pl_loc centroid_loc(OPEN, OPEN, OPEN, OPEN); std::vector unplaced_blocks_to_update_their_score; unplaced_blocks_to_update_their_score = find_centroid_loc(pl_macro, centroid_loc); //no suggestion was available for this block type - if (!is_loc_on_chip(centroid_loc.x, centroid_loc.y)) { + if (!is_loc_on_chip({centroid_loc.x, centroid_loc.y, centroid_loc.layer})) { return false; } @@ -430,7 +459,7 @@ static bool try_centroid_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_l } //no neighbor were found that meet all our requirements, should be placed with random placement - if (!is_loc_on_chip(centroid_loc.x, centroid_loc.y) || !pr.is_loc_in_part_reg(centroid_loc)) { + if (!is_loc_on_chip({centroid_loc.x, centroid_loc.y, centroid_loc.layer}) || !pr.is_loc_in_part_reg(centroid_loc)) { return false; } @@ -440,12 +469,12 @@ static bool try_centroid_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_l //we don't need to find one agian if (!neighbor_legal_loc) { const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type->index]; - const auto& type = device_ctx.grid.get_physical_type(centroid_loc.x, centroid_loc.y); - auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tiles_for_tile.at(type->index); + const auto& type = device_ctx.grid.get_physical_type({centroid_loc.x, centroid_loc.y, centroid_loc.layer}); + const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(type->index); centroid_loc.sub_tile = compatible_sub_tiles[vtr::irand((int)compatible_sub_tiles.size() - 1)]; } - int width_offset = device_ctx.grid.get_width_offset(centroid_loc.x, centroid_loc.y); - int height_offset = device_ctx.grid.get_height_offset(centroid_loc.x, centroid_loc.y); + int width_offset = device_ctx.grid.get_width_offset({centroid_loc.x, centroid_loc.y, centroid_loc.layer}); + int height_offset = device_ctx.grid.get_height_offset({centroid_loc.x, centroid_loc.y, centroid_loc.layer}); VTR_ASSERT(width_offset == 0); VTR_ASSERT(height_offset == 0); @@ -507,6 +536,7 @@ static int get_blk_type_first_loc(t_pl_loc& loc, t_pl_macro pl_macro, std::vecto //set the coordinate of first location that can accomodate macro blocks loc.x = first_empty_loc.first_avail_loc.x; loc.y = get_y_loc_based_on_macro_direction(first_empty_loc, pl_macro); + loc.layer = first_empty_loc.first_avail_loc.layer; loc.sub_tile = first_empty_loc.first_avail_loc.sub_tile; return empty_loc_index; @@ -519,29 +549,38 @@ static std::vector init_blk_types_empty_locations( const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[block_type_index]; const auto& device_ctx = g_vpr_ctx.device(); const auto& grid = device_ctx.grid; + int num_layers = grid.get_num_layers(); //create a vector to store all columns containing block_type_index with their lowest y and number of remaining blocks std::vector block_type_empty_locs; - //create a region the size of grid to find out first location with a specific block type - Region reg; - reg.set_region_rect(0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); - reg.set_sub_tile(NO_SUBTILE); - - int min_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, reg.get_region_rect().xmin()); - int max_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, reg.get_region_rect().xmax()); - - //traverse all column and store their empty locations in block_type_empty_locs - for (int x_loc = min_cx; x_loc <= max_cx; x_loc++) { - t_grid_empty_locs_block_type empty_loc; - auto first_avail_loc = compressed_block_grid.grid[x_loc].begin()->second; - empty_loc.first_avail_loc.x = first_avail_loc.x; - empty_loc.first_avail_loc.y = first_avail_loc.y; - const auto& physical_type = grid.get_physical_type(first_avail_loc.x, first_avail_loc.y); - const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tiles_for_tile.at(physical_type->index); - empty_loc.first_avail_loc.sub_tile = *std::min_element(compatible_sub_tiles.begin(), compatible_sub_tiles.end()); - empty_loc.num_of_empty_locs_in_y_axis = compressed_block_grid.grid[x_loc].size(); - block_type_empty_locs.push_back(empty_loc); + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + //create a region the size of grid to find out first location with a specific block type + Region reg; + reg.set_region_rect({0, + 0, + (int)device_ctx.grid.width() - 1, + (int)device_ctx.grid.height() - 1, + layer_num}); + reg.set_sub_tile(NO_SUBTILE); + const auto reg_coord = reg.get_region_rect(); + int min_cx = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmin, OPEN, layer_num}).x; + int max_cx = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmax, OPEN, layer_num}).x; + + //traverse all column and store their empty locations in block_type_empty_locs + for (int x_loc = min_cx; x_loc <= max_cx; x_loc++) { + t_grid_empty_locs_block_type empty_loc; + const auto& block_rows = compressed_block_grid.get_column_block_map(x_loc, layer_num); + auto first_avail_loc = block_rows.begin()->second; + empty_loc.first_avail_loc.x = first_avail_loc.x; + empty_loc.first_avail_loc.y = first_avail_loc.y; + empty_loc.first_avail_loc.layer = first_avail_loc.layer_num; + const auto& physical_type = grid.get_physical_type({first_avail_loc.x, first_avail_loc.y, first_avail_loc.layer_num}); + const auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(physical_type->index); + empty_loc.first_avail_loc.sub_tile = *std::min_element(compatible_sub_tiles.begin(), compatible_sub_tiles.end()); + empty_loc.num_of_empty_locs_in_y_axis = block_rows.size(); + block_type_empty_locs.push_back(empty_loc); + } } return block_type_empty_locs; @@ -552,7 +591,7 @@ static inline void fix_IO_block_types(t_pl_macro pl_macro, t_pl_loc loc, enum e_ auto& place_ctx = g_vpr_ctx.mutable_placement(); //If the user marked the IO block pad_loc_type as RANDOM, that means it should be randomly //placed and then stay fixed to that location, which is why the macro members are marked as fixed. - const auto& type = device_ctx.grid.get_physical_type(loc.x, loc.y); + const auto& type = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); if (is_io_type(type) && pad_loc_type == RANDOM) { for (unsigned int imember = 0; imember < pl_macro.members.size(); imember++) { place_ctx.block_locs[pl_macro.members[imember].blk_index].is_fixed = true; @@ -584,32 +623,36 @@ static bool try_random_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_log } Region reg = regions[region_index]; - vtr::Rect rect = reg.get_region_rect(); + const auto reg_coord = reg.get_region_rect(); - int min_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmin()); - int min_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, rect.ymin()); + auto min_compressed_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmin, reg_coord.ymin, reg_coord.layer_num}); - int max_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmax()); - int max_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, rect.ymax()); + auto max_compressed_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmax, reg_coord.ymax, reg_coord.layer_num}); - int delta_cx = max_cx - min_cx; + int delta_cx = max_compressed_loc.x - min_compressed_loc.x; - int cx_to; - int cy_to; + t_physical_tile_loc to_compressed_loc; bool legal; - legal = find_compatible_compressed_loc_in_range(block_type, min_cx, max_cx, min_cy, max_cy, delta_cx, cx_from, cy_from, cx_to, cy_to, false); + legal = find_compatible_compressed_loc_in_range(block_type, + delta_cx, + {cx_from, cy_from, reg_coord.layer_num}, + {min_compressed_loc.x, max_compressed_loc.x, + min_compressed_loc.y, max_compressed_loc.y}, + to_compressed_loc, + false, + reg_coord.layer_num); if (!legal) { //No valid position found return false; } - compressed_grid_to_loc(block_type, cx_to, cy_to, loc); + compressed_grid_to_loc(block_type, to_compressed_loc, loc); auto& device_ctx = g_vpr_ctx.device(); - int width_offset = device_ctx.grid.get_width_offset(loc.x, loc.y); - int height_offset = device_ctx.grid.get_height_offset(loc.x, loc.y); + int width_offset = device_ctx.grid.get_width_offset({loc.x, loc.y, loc.layer}); + int height_offset = device_ctx.grid.get_height_offset({loc.x, loc.y, loc.layer}); VTR_ASSERT(width_offset == 0); VTR_ASSERT(height_offset == 0); @@ -633,14 +676,22 @@ static bool try_exhaustive_placement(t_pl_macro pl_macro, PartitionRegion& pr, t t_pl_loc to_loc; for (unsigned int reg = 0; reg < regions.size() && placed == false; reg++) { - vtr::Rect rect = regions[reg].get_region_rect(); + const auto reg_coord = regions[reg].get_region_rect(); + int layer_num = reg_coord.layer_num; - int min_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmin()); - int max_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmax()); + int min_cx = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmin, OPEN, layer_num}).x; + int max_cx = compressed_block_grid.grid_loc_to_compressed_loc_approx({reg_coord.xmax, OPEN, layer_num}).x; + + // There isn't any block of this type in this region + if (min_cx == OPEN) { + VTR_ASSERT(max_cx == OPEN); + continue; + } for (int cx = min_cx; cx <= max_cx && placed == false; cx++) { - auto y_lower_iter = compressed_block_grid.grid[cx].begin(); - auto y_upper_iter = compressed_block_grid.grid[cx].end(); + const auto& block_rows = compressed_block_grid.get_column_block_map(cx, layer_num); + auto y_lower_iter = block_rows.begin(); + auto y_upper_iter = block_rows.end(); int y_range = std::distance(y_lower_iter, y_upper_iter); @@ -649,17 +700,19 @@ static bool try_exhaustive_placement(t_pl_macro pl_macro, PartitionRegion& pr, t for (int dy = 0; dy < y_range && placed == false; dy++) { int cy = (y_lower_iter + dy)->first; - to_loc.x = compressed_block_grid.compressed_to_grid_x[cx]; - to_loc.y = compressed_block_grid.compressed_to_grid_y[cy]; + auto grid_loc = compressed_block_grid.compressed_loc_to_grid_loc({cx, cy, layer_num}); + to_loc.x = grid_loc.x; + to_loc.y = grid_loc.y; + to_loc.layer = grid_loc.layer_num; auto& grid = g_vpr_ctx.device().grid; - auto tile_type = grid.get_physical_type(to_loc.x, to_loc.y); + auto tile_type = grid.get_physical_type({to_loc.x, to_loc.y, layer_num}); if (regions[reg].get_sub_tile() != NO_SUBTILE) { int subtile = regions[reg].get_sub_tile(); to_loc.sub_tile = subtile; - if (place_ctx.grid_blocks[to_loc.x][to_loc.y].blocks[to_loc.sub_tile] == EMPTY_BLOCK_ID) { + if (place_ctx.grid_blocks.block_at_location(to_loc) == EMPTY_BLOCK_ID) { placed = try_place_macro(pl_macro, to_loc); if (placed) { @@ -674,7 +727,7 @@ static bool try_exhaustive_placement(t_pl_macro pl_macro, PartitionRegion& pr, t for (int st = st_low; st <= st_high && placed == false; st++) { to_loc.sub_tile = st; - if (place_ctx.grid_blocks[to_loc.x][to_loc.y].blocks[to_loc.sub_tile] == EMPTY_BLOCK_ID) { + if (place_ctx.grid_blocks.block_at_location(to_loc) == EMPTY_BLOCK_ID) { placed = try_place_macro(pl_macro, to_loc); if (placed) { fix_IO_block_types(pl_macro, to_loc, pad_loc_type); @@ -700,14 +753,14 @@ static bool try_dense_placement(t_pl_macro pl_macro, PartitionRegion& pr, t_logi int column_index = get_blk_type_first_loc(loc, pl_macro, blk_types_empty_locs_in_grid); //check if first available location is within the chip and macro's partition region, otherwise placement is not legal - if (!is_loc_on_chip(loc.x, loc.y) || !pr.is_loc_in_part_reg(loc)) { + if (!is_loc_on_chip({loc.x, loc.y, loc.layer}) || !pr.is_loc_in_part_reg(loc)) { return false; } auto& device_ctx = g_vpr_ctx.device(); - int width_offset = device_ctx.grid.get_width_offset(loc.x, loc.y); - int height_offset = device_ctx.grid.get_height_offset(loc.x, loc.y); + int width_offset = device_ctx.grid.get_width_offset({loc.x, loc.y, loc.layer}); + int height_offset = device_ctx.grid.get_height_offset({loc.x, loc.y, loc.layer}); VTR_ASSERT(width_offset == 0); VTR_ASSERT(height_offset == 0); @@ -730,7 +783,7 @@ static bool try_place_macro(t_pl_macro pl_macro, t_pl_loc head_pos) { bool macro_placed = false; // If that location is occupied, do nothing. - if (place_ctx.grid_blocks[head_pos.x][head_pos.y].blocks[head_pos.sub_tile] != EMPTY_BLOCK_ID) { + if (place_ctx.grid_blocks.block_at_location(head_pos) != EMPTY_BLOCK_ID) { return (macro_placed); } @@ -776,9 +829,15 @@ static bool place_macro(int macros_max_num_tries, t_pl_macro pl_macro, enum e_pa pr = floorplanning_ctx.cluster_constraints[blk_id]; } else { //If the block is not constrained, assign a region the size of the grid to its PartitionRegion Region reg; - reg.set_region_rect(0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); - reg.set_sub_tile(NO_SUBTILE); - pr.add_to_part_region(reg); + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + reg.set_region_rect({0, + 0, + (int)device_ctx.grid.width() - 1, + (int)device_ctx.grid.height() - 1, + layer_num}); + reg.set_sub_tile(NO_SUBTILE); + pr.add_to_part_region(reg); + } } //If blk_types_empty_locs_in_grid is not NULL, means that initial placement has been failed in first iteration for this block type @@ -961,15 +1020,17 @@ static void clear_block_type_grid_locs(std::unordered_set unplaced_blk_type /* We'll use the grid to record where everything goes. Initialize to the grid has no * blocks placed anywhere. */ - for (size_t i = 0; i < device_ctx.grid.width(); i++) { - for (size_t j = 0; j < device_ctx.grid.height(); j++) { - const auto& type = device_ctx.grid.get_physical_type(i, j); - itype = type->index; - if (clear_all_block_types || unplaced_blk_types_index.count(itype)) { - place_ctx.grid_blocks[i][j].usage = 0; - for (int k = 0; k < device_ctx.physical_tile_types[itype].capacity; k++) { - if (place_ctx.grid_blocks[i][j].blocks[k] != INVALID_BLOCK_ID) { - place_ctx.grid_blocks[i][j].blocks[k] = EMPTY_BLOCK_ID; + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { + const auto& type = device_ctx.grid.get_physical_type({i, j, layer_num}); + itype = type->index; + if (clear_all_block_types || unplaced_blk_types_index.count(itype)) { + place_ctx.grid_blocks.set_usage({i, j, layer_num}, 0); + for (int k = 0; k < device_ctx.physical_tile_types[itype].capacity; k++) { + if (place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}) != INVALID_BLOCK_ID) { + place_ctx.grid_blocks.set_block_at_location({i, j, k, layer_num}, EMPTY_BLOCK_ID); + } } } } diff --git a/vpr/src/place/manual_move_generator.cpp b/vpr/src/place/manual_move_generator.cpp index 930eaa9f59a..2cc80347df1 100644 --- a/vpr/src/place/manual_move_generator.cpp +++ b/vpr/src/place/manual_move_generator.cpp @@ -37,14 +37,14 @@ e_create_move ManualMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ //Gets the current location of the block to move. t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = device_ctx.grid.get_physical_type(from.x, from.y); + auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); //Retrieving the compressed block grid for this block type const auto& compressed_block_grid = place_ctx.compressed_block_grids[cluster_from_type->index]; //Checking if the block has a compatible subtile. - auto to_type = device_ctx.grid.get_physical_type(to.x, to.y); - auto& compatible_subtiles = compressed_block_grid.compatible_sub_tiles_for_tile.at(to_type->index); + auto to_type = device_ctx.grid.get_physical_type({to.x, to.y, to.layer}); + auto& compatible_subtiles = compressed_block_grid.compatible_sub_tile_num(to_type->index); //No compatible subtile is found. if (std::find(compatible_subtiles.begin(), compatible_subtiles.end(), to.sub_tile) == compatible_subtiles.end()) { diff --git a/vpr/src/place/median_move_generator.cpp b/vpr/src/place/median_move_generator.cpp index 2bd4ef7a622..9dae21bca5b 100644 --- a/vpr/src/place/median_move_generator.cpp +++ b/vpr/src/place/median_move_generator.cpp @@ -24,7 +24,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y); + auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); /* Calculate the median region */ @@ -47,7 +47,7 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id); if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) continue; - /* To speedup the calculation, we found it is useful to ignore high fanout nets. + /* To speed up the calculation, we found it is useful to ignore high fanout nets. * Especially that in most cases, these high fanout nets are scattered in many locations of * the device and don't guide to a specific location. We also assuered these assumpitions experimentally. */ @@ -118,6 +118,8 @@ e_create_move MedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks_ t_pl_loc median_point; median_point.x = (limit_coords.xmin + limit_coords.xmax) / 2; median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2; + // TODO: When placer is updated to support moving blocks between dice, this needs to be changed. Currently, we only move blocks within a die. + median_point.layer = from.layer; if (!find_to_loc_centroid(cluster_from_type, from, median_point, range_limiters, to, b_from)) return e_create_move::ABORT; diff --git a/vpr/src/place/move_transactions.cpp b/vpr/src/place/move_transactions.cpp index 1fe3ec7aff8..2c7d6dc180d 100644 --- a/vpr/src/place/move_transactions.cpp +++ b/vpr/src/place/move_transactions.cpp @@ -21,7 +21,7 @@ e_block_move_result record_block_move(t_pl_blocks_to_be_moved& blocks_affected, return e_block_move_result::ABORT; } - VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks[to.x][to.y].blocks.size())); + VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks.num_blocks_at_location({to.x, to.y, to.layer}))); // Sets up the blocks moved int imoved_blk = blocks_affected.num_moved_blocks; @@ -46,7 +46,10 @@ void apply_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { place_ctx.block_locs[blk].loc = blocks_affected.moved_blocks[iblk].new_loc; //if physical tile type of old location does not equal physical tile type of new location, sync the new physical pins - if (device_ctx.grid.get_physical_type(blocks_affected.moved_blocks[iblk].old_loc.x, blocks_affected.moved_blocks[iblk].old_loc.y) != device_ctx.grid.get_physical_type(blocks_affected.moved_blocks[iblk].new_loc.x, blocks_affected.moved_blocks[iblk].new_loc.y)) { + if (device_ctx.grid.get_physical_type({blocks_affected.moved_blocks[iblk].old_loc.x, + blocks_affected.moved_blocks[iblk].old_loc.y, + blocks_affected.moved_blocks[iblk].old_loc.layer}) + != device_ctx.grid.get_physical_type({blocks_affected.moved_blocks[iblk].new_loc.x, blocks_affected.moved_blocks[iblk].new_loc.y, blocks_affected.moved_blocks[iblk].new_loc.layer})) { place_sync_external_block_connections(blk); } } @@ -66,17 +69,19 @@ void commit_move_blocks(const t_pl_blocks_to_be_moved& blocks_affected) { t_pl_loc from = blocks_affected.moved_blocks[iblk].old_loc; //Remove from old location only if it hasn't already been updated by a previous block update - if (place_ctx.grid_blocks[from.x][from.y].blocks[from.sub_tile] == blk) { - place_ctx.grid_blocks[from.x][from.y].blocks[from.sub_tile] = EMPTY_BLOCK_ID; - --place_ctx.grid_blocks[from.x][from.y].usage; + if (place_ctx.grid_blocks.block_at_location(from) == blk) { + place_ctx.grid_blocks.set_block_at_location(from, EMPTY_BLOCK_ID); + place_ctx.grid_blocks.set_usage({from.x, from.y, from.layer}, + place_ctx.grid_blocks.get_usage({from.x, from.y, from.layer}) - 1); } //Add to new location - if (place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile] == EMPTY_BLOCK_ID) { + if (place_ctx.grid_blocks.block_at_location(to) == EMPTY_BLOCK_ID) { //Only need to increase usage if previously unused - ++place_ctx.grid_blocks[to.x][to.y].usage; + place_ctx.grid_blocks.set_usage({to.x, to.y, to.layer}, + place_ctx.grid_blocks.get_usage({to.x, to.y, to.layer}) + 1); } - place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile] = blk; + place_ctx.grid_blocks.set_block_at_location(to, blk); } // Finish updating clb for all blocks } @@ -95,11 +100,14 @@ void revert_move_blocks(t_pl_blocks_to_be_moved& blocks_affected) { place_ctx.block_locs[blk].loc = old; //if physical tile type of old location does not equal physical tile type of new location, sync the new physical pins - if (device_ctx.grid.get_physical_type(blocks_affected.moved_blocks[iblk].old_loc.x, blocks_affected.moved_blocks[iblk].old_loc.y) != device_ctx.grid.get_physical_type(blocks_affected.moved_blocks[iblk].new_loc.x, blocks_affected.moved_blocks[iblk].new_loc.y)) { + if (device_ctx.grid.get_physical_type({blocks_affected.moved_blocks[iblk].old_loc.x, + blocks_affected.moved_blocks[iblk].old_loc.y, + blocks_affected.moved_blocks[iblk].old_loc.layer}) + != device_ctx.grid.get_physical_type({blocks_affected.moved_blocks[iblk].new_loc.x, blocks_affected.moved_blocks[iblk].new_loc.y, blocks_affected.moved_blocks[iblk].new_loc.layer})) { place_sync_external_block_connections(blk); } - VTR_ASSERT_SAFE_MSG(place_ctx.grid_blocks[old.x][old.y].blocks[old.sub_tile] == blk, "Grid blocks should only have been updated if swap commited (not reverted)"); + VTR_ASSERT_SAFE_MSG(place_ctx.grid_blocks.block_at_location(old) == blk, "Grid blocks should only have been updated if swap commited (not reverted)"); } } diff --git a/vpr/src/place/move_utils.cpp b/vpr/src/place/move_utils.cpp index 53a7c0ad248..5e1188db6c3 100644 --- a/vpr/src/place/move_utils.cpp +++ b/vpr/src/place/move_utils.cpp @@ -40,7 +40,7 @@ e_create_move create_move(t_pl_blocks_to_be_moved& blocks_affected, ClusterBlock //Try inverting the swap direction auto& place_ctx = g_vpr_ctx.placement(); - ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile]; + ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(to); if (!b_to) { log_move_abort("inverted move no to block"); @@ -93,7 +93,7 @@ e_block_move_result find_affected_blocks(t_pl_blocks_to_be_moved& blocks_affecte VTR_ASSERT_SAFE(outcome != e_block_move_result::VALID || imember_from == int(pl_macros[imacro_from].members.size())); } else { - ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile]; + ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(to); int imacro_to = -1; get_imacro_from_iblk(&imacro_to, b_to, pl_macros); @@ -125,9 +125,9 @@ e_block_move_result record_single_block_swap(t_pl_blocks_to_be_moved& blocks_aff return e_block_move_result::ABORT; } - VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks[to.x][to.y].blocks.size())); + VTR_ASSERT_SAFE(to.sub_tile < int(place_ctx.grid_blocks.num_blocks_at_location({to.x, to.y, to.layer}))); - ClusterBlockId b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile]; + ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(to); t_pl_loc curr_from = place_ctx.block_locs[b_from].loc; @@ -190,7 +190,7 @@ e_block_move_result record_macro_swaps(t_pl_blocks_to_be_moved& blocks_affected, log_move_abort("macro_from swap to location illegal"); outcome = e_block_move_result::ABORT; } else { - ClusterBlockId b_to = place_ctx.grid_blocks[curr_to.x][curr_to.y].blocks[curr_to.sub_tile]; + ClusterBlockId b_to = place_ctx.grid_blocks.block_at_location(curr_to); int imacro_to = -1; get_imacro_from_iblk(&imacro_to, b_to, pl_macros); @@ -334,7 +334,7 @@ e_block_move_result record_macro_move(t_pl_blocks_to_be_moved& blocks_affected, return e_block_move_result::ABORT; } - ClusterBlockId blk_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile]; + ClusterBlockId blk_to = place_ctx.grid_blocks.block_at_location(to); record_block_move(blocks_affected, member.blk_index, to); @@ -365,7 +365,7 @@ e_block_move_result identify_macro_self_swap_affected_macros(std::vector& m return e_block_move_result::ABORT; } - ClusterBlockId blk_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile]; + ClusterBlockId blk_to = place_ctx.grid_blocks.block_at_location(to); int imacro_to = -1; get_imacro_from_iblk(&imacro_to, blk_to, place_ctx.pl_macros); @@ -458,7 +458,7 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) { return false; } - auto physical_tile = device_ctx.grid.get_physical_type(to.x, to.y); + auto physical_tile = device_ctx.grid.get_physical_type(t_physical_tile_loc(to.x, to.y, to.layer)); auto logical_block = cluster_ctx.clb_nlist.block_type(blk); if (to.sub_tile < 0 || to.sub_tile >= physical_tile->capacity @@ -466,7 +466,7 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) { return false; } // If the destination block is user constrained, abort this swap - auto b_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.sub_tile]; + auto b_to = place_ctx.grid_blocks.block_at_location(to); if (b_to != INVALID_BLOCK_ID && b_to != EMPTY_BLOCK_ID) { if (place_ctx.block_locs[b_to].is_fixed) { return false; @@ -710,53 +710,60 @@ bool find_to_loc_uniform(t_logical_block_type_ptr type, //Retrieve the compressed block grid for this block type const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index]; - - //Determine the rlim in each dimension - int rlim_x = std::min(compressed_block_grid.compressed_to_grid_x.size(), rlim); - int rlim_y = std::min(compressed_block_grid.compressed_to_grid_y.size(), rlim); /* for aspect_ratio != 1 case. */ + const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + const int from_layer_num = from.layer; //Determine the coordinates in the compressed grid space of the current block - int cx_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_x, from.x); - int cy_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_y, from.y); + std::vector compressed_locs = get_compressed_loc(compressed_block_grid, + from, + num_layers); //Determine the valid compressed grid location ranges - int min_cx = std::max(0, cx_from - rlim_x); - int max_cx = std::min(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_from + rlim_x); - int delta_cx = max_cx - min_cx; - - int min_cy = std::max(0, cy_from - rlim_y); - int max_cy = std::min(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_from + rlim_y); + std::vector search_range = get_compressed_grid_target_search_range(compressed_block_grid, + compressed_locs, + rlim, + num_layers); + int delta_cx = search_range[from_layer_num].xmax - search_range[from_layer_num].xmin; - int cx_to = OPEN; - int cy_to = OPEN; + t_physical_tile_loc to_compressed_loc; bool legal = false; + //TODO: constraints should be adapted to 3D architecture if (is_cluster_constrained(b_from)) { - bool intersect = intersect_range_limit_with_floorplan_constraints(type, b_from, min_cx, min_cy, max_cx, max_cy, delta_cx); + bool intersect = intersect_range_limit_with_floorplan_constraints(type, + b_from, + search_range[from_layer_num], + delta_cx, + from_layer_num); if (!intersect) { return false; } } - - legal = find_compatible_compressed_loc_in_range(type, min_cx, max_cx, min_cy, max_cy, delta_cx, cx_from, cy_from, cx_to, cy_to, false); + //TODO: For now, we only move the blocks on the same tile + legal = find_compatible_compressed_loc_in_range(type, + delta_cx, + compressed_locs[from_layer_num], + search_range[from_layer_num], + to_compressed_loc, + false, + from_layer_num); if (!legal) { //No valid position found return false; } - VTR_ASSERT(cx_to != OPEN); - VTR_ASSERT(cy_to != OPEN); + VTR_ASSERT(to_compressed_loc); //Convert to true (uncompressed) grid locations - compressed_grid_to_loc(type, cx_to, cy_to, to); + compressed_grid_to_loc(type, to_compressed_loc, to); auto& grid = g_vpr_ctx.device().grid; - const auto& to_type = grid.get_physical_type(to.x, to.y); + const auto& to_type = grid.get_physical_type(t_physical_tile_loc(to.x, to.y, to.layer)); VTR_ASSERT_MSG(is_tile_compatible(to_type, type), "Type must be compatible"); - VTR_ASSERT_MSG(grid.get_width_offset(to.x, to.y) == 0, "Should be at block base location"); - VTR_ASSERT_MSG(grid.get_height_offset(to.x, to.y) == 0, "Should be at block base location"); + VTR_ASSERT_MSG(grid.get_width_offset({to.x, to.y, to.layer}) == 0, "Should be at block base location"); + VTR_ASSERT_MSG(grid.get_height_offset({to.x, to.y, to.layer}) == 0, "Should be at block base location"); return true; } @@ -775,60 +782,78 @@ bool find_to_loc_median(t_logical_block_type_ptr blk_type, const t_bb* limit_coords, t_pl_loc& to_loc, ClusterBlockId b_from) { + int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + int from_layer_num = from_loc.layer; const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index]; //Determine the coordinates in the compressed grid space of the current block - int cx_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_x, from_loc.x); - int cy_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_y, from_loc.y); + std::vector from_compressed_locs = get_compressed_loc(compressed_block_grid, + from_loc, + g_vpr_ctx.device().grid.get_num_layers()); VTR_ASSERT(limit_coords->xmin <= limit_coords->xmax); VTR_ASSERT(limit_coords->ymin <= limit_coords->ymax); //Determine the valid compressed grid location ranges - int min_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, limit_coords->xmin); - int max_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, limit_coords->xmax); - - VTR_ASSERT(min_cx >= 0); - VTR_ASSERT(static_cast(compressed_block_grid.compressed_to_grid_x.size()) - 1 - max_cx >= 0); - VTR_ASSERT(max_cx >= min_cx); - int delta_cx = max_cx - min_cx; - - int min_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, limit_coords->ymin); - int max_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, limit_coords->ymax); - VTR_ASSERT(min_cy >= 0); - VTR_ASSERT(static_cast(compressed_block_grid.compressed_to_grid_y.size()) - 1 - max_cy >= 0); - VTR_ASSERT(max_cy >= min_cy); - - int cx_to = OPEN; - int cy_to = OPEN; + std::vector min_compressed_loc = get_compressed_loc_approx(compressed_block_grid, + {limit_coords->xmin, limit_coords->ymin, 0, from_layer_num}, + num_layers); + std::vector max_compressed_loc = get_compressed_loc_approx(compressed_block_grid, + {limit_coords->xmax, limit_coords->ymax, 0, from_layer_num}, + num_layers); + + VTR_ASSERT(min_compressed_loc[from_layer_num].x >= 0); + VTR_ASSERT(static_cast(compressed_block_grid.get_num_columns(from_layer_num)) - 1 - max_compressed_loc[from_layer_num].x >= 0); + VTR_ASSERT(max_compressed_loc[from_layer_num].x >= min_compressed_loc[from_layer_num].x); + int delta_cx = max_compressed_loc[from_layer_num].x - min_compressed_loc[from_layer_num].x; + + VTR_ASSERT(min_compressed_loc[from_layer_num].y >= 0); + VTR_ASSERT(static_cast(compressed_block_grid.get_num_rows(from_layer_num)) - 1 - max_compressed_loc[from_layer_num].y >= 0); + VTR_ASSERT(max_compressed_loc[from_layer_num].y >= min_compressed_loc[from_layer_num].y); + + t_bb search_range(min_compressed_loc[from_layer_num].x, + max_compressed_loc[from_layer_num].x, + min_compressed_loc[from_layer_num].y, + max_compressed_loc[from_layer_num].y); + + t_physical_tile_loc to_compressed_loc; bool legal = false; if (is_cluster_constrained(b_from)) { - bool intersect = intersect_range_limit_with_floorplan_constraints(blk_type, b_from, min_cx, min_cy, max_cx, max_cy, delta_cx); + bool intersect = intersect_range_limit_with_floorplan_constraints(blk_type, + b_from, + search_range, + delta_cx, + from_layer_num); if (!intersect) { return false; } } - legal = find_compatible_compressed_loc_in_range(blk_type, min_cx, max_cx, min_cy, max_cy, delta_cx, cx_from, cy_from, cx_to, cy_to, true); + legal = find_compatible_compressed_loc_in_range(blk_type, + delta_cx, + from_compressed_locs[from_layer_num], + search_range, + to_compressed_loc, + true, + from_layer_num); if (!legal) { //No valid position found return false; } - VTR_ASSERT(cx_to != OPEN); - VTR_ASSERT(cy_to != OPEN); + VTR_ASSERT(to_compressed_loc); //Convert to true (uncompressed) grid locations - compressed_grid_to_loc(blk_type, cx_to, cy_to, to_loc); + compressed_grid_to_loc(blk_type, to_compressed_loc, to_loc); auto& grid = g_vpr_ctx.device().grid; - const auto& to_type = grid.get_physical_type(to_loc.x, to_loc.y); + const auto& to_type = grid.get_physical_type(t_physical_tile_loc(to_loc.x, to_loc.y, to_loc.layer)); VTR_ASSERT_MSG(is_tile_compatible(to_type, blk_type), "Type must be compatible"); - VTR_ASSERT_MSG(grid.get_width_offset(to_loc.x, to_loc.y) == 0, "Should be at block base location"); - VTR_ASSERT_MSG(grid.get_height_offset(to_loc.x, to_loc.y) == 0, "Should be at block base location"); + VTR_ASSERT_MSG(grid.get_width_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location"); + VTR_ASSERT_MSG(grid.get_height_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location"); return true; } @@ -841,79 +866,77 @@ bool find_to_loc_centroid(t_logical_block_type_ptr blk_type, ClusterBlockId b_from) { //Retrieve the compressed block grid for this block type const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index]; + const int from_layer_num = from_loc.layer; + const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); - //Determine the coordinates in the compressed grid space of the current block - int cx_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_x, from_loc.x); - int cy_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_y, from_loc.y); - - //Determine the rlim in each dimension - int rlim_x = std::min(compressed_block_grid.compressed_to_grid_x.size(), std::min(range_limiters.original_rlim, range_limiters.dm_rlim)); - int rlim_y = std::min(compressed_block_grid.compressed_to_grid_y.size(), std::min(range_limiters.original_rlim, range_limiters.dm_rlim)); /* for aspect_ratio != 1 case. */ + std::vector from_compressed_loc = get_compressed_loc(compressed_block_grid, + from_loc, + num_layers); //Determine the coordinates in the compressed grid space of the current block - int cx_centroid = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, centroid.x); - int cy_centroid = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, centroid.y); + std::vector centroid_compressed_loc = get_compressed_loc_approx(compressed_block_grid, + centroid, + num_layers); //Determine the valid compressed grid location ranges - int min_cx, max_cx, delta_cx; - int min_cy, max_cy; + int delta_cx; + std::vector search_range; // If we are early in the anneal and the range limit still big enough --> search around the center location that the move proposed // If not --> search around the current location of the block but in the direction of the center location that the move proposed if (range_limiters.original_rlim > 0.15 * range_limiters.first_rlim) { - min_cx = std::max(0, cx_centroid - rlim_x); - max_cx = std::min(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_centroid + rlim_x); - - min_cy = std::max(0, cy_centroid - rlim_y); - max_cy = std::min(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_centroid + rlim_y); + search_range = get_compressed_grid_target_search_range(compressed_block_grid, + centroid_compressed_loc, + std::min(range_limiters.original_rlim, range_limiters.dm_rlim), + num_layers); } else { - if (cx_centroid < cx_from) { - min_cx = std::max(0, cx_from - rlim_x); - max_cx = cx_from; - } else { - min_cx = cx_from; - max_cx = std::min(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_from + rlim_x); - } - if (cy_centroid < cy_from) { - min_cy = std::max(0, cy_from - rlim_y); - max_cy = cy_from; - } else { - min_cy = cy_from; - max_cy = std::min(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_from + rlim_y); - } + search_range = get_compressed_grid_bounded_search_range(compressed_block_grid, + from_compressed_loc, + centroid_compressed_loc, + std::min(range_limiters.original_rlim, range_limiters.dm_rlim), + num_layers); } - delta_cx = max_cx - min_cx; + delta_cx = search_range[from_layer_num].xmax - search_range[from_layer_num].xmin; - int cx_to = OPEN; - int cy_to = OPEN; + t_physical_tile_loc to_compressed_loc; bool legal = false; if (is_cluster_constrained(b_from)) { - bool intersect = intersect_range_limit_with_floorplan_constraints(blk_type, b_from, min_cx, min_cy, max_cx, max_cy, delta_cx); + bool intersect = intersect_range_limit_with_floorplan_constraints(blk_type, + b_from, + search_range[from_layer_num], + delta_cx, + from_layer_num); if (!intersect) { return false; } } - legal = find_compatible_compressed_loc_in_range(blk_type, min_cx, max_cx, min_cy, max_cy, delta_cx, cx_from, cy_from, cx_to, cy_to, false); + //TODO: For now, we only move the blocks on the same tile + legal = find_compatible_compressed_loc_in_range(blk_type, + delta_cx, + from_compressed_loc[from_layer_num], + search_range[from_layer_num], + to_compressed_loc, + false, + from_layer_num); if (!legal) { //No valid position found return false; } - VTR_ASSERT(cx_to != OPEN); - VTR_ASSERT(cy_to != OPEN); + VTR_ASSERT(to_compressed_loc); //Convert to true (uncompressed) grid locations - compressed_grid_to_loc(blk_type, cx_to, cy_to, to_loc); + compressed_grid_to_loc(blk_type, to_compressed_loc, to_loc); auto& grid = g_vpr_ctx.device().grid; - const auto& to_type = grid.get_physical_type(to_loc.x, to_loc.y); + const auto& to_type = grid.get_physical_type(t_physical_tile_loc(to_loc.x, to_loc.y, to_loc.layer)); VTR_ASSERT_MSG(is_tile_compatible(to_type, blk_type), "Type must be compatible"); - VTR_ASSERT_MSG(grid.get_width_offset(to_loc.x, to_loc.y) == 0, "Should be at block base location"); - VTR_ASSERT_MSG(grid.get_height_offset(to_loc.x, to_loc.y) == 0, "Should be at block base location"); + VTR_ASSERT_MSG(grid.get_width_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location"); + VTR_ASSERT_MSG(grid.get_height_offset({to_loc.x, to_loc.y, to_loc.layer}) == 0, "Should be at block base location"); return true; } @@ -935,23 +958,33 @@ std::string move_type_to_string(e_move_type move) { } //Convert to true (uncompressed) grid locations -void compressed_grid_to_loc(t_logical_block_type_ptr blk_type, int cx, int cy, t_pl_loc& to_loc) { +void compressed_grid_to_loc(t_logical_block_type_ptr blk_type, + t_physical_tile_loc compressed_loc, + t_pl_loc& to_loc) { const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[blk_type->index]; - - to_loc.x = compressed_block_grid.compressed_to_grid_x[cx]; - to_loc.y = compressed_block_grid.compressed_to_grid_y[cy]; + auto grid_loc = compressed_block_grid.compressed_loc_to_grid_loc(compressed_loc); auto& grid = g_vpr_ctx.device().grid; - auto to_type = grid.get_physical_type(to_loc.x, to_loc.y); + auto to_type = grid.get_physical_type({grid_loc.x, grid_loc.y, grid_loc.layer_num}); //Each x/y location contains only a single type, so we can pick a random z (capcity) location - auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tiles_for_tile.at(to_type->index); - to_loc.sub_tile = compatible_sub_tiles[vtr::irand((int)compatible_sub_tiles.size() - 1)]; + auto& compatible_sub_tiles = compressed_block_grid.compatible_sub_tile_num(to_type->index); + int sub_tile = compatible_sub_tiles[vtr::irand((int)compatible_sub_tiles.size() - 1)]; + + to_loc = t_pl_loc(grid_loc.x, grid_loc.y, sub_tile, grid_loc.layer_num); } -bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int min_cx, int max_cx, int min_cy, int max_cy, int delta_cx, int cx_from, int cy_from, int& cx_to, int& cy_to, bool is_median) { +bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, + const int delta_cx, + const t_physical_tile_loc& from_loc, + t_bb search_range, + t_physical_tile_loc& to_loc, + bool is_median, + int to_layer_num) { + //TODO For the time being, the blocks only moved in the same layer. This assertion should be removed after VPR is updated to move blocks between layers + VTR_ASSERT(to_layer_num == from_loc.layer_num); const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index]; - + to_loc.layer_num = to_layer_num; std::unordered_set tried_cx_to; bool legal = false; int possibilities; @@ -963,13 +996,13 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int while (!legal && (int)tried_cx_to.size() < possibilities) { //Until legal or all possibilities exhaused //Pick a random x-location within [min_cx, max_cx], //until we find a legal swap, or have exhuasted all possiblites - cx_to = min_cx + vtr::irand(delta_cx); + to_loc.x = search_range.xmin + vtr::irand(delta_cx); - VTR_ASSERT(cx_to >= min_cx); - VTR_ASSERT(cx_to <= max_cx); + VTR_ASSERT(to_loc.x >= search_range.xmin); + VTR_ASSERT(to_loc.x <= search_range.xmax); //Record this x location as tried - auto res = tried_cx_to.insert(cx_to); + auto res = tried_cx_to.insert(to_loc.x); if (!res.second) { continue; //Already tried this position } @@ -981,25 +1014,26 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int // //The candidates are stored in a flat_map so we can efficiently find the set of valid //candidates with upper/lower bound. - auto y_lower_iter = compressed_block_grid.grid[cx_to].lower_bound(min_cy); - if (y_lower_iter == compressed_block_grid.grid[cx_to].end()) { + const auto& block_rows = compressed_block_grid.get_column_block_map(to_loc.x, to_layer_num); + auto y_lower_iter = block_rows.lower_bound(search_range.ymin); + if (y_lower_iter == block_rows.end()) { continue; } - auto y_upper_iter = compressed_block_grid.grid[cx_to].upper_bound(max_cy); + auto y_upper_iter = block_rows.upper_bound(search_range.ymax); - if (y_lower_iter->first > min_cy) { + if (y_lower_iter->first > search_range.ymin) { //No valid blocks at this x location which are within rlim_y // if (type->index != 1) continue; else { //Fall back to allow the whole y range - y_lower_iter = compressed_block_grid.grid[cx_to].begin(); - y_upper_iter = compressed_block_grid.grid[cx_to].end(); + y_lower_iter = block_rows.begin(); + y_upper_iter = block_rows.end(); - min_cy = y_lower_iter->first; - max_cy = (y_upper_iter - 1)->first; + search_range.ymin = y_lower_iter->first; + search_range.ymax = (y_upper_iter - 1)->first; } } @@ -1021,12 +1055,12 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int } //Key in the y-dimension is the compressed index location - cy_to = (y_lower_iter + dy)->first; + to_loc.y = (y_lower_iter + dy)->first; - VTR_ASSERT(cy_to >= min_cy); - VTR_ASSERT(cy_to <= max_cy); + VTR_ASSERT(to_loc.y >= search_range.ymin); + VTR_ASSERT(to_loc.y <= search_range.ymax); - if (cx_from == cx_to && cy_from == cy_to) { + if (from_loc.x == to_loc.x && from_loc.y == to_loc.y && from_loc.layer_num == to_layer_num) { continue; //Same from/to location -- try again for new y-position } else { legal = true; @@ -1036,16 +1070,135 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int return legal; } -bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr type, ClusterBlockId b_from, int& min_cx, int& min_cy, int& max_cx, int& max_cy, int& delta_cx) { +std::vector get_compressed_loc(const t_compressed_block_grid& compressed_block_grid, + t_pl_loc grid_loc, + int num_layers) { + //TODO: This function currently only determine the compressed location for the same layer as grid_loc - it should be updated to cover all layers + std::vector compressed_locs(num_layers); + + for (int layer_num = 0; layer_num < num_layers; ++layer_num) { + if (layer_num != grid_loc.layer) { + continue; + } + compressed_locs[layer_num] = compressed_block_grid.grid_loc_to_compressed_loc({grid_loc.x, grid_loc.y, layer_num}); + } + + return compressed_locs; +} + +std::vector get_compressed_loc_approx(const t_compressed_block_grid& compressed_block_grid, + t_pl_loc grid_loc, + int num_layers) { + //TODO: This function currently only determine the compressed location for the same layer as grid_loc - it should be updated to cover all layers + std::vector compressed_locs(num_layers); + + for (int layer_num = 0; layer_num < num_layers; ++layer_num) { + if (layer_num != grid_loc.layer) { + continue; + } + compressed_locs[layer_num] = compressed_block_grid.grid_loc_to_compressed_loc_approx({grid_loc.x, grid_loc.y, layer_num}); + } + + return compressed_locs; +} + +std::vector get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid, + const std::vector& compressed_locs, + float rlim, + int num_layers) { + std::vector search_ranges(num_layers, t_bb()); + for (int layer_num = 0; layer_num < num_layers; ++layer_num) { + const auto& layer_loc = compressed_locs[layer_num]; + //TODO: This if condition is added because blocks are only moved in the same layer. After the update, this condition should be replaced with an assertion + if (layer_loc.x == OPEN || layer_loc.y == OPEN || layer_loc.layer_num == OPEN) { + //No valid compressed location for this layer + continue; + } + int rlim_x_max_range = std::min((int)compressed_block_grid.get_num_columns(layer_num), rlim); + int rlim_y_max_range = std::min((int)compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */ + + search_ranges[layer_num].xmin = std::max(0, layer_loc.x - rlim_x_max_range); + search_ranges[layer_num].xmax = std::min(compressed_block_grid.get_num_columns(layer_num) - 1, layer_loc.x + rlim_x_max_range); + + search_ranges[layer_num].ymin = std::max(0, layer_loc.y - rlim_y_max_range); + search_ranges[layer_num].ymax = std::min(compressed_block_grid.get_num_rows(layer_num) - 1, layer_loc.y + rlim_y_max_range); + } + + return search_ranges; +} + +std::vector get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid, + const std::vector& from_compressed_loc, + const std::vector& target_compressed_loc, + float rlim, + int num_layers) { + std::vector search_range(num_layers, t_bb()); + + int min_cx, max_cx, min_cy, max_cy; + + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + //TODO: This if condition is added because blocks are only moved in the same layer. After the update, this condition should be replaced with an assertion + if (from_compressed_loc[layer_num].x == OPEN || from_compressed_loc[layer_num].y == OPEN || from_compressed_loc[layer_num].layer_num == OPEN) { + continue; + } + VTR_ASSERT(from_compressed_loc[layer_num].layer_num == layer_num); + VTR_ASSERT(target_compressed_loc[layer_num].layer_num == layer_num); + + int rlim_x_max_range = std::min(compressed_block_grid.get_num_columns(layer_num), rlim); + int rlim_y_max_range = std::min(compressed_block_grid.get_num_rows(layer_num), rlim); /* for aspect_ratio != 1 case. */ + + int cx_from = from_compressed_loc[layer_num].x; + int cy_from = from_compressed_loc[layer_num].y; + if (cx_from == OPEN || cy_from == OPEN) { + continue; + } + + int cx_centroid = target_compressed_loc[layer_num].x; + int cy_centroid = target_compressed_loc[layer_num].y; + + if (cx_centroid < cx_from) { + min_cx = std::max(0, cx_from - rlim_x_max_range); + max_cx = cx_from; + } else { + min_cx = cx_from; + max_cx = std::min(compressed_block_grid.get_num_columns(layer_num) - 1, cx_from + rlim_x_max_range); + } + if (cy_centroid < cy_from) { + min_cy = std::max(0, cy_from - rlim_y_max_range); + max_cy = cy_from; + } else { + min_cy = cy_from; + max_cy = std::min(compressed_block_grid.get_num_rows(layer_num) - 1, cy_from + rlim_y_max_range); + } + + search_range[layer_num] = t_bb(min_cx, max_cx, min_cy, max_cy); + } + + return search_range; +} + +bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr type, + ClusterBlockId b_from, + t_bb& search_range, + int& delta_cx, + int layer_num) { //Retrieve the compressed block grid for this block type const auto& compressed_block_grid = g_vpr_ctx.placement().compressed_block_grids[type->index]; - int min_x = compressed_block_grid.compressed_to_grid_x[min_cx]; - int max_x = compressed_block_grid.compressed_to_grid_x[max_cx]; - int min_y = compressed_block_grid.compressed_to_grid_y[min_cy]; - int max_y = compressed_block_grid.compressed_to_grid_y[max_cy]; + auto min_grid_loc = compressed_block_grid.compressed_loc_to_grid_loc({search_range.xmin, + search_range.ymin, + layer_num}); + + auto max_grid_loc = compressed_block_grid.compressed_loc_to_grid_loc({search_range.xmax, + search_range.ymax, + layer_num}); + Region range_reg; - range_reg.set_region_rect(min_x, min_y, max_x, max_y); + range_reg.set_region_rect({min_grid_loc.x, + min_grid_loc.y, + max_grid_loc.x, + max_grid_loc.y, + layer_num}); auto& floorplanning_ctx = g_vpr_ctx.floorplanning(); @@ -1068,12 +1221,16 @@ bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr t if (intersect_reg.empty()) { return false; } else { - vtr::Rect rect = intersect_reg.get_region_rect(); - min_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmin()); - max_cx = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_x, rect.xmax()); - min_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, rect.ymin()); - max_cy = grid_to_compressed_approx(compressed_block_grid.compressed_to_grid_y, rect.ymax()); - delta_cx = max_cx - min_cx; + const auto intersect_coord = intersect_reg.get_region_rect(); + VTR_ASSERT(intersect_coord.layer_num == layer_num); + auto min_compressed_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx({intersect_coord.xmin, + intersect_coord.ymin, + layer_num}); + + auto max_compressed_loc = compressed_block_grid.grid_loc_to_compressed_loc_approx({intersect_coord.xmax, + intersect_coord.ymax, + layer_num}); + delta_cx = max_compressed_loc.x - min_compressed_loc.x; } } diff --git a/vpr/src/place/move_utils.h b/vpr/src/place/move_utils.h index 36733624eed..9cdc908fa29 100644 --- a/vpr/src/place/move_utils.h +++ b/vpr/src/place/move_utils.h @@ -214,9 +214,11 @@ std::string move_type_to_string(e_move_type); * cy: the y coordinate of the compressed location * loc: the uncompressed output location (returned in reference) */ -void compressed_grid_to_loc(t_logical_block_type_ptr blk_type, int cx, int cy, t_pl_loc& loc); +void compressed_grid_to_loc(t_logical_block_type_ptr blk_type, + t_physical_tile_loc compressed_loc, + t_pl_loc& to_loc); /** - * @brief find compressed location in a compressed range for a specific type + * @brief find compressed location in a compressed range for a specific type in the given layer (to_layer_num) * * type: defines the moving block type * min_cx, max_cx: the minimum and maximum x coordinates of the range in the compressed grid @@ -224,8 +226,77 @@ void compressed_grid_to_loc(t_logical_block_type_ptr blk_type, int cx, int cy, t * cx_from, cy_from: the x and y coordinates of the old location * cx_to, cy_to: the x and y coordinates of the new location on the compressed grid * is_median: true if this is called from find_to_loc_median + * to_layer_num: the layer number of the new location (set by the caller) */ -bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int min_cx, int max_cx, int min_cy, int max_cy, int delta_cx, int cx_from, int cy_from, int& cx_to, int& cy_to, bool is_median); +bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, + const int delta_cx, + const t_physical_tile_loc& from_loc, + t_bb search_range, + t_physical_tile_loc& to_loc, + bool is_median, + int to_layer_num); + +/** + * @brief Get the the compressed loc from the uncompressed loc (grid_loc) + * @note This assumes the grid_loc corresponds to a location of the block type that compressed_block_grid stores its + * compressed location. Otherwise, it would raise an assertion error. + * @param compressed_block_grid The class that stores the compressed block grid of the block + * @param grid_loc The actual location of the block + * @param num_layers The number of layers (dice) of the FPGA + * @return Returns the compressed location of the block on each layer + */ +std::vector get_compressed_loc(const t_compressed_block_grid& compressed_block_grid, + t_pl_loc grid_loc, + int num_layers); + +/** + * @brief Get the the compressed loc from the uncompressed loc (grid_loc). Return the nearest compressed location + * if grid_loc doesn't fall on a block of the type that compressed_block_grid stores its compressed location. + * @param compressed_block_grid + * @param grid_loc + * @param num_layers + * @return + */ +std::vector get_compressed_loc_approx(const t_compressed_block_grid& compressed_block_grid, + t_pl_loc grid_loc, + int num_layers); + +/** + * @brief This function calculates the search range around the compressed locs, based on the given rlim value and + * the number of rows/columns containing the same block type as the one that compressed_loc belongs to. + * If rlim is greater than the number of columns containing the block type on the right side of the compressed_loc, + * the search range from the right is limited by that number. Similar constraints apply to other sides as well. The + * function returns the final search range based on these conditions. + * @param compressed_block_grid + * @param compressed_locs + * @param rlim + * @param num_layers + * @return A compressed search range for each layer + */ +std::vector get_compressed_grid_target_search_range(const t_compressed_block_grid& compressed_block_grid, + const std::vector& compressed_locs, + float rlim, + int num_layers); + +/** + * @brief This function calculates the search range based on the given rlim value and the number of columns/rows + * containing the same resource type as the one specified in the compressed_block_grid. + * The search range is determined in a square shape, with from_compressed_loc as one of the corners and + * directed towards the target_compressed_loc. The function returns the final search range based on these conditions. + * @Note This function differs from get_compressed_grid_target_search_range as it doesn't have from_compressed_loc + * in the center of the search range. + * @param compressed_block_grid + * @param from_compressed_loc + * @param target_compressed_loc + * @param rlim + * @param num_layers + * @return + */ +std::vector get_compressed_grid_bounded_search_range(const t_compressed_block_grid& compressed_block_grid, + const std::vector& from_compressed_loc, + const std::vector& target_compressed_loc, + float rlim, + int num_layers); /* * If the block to be moved (b_from) has a floorplan constraint, this routine changes the max and min coords @@ -241,8 +312,14 @@ bool find_compatible_compressed_loc_in_range(t_logical_block_type_ptr type, int * this routine is done for cpu time optimization, so we do not have to necessarily check each * complicated case to get correct functionality during place moves. * + * The intersection takes place in the layer (die) specified by layer_num. + * */ -bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr type, ClusterBlockId b_from, int& min_cx, int& min_cy, int& max_cx, int& max_cy, int& delta_cx); +bool intersect_range_limit_with_floorplan_constraints(t_logical_block_type_ptr type, + ClusterBlockId b_from, + t_bb& search_range, + int& delta_cx, + int layer_num); std::string e_move_result_to_string(e_move_result move_outcome); diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp index 0a53e936d51..11d9121ff2b 100644 --- a/vpr/src/place/noc_place_utils.cpp +++ b/vpr/src/place/noc_place_utils.cpp @@ -474,7 +474,7 @@ e_create_move propose_router_swap(t_pl_blocks_to_be_moved& blocks_affected, floa t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y); + auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); // now choose a compatible block to swap with diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 6b21212cb2f..b6696bdb8ac 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -447,6 +447,13 @@ void try_place(const Netlist<>& net_list, * width of the widest channel. Place_cost_exp says what exponent the * * width should be taken to when calculating costs. This allows a * * greater bias for anisotropic architectures. */ + + /* + * Currently, the functions that require is_flat as their parameter and are called during placement should + * receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as + * if is_flat is false, even if is_flat is set to true from the command line. + */ + VTR_ASSERT(!is_flat); auto& device_ctx = g_vpr_ctx.device(); auto& atom_ctx = g_vpr_ctx.atom(); auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -761,8 +768,12 @@ void try_place(const Netlist<>& net_list, /* Set the temperature low to ensure that initial placement quality will be preserved */ first_t = EPSILON; - t_annealing_state state(annealing_sched, first_t, first_rlim, - first_move_lim, first_crit_exponent); + t_annealing_state state(annealing_sched, + first_t, + first_rlim, + first_move_lim, + first_crit_exponent, + device_ctx.grid.get_num_layers()); /* Update the starting temperature for placement annealing to a more appropriate value */ state.t = starting_t(&state, &costs, annealing_sched, @@ -2962,53 +2973,66 @@ static int check_block_placement_consistency() { cluster_ctx.clb_nlist.blocks().size(), 0); /* Step through device grid and placement. Check it against blocks */ - for (size_t i = 0; i < device_ctx.grid.width(); i++) - for (size_t j = 0; j < device_ctx.grid.height(); j++) { - const auto& type = device_ctx.grid.get_physical_type(i, j); - if (place_ctx.grid_blocks[i][j].usage - > type->capacity) { - VTR_LOG_ERROR( - "%d blocks were placed at grid location (%zu,%zu), but location capacity is %d.\n", - place_ctx.grid_blocks[i][j].usage, i, j, - type->capacity); - error++; - } - int usage_check = 0; - for (int k = 0; k < type->capacity; k++) { - auto bnum = place_ctx.grid_blocks[i][j].blocks[k]; - if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum) - continue; - - auto logical_block = cluster_ctx.clb_nlist.block_type(bnum); - auto physical_tile = type; - - if (physical_tile_type(bnum) != physical_tile) { + for (int layer_num = 0; layer_num < (int)device_ctx.grid.get_num_layers(); layer_num++) { + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { + const t_physical_tile_loc tile_loc(i, j, layer_num); + const auto& type = device_ctx.grid.get_physical_type(tile_loc); + if (place_ctx.grid_blocks.get_usage(tile_loc) > type->capacity) { VTR_LOG_ERROR( - "Block %zu type (%s) does not match grid location (%zu,%zu) type (%s).\n", - size_t(bnum), logical_block->name, i, j, - physical_tile->name); + "%d blocks were placed at grid location (%d,%d,%d), but location capacity is %d.\n", + place_ctx.grid_blocks.get_usage(tile_loc), i, j, layer_num, + type->capacity); error++; } - - auto& loc = place_ctx.block_locs[bnum].loc; - if (loc.x != int(i) || loc.y != int(j) - || !is_sub_tile_compatible(physical_tile, logical_block, - loc.sub_tile)) { + int usage_check = 0; + for (int k = 0; k < type->capacity; k++) { + auto bnum = place_ctx.grid_blocks.block_at_location({i, j, k, layer_num}); + if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum) + continue; + + auto logical_block = cluster_ctx.clb_nlist.block_type(bnum); + auto physical_tile = type; + + if (physical_tile_type(bnum) != physical_tile) { + VTR_LOG_ERROR( + "Block %zu type (%s) does not match grid location (%zu,%zu, %d) type (%s).\n", + size_t(bnum), logical_block->name, i, j, layer_num, physical_tile->name); + error++; + } + + auto& loc = place_ctx.block_locs[bnum].loc; + if (loc.x != i || loc.y != j || loc.layer != layer_num + || !is_sub_tile_compatible(physical_tile, logical_block, + loc.sub_tile)) { + VTR_LOG_ERROR( + "Block %zu's location is (%d,%d,%d) but found in grid at (%zu,%zu,%d,%d).\n", + size_t(bnum), + loc.x, + loc.y, + loc.sub_tile, + tile_loc.x, + tile_loc.y, + tile_loc.layer_num, + layer_num); + error++; + } + ++usage_check; + bdone[bnum]++; + } + if (usage_check != place_ctx.grid_blocks.get_usage(tile_loc)) { VTR_LOG_ERROR( - "Block %zu's location is (%d,%d,%d) but found in grid at (%zu,%zu,%d).\n", - size_t(bnum), loc.x, loc.y, loc.sub_tile, i, j, k); + "%d block(s) were placed at location (%d,%d,%d), but location contains %d block(s).\n", + place_ctx.grid_blocks.get_usage(tile_loc), + tile_loc.x, + tile_loc.y, + tile_loc.layer_num, + usage_check); error++; } - ++usage_check; - bdone[bnum]++; - } - if (usage_check != place_ctx.grid_blocks[i][j].usage) { - VTR_LOG_ERROR( - "%d block(s) were placed at location (%zu,%zu), but location contains %d block(s).\n", - place_ctx.grid_blocks[i][j].usage, i, j, usage_check); - error++; } } + } /* Check that every block exists in the device_ctx.grid and cluster_ctx.blocks arrays somewhere. */ for (auto blk_id : cluster_ctx.clb_nlist.blocks()) @@ -3048,7 +3072,7 @@ int check_macro_placement_consistency() { } // Then check the place_ctx.grid data structure - if (place_ctx.grid_blocks[member_pos.x][member_pos.y].blocks[member_pos.sub_tile] + if (place_ctx.grid_blocks.block_at_location(member_pos) != member_iblk) { VTR_LOG_ERROR( "Block %zu in pl_macro #%zu is not placed in the proper orientation.\n", @@ -3167,7 +3191,7 @@ static void print_resources_utilization() { auto block_loc = place_ctx.block_locs[blk_id]; auto loc = block_loc.loc; - auto physical_tile = device_ctx.grid.get_physical_type(loc.x, loc.y); + auto physical_tile = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); num_type_instances[logical_block]++; diff --git a/vpr/src/place/place_constraints.cpp b/vpr/src/place/place_constraints.cpp index a0a811cb11d..f1c5045251b 100644 --- a/vpr/src/place/place_constraints.cpp +++ b/vpr/src/place/place_constraints.cpp @@ -80,9 +80,13 @@ PartitionRegion update_macro_head_pr(const t_pl_macro& pl_macro, const Partition Region modified_reg; auto offset = pl_macro.members[imember].offset; - vtr::Rect reg_rect = block_regions[i].get_region_rect(); + const auto block_reg_coord = block_regions[i].get_region_rect(); - modified_reg.set_region_rect(reg_rect.xmin() - offset.x, reg_rect.ymin() - offset.y, reg_rect.xmax() - offset.x, reg_rect.ymax() - offset.y); + modified_reg.set_region_rect({block_reg_coord.xmin - offset.x, + block_reg_coord.ymin - offset.y, + block_reg_coord.xmax - offset.x, + block_reg_coord.ymax - offset.y, + block_reg_coord.layer_num}); //check that subtile is not an invalid value before changing, otherwise it just stays -1 if (block_regions[i].get_sub_tile() != NO_SUBTILE) { @@ -118,9 +122,13 @@ PartitionRegion update_macro_member_pr(PartitionRegion& head_pr, const t_pl_offs for (unsigned int i = 0; i < block_regions.size(); i++) { Region modified_reg; - vtr::Rect reg_rect = block_regions[i].get_region_rect(); + const auto block_reg_coord = block_regions[i].get_region_rect(); - modified_reg.set_region_rect(reg_rect.xmin() + offset.x, reg_rect.ymin() + offset.y, reg_rect.xmax() + offset.x, reg_rect.ymax() + offset.y); + modified_reg.set_region_rect({block_reg_coord.xmin + offset.x, + block_reg_coord.ymin + offset.y, + block_reg_coord.xmax + offset.x, + block_reg_coord.ymax + offset.y, + block_reg_coord.layer_num}); //check that subtile is not an invalid value before changing, otherwise it just stays -1 if (block_regions[i].get_sub_tile() != NO_SUBTILE) { @@ -159,14 +167,19 @@ void propagate_place_constraints() { auto& floorplanning_ctx = g_vpr_ctx.mutable_floorplanning(); auto& device_ctx = g_vpr_ctx.device(); - //Create a PartitionRegion with grid dimensions - //Will be used to check that updated PartitionRegions are within grid bounds - int width = device_ctx.grid.width() - 1; - int height = device_ctx.grid.height() - 1; + int num_layers = device_ctx.grid.get_num_layers(); Region grid_reg; - grid_reg.set_region_rect(0, 0, width, height); PartitionRegion grid_pr; - grid_pr.add_to_part_region(grid_reg); + + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + //Create a PartitionRegion with grid dimensions + //Will be used to check that updated PartitionRegions are within grid bounds + int width = device_ctx.grid.width() - 1; + int height = device_ctx.grid.height() - 1; + + grid_reg.set_region_rect({0, 0, width, height, layer_num}); + grid_pr.add_to_part_region(grid_reg); + } for (auto pl_macro : place_ctx.pl_macros) { if (is_macro_constrained(pl_macro)) { @@ -296,12 +309,13 @@ void mark_fixed_blocks() { */ int region_tile_cover(const Region& reg, t_logical_block_type_ptr block_type, t_pl_loc& loc) { auto& device_ctx = g_vpr_ctx.device(); - vtr::Rect rb = reg.get_region_rect(); + const auto reg_coord = reg.get_region_rect(); + const int layer_num = reg.get_layer_num(); int num_tiles = 0; - for (int x = rb.xmin(); x <= rb.xmax(); x++) { - for (int y = rb.ymin(); y <= rb.ymax(); y++) { - const auto& tile = device_ctx.grid.get_physical_type(x, y); + for (int x = reg_coord.xmin; x <= reg_coord.xmax; x++) { + for (int y = reg_coord.ymin; y <= reg_coord.ymax; y++) { + const auto& tile = device_ctx.grid.get_physical_type({x, y, reg_coord.layer_num}); /* * If the tile at the grid location is not compatible with the cluster block @@ -322,6 +336,7 @@ int region_tile_cover(const Region& reg, t_logical_block_type_ptr block_type, t_ loc.x = x; loc.y = y; loc.sub_tile = reg.get_sub_tile(); + loc.layer = layer_num; if (num_tiles > 1) { return num_tiles; } @@ -342,6 +357,7 @@ int region_tile_cover(const Region& reg, t_logical_block_type_ptr block_type, t_ loc.x = x; loc.y = y; loc.sub_tile = z; + loc.layer = layer_num; } if (num_tiles > 1) { return num_tiles; @@ -368,13 +384,21 @@ bool is_pr_size_one(PartitionRegion& pr, t_logical_block_type_ptr block_type, t_ bool pr_size_one; int pr_size = 0; int reg_size; - - Region intersect_reg; - intersect_reg.set_region_rect(0, 0, device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); - Region current_reg; + int num_layers = device_ctx.grid.get_num_layers(); + + std::vector intersect_reg(num_layers); + for (int layer_num = 0; layer_num < num_layers; ++layer_num) { + intersect_reg[layer_num].set_region_rect({0, + 0, + (int)device_ctx.grid.width() - 1, + (int)device_ctx.grid.height() - 1, + layer_num}); + } + std::vector current_reg(num_layers); for (unsigned int i = 0; i < regions.size(); i++) { reg_size = region_tile_cover(regions[i], block_type, loc); + int layer_num = regions[i].get_layer_num(); /* * If multiple regions in the PartitionRegion all have size 1, @@ -387,9 +411,9 @@ bool is_pr_size_one(PartitionRegion& pr, t_logical_block_type_ptr block_type, t_ */ if (reg_size == 1) { //get the exact x, y, subtile location covered by the current region (regions[i]) - current_reg.set_region_rect(loc.x, loc.y, loc.x, loc.y); - current_reg.set_sub_tile(loc.sub_tile); - intersect_reg = intersection(intersect_reg, current_reg); + current_reg[layer_num].set_region_rect({loc.x, loc.y, loc.x, loc.y, layer_num}); + current_reg[layer_num].set_sub_tile(loc.sub_tile); + intersect_reg[layer_num] = intersection(intersect_reg[layer_num], current_reg[layer_num]); if (i == 0 || intersect_reg.empty()) { pr_size = pr_size + reg_size; diff --git a/vpr/src/place/place_constraints.h b/vpr/src/place/place_constraints.h index c1cf889f395..5fbc481fd2b 100644 --- a/vpr/src/place/place_constraints.h +++ b/vpr/src/place/place_constraints.h @@ -126,7 +126,7 @@ int get_part_reg_size(PartitionRegion& pr, t_logical_block_type_ptr block_type, /* * Return the floorplan score that will be used for sorting blocks during initial placement. This score is the - * total number of subtilesfor the block type in the grid, minus the number of subtiles in the block's floorplan PartitionRegion. + * total number of subtiles for the block type in the grid, minus the number of subtiles in the block's floorplan PartitionRegion. * The resulting number is the number of tiles outside the block's floorplan region, meaning the higher * it is, the more difficult the block is to place. */ diff --git a/vpr/src/place/place_delay_model.cpp b/vpr/src/place/place_delay_model.cpp index c6e27c39c1f..44d8c4a0b49 100644 --- a/vpr/src/place/place_delay_model.cpp +++ b/vpr/src/place/place_delay_model.cpp @@ -27,26 +27,30 @@ #endif /* VTR_ENABLE_CAPNPROTO */ ///@brief DeltaDelayModel methods. -float DeltaDelayModel::delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/) const { +float DeltaDelayModel::delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/, int layer_num) const { int delta_x = std::abs(from_x - to_x); int delta_y = std::abs(from_y - to_y); - return delays_[delta_x][delta_y]; + return delays_[layer_num][delta_x][delta_y]; } void DeltaDelayModel::dump_echo(std::string filepath) const { FILE* f = vtr::fopen(filepath.c_str(), "w"); fprintf(f, " "); - for (size_t dx = 0; dx < delays_.dim_size(0); ++dx) { - fprintf(f, " %9zu", dx); - } - fprintf(f, "\n"); - for (size_t dy = 0; dy < delays_.dim_size(1); ++dy) { - fprintf(f, "%9zu", dy); - for (size_t dx = 0; dx < delays_.dim_size(0); ++dx) { - fprintf(f, " %9.2e", delays_[dx][dy]); + for (size_t layer_num = 0; layer_num < delays_.dim_size(0); ++layer_num) { + fprintf(f, " %9zu", layer_num); + fprintf(f, "\n"); + for (size_t dx = 0; dx < delays_.dim_size(1); ++dx) { + fprintf(f, " %9zu", dx); } fprintf(f, "\n"); + for (size_t dy = 0; dy < delays_.dim_size(2); ++dy) { + fprintf(f, "%9zu", dy); + for (size_t dx = 0; dx < delays_.dim_size(1); ++dx) { + fprintf(f, " %9.2e", delays_[layer_num][dx][dy]); + } + fprintf(f, "\n"); + } } vtr::fclose(f); } @@ -56,13 +60,13 @@ const DeltaDelayModel* OverrideDelayModel::base_delay_model() const { } ///@brief OverrideDelayModel methods. -float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin) const { +float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const { //First check to if there is an override delay value auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; - t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type(from_x, from_y); - t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type(to_x, to_y); + t_physical_tile_type_ptr from_type_ptr = grid.get_physical_type({from_x, from_y, layer_num}); + t_physical_tile_type_ptr to_type_ptr = grid.get_physical_type({to_x, to_y, layer_num}); t_override override_key; override_key.from_type = from_type_ptr->index; @@ -82,7 +86,7 @@ float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x, delay_val = override_iter->second; } else { //Fall back to the base delay model if no override was found - delay_val = base_delay_model_->delay(from_x, from_y, from_pin, to_x, to_y, to_pin); + delay_val = base_delay_model_->delay(from_x, from_y, from_pin, to_x, to_y, to_pin, layer_num); } return delay_val; @@ -221,7 +225,7 @@ void DeltaDelayModel::read(const std::string& file) { // // The second argument should be of type Matrix::Reader where X is the // capnproto element type. - ToNdMatrix<2, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat); + ToNdMatrix<3, VprFloatEntry, float>(&delays_, model.getDelays(), ToFloat); } void DeltaDelayModel::write(const std::string& file) const { @@ -237,7 +241,7 @@ void DeltaDelayModel::write(const std::string& file) const { // Matrix message. It is the mirror function of ToNdMatrix described in // read above. auto delay_values = model.getDelays(); - FromNdMatrix<2, VprFloatEntry, float>(&delay_values, delays_, FromFloat); + FromNdMatrix<3, VprFloatEntry, float>(&delay_values, delays_, FromFloat); // writeMessageToFile writes message to the specified file. writeMessageToFile(file, &builder); @@ -250,9 +254,9 @@ void OverrideDelayModel::read(const std::string& file) { ::capnp::ReaderOptions opts = default_large_capnp_opts(); ::capnp::FlatArrayMessageReader reader(f.getData(), opts); - vtr::Matrix delays; + vtr::NdMatrix delays; auto model = reader.getRoot(); - ToNdMatrix<2, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat); + ToNdMatrix<3, VprFloatEntry, float>(&delays, model.getDelays(), ToFloat); base_delay_model_ = std::make_unique(delays, is_flat_); @@ -280,7 +284,7 @@ void OverrideDelayModel::write(const std::string& file) const { auto model = builder.initRoot(); auto delays = model.getDelays(); - FromNdMatrix<2, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat); + FromNdMatrix<3, VprFloatEntry, float>(&delays, base_delay_model_->delays(), FromFloat); // Non-scalar capnproto fields should be first initialized with // init(count), and then accessed from the returned @@ -344,6 +348,7 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste int source_y = place_ctx.block_locs[source_block].loc.y; int sink_x = place_ctx.block_locs[sink_block].loc.x; int sink_y = place_ctx.block_locs[sink_block].loc.y; + int sink_layer_num = place_ctx.block_locs[sink_block].loc.layer; /** * This heuristic only considers delta_x and delta_y, a much better @@ -357,7 +362,8 @@ float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, Cluste source_block_ipin, sink_x, sink_y, - sink_block_ipin); + sink_block_ipin, + sink_layer_num); if (delay_source_to_sink < 0) { VPR_ERROR(VPR_ERROR_PLACE, "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d) to %s (at %d,%d)\n" diff --git a/vpr/src/place/place_delay_model.h b/vpr/src/place/place_delay_model.h index f8b9f72f1b8..09b6969c011 100644 --- a/vpr/src/place/place_delay_model.h +++ b/vpr/src/place/place_delay_model.h @@ -62,7 +62,7 @@ class PlaceDelayModel { * * Either compute or read methods must be invoked before invoking delay. */ - virtual float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin) const = 0; + virtual float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const = 0; ///@brief Dumps the delay model to an echo file. virtual void dump_echo(std::string filename) const = 0; @@ -87,7 +87,7 @@ class DeltaDelayModel : public PlaceDelayModel { public: DeltaDelayModel(bool is_flat) : is_flat_(is_flat) {} - DeltaDelayModel(vtr::Matrix delta_delays, bool is_flat) + DeltaDelayModel(vtr::NdMatrix delta_delays, bool is_flat) : delays_(std::move(delta_delays)) , is_flat_(is_flat) {} @@ -96,17 +96,17 @@ class DeltaDelayModel : public PlaceDelayModel { const t_placer_opts& placer_opts, const t_router_opts& router_opts, int longest_length) override; - float delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/) const override; + float delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/, int layer_num) const override; void dump_echo(std::string filepath) const override; void read(const std::string& file) override; void write(const std::string& file) const override; - const vtr::Matrix& delays() const { + const vtr::NdMatrix& delays() const { return delays_; } private: - vtr::Matrix delays_; + vtr::NdMatrix delays_; // [0..num_layers-1][0..max_dx][0..max_dy] bool is_flat_; }; @@ -119,7 +119,9 @@ class OverrideDelayModel : public PlaceDelayModel { const t_placer_opts& placer_opts, const t_router_opts& router_opts, int longest_length) override; - float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin) const override; + // returns delay from the specified (x,y) to the specified (x,y) with both endpoints on layer_num and the + // specified from and to pins + float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin, int layer_num) const override; void dump_echo(std::string filepath) const override; void read(const std::string& file) override; diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index f8afaeb1363..75ff2d2bf12 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -10,7 +10,7 @@ #include "place_constraints.h" /* File-scope routines */ -static vtr::Matrix init_grid_blocks(); +static GridBlock init_grid_blocks(); /** * @brief Initialize the placer's block-grid dual direction mapping. @@ -38,16 +38,19 @@ void init_placement_context() { * The container at each grid block location should have a length equal to the * subtile capacity of that block. Unused subtile would be marked EMPTY_BLOCK_ID. */ -static vtr::Matrix init_grid_blocks() { +static GridBlock init_grid_blocks() { auto& device_ctx = g_vpr_ctx.device(); + int num_layers = device_ctx.grid.get_num_layers(); /* Structure should have the same dimensions as the grid. */ - auto grid_blocks = vtr::Matrix({device_ctx.grid.width(), device_ctx.grid.height()}); + auto grid_blocks = GridBlock(device_ctx.grid.width(), device_ctx.grid.height(), num_layers); - for (size_t x = 0; x < device_ctx.grid.width(); ++x) { - for (size_t y = 0; y < device_ctx.grid.height(); ++y) { - auto type = device_ctx.grid.get_physical_type(x, y); - grid_blocks[x][y].blocks.resize(type->capacity, EMPTY_BLOCK_ID); + for (int layer_num = 0; layer_num < num_layers; ++layer_num) { + for (int x = 0; x < (int)device_ctx.grid.width(); ++x) { + for (int y = 0; y < (int)device_ctx.grid.height(); ++y) { + auto type = device_ctx.grid.get_physical_type({x, y, layer_num}); + grid_blocks.initialized_grid_block_at_location({x, y, layer_num}, type->capacity); + } } } return grid_blocks; @@ -75,7 +78,8 @@ t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched, float first_t, float first_rlim, int first_move_lim, - float first_crit_exponent) { + float first_crit_exponent, + int num_laters) { num_temps = 0; alpha = annealing_sched.alpha_min; t = first_t; @@ -91,6 +95,8 @@ t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched, move_lim = move_lim_max; } + NUM_LAYERS = num_laters; + /* Store this inverse value for speed when updating crit_exponent. */ INVERSE_DELTA_RLIM = 1 / (first_rlim - FINAL_RLIM); @@ -347,8 +353,9 @@ void load_grid_blocks_from_block_locs() { VTR_ASSERT(location.x < (int)device_ctx.grid.width()); VTR_ASSERT(location.y < (int)device_ctx.grid.height()); - place_ctx.grid_blocks[location.x][location.y].blocks[location.sub_tile] = blk_id; - place_ctx.grid_blocks[location.x][location.y].usage++; + place_ctx.grid_blocks.set_block_at_location(location, blk_id); + place_ctx.grid_blocks.set_usage({location.x, location.y, location.layer}, + place_ctx.grid_blocks.get_usage({location.x, location.y, location.layer}) + 1); } } @@ -358,17 +365,19 @@ void zero_initialize_grid_blocks() { /* Initialize all occupancy to zero. */ - for (size_t i = 0; i < device_ctx.grid.width(); i++) { - for (size_t j = 0; j < device_ctx.grid.height(); j++) { - place_ctx.grid_blocks[i][j].usage = 0; - auto tile = device_ctx.grid.get_physical_type(i, j); + for (int layer_num = 0; layer_num < (int)device_ctx.grid.get_num_layers(); layer_num++) { + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { + place_ctx.grid_blocks.set_usage({i, j, layer_num}, 0); + auto tile = device_ctx.grid.get_physical_type({i, j, layer_num}); - for (auto sub_tile : tile->sub_tiles) { - auto capacity = sub_tile.capacity; + for (auto sub_tile : tile->sub_tiles) { + auto capacity = sub_tile.capacity; - for (int k = 0; k < capacity.total(); k++) { - if (place_ctx.grid_blocks[i][j].blocks[k + capacity.low] != INVALID_BLOCK_ID) { - place_ctx.grid_blocks[i][j].blocks[k + capacity.low] = EMPTY_BLOCK_ID; + for (int k = 0; k < capacity.total(); k++) { + if (place_ctx.grid_blocks.block_at_location({i, j, k + capacity.low, layer_num}) != INVALID_BLOCK_ID) { + place_ctx.grid_blocks.set_block_at_location({i, j, k + capacity.low, layer_num}, EMPTY_BLOCK_ID); + } } } } @@ -398,27 +407,30 @@ void alloc_and_load_legal_placement_locations(std::vectorsub_tiles) { - auto capacity = sub_tile.capacity; - - for (int k = 0; k < capacity.total(); k++) { - if (place_ctx.grid_blocks[i][j].blocks[k + capacity.low] == INVALID_BLOCK_ID) { - continue; - } - // If this is the anchor position of a block, add it to the legal_pos. - // Otherwise don't, so large blocks aren't added multiple times. - if (device_ctx.grid.get_width_offset(i, j) == 0 && device_ctx.grid.get_height_offset(i, j) == 0) { - int itype = tile->index; - int isub_tile = sub_tile.index; - t_pl_loc temp_loc; - temp_loc.x = i; - temp_loc.y = j; - temp_loc.sub_tile = k + capacity.low; - legal_pos[itype][isub_tile].push_back(temp_loc); + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + for (int i = 0; i < (int)device_ctx.grid.width(); i++) { + for (int j = 0; j < (int)device_ctx.grid.height(); j++) { + auto tile = device_ctx.grid.get_physical_type({i, j, layer_num}); + + for (const auto& sub_tile : tile->sub_tiles) { + auto capacity = sub_tile.capacity; + + for (int k = 0; k < capacity.total(); k++) { + if (place_ctx.grid_blocks.block_at_location({i, j, k + capacity.low, layer_num}) == INVALID_BLOCK_ID) { + continue; + } + // If this is the anchor position of a block, add it to the legal_pos. + // Otherwise don't, so large blocks aren't added multiple times. + if (device_ctx.grid.get_width_offset({i, j, layer_num}) == 0 && device_ctx.grid.get_height_offset({i, j, layer_num}) == 0) { + int itype = tile->index; + int isub_tile = sub_tile.index; + t_pl_loc temp_loc; + temp_loc.x = i; + temp_loc.y = j; + temp_loc.sub_tile = k + capacity.low; + temp_loc.layer = layer_num; + legal_pos[itype][isub_tile].push_back(temp_loc); + } } } } @@ -442,12 +454,10 @@ void set_block_location(ClusterBlockId blk_id, const t_pl_loc& location) { } //Set the location of the block - place_ctx.block_locs[blk_id].loc.x = location.x; - place_ctx.block_locs[blk_id].loc.y = location.y; - place_ctx.block_locs[blk_id].loc.sub_tile = location.sub_tile; + place_ctx.block_locs[blk_id].loc = location; //Check if block is at an illegal location - auto physical_tile = device_ctx.grid.get_physical_type(location.x, location.y); + auto physical_tile = device_ctx.grid.get_physical_type({location.x, location.y, location.layer}); auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); if (location.sub_tile >= physical_tile->capacity || location.sub_tile < 0) { @@ -455,13 +465,18 @@ void set_block_location(ClusterBlockId blk_id, const t_pl_loc& location) { } if (!is_sub_tile_compatible(physical_tile, logical_block, place_ctx.block_locs[blk_id].loc.sub_tile)) { - VPR_THROW(VPR_ERROR_PLACE, "Attempt to place block %s with ID %d at illegal location (%d, %d). \n", block_name.c_str(), blk_id, location.x, location.y); + VPR_THROW(VPR_ERROR_PLACE, "Attempt to place block %s with ID %d at illegal location (%d,%d,%d). \n", + block_name.c_str(), + blk_id, + location.x, + location.y, + location.layer); } //Mark the grid location and usage of the block - place_ctx.grid_blocks[location.x][location.y].blocks[location.sub_tile] = blk_id; - place_ctx.grid_blocks[location.x][location.y].usage++; - + place_ctx.grid_blocks.set_block_at_location(location, blk_id); + place_ctx.grid_blocks.set_usage({location.x, location.y, location.layer}, + place_ctx.grid_blocks.get_usage({location.x, location.y, location.layer}) + 1); place_sync_external_block_connections(blk_id); } @@ -482,7 +497,7 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_ t_pl_loc member_pos = head_pos + pl_macro.members[imember].offset; //Check that the member location is on the grid - if (!is_loc_on_chip(member_pos.x, member_pos.y)) { + if (!is_loc_on_chip({member_pos.x, member_pos.y, member_pos.layer})) { mac_can_be_placed = false; break; } @@ -519,8 +534,8 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_ // Then check whether the location could still accommodate more blocks // Also check whether the member position is valid, and the member_z is allowed at that location on the grid if (member_pos.x < int(device_ctx.grid.width()) && member_pos.y < int(device_ctx.grid.height()) - && is_tile_compatible(device_ctx.grid.get_physical_type(member_pos.x, member_pos.y), block_type) - && place_ctx.grid_blocks[member_pos.x][member_pos.y].blocks[member_pos.sub_tile] == EMPTY_BLOCK_ID) { + && is_tile_compatible(device_ctx.grid.get_physical_type({member_pos.x, member_pos.y, member_pos.layer}), block_type) + && place_ctx.grid_blocks.block_at_location(member_pos) == EMPTY_BLOCK_ID) { // Can still accommodate blocks here, check the next position continue; } else { diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h index 06373920bb9..cc903cf4f71 100644 --- a/vpr/src/place/place_util.h +++ b/vpr/src/place/place_util.h @@ -133,13 +133,15 @@ class t_annealing_state { float UPPER_RLIM; float FINAL_RLIM = 1.; float INVERSE_DELTA_RLIM; + int NUM_LAYERS = 1; public: //Constructor t_annealing_state(const t_annealing_sched& annealing_sched, float first_t, float first_rlim, int first_move_lim, - float first_crit_exponent); + float first_crit_exponent, + int num_layers); public: //Mutator bool outer_loop_update(float success_rate, @@ -229,10 +231,13 @@ void alloc_and_load_legal_placement_locations(std::vector= 0 && x < int(device_ctx.grid.width()) && y >= 0 && y < int(device_ctx.grid.height())); + return (layer_num >= 0 && layer_num < int(grid.get_num_layers()) && x >= 0 && x < int(grid.width()) && y >= 0 && y < int(grid.height())); } /** diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index 55667ecb8d0..74682d220f3 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -69,6 +69,7 @@ static t_chan_width setup_chan_width(const t_router_opts& router_opts, static float route_connection_delay( RouterDelayProfiler& route_profiler, + int layer_num, int source_x_loc, int source_y_loc, int sink_x_loc, @@ -86,6 +87,7 @@ typedef std::function&, @@ -95,6 +97,7 @@ typedef std::function>& matrix, + int layer_num, int source_x, int source_y, int start_x, @@ -109,6 +112,7 @@ static void generic_compute_matrix_iterative_astar( static void generic_compute_matrix_dijkstra_expansion( RouterDelayProfiler& route_profiler, vtr::Matrix>& matrix, + int layer_num, int source_x, int source_y, int start_x, @@ -120,7 +124,7 @@ static void generic_compute_matrix_dijkstra_expansion( const std::set& allowed_types, bool is_flat); -static vtr::Matrix compute_delta_delays( +static vtr::NdMatrix compute_delta_delays( RouterDelayProfiler& route_profiler, const t_placer_opts& palcer_opts, const t_router_opts& router_opts, @@ -130,7 +134,7 @@ static vtr::Matrix compute_delta_delays( float delay_reduce(std::vector& delays, e_reducer reducer); -static vtr::Matrix compute_delta_delay_model( +static vtr::NdMatrix compute_delta_delay_model( RouterDelayProfiler& route_profiler, const t_placer_opts& placer_opts, const t_router_opts& router_opts, @@ -148,14 +152,14 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, int* src_rr, int* sink_rr); -static bool verify_delta_delays(const vtr::Matrix& delta_delays); +static bool verify_delta_delays(const vtr::NdMatrix& delta_delays); static int get_longest_segment_length(std::vector& segment_inf); -static void fix_empty_coordinates(vtr::Matrix& delta_delays); -static void fix_uninitialized_coordinates(vtr::Matrix& delta_delays); +static void fix_empty_coordinates(vtr::NdMatrix& delta_delays); +static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays); -static float find_neightboring_average(vtr::Matrix& matrix, int x, int y, int max_distance); +static float find_neightboring_average(vtr::NdMatrix& matrix, t_physical_tile_loc tile_loc, int max_distance); /******* Globally Accessible Functions **********/ @@ -174,7 +178,6 @@ std::unique_ptr compute_place_delay_model(const t_placer_opts& t_chan_width chan_width = setup_chan_width(router_opts, chan_width_dist); - //TODO: is_flat flag should not be set here - It should be passed to the function. alloc_routing_structs(chan_width, router_opts, det_routing_arch, segment_inf, directs, num_directs, is_flat); @@ -345,6 +348,7 @@ static t_chan_width setup_chan_width(const t_router_opts& router_opts, static float route_connection_delay( RouterDelayProfiler& route_profiler, + int layer_num, int source_x, int source_y, int sink_x, @@ -360,20 +364,18 @@ static float route_connection_delay( bool successfully_routed = false; //Get the rr nodes to route between - auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type(source_x, source_y)); - auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type(sink_x, sink_y)); + auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, layer_num})); + auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num})); for (int driver_ptc : best_driver_ptcs) { VTR_ASSERT(driver_ptc != OPEN); - - RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(source_x, source_y, SOURCE, driver_ptc); + RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, source_x, source_y, SOURCE, driver_ptc); VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); for (int sink_ptc : best_sink_ptcs) { VTR_ASSERT(sink_ptc != OPEN); - - RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(sink_x, sink_y, SINK, sink_ptc); + RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, sink_x, sink_y, SINK, sink_ptc); VTR_ASSERT(sink_rr_node != RRNodeId::INVALID()); @@ -395,8 +397,8 @@ static float route_connection_delay( } if (!successfully_routed) { - VTR_LOG_WARN("Unable to route between blocks at (%d,%d) and (%d,%d) to characterize delay (setting to %g)\n", - source_x, source_y, sink_x, sink_y, net_delay_value); + VTR_LOG_WARN("Unable to route between blocks at (%d,%d,%d) and (%d,%d,%d) to characterize delay (setting to %g)\n", + layer_num, source_x, source_y, layer_num, sink_x, sink_y, net_delay_value); } return (net_delay_value); @@ -419,6 +421,7 @@ static void add_delay_to_matrix( static void generic_compute_matrix_dijkstra_expansion( RouterDelayProfiler& /*route_profiler*/, vtr::Matrix>& matrix, + int layer_num, int source_x, int source_y, int start_x, @@ -431,7 +434,7 @@ static void generic_compute_matrix_dijkstra_expansion( bool is_flat) { auto& device_ctx = g_vpr_ctx.device(); - t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type(source_x, source_y); + t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, layer_num}); bool is_allowed_type = allowed_types.empty() || allowed_types.find(src_type->name) != allowed_types.end(); if (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || !is_allowed_type) { for (int sink_x = start_x; sink_x <= end_x; sink_x++) { @@ -458,10 +461,10 @@ static void generic_compute_matrix_dijkstra_expansion( vtr::Matrix found_matrix({matrix.dim_size(0), matrix.dim_size(1)}, false); - auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type(source_x, source_y)); + auto best_driver_ptcs = get_best_classes(DRIVER, device_ctx.grid.get_physical_type({source_x, source_y, layer_num})); for (int driver_ptc : best_driver_ptcs) { VTR_ASSERT(driver_ptc != OPEN); - RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(source_x, source_y, SOURCE, driver_ptc); + RRNodeId source_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, source_x, source_y, SOURCE, driver_ptc); VTR_ASSERT(source_rr_node != RRNodeId::INVALID()); auto delays = calculate_all_path_delays_from_rr_node(size_t(source_rr_node), @@ -478,7 +481,7 @@ static void generic_compute_matrix_dijkstra_expansion( continue; } - t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type(sink_x, sink_y); + t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num}); if (sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { if (matrix[delta_x][delta_y].empty()) { //Only set empty target if we don't already have a valid delta delay @@ -494,11 +497,10 @@ static void generic_compute_matrix_dijkstra_expansion( } } else { bool found_a_sink = false; - auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type(sink_x, sink_y)); + auto best_sink_ptcs = get_best_classes(RECEIVER, device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num})); for (int sink_ptc : best_sink_ptcs) { VTR_ASSERT(sink_ptc != OPEN); - - RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(sink_x, sink_y, SINK, sink_ptc); + RRNodeId sink_rr_node = device_ctx.rr_graph.node_lookup().find_node(layer_num, sink_x, sink_y, SINK, sink_ptc); VTR_ASSERT(sink_rr_node != RRNodeId::INVALID()); @@ -555,6 +557,7 @@ static void generic_compute_matrix_dijkstra_expansion( static void generic_compute_matrix_iterative_astar( RouterDelayProfiler& route_profiler, vtr::Matrix>& matrix, + int layer_num, int source_x, int source_y, int start_x, @@ -577,8 +580,8 @@ static void generic_compute_matrix_iterative_astar( delta_x = abs(sink_x - source_x); delta_y = abs(sink_y - source_y); - t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type(source_x, source_y); - t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type(sink_x, sink_y); + t_physical_tile_type_ptr src_type = device_ctx.grid.get_physical_type({source_x, source_y, layer_num}); + t_physical_tile_type_ptr sink_type = device_ctx.grid.get_physical_type({sink_x, sink_y, layer_num}); bool src_or_target_empty = (src_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE || sink_type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE); @@ -600,7 +603,7 @@ static void generic_compute_matrix_iterative_astar( } else { //Valid start/end - float delay = route_connection_delay(route_profiler, source_x, source_y, sink_x, sink_y, router_opts, measure_directconnect); + float delay = route_connection_delay(route_profiler, layer_num, source_x, source_y, sink_x, sink_y, router_opts, measure_directconnect); #ifdef VERBOSE VTR_LOG("Computed delay: %12g delta: %d,%d (src: %d,%d sink: %d,%d)\n", @@ -621,7 +624,7 @@ static void generic_compute_matrix_iterative_astar( } } -static vtr::Matrix compute_delta_delays( +static vtr::NdMatrix compute_delta_delays( RouterDelayProfiler& route_profiler, const t_placer_opts& placer_opts, const t_router_opts& router_opts, @@ -635,187 +638,196 @@ static vtr::Matrix compute_delta_delays( auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; - vtr::Matrix> sampled_delta_delays({grid.width(), grid.height()}); + vtr::NdMatrix delta_delays({static_cast(grid.get_num_layers()), grid.width(), grid.height()}); - size_t mid_x = vtr::nint(grid.width() / 2); - size_t mid_y = vtr::nint(grid.height() / 2); + for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) { + vtr::Matrix> sampled_delta_delays({grid.width(), grid.height()}); - size_t low_x = std::min(longest_length, mid_x); - size_t low_y = std::min(longest_length, mid_y); - size_t high_x = mid_x; - size_t high_y = mid_y; - if (longest_length <= grid.width()) { - high_x = std::max(grid.width() - longest_length, mid_x); - } - if (longest_length <= grid.height()) { - high_y = std::max(grid.height() - longest_length, mid_y); - } + size_t mid_x = vtr::nint(grid.width() / 2); + size_t mid_y = vtr::nint(grid.height() / 2); - std::set allowed_types; - if (!placer_opts.allowed_tiles_for_delay_model.empty()) { - auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); - for (const auto& type : allowed_types_vector) { - allowed_types.insert(type); + size_t low_x = std::min(longest_length, mid_x); + size_t low_y = std::min(longest_length, mid_y); + size_t high_x = mid_x; + size_t high_y = mid_y; + if (longest_length <= grid.width()) { + high_x = std::max(grid.width() - longest_length, mid_x); + } + if (longest_length <= grid.height()) { + high_y = std::max(grid.height() - longest_length, mid_y); } - } - // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - // + | | + - // + A | B | C + - // + | | + - // +-----------------\-----------------------.---------------+ - // + | | + - // + | | + - // + | | + - // + | | + - // + D | E | F + - // + | | + - // + | | + - // + | | + - // + | | + - // +-----------------*-----------------------/---------------+ - // + | | + - // + G | H | I + - // + | | + - // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - // - // * = (low_x, low_y) - // . = (high_x, high_y) - // / = (high_x, low_y) - // \ = (low_x, high_y) - // + = device edge - - //Find the lowest y location on the left edge with a non-empty block - size_t y = 0; - size_t x = 0; - t_physical_tile_type_ptr src_type = nullptr; - for (x = 0; x < grid.width(); ++x) { - for (y = 0; y < grid.height(); ++y) { - auto type = grid.get_physical_type(x, y); - - if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) { - continue; + std::set allowed_types; + if (!placer_opts.allowed_tiles_for_delay_model.empty()) { + auto allowed_types_vector = vtr::split(placer_opts.allowed_tiles_for_delay_model, ","); + for (const auto& type : allowed_types_vector) { + allowed_types.insert(type); + } + } + + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // + | | + + // + A | B | C + + // + | | + + // +-----------------\-----------------------.---------------+ + // + | | + + // + | | + + // + | | + + // + | | + + // + D | E | F + + // + | | + + // + | | + + // + | | + + // + | | + + // +-----------------*-----------------------/---------------+ + // + | | + + // + G | H | I + + // + | | + + // +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + // + // * = (low_x, low_y) + // . = (high_x, high_y) + // / = (high_x, low_y) + // \ = (low_x, high_y) + // + = device edge + + //Find the lowest y location on the left edge with a non-empty block + int y = 0; + int x = 0; + t_physical_tile_type_ptr src_type = nullptr; + for (x = 0; x < (int)grid.width(); ++x) { + for (y = 0; y < (int)grid.height(); ++y) { + auto type = grid.get_physical_type({x, y, layer_num}); + + if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) { + continue; + } + src_type = type; + break; } - src_type = type; + } + if (src_type) { break; } } - if (src_type) { - break; - } - } - VTR_ASSERT(src_type != nullptr); + VTR_ASSERT(src_type != nullptr); - t_compute_delta_delay_matrix generic_compute_matrix; - switch (placer_opts.place_delta_delay_matrix_calculation_method) { - case e_place_delta_delay_algorithm::ASTAR_ROUTE: - generic_compute_matrix = generic_compute_matrix_iterative_astar; - break; - case e_place_delta_delay_algorithm::DIJKSTRA_EXPANSION: - generic_compute_matrix = generic_compute_matrix_dijkstra_expansion; - break; - default: - VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unknown place_delta_delay_matrix_calculation_method %d", placer_opts.place_delta_delay_matrix_calculation_method); - } + t_compute_delta_delay_matrix generic_compute_matrix; + switch (placer_opts.place_delta_delay_matrix_calculation_method) { + case e_place_delta_delay_algorithm::ASTAR_ROUTE: + generic_compute_matrix = generic_compute_matrix_iterative_astar; + break; + case e_place_delta_delay_algorithm::DIJKSTRA_EXPANSION: + generic_compute_matrix = generic_compute_matrix_dijkstra_expansion; + break; + default: + VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Unknown place_delta_delay_matrix_calculation_method %d", placer_opts.place_delta_delay_matrix_calculation_method); + } #ifdef VERBOSE - VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y); + VTR_LOG("Computing from lower left edge (%d,%d):\n", x, y); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - x, y, - x, y, - grid.width() - 1, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Find the lowest x location on the bottom edge with a non-empty block - src_type = nullptr; - for (y = 0; y < grid.height(); ++y) { - for (x = 0; x < grid.width(); ++x) { - auto type = grid.get_physical_type(x, y); - - if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { - if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) { - continue; + generic_compute_matrix(route_profiler, sampled_delta_delays, + layer_num, + x, y, + x, y, + grid.width() - 1, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Find the lowest x location on the bottom edge with a non-empty block + src_type = nullptr; + for (y = 0; y < (int)grid.height(); ++y) { + for (x = 0; x < (int)grid.width(); ++x) { + auto type = grid.get_physical_type({x, y, layer_num}); + + if (type != device_ctx.EMPTY_PHYSICAL_TILE_TYPE) { + if (!allowed_types.empty() && allowed_types.find(std::string(type->name)) == allowed_types.end()) { + continue; + } + src_type = type; + break; } - src_type = type; + } + if (src_type) { break; } } - if (src_type) { - break; - } - } - VTR_ASSERT(src_type != nullptr); + VTR_ASSERT(src_type != nullptr); #ifdef VERBOSE - VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y); + VTR_LOG("Computing from left bottom edge (%d,%d):\n", x, y); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - x, y, - x, y, - grid.width() - 1, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions B, C, E, F + generic_compute_matrix(route_profiler, sampled_delta_delays, + layer_num, + x, y, + x, y, + grid.width() - 1, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions B, C, E, F #ifdef VERBOSE - VTR_LOG("Computing from low/low:\n"); + VTR_LOG("Computing from low/low:\n"); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - low_x, low_y, - low_x, low_y, - grid.width() - 1, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions D, E, G, H + generic_compute_matrix(route_profiler, sampled_delta_delays, + layer_num, + low_x, low_y, + low_x, low_y, + grid.width() - 1, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions D, E, G, H #ifdef VERBOSE - VTR_LOG("Computing from high/high:\n"); + VTR_LOG("Computing from high/high:\n"); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - high_x, high_y, - 0, 0, - high_x, high_y, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions A, B, D, E + generic_compute_matrix(route_profiler, sampled_delta_delays, + layer_num, + high_x, high_y, + 0, 0, + high_x, high_y, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions A, B, D, E #ifdef VERBOSE - VTR_LOG("Computing from high/low:\n"); + VTR_LOG("Computing from high/low:\n"); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - high_x, low_y, - 0, low_y, - high_x, grid.height() - 1, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - //Since the other delta delay values may have suffered from edge effects, - //we recalculate deltas within regions E, F, H, I + generic_compute_matrix(route_profiler, sampled_delta_delays, + layer_num, + high_x, low_y, + 0, low_y, + high_x, grid.height() - 1, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + //Since the other delta delay values may have suffered from edge effects, + //we recalculate deltas within regions E, F, H, I #ifdef VERBOSE - VTR_LOG("Computing from low/high:\n"); + VTR_LOG("Computing from low/high:\n"); #endif - generic_compute_matrix(route_profiler, sampled_delta_delays, - low_x, high_y, - low_x, 0, - grid.width() - 1, high_y, - router_opts, - measure_directconnect, allowed_types, - is_flat); - - vtr::Matrix delta_delays({grid.width(), grid.height()}); - for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) { - for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) { - delta_delays[dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer); + generic_compute_matrix(route_profiler, sampled_delta_delays, + layer_num, + low_x, high_y, + low_x, 0, + grid.width() - 1, high_y, + router_opts, + measure_directconnect, allowed_types, + is_flat); + + for (size_t dx = 0; dx < sampled_delta_delays.dim_size(0); ++dx) { + for (size_t dy = 0; dy < sampled_delta_delays.dim_size(1); ++dy) { + delta_delays[layer_num][dx][dy] = delay_reduce(sampled_delta_delays[dx][dy], placer_opts.delay_model_reducer); + } } } @@ -862,17 +874,20 @@ float delay_reduce(std::vector& delays, e_reducer reducer) { * we return IMPOSSIBLE_DELTA. */ static float find_neightboring_average( - vtr::Matrix& matrix, - int x, - int y, + vtr::NdMatrix& matrix, + t_physical_tile_loc tile_loc, int max_distance) { float sum = 0; int counter = 0; - int endx = matrix.end_index(0); - int endy = matrix.end_index(1); + int endx = matrix.end_index(1); + int endy = matrix.end_index(2); int delx, dely; + int x = tile_loc.x; + int y = tile_loc.y; + int layer_num = tile_loc.layer_num; + for (int distance = 1; distance <= max_distance; ++distance) { for (delx = x - distance; delx <= x + distance; delx++) { for (dely = y - distance; dely <= y + distance; dely++) { @@ -886,11 +901,11 @@ static float find_neightboring_average( continue; } - if (matrix[delx][dely] == EMPTY_DELTA || matrix[delx][dely] == IMPOSSIBLE_DELTA) { + if (matrix[layer_num][delx][dely] == EMPTY_DELTA || matrix[layer_num][delx][dely] == IMPOSSIBLE_DELTA) { continue; } counter++; - sum += matrix[delx][dely]; + sum += matrix[layer_num][delx][dely]; } } if (counter != 0) { @@ -901,7 +916,7 @@ static float find_neightboring_average( return IMPOSSIBLE_DELTA; } -static void fix_empty_coordinates(vtr::Matrix& delta_delays) { +static void fix_empty_coordinates(vtr::NdMatrix& delta_delays) { // Set any empty delta's to the average of it's neighbours // // Empty coordinates may occur if the sampling location happens to not have @@ -909,27 +924,32 @@ static void fix_empty_coordinates(vtr::Matrix& delta_delays) { // would return a result, so we fill in the empty holes with a small // neighbour average. constexpr int kMaxAverageDistance = 2; - for (size_t delta_x = 0; delta_x < delta_delays.dim_size(0); ++delta_x) { - for (size_t delta_y = 0; delta_y < delta_delays.dim_size(1); ++delta_y) { - if (delta_delays[delta_x][delta_y] == EMPTY_DELTA) { - delta_delays[delta_x][delta_y] = find_neightboring_average(delta_delays, delta_x, delta_y, kMaxAverageDistance); + for (int layer_num = 0; layer_num < (int)delta_delays.dim_size(0); ++layer_num) { + for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(1); ++delta_x) { + for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(2); ++delta_y) { + if (delta_delays[layer_num][delta_x][delta_y] == EMPTY_DELTA) { + delta_delays[layer_num][delta_x][delta_y] = find_neightboring_average(delta_delays, {delta_x, delta_y, layer_num}, kMaxAverageDistance); + } } } } } -static void fix_uninitialized_coordinates(vtr::Matrix& delta_delays) { +static void fix_uninitialized_coordinates(vtr::NdMatrix& delta_delays) { // Set any empty delta's to the average of it's neighbours - for (size_t delta_x = 0; delta_x < delta_delays.dim_size(0); ++delta_x) { - for (size_t delta_y = 0; delta_y < delta_delays.dim_size(1); ++delta_y) { - if (delta_delays[delta_x][delta_y] == UNINITIALIZED_DELTA) { - delta_delays[delta_x][delta_y] = IMPOSSIBLE_DELTA; + + for (size_t layer_num = 0; layer_num < delta_delays.dim_size(0); ++layer_num) { + for (size_t delta_x = 0; delta_x < delta_delays.dim_size(1); ++delta_x) { + for (size_t delta_y = 0; delta_y < delta_delays.dim_size(2); ++delta_y) { + if (delta_delays[layer_num][delta_x][delta_y] == UNINITIALIZED_DELTA) { + delta_delays[layer_num][delta_x][delta_y] = IMPOSSIBLE_DELTA; + } } } } } -static void fill_impossible_coordinates(vtr::Matrix& delta_delays) { +static void fill_impossible_coordinates(vtr::NdMatrix& delta_delays) { // Set any impossible delta's to the average of it's neighbours // // Impossible coordinates may occur if an IPIN cannot be reached from the @@ -942,17 +962,19 @@ static void fill_impossible_coordinates(vtr::Matrix& delta_delays) { // filling these gaps. It is more important to have a poor predication, // than a invalid value and causing a slack assertion. constexpr int kMaxAverageDistance = 5; - for (size_t delta_x = 0; delta_x < delta_delays.dim_size(0); ++delta_x) { - for (size_t delta_y = 0; delta_y < delta_delays.dim_size(1); ++delta_y) { - if (delta_delays[delta_x][delta_y] == IMPOSSIBLE_DELTA) { - delta_delays[delta_x][delta_y] = find_neightboring_average( - delta_delays, delta_x, delta_y, kMaxAverageDistance); + for (int layer_num = 0; layer_num < (int)delta_delays.dim_size(0); ++layer_num) { + for (int delta_x = 0; delta_x < (int)delta_delays.dim_size(1); ++delta_x) { + for (int delta_y = 0; delta_y < (int)delta_delays.dim_size(2); ++delta_y) { + if (delta_delays[layer_num][delta_x][delta_y] == IMPOSSIBLE_DELTA) { + delta_delays[layer_num][delta_x][delta_y] = find_neightboring_average( + delta_delays, {delta_x, delta_y, layer_num}, kMaxAverageDistance); + } } } } } -static vtr::Matrix compute_delta_delay_model( +static vtr::NdMatrix compute_delta_delay_model( RouterDelayProfiler& route_profiler, const t_placer_opts& placer_opts, const t_router_opts& router_opts, @@ -960,12 +982,12 @@ static vtr::Matrix compute_delta_delay_model( int longest_length, bool is_flat) { vtr::ScopedStartFinishTimer timer("Computing delta delays"); - vtr::Matrix delta_delays = compute_delta_delays(route_profiler, - placer_opts, - router_opts, - measure_directconnect, - longest_length, - is_flat); + vtr::NdMatrix delta_delays = compute_delta_delays(route_profiler, + placer_opts, + router_opts, + measure_directconnect, + longest_length, + is_flat); fix_uninitialized_coordinates(delta_delays); @@ -997,54 +1019,63 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, //Search the grid for an instance of from/to blocks which satisfy this direct connect offsets, //and which has the appropriate pins - int from_x = 0, from_y = 0, from_sub_tile = 0; + int from_x = -1; + int from_y = -1; + int from_sub_tile = -1; int to_x = 0, to_y = 0, to_sub_tile = 0; bool found = false; - for (from_x = 0; from_x < (int)grid.width(); ++from_x) { - to_x = from_x + direct->x_offset; - if (to_x < 0 || to_x >= (int)grid.width()) continue; - - for (from_y = 0; from_y < (int)grid.height(); ++from_y) { - if (grid.get_physical_type(from_x, from_y) != from_type) continue; - - //Check that the from pin exists at this from location - //(with multi-width/height blocks pins may not exist at all locations) - bool from_pin_found = false; - if (direct->from_side != NUM_SIDES) { - RRNodeId from_pin_rr = node_lookup.find_node(from_x, from_y, OPIN, from_pin, direct->from_side); - from_pin_found = (from_pin_rr != RRNodeId::INVALID()); - } else { - from_pin_found = !(node_lookup.find_nodes_at_all_sides(from_x, from_y, OPIN, from_pin).empty()); - } - if (!from_pin_found) continue; + int found_layer_num = -1; + //TODO: Function *FOR NOW* assumes that from/to blocks are at same die and have a same layer nums + for (int layer_num = 0; layer_num < grid.get_num_layers() && !found; ++layer_num) { + for (int x = 0; x < (int)grid.width() && !found; ++x) { + to_x = x + direct->x_offset; + if (to_x < 0 || to_x >= (int)grid.width()) continue; + + for (int y = 0; y < (int)grid.height() && !found; ++y) { + if (grid.get_physical_type({x, y, layer_num}) != from_type) continue; + + //Check that the from pin exists at this from location + //(with multi-width/height blocks pins may not exist at all locations) + bool from_pin_found = false; + if (direct->from_side != NUM_SIDES) { + RRNodeId from_pin_rr = node_lookup.find_node(layer_num, x, y, OPIN, from_pin, direct->from_side); + from_pin_found = (from_pin_rr != RRNodeId::INVALID()); + } else { + from_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, x, y, OPIN, from_pin).empty()); + } + if (!from_pin_found) continue; - to_y = from_y + direct->y_offset; + to_y = y + direct->y_offset; - if (to_y < 0 || to_y >= (int)grid.height()) continue; - if (grid.get_physical_type(to_x, to_y) != to_type) continue; + if (to_y < 0 || to_y >= (int)grid.height()) continue; + if (grid.get_physical_type({to_x, to_y, layer_num}) != to_type) continue; - //Check that the from pin exists at this from location - //(with multi-width/height blocks pins may not exist at all locations) - bool to_pin_found = false; - if (direct->to_side != NUM_SIDES) { - RRNodeId to_pin_rr = node_lookup.find_node(to_x, to_y, IPIN, to_pin, direct->to_side); - to_pin_found = (to_pin_rr != RRNodeId::INVALID()); - } else { - to_pin_found = !(node_lookup.find_nodes_at_all_sides(to_x, to_y, IPIN, to_pin).empty()); - } - if (!to_pin_found) continue; + //Check that the from pin exists at this from location + //(with multi-width/height blocks pins may not exist at all locations) + bool to_pin_found = false; + if (direct->to_side != NUM_SIDES) { + RRNodeId to_pin_rr = node_lookup.find_node(layer_num, to_x, to_y, IPIN, to_pin, direct->to_side); + to_pin_found = (to_pin_rr != RRNodeId::INVALID()); + } else { + to_pin_found = !(node_lookup.find_nodes_at_all_sides(layer_num, to_x, to_y, IPIN, to_pin).empty()); + } + if (!to_pin_found) continue; - for (from_sub_tile = 0; from_sub_tile < from_type->capacity; ++from_sub_tile) { - to_sub_tile = from_sub_tile + direct->sub_tile_offset; + for (int sub_tile_num = 0; sub_tile_num < from_type->capacity; ++sub_tile_num) { + to_sub_tile = sub_tile_num + direct->sub_tile_offset; - if (to_sub_tile < 0 || to_sub_tile >= to_type->capacity) continue; + if (to_sub_tile < 0 || to_sub_tile >= to_type->capacity) continue; - found = true; - break; + found = true; + found_layer_num = layer_num; + from_x = x; + from_y = y; + from_sub_tile = sub_tile_num; + + break; + } } - if (found) break; } - if (found) break; } if (!found) { @@ -1052,10 +1083,10 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, } //Now have a legal instance of this direct connect - VTR_ASSERT(grid.get_physical_type(from_x, from_y) == from_type); + VTR_ASSERT(grid.get_physical_type({from_x, from_y, found_layer_num}) == from_type); VTR_ASSERT(from_sub_tile < from_type->capacity); - VTR_ASSERT(grid.get_physical_type(to_x, to_y) == to_type); + VTR_ASSERT(grid.get_physical_type({to_x, to_y, found_layer_num}) == to_type); VTR_ASSERT(to_sub_tile < to_type->capacity); VTR_ASSERT(from_x + direct->x_offset == to_x); @@ -1067,13 +1098,13 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, // { - RRNodeId src_rr_candidate = node_lookup.find_node(from_x, from_y, SOURCE, from_pin_class); + RRNodeId src_rr_candidate = node_lookup.find_node(found_layer_num, from_x, from_y, SOURCE, from_pin_class); VTR_ASSERT(src_rr_candidate); *src_rr = size_t(src_rr_candidate); } { - RRNodeId sink_rr_candidate = node_lookup.find_node(to_x, to_y, SINK, to_pin_class); + RRNodeId sink_rr_candidate = node_lookup.find_node(found_layer_num, to_x, to_y, SINK, to_pin_class); VTR_ASSERT(sink_rr_candidate); *sink_rr = size_t(sink_rr_candidate); } @@ -1081,18 +1112,20 @@ static bool find_direct_connect_sample_locations(const t_direct_inf* direct, return true; } -static bool verify_delta_delays(const vtr::Matrix& delta_delays) { +static bool verify_delta_delays(const vtr::NdMatrix& delta_delays) { auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; - for (size_t x = 0; x < grid.width(); ++x) { - for (size_t y = 0; y < grid.height(); ++y) { - float delta_delay = delta_delays[x][y]; + for (int layer_num = 0; layer_num < grid.get_num_layers(); ++layer_num) { + for (size_t x = 0; x < grid.width(); ++x) { + for (size_t y = 0; y < grid.height(); ++y) { + float delta_delay = delta_delays[layer_num][x][y]; - if (delta_delay < 0.) { - VPR_ERROR(VPR_ERROR_PLACE, - "Found invaild negative delay %g for delta (%d,%d)", - delta_delay, x, y); + if (delta_delay < 0.) { + VPR_ERROR(VPR_ERROR_PLACE, + "Found invaild negative delay %g for delta (%d,%d)", + delta_delay, x, y); + } } } } diff --git a/vpr/src/place/uniform_move_generator.cpp b/vpr/src/place/uniform_move_generator.cpp index cd75492eb71..c979295e4f0 100644 --- a/vpr/src/place/uniform_move_generator.cpp +++ b/vpr/src/place/uniform_move_generator.cpp @@ -16,7 +16,7 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y); + auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); t_pl_loc to; @@ -27,9 +27,10 @@ e_create_move UniformMoveGenerator::propose_move(t_pl_blocks_to_be_moved& blocks #if 0 auto& grid = g_vpr_ctx.device().grid; - VTR_LOG( "swap [%d][%d][%d] %s block %zu \"%s\" <=> [%d][%d][%d] %s block ", - from.x, from.y, from.sub_tile, grid[from.x][from.y].type->name, size_t(b_from), (b_from ? cluster_ctx.clb_nlist.block_name(b_from).c_str() : ""), - to.x, to.y, to.sub_tile, grid[to.x][to.y].type->name); + const auto& grid_to_type = grid.get_physical_type(to.x, to.y, to.layer); + VTR_LOG( "swap [%d][%d][%d][%d] %s block %zu \"%s\" <=> [%d][%d][%d][%d] %s block ", + from.x, from.y, from.sub_tile,from.layer, grid_from_type->name, size_t(b_from), (b_from ? cluster_ctx.clb_nlist.block_name(b_from).c_str() : ""), + to.x, to.y, to.sub_tile, to.layer, grid_to_type->name); if (b_to) { VTR_LOG("%zu \"%s\"", size_t(b_to), cluster_ctx.clb_nlist.block_name(b_to).c_str()); } else { diff --git a/vpr/src/place/weighted_centroid_move_generator.cpp b/vpr/src/place/weighted_centroid_move_generator.cpp index 00144f41c9a..4e968680cba 100644 --- a/vpr/src/place/weighted_centroid_move_generator.cpp +++ b/vpr/src/place/weighted_centroid_move_generator.cpp @@ -20,7 +20,7 @@ e_create_move WeightedCentroidMoveGenerator::propose_move(t_pl_blocks_to_be_move t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = device_ctx.grid.get_physical_type(from.x, from.y); + auto grid_from_type = device_ctx.grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); t_range_limiters range_limiters; diff --git a/vpr/src/place/weighted_median_move_generator.cpp b/vpr/src/place/weighted_median_move_generator.cpp index fbad2be2413..a5e59fec044 100644 --- a/vpr/src/place/weighted_median_move_generator.cpp +++ b/vpr/src/place/weighted_median_move_generator.cpp @@ -23,7 +23,7 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); - auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type(from.x, from.y); + auto grid_from_type = g_vpr_ctx.device().grid.get_physical_type({from.x, from.y, from.layer}); VTR_ASSERT(is_tile_compatible(grid_from_type, cluster_from_type)); /* Calculate the Edge weighted median region */ @@ -99,6 +99,8 @@ e_create_move WeightedMedianMoveGenerator::propose_move(t_pl_blocks_to_be_moved& t_pl_loc w_median_point; w_median_point.x = (limit_coords.xmin + limit_coords.xmax) / 2; w_median_point.y = (limit_coords.ymin + limit_coords.ymax) / 2; + // TODO: Currently, we don't move blocks between different types of layers + w_median_point.layer = from.layer; if (!find_to_loc_centroid(cluster_from_type, from, w_median_point, range_limiters, to, b_from)) { return e_create_move::ABORT; } diff --git a/vpr/src/power/power.cpp b/vpr/src/power/power.cpp index 7591d2f183c..94d55479580 100644 --- a/vpr/src/power/power.cpp +++ b/vpr/src/power/power.cpp @@ -609,34 +609,36 @@ static void power_usage_blocks(t_power_usage* power_usage) { t_logical_block_type_ptr logical_block; /* Loop through all grid locations */ - for (size_t x = 0; x < device_ctx.grid.width(); x++) { - for (size_t y = 0; y < device_ctx.grid.height(); y++) { - auto physical_tile = device_ctx.grid.get_physical_type(x, y); - int width_offset = device_ctx.grid.get_width_offset(x, y); - int height_offset = device_ctx.grid.get_height_offset(x, y); - - if ((width_offset != 0) - || (height_offset != 0) - || is_empty_type(physical_tile)) { - continue; - } + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + for (int x = 0; x < (int)device_ctx.grid.width(); x++) { + for (int y = 0; y < (int)device_ctx.grid.height(); y++) { + auto physical_tile = device_ctx.grid.get_physical_type({x, y, layer_num}); + int width_offset = device_ctx.grid.get_width_offset({x, y, layer_num}); + int height_offset = device_ctx.grid.get_height_offset({x, y, layer_num}); + + if ((width_offset != 0) + || (height_offset != 0) + || is_empty_type(physical_tile)) { + continue; + } - for (int z = 0; z < physical_tile->capacity; z++) { - t_pb* pb = nullptr; - t_power_usage pb_power; + for (int z = 0; z < physical_tile->capacity; z++) { + t_pb* pb = nullptr; + t_power_usage pb_power; - ClusterBlockId iblk = place_ctx.grid_blocks[x][y].blocks[z]; + ClusterBlockId iblk = place_ctx.grid_blocks.block_at_location({x, y, z, layer_num}); - if (iblk != EMPTY_BLOCK_ID && iblk != INVALID_BLOCK_ID) { - pb = cluster_ctx.clb_nlist.block_pb(iblk); - logical_block = cluster_ctx.clb_nlist.block_type(iblk); - } else { - logical_block = pick_logical_type(physical_tile); - } + if (iblk != EMPTY_BLOCK_ID && iblk != INVALID_BLOCK_ID) { + pb = cluster_ctx.clb_nlist.block_pb(iblk); + logical_block = cluster_ctx.clb_nlist.block_type(iblk); + } else { + logical_block = pick_logical_type(physical_tile); + } - /* Calculate power of this CLB */ - power_usage_pb(&pb_power, pb, logical_block->pb_graph_head, iblk); - power_add_usage(power_usage, &pb_power); + /* Calculate power of this CLB */ + power_usage_pb(&pb_power, pb, logical_block->pb_graph_head, iblk); + power_add_usage(power_usage, &pb_power); + } } } } diff --git a/vpr/src/route/check_route.cpp b/vpr/src/route/check_route.cpp index e2c048fe0f7..3cf5c5c20f2 100644 --- a/vpr/src/route/check_route.cpp +++ b/vpr/src/route/check_route.cpp @@ -257,7 +257,7 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) { * represent specially-crafted connections such as carry-chains or more advanced * blocks where adjacency is overridden by the architect */ - int from_xlow, from_ylow, to_xlow, to_ylow, from_ptc, to_ptc, iclass; + int from_layer, from_xlow, from_ylow, to_layer, to_xlow, to_ylow, from_ptc, to_ptc, iclass; int num_adj, to_xhigh, to_yhigh, from_xhigh, from_yhigh; bool reached; t_rr_type from_type, to_type; @@ -284,19 +284,25 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) { num_adj = 0; - from_type = rr_graph.node_type(from_node); - from_xlow = rr_graph.node_xlow(from_node); - from_ylow = rr_graph.node_ylow(from_node); - from_xhigh = rr_graph.node_xhigh(from_node); - from_yhigh = rr_graph.node_yhigh(from_node); - from_ptc = rr_graph.node_ptc_num(from_node); - to_type = rr_graph.node_type(to_node); - to_xlow = rr_graph.node_xlow(to_node); - to_ylow = rr_graph.node_ylow(to_node); - to_xhigh = rr_graph.node_xhigh(to_node); - to_yhigh = rr_graph.node_yhigh(to_node); - to_ptc = rr_graph.node_ptc_num(to_node); - + auto from_rr = RRNodeId(from_node); + auto to_rr = RRNodeId(to_node); + from_type = rr_graph.node_type(from_rr); + from_layer = rr_graph.node_layer(from_rr); + from_xlow = rr_graph.node_xlow(from_rr); + from_ylow = rr_graph.node_ylow(from_rr); + from_xhigh = rr_graph.node_xhigh(from_rr); + from_yhigh = rr_graph.node_yhigh(from_rr); + from_ptc = rr_graph.node_ptc_num(from_rr); + to_type = rr_graph.node_type(to_rr); + to_layer = rr_graph.node_layer(to_rr); + to_xlow = rr_graph.node_xlow(to_rr); + to_ylow = rr_graph.node_ylow(to_rr); + to_xhigh = rr_graph.node_xhigh(to_rr); + to_yhigh = rr_graph.node_yhigh(to_rr); + to_ptc = rr_graph.node_ptc_num(to_rr); + + // Layer numbers are should not be more than one layer apart for connected nodes + VTR_ASSERT(abs(from_layer - to_layer) <= 1); switch (from_type) { case SOURCE: VTR_ASSERT(to_type == OPIN); @@ -306,8 +312,8 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) { && from_ylow <= to_ylow && from_xhigh >= to_xhigh && from_yhigh >= to_yhigh) { - from_grid_type = device_ctx.grid.get_physical_type(from_xlow, from_ylow); - to_grid_type = device_ctx.grid.get_physical_type(to_xlow, to_ylow); + from_grid_type = device_ctx.grid.get_physical_type({from_xlow, from_ylow, from_layer}); + to_grid_type = device_ctx.grid.get_physical_type({to_xlow, to_ylow, to_layer}); VTR_ASSERT(from_grid_type == to_grid_type); iclass = get_class_num_from_pin_physical_num(to_grid_type, to_ptc); @@ -321,7 +327,7 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) { break; case OPIN: - from_grid_type = device_ctx.grid.get_physical_type(from_xlow, from_ylow); + from_grid_type = device_ctx.grid.get_physical_type({from_xlow, from_ylow, from_layer}); if (to_type == CHANX || to_type == CHANY) { num_adj += 1; //adjacent } else if (is_flat) { @@ -335,7 +341,7 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) { break; case IPIN: - from_grid_type = device_ctx.grid.get_physical_type(from_xlow, from_ylow); + from_grid_type = device_ctx.grid.get_physical_type({from_xlow, from_ylow, from_layer}); if (is_flat) { VTR_ASSERT(to_type == OPIN || to_type == IPIN || to_type == SINK); } else { @@ -348,21 +354,21 @@ static bool check_adjacent(RRNodeId from_node, RRNodeId to_node, bool is_flat) { && from_ylow >= to_ylow && from_xhigh <= to_xhigh && from_yhigh <= to_yhigh) { - from_grid_type = device_ctx.grid.get_physical_type(from_xlow, from_ylow); - to_grid_type = device_ctx.grid.get_physical_type(to_xlow, to_ylow); + from_grid_type = device_ctx.grid.get_physical_type({from_xlow, from_ylow, from_layer}); + to_grid_type = device_ctx.grid.get_physical_type({to_xlow, to_ylow, to_layer}); VTR_ASSERT(from_grid_type == to_grid_type); iclass = get_class_num_from_pin_physical_num(from_grid_type, from_ptc); if (iclass == to_ptc) num_adj++; } } else { - from_grid_type = device_ctx.grid.get_physical_type(from_xlow, from_ylow); - to_grid_type = device_ctx.grid.get_physical_type(to_xlow, to_ylow); + from_grid_type = device_ctx.grid.get_physical_type({from_xlow, from_ylow, from_layer}); + to_grid_type = device_ctx.grid.get_physical_type({to_xlow, to_ylow, to_layer}); VTR_ASSERT(from_grid_type == to_grid_type); - int from_root_x = from_xlow - device_ctx.grid.get_width_offset(from_xlow, from_ylow); - int from_root_y = from_ylow - device_ctx.grid.get_height_offset(from_xlow, from_ylow); - int to_root_x = to_xlow - device_ctx.grid.get_width_offset(to_xlow, to_ylow); - int to_root_y = to_ylow - device_ctx.grid.get_height_offset(to_xlow, to_ylow); + int from_root_x = from_xlow - device_ctx.grid.get_width_offset({from_xlow, from_ylow, from_layer}); + int from_root_y = from_ylow - device_ctx.grid.get_height_offset({from_xlow, from_ylow, from_layer}); + int to_root_x = to_xlow - device_ctx.grid.get_width_offset({to_xlow, to_ylow, to_layer}); + int to_root_y = to_ylow - device_ctx.grid.get_height_offset({to_xlow, to_ylow, to_layer}); if (from_root_x == to_root_x && from_root_y == to_root_y) { num_adj++; diff --git a/vpr/src/route/clock_connection_builders.cpp b/vpr/src/route/clock_connection_builders.cpp index 645e96cb306..fbed3a3b62d 100644 --- a/vpr/src/route/clock_connection_builders.cpp +++ b/vpr/src/route/clock_connection_builders.cpp @@ -23,9 +23,10 @@ void RoutingToClockConnection::set_clock_switch_point_name(std::string clock_swi switch_point_name = clock_switch_point_name; } -void RoutingToClockConnection::set_switch_location(int x, int y) { +void RoutingToClockConnection::set_switch_location(int x, int y, int layer /* =0 */) { switch_location.x = x; switch_location.y = y; + switch_location.layer = layer; } void RoutingToClockConnection::set_switch(int arch_switch_index) { @@ -55,15 +56,15 @@ void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_ auto& device_ctx = g_vpr_ctx.device(); const auto& node_lookup = device_ctx.rr_graph.node_lookup(); - RRNodeId virtual_clock_network_root_idx = create_virtual_clock_network_sink_node(switch_location.x, switch_location.y); + RRNodeId virtual_clock_network_root_idx = create_virtual_clock_network_sink_node(switch_location.layer, switch_location.x, switch_location.y); { auto& mut_device_ctx = g_vpr_ctx.mutable_device(); mut_device_ctx.virtual_clock_network_root_idx = size_t(virtual_clock_network_root_idx); } // rr_node indices for x and y channel routing wires and clock wires to connect to - auto x_wire_indices = node_lookup.find_channel_nodes(switch_location.x, switch_location.y, CHANX); - auto y_wire_indices = node_lookup.find_channel_nodes(switch_location.x, switch_location.y, CHANY); + auto x_wire_indices = node_lookup.find_channel_nodes(switch_location.layer, switch_location.x, switch_location.y, CHANX); + auto y_wire_indices = node_lookup.find_channel_nodes(switch_location.layer, switch_location.x, switch_location.y, CHANY); auto clock_indices = clock_graph.get_rr_node_indices_at_switch_location( clock_to_connect_to, switch_point_name, switch_location.x, switch_location.y); @@ -90,7 +91,7 @@ void RoutingToClockConnection::create_switches(const ClockRRGraphBuilder& clock_ } } -RRNodeId RoutingToClockConnection::create_virtual_clock_network_sink_node(int x, int y) { +RRNodeId RoutingToClockConnection::create_virtual_clock_network_sink_node(int layer, int x, int y) { auto& device_ctx = g_vpr_ctx.mutable_device(); auto& rr_graph = device_ctx.rr_graph; auto& rr_graph_builder = device_ctx.rr_graph_builder; @@ -99,8 +100,8 @@ RRNodeId RoutingToClockConnection::create_virtual_clock_network_sink_node(int x, rr_graph_builder.emplace_back(); RRNodeId node_index = RRNodeId(rr_graph.num_nodes() - 1); - //Determine the a valid PTC - std::vector nodes_at_loc = node_lookup.find_grid_nodes_at_all_sides(x, y, SINK); + //Determine a valid PTC + std::vector nodes_at_loc = node_lookup.find_grid_nodes_at_all_sides(layer, x, y, SINK); int max_ptc = 0; for (RRNodeId inode : nodes_at_loc) { @@ -111,6 +112,7 @@ RRNodeId RoutingToClockConnection::create_virtual_clock_network_sink_node(int x, rr_graph_builder.set_node_type(node_index, SINK); rr_graph_builder.set_node_class_num(node_index, ptc); rr_graph_builder.set_node_coordinates(node_index, x, y, x, y); + rr_graph_builder.set_node_layer(node_index, layer); rr_graph_builder.set_node_capacity(node_index, 1); rr_graph_builder.set_node_cost_index(node_index, RRIndexedDataId(SINK_COST_INDEX)); @@ -122,7 +124,7 @@ RRNodeId RoutingToClockConnection::create_virtual_clock_network_sink_node(int x, // However, since the SINK node has the same xhigh/xlow as well as yhigh/ylow, we can probably use a shortcut for (int ix = rr_graph.node_xlow(node_index); ix <= rr_graph.node_xhigh(node_index); ++ix) { for (int iy = rr_graph.node_ylow(node_index); iy <= rr_graph.node_yhigh(node_index); ++iy) { - node_lookup.add_node(node_index, ix, iy, rr_graph.node_type(node_index), rr_graph.node_class_num(node_index)); + node_lookup.add_node(node_index, layer, ix, iy, rr_graph.node_type(node_index), rr_graph.node_class_num(node_index)); } } @@ -248,23 +250,24 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra auto& device_ctx = g_vpr_ctx.device(); const auto& node_lookup = device_ctx.rr_graph.node_lookup(); auto& grid = clock_graph.grid(); + int layer_num = 0; //Function *FOR NOW* assumes that layer_num is always 0 - for (size_t x = 0; x < grid.width(); x++) { - for (size_t y = 0; y < grid.height(); y++) { + for (int x = 0; x < (int)grid.width(); x++) { + for (int y = 0; y < (int)grid.height(); y++) { //Avoid boundary - if ((y == 0 && x == 0) || (x == grid.width() - 1 && y == grid.height() - 1)) { + if ((y == 0 && x == 0) || (x == (int)grid.width() - 1 && y == (int)grid.height() - 1)) { continue; } - auto type = grid.get_physical_type(x, y); + auto type = grid.get_physical_type({x, y, layer_num}); // Skip EMPTY type if (is_empty_type(type)) { continue; } - auto width_offset = grid.get_width_offset(x, y); - auto height_offset = grid.get_height_offset(x, y); + auto width_offset = grid.get_width_offset({x, y, layer_num}); + auto height_offset = grid.get_height_offset({x, y, layer_num}); // Ignore grid locations that do not have blocks bool has_pb_type = false; @@ -282,7 +285,7 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra for (e_side side : SIDES) { //Don't connect pins which are not adjacent to channels around the perimeter - if ((x == 0 && side != RIGHT) || (x == grid.width() - 1 && side != LEFT) || (y == 0 && side != TOP) || (y == grid.height() - 1 && side != BOTTOM)) { + if ((x == 0 && side != RIGHT) || (x == (int)grid.width() - 1 && side != LEFT) || (y == 0 && side != TOP) || (y == (int)grid.height() - 1 && side != BOTTOM)) { continue; } @@ -298,7 +301,7 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra if (x == 0) { clock_x_offset = 1; // chanx clock always starts at 1 offset clock_y_offset = -1; // pick the chanx below the block - } else if (x == grid.width() - 1) { + } else if (x == (int)grid.width() - 1) { clock_x_offset = -1; // chanx clock always ends at 1 offset clock_y_offset = -1; // pick the chanx below the block } else if (y == 0) { @@ -307,7 +310,8 @@ void ClockToPinsConnection::create_switches(const ClockRRGraphBuilder& clock_gra clock_y_offset = -1; // pick the chanx below the block } - auto clock_pin_node_idx = node_lookup.find_node(x, + auto clock_pin_node_idx = node_lookup.find_node(layer_num, + x, y, IPIN, clock_pin_idx, diff --git a/vpr/src/route/clock_connection_builders.h b/vpr/src/route/clock_connection_builders.h index 0565128471c..7ab1c7c5be7 100644 --- a/vpr/src/route/clock_connection_builders.h +++ b/vpr/src/route/clock_connection_builders.h @@ -47,7 +47,7 @@ class RoutingToClockConnection : public ClockConnection { */ void set_clock_name_to_connect_to(std::string clock_name); void set_clock_switch_point_name(std::string clock_switch_point_name); - void set_switch_location(int x, int y); + void set_switch_location(int x, int y, int layer = 0); void set_switch(int arch_switch_index); void set_fc_val(float fc_val); @@ -57,7 +57,7 @@ class RoutingToClockConnection : public ClockConnection { /* Connects the inter-block routing to the clock source at the specified coordinates */ void create_switches(const ClockRRGraphBuilder& clock_graph, t_rr_edge_info_set* rr_edges_to_create) override; size_t estimate_additional_nodes() override; - RRNodeId create_virtual_clock_network_sink_node(int x, int y); + RRNodeId create_virtual_clock_network_sink_node(int layer, int x, int y); }; class ClockToClockConneciton : public ClockConnection { diff --git a/vpr/src/route/clock_fwd.h b/vpr/src/route/clock_fwd.h index ef119f07649..abf76b3b7bd 100644 --- a/vpr/src/route/clock_fwd.h +++ b/vpr/src/route/clock_fwd.h @@ -4,6 +4,7 @@ struct Coordinates { int x = -1; int y = -1; + int layer = -1; }; #endif diff --git a/vpr/src/route/clock_network_builders.cpp b/vpr/src/route/clock_network_builders.cpp index 1db5796f47d..3a1606e5831 100644 --- a/vpr/src/route/clock_network_builders.cpp +++ b/vpr/src/route/clock_network_builders.cpp @@ -245,6 +245,10 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB VTR_ASSERT(repeat.y > 0); VTR_ASSERT(repeat.x > 0); + // TODO: This function is not adapted to the multi-layer grid + VTR_ASSERT(g_vpr_ctx.device().grid.get_num_layers() == 1); + int layer_num = 0; + for (unsigned y = x_chan_wire.position; y < grid.height() - 1; y += repeat.y) { for (unsigned x_start = x_chan_wire.start; x_start < grid.width() - 1; x_start += repeat.x) { unsigned drive_x = x_start + drive.offset; @@ -282,7 +286,8 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB } // create drive point (length zero wire) - auto drive_node_idx = create_chanx_wire(drive_x, + auto drive_node_idx = create_chanx_wire(layer_num, + drive_x, drive_x, y, ptc_num, @@ -292,14 +297,16 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB clock_graph.add_switch_location(get_name(), drive.name, drive_x, y, drive_node_idx); // create rib wire to the right and left of the drive point - auto left_node_idx = create_chanx_wire(x_start + x_offset, + auto left_node_idx = create_chanx_wire(layer_num, + x_start + x_offset, drive_x - 1, y, ptc_num, Direction::DEC, rr_nodes, rr_graph_builder); - auto right_node_idx = create_chanx_wire(drive_x + 1, + auto right_node_idx = create_chanx_wire(layer_num, + drive_x + 1, x_end, y, ptc_num, @@ -320,7 +327,8 @@ void ClockRib::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGraphB } } -int ClockRib::create_chanx_wire(int x_start, +int ClockRib::create_chanx_wire(int layer, + int x_start, int x_end, int y, int ptc_num, @@ -333,6 +341,7 @@ int ClockRib::create_chanx_wire(int x_start, rr_graph_builder.set_node_type(chanx_node, CHANX); rr_graph_builder.set_node_coordinates(chanx_node, x_start, y, x_end, y); + rr_graph_builder.set_node_layer(chanx_node, layer); rr_graph_builder.set_node_capacity(chanx_node, 1); rr_graph_builder.set_node_track_num(chanx_node, ptc_num); rr_graph_builder.set_node_rc_index(chanx_node, NodeRCIndex(find_create_rr_rc_data( @@ -363,7 +372,7 @@ int ClockRib::create_chanx_wire(int x_start, for (int ix = rr_graph.node_xlow(chanx_node); ix <= rr_graph.node_xhigh(chanx_node); ++ix) { for (int iy = rr_graph.node_ylow(chanx_node); iy <= rr_graph.node_yhigh(chanx_node); ++iy) { //TODO: CHANX uses odd swapped x/y indices here. Will rework once rr_node_indices is shadowed - rr_graph_builder.node_lookup().add_node(chanx_node, iy, ix, rr_graph.node_type(chanx_node), rr_graph.node_track_num(chanx_node)); + rr_graph_builder.node_lookup().add_node(chanx_node, layer, iy, ix, rr_graph.node_type(chanx_node), rr_graph.node_track_num(chanx_node)); } } @@ -573,6 +582,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap VTR_ASSERT(repeat.y > 0); VTR_ASSERT(repeat.x > 0); + int layer_num = 0; //Function "FOR NOW" assumes that layer_num is always 0 + for (unsigned x = y_chan_wire.position; x < grid.width() - 1; x += repeat.x) { for (unsigned y_start = y_chan_wire.start; y_start < grid.height() - 1; y_start += repeat.y) { unsigned drive_y = y_start + drive.offset; @@ -610,7 +621,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap } //create drive point (length zero wire) - auto drive_node_idx = create_chany_wire(drive_y, + auto drive_node_idx = create_chany_wire(layer_num, + drive_y, drive_y, x, ptc_num, @@ -621,7 +633,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap clock_graph.add_switch_location(get_name(), drive.name, x, drive_y, drive_node_idx); // create spine wire above and below the drive point - auto left_node_idx = create_chany_wire(y_start + y_offset, + auto left_node_idx = create_chany_wire(layer_num, + y_start + y_offset, drive_y - 1, x, ptc_num, @@ -629,7 +642,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap rr_nodes, rr_graph_builder, num_segments_x); - auto right_node_idx = create_chany_wire(drive_y + 1, + auto right_node_idx = create_chany_wire(layer_num, + drive_y + 1, y_end, x, ptc_num, @@ -654,7 +668,8 @@ void ClockSpine::create_rr_nodes_and_internal_edges_for_one_instance(ClockRRGrap } } -int ClockSpine::create_chany_wire(int y_start, +int ClockSpine::create_chany_wire(int layer, + int y_start, int y_end, int x, int ptc_num, @@ -668,6 +683,7 @@ int ClockSpine::create_chany_wire(int y_start, rr_graph_builder.set_node_type(chany_node, CHANY); rr_graph_builder.set_node_coordinates(chany_node, x, y_start, x, y_end); + rr_graph_builder.set_node_layer(chany_node, layer); rr_graph_builder.set_node_capacity(chany_node, 1); rr_graph_builder.set_node_track_num(chany_node, ptc_num); rr_graph_builder.set_node_rc_index(chany_node, NodeRCIndex(find_create_rr_rc_data( @@ -697,7 +713,7 @@ int ClockSpine::create_chany_wire(int y_start, /* TODO: Will replace these codes with an API add_node_to_all_locs() of RRGraphBuilder */ for (int ix = rr_graph.node_xlow(chany_node); ix <= rr_graph.node_xhigh(chany_node); ++ix) { for (int iy = rr_graph.node_ylow(chany_node); iy <= rr_graph.node_yhigh(chany_node); ++iy) { - rr_graph_builder.node_lookup().add_node(chany_node, ix, iy, rr_graph.node_type(chany_node), rr_graph.node_ptc_num(chany_node)); + rr_graph_builder.node_lookup().add_node(chany_node, layer, ix, iy, rr_graph.node_type(chany_node), rr_graph.node_ptc_num(chany_node)); } } diff --git a/vpr/src/route/clock_network_builders.h b/vpr/src/route/clock_network_builders.h index 60db3eba728..f9983cd85e9 100644 --- a/vpr/src/route/clock_network_builders.h +++ b/vpr/src/route/clock_network_builders.h @@ -175,7 +175,8 @@ class ClockRib : public ClockNetwork { void map_relative_seg_indices(const t_unified_to_parallel_seg_index& index_map) override; - int create_chanx_wire(int x_start, + int create_chanx_wire(int layer, + int x_start, int x_end, int y, int ptc_num, @@ -242,7 +243,8 @@ class ClockSpine : public ClockNetwork { int num_segments_x) override; size_t estimate_additional_nodes(const DeviceGrid& grid) override; void map_relative_seg_indices(const t_unified_to_parallel_seg_index& index_map) override; - int create_chany_wire(int y_start, + int create_chany_wire(int layer, + int y_start, int y_end, int x, int ptc_num, diff --git a/vpr/src/route/connection_router.cpp b/vpr/src/route/connection_router.cpp index d5bd3e7a93d..4d0c0f96f05 100644 --- a/vpr/src/route/connection_router.cpp +++ b/vpr/src/route/connection_router.cpp @@ -31,7 +31,9 @@ inline void update_router_stats(const DeviceContext& device_ctx, auto node_type = rr_graph->node_type(rr_node_id); VTR_ASSERT(node_type != NUM_RR_TYPES); - t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type(rr_graph->node_xlow(rr_node_id), rr_graph->node_ylow(rr_node_id)); + t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type({rr_graph->node_xlow(rr_node_id), + rr_graph->node_ylow(rr_node_id), + rr_graph->node_layer(rr_node_id)}); if (is_inter_cluster_node(physical_type, node_type, diff --git a/vpr/src/route/overuse_report.cpp b/vpr/src/route/overuse_report.cpp index 1fd65115aea..f2e0864ec25 100644 --- a/vpr/src/route/overuse_report.cpp +++ b/vpr/src/route/overuse_report.cpp @@ -22,6 +22,7 @@ static void report_congested_nets(const Netlist<>& net_list, std::ostream& os, const std::set& congested_nets, bool is_flat, + int layer_num, int x, int y, bool report_sinks); @@ -30,6 +31,7 @@ static void log_overused_nodes_header(); static void log_single_overused_node_status(int overuse_index, RRNodeId inode); void print_block_pins_nets(std::ostream& os, t_physical_tile_type_ptr physical_type, + int layer, int root_x, int root_y, int pin_physical_num, @@ -110,6 +112,7 @@ void report_overused_nodes(const Netlist<>& net_list, bool report_sinks = false; int x = rr_graph.node_xlow(node_id); int y = rr_graph.node_ylow(node_id); + int layer_num = rr_graph.node_layer(node_id); switch (node_type) { case IPIN: case OPIN: @@ -117,8 +120,8 @@ void report_overused_nodes(const Netlist<>& net_list, node_id, rr_node_to_net_map); report_sinks = true; - x -= g_vpr_ctx.device().grid.get_physical_type(x, y)->width; - y -= g_vpr_ctx.device().grid.get_physical_type(x, y)->width; + x -= g_vpr_ctx.device().grid.get_physical_type({x, y, layer_num})->width; + y -= g_vpr_ctx.device().grid.get_physical_type({x, y, layer_num})->width; break; case CHANX: case CHANY: @@ -142,6 +145,7 @@ void report_overused_nodes(const Netlist<>& net_list, os, congested_nets, is_flat, + layer_num, x, y, report_sinks); @@ -209,47 +213,52 @@ static void report_overused_ipin_opin(std::ostream& os, auto grid_x = rr_graph.node_xlow(node_id); auto grid_y = rr_graph.node_ylow(node_id); + auto grid_layer = rr_graph.node_layer(node_id); + VTR_ASSERT_MSG( grid_x == rr_graph.node_xhigh(node_id) && grid_y == rr_graph.node_yhigh(node_id), "Non-track RR node should not span across multiple grid blocks."); - t_physical_tile_type_ptr physical_tile = device_ctx.grid.get_physical_type(grid_x, grid_y); + t_physical_tile_type_ptr physical_tile = device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer}); os << "Pin physical number = " << rr_graph.node_pin_num(node_id) << '\n'; if (is_inter_cluster_node(physical_tile, rr_graph.node_type(node_id), rr_graph.node_ptc_num(node_id))) { os << "On Tile Pin" << "\n"; } else { - auto pb_type_name = get_pb_graph_node_from_pin_physical_num(device_ctx.grid.get_physical_type(grid_x, grid_y), + auto pb_type_name = get_pb_graph_node_from_pin_physical_num(device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer}), rr_graph.node_ptc_num(node_id)) ->pb_type->name; - auto pb_pin = get_pb_pin_from_pin_physical_num(device_ctx.grid.get_physical_type(grid_x, grid_y), rr_graph.node_ptc_num(node_id)); + auto pb_pin = get_pb_pin_from_pin_physical_num(device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer}), + rr_graph.node_ptc_num(node_id)); os << "Intra-Tile Pin - Port : " << pb_pin->port->name << " - PB Type : " << std::string(pb_type_name) << "\n"; } print_block_pins_nets(os, - device_ctx.grid.get_physical_type(grid_x, grid_y), - grid_x - device_ctx.grid.get_width_offset(grid_x, grid_y), - grid_y - device_ctx.grid.get_height_offset(grid_x, grid_y), + device_ctx.grid.get_physical_type({grid_x, grid_y, grid_layer}), + grid_layer, + grid_x - device_ctx.grid.get_width_offset({grid_x, grid_y, grid_layer}), + grid_y - device_ctx.grid.get_height_offset({grid_x, grid_y, grid_layer}), rr_graph.node_ptc_num(node_id), rr_node_to_net_map); os << "Side = " << rr_graph.node_side_string(node_id) << "\n\n"; //Add block type for IPINs/OPINs in overused rr-node report const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - auto& grid_info = place_ctx.grid_blocks[grid_x][grid_y]; + const auto& grid_info = place_ctx.grid_blocks; os << "Grid location: X = " << grid_x << ", Y = " << grid_y << '\n'; - os << "Number of blocks currently occupying this grid location = " << grid_info.usage << '\n'; + os << "Number of blocks currently occupying this grid location = " << grid_info.get_usage({grid_x, grid_y, grid_layer}) << '\n'; size_t iblock = 0; - for (size_t isubtile = 0; isubtile < grid_info.blocks.size(); ++isubtile) { + for (int isubtile = 0; isubtile < (int)grid_info.num_blocks_at_location({grid_x, grid_y, grid_layer}); ++isubtile) { //Check if there is a valid block at this subtile location - if (grid_info.subtile_empty(isubtile)) { + if (grid_info.is_sub_tile_empty({grid_x, grid_y, grid_layer}, isubtile)) { continue; } //Print out the block index, name and type - ClusterBlockId block_id = grid_info.blocks[isubtile]; + // TODO: Needs to be updated when RR Graph Nodes know their layer_num + ClusterBlockId block_id = grid_info.block_at_location({grid_x, grid_y, isubtile, 0}); os << "Block #" << iblock << ": "; os << "Block name = " << clb_nlist.block_pb(block_id)->name << ", "; os << "Block type = " << clb_nlist.block_type(block_id)->name << '\n'; @@ -298,6 +307,7 @@ static void report_congested_nets(const Netlist<>& net_list, std::ostream& os, const std::set& congested_nets, bool is_flat, + int layer_num, int x, int y, bool report_sinks) { @@ -329,11 +339,12 @@ static void report_congested_nets(const Netlist<>& net_list, cluster_block_id = convert_to_cluster_block_id(net_list.pin_block(sink_id)); } auto cluster_loc = g_vpr_ctx.placement().block_locs[cluster_block_id]; - auto physical_type = g_vpr_ctx.device().grid.get_physical_type(x, y); - int cluster_x = cluster_loc.loc.x - g_vpr_ctx.device().grid.get_physical_type(cluster_loc.loc.x, cluster_loc.loc.y)->width; - int cluster_y = cluster_loc.loc.y - g_vpr_ctx.device().grid.get_physical_type(cluster_loc.loc.x, cluster_loc.loc.y)->height; + auto physical_type = g_vpr_ctx.device().grid.get_physical_type({x, y, layer_num}); + int cluster_layer_num = cluster_loc.loc.layer; + int cluster_x = cluster_loc.loc.x - g_vpr_ctx.device().grid.get_physical_type({cluster_loc.loc.x, cluster_loc.loc.y, cluster_layer_num})->width; + int cluster_y = cluster_loc.loc.y - g_vpr_ctx.device().grid.get_physical_type({cluster_loc.loc.x, cluster_loc.loc.y, cluster_layer_num})->height; if (cluster_x == x && cluster_y == y) { - VTR_ASSERT(physical_type == g_vpr_ctx.device().grid.get_physical_type(cluster_x, cluster_y)); + VTR_ASSERT(physical_type == g_vpr_ctx.device().grid.get_physical_type({cluster_x, cluster_y, cluster_layer_num})); os << "Sink in the same location = " << "\n"; if (is_flat) { @@ -370,7 +381,8 @@ static void log_single_overused_node_status(int overuse_index, RRNodeId node_id) const auto& route_ctx = g_vpr_ctx.routing(); int x = rr_graph.node_xlow(node_id); int y = rr_graph.node_ylow(node_id); - auto physical_blk = device_ctx.grid.get_physical_type(x, y); + int layer_num = rr_graph.node_layer(node_id); + auto physical_blk = device_ctx.grid.get_physical_type({x, y, layer_num}); //Determines if direction or side is available for printing auto node_type = rr_graph.node_type(node_id); @@ -429,6 +441,7 @@ static void log_single_overused_node_status(int overuse_index, RRNodeId node_id) void print_block_pins_nets(std::ostream& os, t_physical_tile_type_ptr physical_type, + int layer, int root_x, int root_y, int pin_physical_num, @@ -457,7 +470,7 @@ void print_block_pins_nets(std::ostream& os, for (int pin = pin_num_range.low; pin <= pin_num_range.high; pin++) { t_rr_type rr_type = (get_pin_type_from_pin_physical_num(physical_type, pin) == DRIVER) ? t_rr_type::OPIN : t_rr_type::IPIN; - RRNodeId node_id = get_pin_rr_node_id(rr_graph.node_lookup(), physical_type, root_x, root_y, pin); + RRNodeId node_id = get_pin_rr_node_id(rr_graph.node_lookup(), physical_type, layer, root_x, root_y, pin); VTR_ASSERT(node_id != RRNodeId::INVALID()); auto search_result = rr_node_to_net_map.find(node_id); if (rr_type == t_rr_type::OPIN) { diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index fd2091fbc9b..466608319fb 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -646,8 +646,8 @@ static vtr::vector> load_net_rr_terminals(const RR t_block_loc blk_loc; blk_loc = get_block_loc(block_id, is_flat); int iclass = get_block_pin_class_num(block_id, pin_id, is_flat); - - RRNodeId inode = rr_graph.node_lookup().find_node(blk_loc.loc.x, + RRNodeId inode = rr_graph.node_lookup().find_node(blk_loc.loc.layer, + blk_loc.loc.x, blk_loc.loc.y, (pin_count == 0 ? SOURCE : SINK), /* First pin is driver */ iclass); @@ -754,7 +754,8 @@ static vtr::vector> load_rr_clb_sources(const RR rr_type = SINK; } - RRNodeId inode = rr_graph.node_lookup().find_node(blk_loc.loc.x, + RRNodeId inode = rr_graph.node_lookup().find_node(blk_loc.loc.layer, + blk_loc.loc.x, blk_loc.loc.y, rr_type, iclass); diff --git a/vpr/src/route/route_timing.cpp b/vpr/src/route/route_timing.cpp index d119403cb8e..48074f717cb 100644 --- a/vpr/src/route/route_timing.cpp +++ b/vpr/src/route/route_timing.cpp @@ -2310,7 +2310,7 @@ vtr::vector>> set_net std::for_each(sink_grp.begin(), sink_grp.end(), [&rr_graph](int& sink_rr_num) { sink_rr_num = rr_graph.node_ptc_num(RRNodeId(sink_rr_num)); }); - auto physical_type = device_ctx.grid.get_physical_type(blk_loc.loc.x, blk_loc.loc.y); + auto physical_type = device_ctx.grid.get_physical_type({blk_loc.loc.x, blk_loc.loc.y, blk_loc.loc.layer}); // Get the choke points of the sink corresponds to pin_count given the sink group auto sink_choking_spots = get_sink_choking_points(physical_type, rr_graph.node_ptc_num(RRNodeId(net_rr_terminal[net_id][pin_count])), @@ -2321,6 +2321,7 @@ vtr::vector>> set_net int num_reachable_sinks = choking_spot.second; auto pin_rr_node_id = get_pin_rr_node_id(rr_graph.node_lookup(), physical_type, + blk_loc.loc.layer, blk_loc.loc.x, blk_loc.loc.y, pin_physical_num); diff --git a/vpr/src/route/router_lookahead_extended_map.cpp b/vpr/src/route/router_lookahead_extended_map.cpp index 5b95603f191..375b1127177 100644 --- a/vpr/src/route/router_lookahead_extended_map.cpp +++ b/vpr/src/route/router_lookahead_extended_map.cpp @@ -73,12 +73,15 @@ std::pair ExtendedMapLookahead::get_src_opin_cost(RRNodeId from_no //reachable, we query the f_wire_cost_map (i.e. the wire lookahead) to get the final //delay to reach the sink. - t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(from_node), rr_graph.node_ylow(from_node)); + t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(from_node), + rr_graph.node_ylow(from_node), + rr_graph.node_layer(from_node)}); auto tile_index = tile_type->index; auto from_ptc = rr_graph.node_ptc_num(from_node); + int from_layer_num = rr_graph.node_layer(from_node); - if (this->src_opin_delays[tile_index][from_ptc].empty()) { + if (this->src_opin_delays[from_layer_num][tile_index][from_ptc].empty()) { //During lookahead profiling we were unable to find any wires which connected //to this PTC. // @@ -105,7 +108,7 @@ std::pair ExtendedMapLookahead::get_src_opin_cost(RRNodeId from_no float expected_delay_cost = std::numeric_limits::infinity(); float expected_cong_cost = std::numeric_limits::infinity(); - for (const auto& kv : this->src_opin_delays[tile_index][from_ptc]) { + for (const auto& kv : this->src_opin_delays[from_layer_num][tile_index][from_ptc]) { const util::t_reachable_wire_inf& reachable_wire_inf = kv.second; util::Cost_Entry cost_entry; @@ -151,14 +154,17 @@ float ExtendedMapLookahead::get_chan_ipin_delays(RRNodeId to_node) const { e_rr_type to_type = rr_graph.node_type(to_node); VTR_ASSERT(to_type == SINK || to_type == IPIN); - auto to_tile_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(to_node), rr_graph.node_ylow(to_node)); + auto to_tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(to_node), + rr_graph.node_ylow(to_node), + rr_graph.node_layer(to_node)}); auto to_tile_index = to_tile_type->index; auto to_ptc = rr_graph.node_ptc_num(to_node); + int to_layer_num = rr_graph.node_layer(to_node); float site_pin_delay = 0.f; - if (this->chan_ipins_delays[to_tile_index].size() != 0) { - auto reachable_wire_inf = this->chan_ipins_delays[to_tile_index][to_ptc]; + if (this->chan_ipins_delays[to_layer_num][to_tile_index].size() != 0) { + auto reachable_wire_inf = this->chan_ipins_delays[to_layer_num][to_tile_index][to_ptc]; site_pin_delay = reachable_wire_inf.delay; } diff --git a/vpr/src/route/router_lookahead_map.cpp b/vpr/src/route/router_lookahead_map.cpp index d029900f565..b518970dcc0 100644 --- a/vpr/src/route/router_lookahead_map.cpp +++ b/vpr/src/route/router_lookahead_map.cpp @@ -205,7 +205,11 @@ struct t_dijkstra_data { t_wire_cost_map f_wire_cost_map; /******** File-Scope Functions ********/ -Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int delta_x, int delta_y); +Cost_Entry get_wire_cost_entry(e_rr_type rr_type, + int seg_index, + int layer_num, + int delta_x, + int delta_y); static void compute_router_wire_lookahead(const std::vector& segment_inf); static void compute_tiles_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, std::unordered_map>& tile_min_cost, @@ -221,9 +225,7 @@ static void store_min_cost_to_sinks(std::unordered_map& inter_tile_pin_primitive_pin_delay); -static void min_global_cost_map(vtr::NdMatrix& internal_opin_global_cost_map, - size_t max_dx, - size_t max_dy); +static void min_global_cost_map(vtr::NdMatrix& internal_opin_global_cost_map); // Read the file and fill inter_tile_pin_primitive_pin_delay and tile_min_cost static void read_intra_cluster_router_lookahead(std::unordered_map& inter_tile_pin_primitive_pin_delay, @@ -236,10 +238,11 @@ static void write_intra_cluster_router_lookahead(const std::string& file, const std::unordered_map>& tile_min_cost); /* returns index of a node from which to start routing */ -static RRNodeId get_start_node(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset); +static RRNodeId get_start_node(int layer, int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset); /* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information * to that pin is stored is added to an entry in the routing_cost_map */ static void run_dijkstra(RRNodeId start_node, + int sample_layer_num, int start_x, int start_y, t_routing_cost_map& routing_cost_map, @@ -250,11 +253,11 @@ static void expand_dijkstra_neighbours(PQ_Entry parent_entry, vtr::vector& node_expanded, std::priority_queue& pq); /* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */ -static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map); +static void set_lookahead_map_costs(int layer_num, int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map); /* fills in missing lookahead map entries by copying the cost of the closest valid entry */ static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_type); /* returns a cost entry in the f_wire_cost_map that is near the specified coordinates (and preferably towards (0,0)) */ -static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int chan_index); +static Cost_Entry get_nearby_cost_entry(int layer_num, int x, int y, int segment_index, int chan_index); /* returns the absolute delta_x and delta_y offset required to reach to_node from from_node */ static void get_xy_deltas(const RRNodeId from_node, const RRNodeId to_node, int* delta_x, int* delta_y); static void adjust_rr_position(const RRNodeId rr, int& x, int& y); @@ -262,7 +265,7 @@ static void adjust_rr_pin_position(const RRNodeId rr, int& x, int& y); static void adjust_rr_wire_position(const RRNodeId rr, int& x, int& y); static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y); -static void print_wire_cost_map(const std::vector& segment_inf); +static void print_wire_cost_map(int layer_num, const std::vector& segment_inf); static void print_router_cost_map(const t_routing_cost_map& router_cost_map); /******** Interface class member function definitions ********/ @@ -270,13 +273,18 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; - t_physical_tile_type_ptr from_physical_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(current_node), rr_graph.node_ylow(current_node)); + t_physical_tile_type_ptr from_physical_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(current_node), + rr_graph.node_ylow(current_node), + rr_graph.node_layer(current_node)}); t_rr_type from_rr_type = rr_graph.node_type(current_node); int from_node_ptc_num = rr_graph.node_ptc_num(current_node); - t_physical_tile_type_ptr to_physical_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(target_node), rr_graph.node_ylow(target_node)); + t_physical_tile_type_ptr to_physical_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(target_node), + rr_graph.node_ylow(target_node), + rr_graph.node_layer(target_node)}); t_rr_type to_rr_type = rr_graph.node_type(target_node); int to_node_ptc_num = rr_graph.node_ptc_num(target_node); + int to_layer_num = rr_graph.node_layer(target_node); VTR_ASSERT(to_rr_type == t_rr_type::SINK); float delay_cost = 0.; @@ -285,6 +293,8 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod float cong_offset_cost = 0.; if (is_flat_) { + // We have not checked the multi-layer FPGA for flat routing + VTR_ASSERT(rr_graph.node_layer(current_node) == rr_graph.node_layer(target_node)); if (from_rr_type == CHANX || from_rr_type == CHANY) { std::tie(delay_cost, cong_cost) = get_expected_delay_and_cong(current_node, target_node, params, R_upstream); @@ -329,8 +339,8 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod get_xy_deltas(current_node, target_node, &delta_x, &delta_y); delta_x = abs(delta_x); delta_y = abs(delta_y); - delay_cost = params.criticality * distance_based_min_cost[delta_x][delta_y].delay; - cong_cost = (1. - params.criticality) * distance_based_min_cost[delta_x][delta_y].congestion; + delay_cost = params.criticality * distance_based_min_cost[to_layer_num][delta_x][delta_y].delay; + cong_cost = (1. - params.criticality) * distance_based_min_cost[to_layer_num][delta_x][delta_y].congestion; delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).delay; cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).congestion; @@ -361,8 +371,8 @@ float MapLookahead::get_expected_cost(RRNodeId current_node, RRNodeId target_nod get_xy_deltas(current_node, target_node, &delta_x, &delta_y); delta_x = abs(delta_x); delta_y = abs(delta_y); - delay_cost = params.criticality * distance_based_min_cost[delta_x][delta_y].delay; - cong_cost = (1. - params.criticality) * distance_based_min_cost[delta_x][delta_y].congestion; + delay_cost = params.criticality * distance_based_min_cost[to_layer_num][delta_x][delta_y].delay; + cong_cost = (1. - params.criticality) * distance_based_min_cost[to_layer_num][delta_x][delta_y].congestion; delay_offset_cost = params.criticality * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).delay; cong_offset_cost = (1. - params.criticality) * tile_min_cost.at(to_physical_type).at(to_node_ptc_num).congestion; @@ -393,6 +403,7 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ auto& rr_graph = device_ctx.rr_graph; int delta_x, delta_y; + int from_layer_num = rr_graph.node_layer(from_node); get_xy_deltas(from_node, to_node, &delta_x, &delta_y); delta_x = abs(delta_x); delta_y = abs(delta_y); @@ -407,12 +418,15 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ //reachable, we query the f_wire_cost_map (i.e. the wire lookahead) to get the final //delay to reach the sink. - t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(from_node), rr_graph.node_ylow(from_node)); + t_physical_tile_type_ptr tile_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(from_node), + rr_graph.node_ylow(from_node), + from_layer_num}); + auto tile_index = std::distance(&device_ctx.physical_tile_types[0], tile_type); auto from_ptc = rr_graph.node_ptc_num(from_node); - if (this->src_opin_delays[tile_index][from_ptc].empty()) { + if (this->src_opin_delays[from_layer_num][tile_index][from_ptc].empty()) { //During lookahead profiling we were unable to find any wires which connected //to this PTC. // @@ -436,7 +450,7 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ //From the current SOURCE/OPIN we look-up the wiretypes which are reachable //and then add the estimates from those wire types for the distance of interest. //If there are multiple options we use the minimum value. - for (const auto& kv : this->src_opin_delays[tile_index][from_ptc]) { + for (const auto& kv : this->src_opin_delays[from_layer_num][tile_index][from_ptc]) { const util::t_reachable_wire_inf& reachable_wire_inf = kv.second; Cost_Entry wire_cost_entry; @@ -449,7 +463,11 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ } else { //For an actual accessible wire, we query the wire look-up to get it's //delay and congestion cost estimates - wire_cost_entry = get_wire_cost_entry(reachable_wire_inf.wire_rr_type, reachable_wire_inf.wire_seg_index, delta_x, delta_y); + wire_cost_entry = get_wire_cost_entry(reachable_wire_inf.wire_rr_type, + reachable_wire_inf.wire_seg_index, + from_layer_num, + delta_x, + delta_y); } float this_delay_cost = (params.criticality) * (reachable_wire_inf.delay + wire_cost_entry.delay); @@ -481,7 +499,11 @@ std::pair MapLookahead::get_expected_delay_and_cong(RRNodeId from_ VTR_ASSERT(from_seg_index >= 0); /* now get the expected cost from our lookahead map */ - Cost_Entry cost_entry = get_wire_cost_entry(from_type, from_seg_index, delta_x, delta_y); + Cost_Entry cost_entry = get_wire_cost_entry(from_type, + from_seg_index, + from_layer_num, + delta_x, + delta_y); float expected_delay = cost_entry.delay; float expected_cong = cost_entry.congestion; @@ -532,9 +554,7 @@ void MapLookahead::compute_intra_tile() { det_routing_arch_, g_vpr_ctx.device()); - min_global_cost_map(distance_based_min_cost, - f_wire_cost_map.dim_size(2), - f_wire_cost_map.dim_size(3)); + min_global_cost_map(distance_based_min_cost); } void MapLookahead::read(const std::string& file) { @@ -554,9 +574,7 @@ void MapLookahead::read_intra_cluster(const std::string& file) { file); // The information about distance_based_min_cost is not stored in the file, thus it needs to be computed - min_global_cost_map(distance_based_min_cost, - f_wire_cost_map.dim_size(2), - f_wire_cost_map.dim_size(3)); + min_global_cost_map(distance_based_min_cost); } void MapLookahead::write(const std::string& file) const { @@ -571,7 +589,7 @@ void MapLookahead::write_intra_cluster(const std::string& file) const { /******** Function Definitions ********/ -Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int delta_x, int delta_y) { +Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int layer_num, int delta_x, int delta_y) { VTR_ASSERT_SAFE(rr_type == CHANX || rr_type == CHANY); int chan_index = 0; @@ -579,10 +597,11 @@ Cost_Entry get_wire_cost_entry(e_rr_type rr_type, int seg_index, int delta_x, in chan_index = 1; } - VTR_ASSERT_SAFE(delta_x < (int)f_wire_cost_map.dim_size(2)); - VTR_ASSERT_SAFE(delta_y < (int)f_wire_cost_map.dim_size(3)); + VTR_ASSERT_SAFE(layer_num < (int)f_wire_cost_map.dim_size(0)); + VTR_ASSERT_SAFE(delta_x < (int)f_wire_cost_map.dim_size(3)); + VTR_ASSERT_SAFE(delta_y < (int)f_wire_cost_map.dim_size(4)); - return f_wire_cost_map[chan_index][seg_index][delta_x][delta_y]; + return f_wire_cost_map[layer_num][chan_index][seg_index][delta_x][delta_y]; } static void compute_router_wire_lookahead(const std::vector& segment_inf) { @@ -593,7 +612,11 @@ static void compute_router_wire_lookahead(const std::vector& segm auto& grid = device_ctx.grid; //Re-allocate - f_wire_cost_map = t_wire_cost_map({2, segment_inf.size(), device_ctx.grid.width(), device_ctx.grid.height()}); + f_wire_cost_map = t_wire_cost_map({static_cast(grid.get_num_layers()), + 2, + segment_inf.size(), + device_ctx.grid.width(), + device_ctx.grid.height()}); int longest_length = 0; for (const auto& seg_inf : segment_inf) { @@ -620,117 +643,122 @@ static void compute_router_wire_lookahead(const std::vector& segm int target_y = device_ctx.grid.height() - 2; //Profile each wire segment type - for (int iseg = 0; iseg < int(segment_inf.size()); iseg++) { - //First try to pick good representative sample locations for each type - std::map> sample_nodes; - std::vector chan_types; - if (segment_inf[iseg].parallel_axis == X_AXIS) - chan_types.push_back(CHANX); - else if (segment_inf[iseg].parallel_axis == Y_AXIS) - chan_types.push_back(CHANY); - else //Both for BOTH_AXIS segments and special segments such as clock_networks we want to search in both directions. - chan_types.insert(chan_types.end(), {CHANX, CHANY}); - - for (e_rr_type chan_type : chan_types) { - for (int ref_inc : ref_increments) { - int sample_x = ref_x + ref_inc; - int sample_y = ref_y + ref_inc; - - if (sample_x >= int(grid.width())) continue; - if (sample_y >= int(grid.height())) continue; - - for (int track_offset = 0; track_offset < MAX_TRACK_OFFSET; track_offset += 2) { - /* get the rr node index from which to start routing */ - RRNodeId start_node = get_start_node(sample_x, sample_y, - target_x, target_y, //non-corner upper right - chan_type, iseg, track_offset); - - if (!start_node) { - continue; + for (int layer_num = 0; layer_num < grid.get_num_layers(); layer_num++) { + for (int iseg = 0; iseg < int(segment_inf.size()); iseg++) { + //First try to pick good representative sample locations for each type + std::map> sample_nodes; + std::vector chan_types; + if (segment_inf[iseg].parallel_axis == X_AXIS) + chan_types.push_back(CHANX); + else if (segment_inf[iseg].parallel_axis == Y_AXIS) + chan_types.push_back(CHANY); + else //Both for BOTH_AXIS segments and special segments such as clock_networks we want to search in both directions. + chan_types.insert(chan_types.end(), {CHANX, CHANY}); + + for (e_rr_type chan_type : chan_types) { + for (int ref_inc : ref_increments) { + int sample_x = ref_x + ref_inc; + int sample_y = ref_y + ref_inc; + + if (sample_x >= int(grid.width())) continue; + if (sample_y >= int(grid.height())) continue; + + for (int track_offset = 0; track_offset < MAX_TRACK_OFFSET; track_offset += 2) { + /* get the rr node index from which to start routing */ + RRNodeId start_node = get_start_node(layer_num, sample_x, sample_y, + target_x, target_y, //non-corner upper right + chan_type, iseg, track_offset); + + if (!start_node) { + continue; + } + // TODO: Temporary - After testing benchmarks this can be deleted + VTR_ASSERT(rr_graph.node_layer(start_node) == layer_num); + + sample_nodes[chan_type].push_back(RRNodeId(start_node)); } - - sample_nodes[chan_type].push_back(RRNodeId(start_node)); } } - } - //If we failed to find any representative sample locations, search exhaustively - // - //This is to ensure we sample 'unusual' wire types which may not exist in all channels - //(e.g. clock routing) - for (e_rr_type chan_type : chan_types) { - if (!sample_nodes[chan_type].empty()) continue; + //If we failed to find any representative sample locations, search exhaustively + // + //This is to ensure we sample 'unusual' wire types which may not exist in all channels + //(e.g. clock routing) + for (e_rr_type chan_type : chan_types) { + if (!sample_nodes[chan_type].empty()) continue; - //Try an exhaustive search to find a suitable sample point - for (RRNodeId rr_node : rr_graph.nodes()) { - auto rr_type = rr_graph.node_type(rr_node); - if (rr_type != chan_type) continue; + //Try an exhaustive search to find a suitable sample point + for (RRNodeId rr_node : rr_graph.nodes()) { + auto rr_type = rr_graph.node_type(rr_node); + if (rr_type != chan_type) continue; + if (rr_graph.node_layer(rr_node) != layer_num) continue; - auto cost_index = rr_graph.node_cost_index(rr_node); - VTR_ASSERT(cost_index != RRIndexedDataId(OPEN)); + auto cost_index = rr_graph.node_cost_index(rr_node); + VTR_ASSERT(cost_index != RRIndexedDataId(OPEN)); - int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index; + int seg_index = device_ctx.rr_indexed_data[cost_index].seg_index; - if (seg_index == iseg) { - sample_nodes[chan_type].push_back(rr_node); - } + if (seg_index == iseg) { + sample_nodes[chan_type].push_back(rr_node); + } - if (sample_nodes[chan_type].size() >= ref_increments.size()) { - break; + if (sample_nodes[chan_type].size() >= ref_increments.size()) { + break; + } } } - } - - //Finally, now that we have a list of sample locations, run a Djikstra flood from - //each sample location to profile the routing network from this type - - t_dijkstra_data dijkstra_data; - t_routing_cost_map routing_cost_map({device_ctx.grid.width(), device_ctx.grid.height()}); - for (e_rr_type chan_type : chan_types) { - if (sample_nodes[chan_type].empty()) { - VTR_LOG_WARN("Unable to find any sample location for segment %s type '%s' (length %d)\n", - rr_node_typename[chan_type], - segment_inf[iseg].name.c_str(), - segment_inf[iseg].length); - } else { - //reset cost for this segment - routing_cost_map.fill(Expansion_Cost_Entry()); + //Finally, now that we have a list of sample locations, run a Djikstra flood from + //each sample location to profile the routing network from this type - for (RRNodeId sample_node : sample_nodes[chan_type]) { - int sample_x = rr_graph.node_xlow(sample_node); - int sample_y = rr_graph.node_ylow(sample_node); + t_dijkstra_data dijkstra_data; + t_routing_cost_map routing_cost_map({device_ctx.grid.width(), device_ctx.grid.height()}); - if (rr_graph.node_direction(sample_node) == Direction::DEC) { - sample_x = rr_graph.node_xhigh(sample_node); - sample_y = rr_graph.node_yhigh(sample_node); + for (e_rr_type chan_type : chan_types) { + if (sample_nodes[chan_type].empty()) { + VTR_LOG_WARN("Unable to find any sample location for segment %s type '%s' (length %d)\n", + rr_node_typename[chan_type], + segment_inf[iseg].name.c_str(), + segment_inf[iseg].length); + } else { + //reset cost for this segment + routing_cost_map.fill(Expansion_Cost_Entry()); + + for (RRNodeId sample_node : sample_nodes[chan_type]) { + int sample_x = rr_graph.node_xlow(sample_node); + int sample_y = rr_graph.node_ylow(sample_node); + + if (rr_graph.node_direction(sample_node) == Direction::DEC) { + sample_x = rr_graph.node_xhigh(sample_node); + sample_y = rr_graph.node_yhigh(sample_node); + } + + run_dijkstra(sample_node, + layer_num, + sample_x, + sample_y, + routing_cost_map, + &dijkstra_data); } - run_dijkstra(sample_node, - sample_x, - sample_y, - routing_cost_map, - &dijkstra_data); - } - - if (false) print_router_cost_map(routing_cost_map); + if (false) print_router_cost_map(routing_cost_map); - /* boil down the cost list in routing_cost_map at each coordinate to a representative cost entry and store it in the lookahead - * cost map */ - set_lookahead_map_costs(iseg, chan_type, routing_cost_map); + /* boil down the cost list in routing_cost_map at each coordinate to a representative cost entry and store it in the lookahead + * cost map */ + set_lookahead_map_costs(layer_num, iseg, chan_type, routing_cost_map); - /* fill in missing entries in the lookahead cost map by copying the closest cost entries (cost map was computed based on - * a reference coordinate > (0,0) so some entries that represent a cross-chip distance have not been computed) */ - fill_in_missing_lookahead_entries(iseg, chan_type); + /* fill in missing entries in the lookahead cost map by copying the closest cost entries (cost map was computed based on + * a reference coordinate > (0,0) so some entries that represent a cross-chip distance have not been computed) */ + fill_in_missing_lookahead_entries(iseg, chan_type); + } } } + if (false) print_wire_cost_map(layer_num, segment_inf); } - - if (false) print_wire_cost_map(segment_inf); } /* returns index of a node from which to start routing */ -static RRNodeId get_start_node(int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset) { +static RRNodeId get_start_node(int layer, int start_x, int start_y, int target_x, int target_y, t_rr_type rr_type, int seg_index, int track_offset) { auto& device_ctx = g_vpr_ctx.device(); const auto& rr_graph = device_ctx.rr_graph; const auto& node_lookup = rr_graph.node_lookup(); @@ -751,7 +779,7 @@ static RRNodeId get_start_node(int start_x, int start_y, int target_x, int targe int start_lookup_y = start_y; /* find first node in channel that has specified segment index and goes in the desired direction */ - for (const RRNodeId& node_id : node_lookup.find_channel_nodes(start_lookup_x, start_lookup_y, rr_type)) { + for (const RRNodeId& node_id : node_lookup.find_channel_nodes(layer, start_lookup_x, start_lookup_y, rr_type)) { VTR_ASSERT(rr_graph.node_type(node_id) == rr_type); Direction node_direction = rr_graph.node_direction(node_id); @@ -774,6 +802,7 @@ static RRNodeId get_start_node(int start_x, int start_y, int target_x, int targe /* runs Dijkstra's algorithm from specified node until all nodes have been visited. Each time a pin is visited, the delay/congestion information * to that pin is stored is added to an entry in the routing_cost_map */ static void run_dijkstra(RRNodeId start_node, + int sample_layer_num, int start_x, int start_y, t_routing_cost_map& routing_cost_map, @@ -814,6 +843,10 @@ static void run_dijkstra(RRNodeId start_node, continue; } + if (rr_graph.node_layer(curr_node) != sample_layer_num) { + continue; + } + //VTR_LOG("Expanding with delay=%10.3g cong=%10.3g (%s)\n", current.delay, current.congestion_upstream, describe_rr_node(rr_graph, device_ctx.grid, device_ctx.rr_indexed_data, curr_node).c_str()); /* if this node is an ipin record its congestion/delay in the routing_cost_map */ @@ -849,7 +882,9 @@ static void expand_dijkstra_neighbours(PQ_Entry parent_entry, for (t_edge_size edge : rr_graph.edges(parent)) { RRNodeId child_node = rr_graph.edge_sink_node(parent, edge); // For the time being, we decide to not let the lookahead explore the node inside the clusters - t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(child_node), rr_graph.node_ylow(child_node)); + t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(child_node), + rr_graph.node_ylow(child_node), + rr_graph.node_layer(child_node)}); if (!is_inter_cluster_node(physical_type, rr_graph.node_type(child_node), @@ -882,7 +917,7 @@ static void expand_dijkstra_neighbours(PQ_Entry parent_entry, } /* sets the lookahead cost map entries based on representative cost entries from routing_cost_map */ -static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map) { +static void set_lookahead_map_costs(int layer_num, int segment_index, e_rr_type chan_type, t_routing_cost_map& routing_cost_map) { int chan_index = 0; if (chan_type == CHANY) { chan_index = 1; @@ -893,7 +928,7 @@ static void set_lookahead_map_costs(int segment_index, e_rr_type chan_type, t_ro for (unsigned iy = 0; iy < routing_cost_map.dim_size(1); iy++) { Expansion_Cost_Entry& expansion_cost_entry = routing_cost_map[ix][iy]; - f_wire_cost_map[chan_index][segment_index][ix][iy] = expansion_cost_entry.get_representative_cost_entry(REPRESENTATIVE_ENTRY_METHOD); + f_wire_cost_map[layer_num][chan_index][segment_index][ix][iy] = expansion_cost_entry.get_representative_cost_entry(REPRESENTATIVE_ENTRY_METHOD); } } } @@ -908,20 +943,22 @@ static void fill_in_missing_lookahead_entries(int segment_index, e_rr_type chan_ auto& device_ctx = g_vpr_ctx.device(); /* find missing cost entries and fill them in by copying a nearby cost entry */ - for (unsigned ix = 0; ix < device_ctx.grid.width(); ix++) { - for (unsigned iy = 0; iy < device_ctx.grid.height(); iy++) { - Cost_Entry cost_entry = f_wire_cost_map[chan_index][segment_index][ix][iy]; - - if (std::isnan(cost_entry.delay) && std::isnan(cost_entry.congestion)) { - Cost_Entry copied_entry = get_nearby_cost_entry(ix, iy, segment_index, chan_index); - f_wire_cost_map[chan_index][segment_index][ix][iy] = copied_entry; + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); ++layer_num) { + for (unsigned ix = 0; ix < device_ctx.grid.width(); ix++) { + for (unsigned iy = 0; iy < device_ctx.grid.height(); iy++) { + Cost_Entry cost_entry = f_wire_cost_map[layer_num][chan_index][segment_index][ix][iy]; + + if (std::isnan(cost_entry.delay) && std::isnan(cost_entry.congestion)) { + Cost_Entry copied_entry = get_nearby_cost_entry(layer_num, ix, iy, segment_index, chan_index); + f_wire_cost_map[layer_num][chan_index][segment_index][ix][iy] = copied_entry; + } } } } } /* returns a cost entry in the f_wire_cost_map that is near the specified coordinates (and preferably towards (0,0)) */ -static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int chan_index) { +static Cost_Entry get_nearby_cost_entry(int layer_num, int x, int y, int segment_index, int chan_index) { /* compute the slope from x,y to 0,0 and then move towards 0,0 by one unit to get the coordinates * of the cost entry to be copied */ @@ -948,14 +985,14 @@ static Cost_Entry get_nearby_cost_entry(int x, int y, int segment_index, int cha copy_y = std::max(copy_y, 0); //Clip to zero copy_x = std::max(copy_x, 0); //Clip to zero - Cost_Entry copy_entry = f_wire_cost_map[chan_index][segment_index][copy_x][copy_y]; + Cost_Entry copy_entry = f_wire_cost_map[layer_num][chan_index][segment_index][copy_x][copy_y]; /* if the entry to be copied is also empty, recurse */ if (std::isnan(copy_entry.delay) && std::isnan(copy_entry.congestion)) { if (copy_x == 0 && copy_y == 0) { copy_entry = Cost_Entry(0., 0.); //(0, 0) entry is invalid so set zero to terminate recursion } else { - copy_entry = get_nearby_cost_entry(copy_x, copy_y, segment_index, chan_index); + copy_entry = get_nearby_cost_entry(layer_num, copy_x, copy_y, segment_index, chan_index); } } @@ -1274,11 +1311,11 @@ static void adjust_rr_src_sink_position(const RRNodeId rr, int& x, int& y) { y = vtr::nint((rr_graph.node_ylow(rr) + rr_graph.node_yhigh(rr)) / 2.); } -static void print_wire_cost_map(const std::vector& segment_inf) { +static void print_wire_cost_map(int layer_num, const std::vector& segment_inf) { auto& device_ctx = g_vpr_ctx.device(); - for (size_t chan_index = 0; chan_index < f_wire_cost_map.dim_size(0); chan_index++) { - for (size_t iseg = 0; iseg < f_wire_cost_map.dim_size(1); iseg++) { + for (size_t chan_index = 0; chan_index < f_wire_cost_map.dim_size(1); chan_index++) { + for (size_t iseg = 0; iseg < f_wire_cost_map.dim_size(2); iseg++) { vtr::printf("Seg %d (%s, length %d) %d\n", iseg, segment_inf[iseg].name.c_str(), @@ -1286,7 +1323,7 @@ static void print_wire_cost_map(const std::vector& segment_inf) { chan_index); for (size_t iy = 0; iy < device_ctx.grid.height(); iy++) { for (size_t ix = 0; ix < device_ctx.grid.width(); ix++) { - vtr::printf("%2d,%2d: %10.3g\t", ix, iy, f_wire_cost_map[chan_index][iseg][ix][iy].delay); + vtr::printf("%2d,%2d: %10.3g\t", ix, iy, f_wire_cost_map[layer_num][chan_index][iseg][ix][iy].delay); } vtr::printf("\n"); } @@ -1335,11 +1372,13 @@ static void compute_tile_lookahead(std::unordered_map& internal_opin_global_cost_map, - size_t max_dx, - size_t max_dy) { - internal_opin_global_cost_map.resize({max_dx, max_dy}); - for (int dx = 0; dx < (int)max_dx; dx++) { - for (int dy = 0; dy < (int)max_dy; dy++) { - util::Cost_Entry min_cost(std::numeric_limits::max(), std::numeric_limits::max()); - for (int chan_idx = 0; chan_idx < (int)f_wire_cost_map.dim_size(0); chan_idx++) { - for (int seg_idx = 0; seg_idx < (int)f_wire_cost_map.dim_size(1); seg_idx++) { - auto cost = util::Cost_Entry(f_wire_cost_map[chan_idx][seg_idx][dx][dy].delay, - f_wire_cost_map[chan_idx][seg_idx][dx][dy].congestion); - if (cost.delay < min_cost.delay) { - min_cost.delay = cost.delay; - min_cost.congestion = cost.congestion; +static void min_global_cost_map(vtr::NdMatrix& internal_opin_global_cost_map) { + int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + int width = (int)g_vpr_ctx.device().grid.width(); + int height = (int)g_vpr_ctx.device().grid.height(); + internal_opin_global_cost_map.resize({static_cast(num_layers), + static_cast(width), + static_cast(height)}); + + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + for (int dx = 0; dx < width; dx++) { + for (int dy = 0; dy < height; dy++) { + util::Cost_Entry min_cost(std::numeric_limits::max(), std::numeric_limits::max()); + for (int chan_idx = 0; chan_idx < (int)f_wire_cost_map.dim_size(1); chan_idx++) { + for (int seg_idx = 0; seg_idx < (int)f_wire_cost_map.dim_size(2); seg_idx++) { + auto cost = util::Cost_Entry(f_wire_cost_map[layer_num][chan_idx][seg_idx][dx][dy].delay, + f_wire_cost_map[layer_num][chan_idx][seg_idx][dx][dy].congestion); + if (cost.delay < min_cost.delay) { + min_cost.delay = cost.delay; + min_cost.congestion = cost.congestion; + } } } + internal_opin_global_cost_map[layer_num][dx][dy] = min_cost; } - internal_opin_global_cost_map[dx][dy] = min_cost; } } } @@ -1501,7 +1547,7 @@ void read_router_lookahead(const std::string& file) { auto map = reader.getRoot(); - ToNdMatrix<4, VprMapCostEntry, Cost_Entry>(&f_wire_cost_map, map.getCostMap(), ToCostEntry); + ToNdMatrix<5, VprMapCostEntry, Cost_Entry>(&f_wire_cost_map, map.getCostMap(), ToCostEntry); } void write_router_lookahead(const std::string& file) { @@ -1510,7 +1556,7 @@ void write_router_lookahead(const std::string& file) { auto map = builder.initRoot(); auto cost_map = map.initCostMap(); - FromNdMatrix<4, VprMapCostEntry, Cost_Entry>(&cost_map, f_wire_cost_map, FromCostEntry); + FromNdMatrix<5, VprMapCostEntry, Cost_Entry>(&cost_map, f_wire_cost_map, FromCostEntry); writeMessageToFile(file, &builder); } diff --git a/vpr/src/route/router_lookahead_map.h b/vpr/src/route/router_lookahead_map.h index 6ccd88aa621..00dc5bf62ad 100644 --- a/vpr/src/route/router_lookahead_map.h +++ b/vpr/src/route/router_lookahead_map.h @@ -20,7 +20,7 @@ class MapLookahead : public RouterLookahead { // Lookup table to store the minimum cost to reach to a primitive pin from the root-level IPINs std::unordered_map> tile_min_cost; // [physical_tile_type][sink_physical_num] -> cost // Lookup table to store the minimum cost for each dx and dy - vtr::NdMatrix distance_based_min_cost; // [dx][dy] -> cost + vtr::NdMatrix distance_based_min_cost; // [layer_num][dx][dy] -> cost const t_det_routing_arch& det_routing_arch_; bool is_flat_; @@ -56,7 +56,7 @@ class Cost_Entry { /* provides delay/congestion estimates to travel specified distances * in the x/y direction */ -typedef vtr::NdMatrix t_wire_cost_map; //[0..1][[0..num_seg_types-1]0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1] +typedef vtr::NdMatrix t_wire_cost_map; //[0..num_layers][0..1][[0..num_seg_types-1]0..device_ctx.grid.width()-1][0..device_ctx.grid.height()-1] //[0..1] entry distinguish between CHANX/CHANY start nodes respectively void read_router_lookahead(const std::string& file); diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp index 139c1bdfb22..5ec27a15cc8 100644 --- a/vpr/src/route/router_lookahead_map_utils.cpp +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -22,7 +22,7 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId inode, util::t_src_opin_delays& src_opin_delays); static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& chan_ipins_delays); -static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point start); +static t_physical_tile_loc pick_sample_tile(int layer_num, t_physical_tile_type_ptr tile_type, t_physical_tile_loc prev); static void run_intra_tile_dijkstra(const RRGraphView& rr_graph, util::t_ipin_primitive_sink_delays& pin_delays, @@ -312,66 +312,73 @@ t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat) { t_src_opin_delays src_opin_delays; - src_opin_delays.resize(device_ctx.physical_tile_types.size()); + src_opin_delays.resize(device_ctx.grid.get_num_layers()); + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + src_opin_delays[layer_num].resize(device_ctx.physical_tile_types.size()); + } //We assume that the routing connectivity of each instance of a physical tile is the same, //and so only measure one instance of each type - for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) { - for (e_rr_type rr_type : {SOURCE, OPIN}) { - vtr::Point sample_loc(-1, -1); - - size_t num_sampled_locs = 0; - bool ptcs_with_no_delays = true; - while (ptcs_with_no_delays) { //Haven't found wire connected to ptc - ptcs_with_no_delays = false; - - sample_loc = pick_sample_tile(&device_ctx.physical_tile_types[itile], sample_loc); - - if (sample_loc.x() == -1 && sample_loc.y() == -1) { - //No untried instances of the current tile type left - VTR_LOG_WARN("Found no %ssample locations for %s in %s\n", - (num_sampled_locs == 0) ? "" : "more ", - rr_node_typename[rr_type], - device_ctx.physical_tile_types[itile].name); - break; - } - - //VTR_LOG("Sampling %s at (%d,%d)\n", device_ctx.physical_tile_types[itile].name, sample_loc.x(), sample_loc.y()); - - const std::vector& rr_nodes_at_loc = device_ctx.rr_graph.node_lookup().find_grid_nodes_at_all_sides(sample_loc.x(), sample_loc.y(), rr_type); - for (RRNodeId node_id : rr_nodes_at_loc) { - int ptc = rr_graph.node_ptc_num(node_id); - // For the time being, we decide to not let the lookahead explore the node inside the clusters - if (!is_inter_cluster_node(&device_ctx.physical_tile_types[itile], - rr_type, - ptc)) { - continue; + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + for (size_t itile = 0; itile < device_ctx.physical_tile_types.size(); ++itile) { + if (device_ctx.grid.num_instances(&device_ctx.physical_tile_types[itile], layer_num) == 0) { + continue; + } + for (e_rr_type rr_type : {SOURCE, OPIN}) { + t_physical_tile_loc sample_loc(OPEN, OPEN, OPEN); + + size_t num_sampled_locs = 0; + bool ptcs_with_no_delays = true; + while (ptcs_with_no_delays) { //Haven't found wire connected to ptc + ptcs_with_no_delays = false; + + sample_loc = pick_sample_tile(layer_num, &device_ctx.physical_tile_types[itile], sample_loc); + + if (sample_loc.x == OPEN && sample_loc.y == OPEN && sample_loc.layer_num == OPEN) { + //No untried instances of the current tile type left + VTR_LOG_WARN("Found no %ssample locations for %s in %s\n", + (num_sampled_locs == 0) ? "" : "more ", + rr_node_typename[rr_type], + device_ctx.physical_tile_types[itile].name); + break; } - if (ptc >= int(src_opin_delays[itile].size())) { - src_opin_delays[itile].resize(ptc + 1); //Inefficient but functional... + //VTR_LOG("Sampling %s at (%d,%d)\n", device_ctx.physical_tile_types[itile].name, sample_loc.x(), sample_loc.y()); + const std::vector& rr_nodes_at_loc = device_ctx.rr_graph.node_lookup().find_grid_nodes_at_all_sides(sample_loc.layer_num, sample_loc.x, sample_loc.y, rr_type); + for (RRNodeId node_id : rr_nodes_at_loc) { + int ptc = rr_graph.node_ptc_num(node_id); + // For the time being, we decide to not let the lookahead explore the node inside the clusters + if (!is_inter_cluster_node(&device_ctx.physical_tile_types[itile], + rr_type, + ptc)) { + continue; + } + + if (ptc >= int(src_opin_delays[layer_num][itile].size())) { + src_opin_delays[layer_num][itile].resize(ptc + 1); //Inefficient but functional... + } + + //Find the wire types which are reachable from inode and record them and + //the cost to reach them + dijkstra_flood_to_wires(itile, node_id, src_opin_delays); + + if (src_opin_delays[layer_num][itile][ptc].empty()) { + VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n", + rr_node_typename[rr_type], + rr_node_arch_name(size_t(node_id), is_flat).c_str(), + sample_loc.x, + sample_loc.y, + is_flat); + + ptcs_with_no_delays = true; + } } - //Find the wire types which are reachable from inode and record them and - //the cost to reach them - dijkstra_flood_to_wires(itile, node_id, src_opin_delays); - - if (src_opin_delays[itile][ptc].empty()) { - VTR_LOGV_DEBUG(f_router_debug, "Found no reachable wires from %s (%s) at (%d,%d)\n", - rr_node_typename[rr_type], - rr_node_arch_name(size_t(node_id), is_flat).c_str(), - sample_loc.x(), - sample_loc.y(), - is_flat); - - ptcs_with_no_delays = true; - } + ++num_sampled_locs; + } + if (ptcs_with_no_delays) { + VPR_ERROR(VPR_ERROR_ROUTE, "Some SOURCE/OPINs have no reachable wires\n"); } - - ++num_sampled_locs; - } - if (ptcs_with_no_delays) { - VPR_ERROR(VPR_ERROR_ROUTE, "Some SOURCE/OPINs have no reachable wires\n"); } } } @@ -386,34 +393,42 @@ t_chan_ipins_delays compute_router_chan_ipin_lookahead() { t_chan_ipins_delays chan_ipins_delays; - chan_ipins_delays.resize(device_ctx.physical_tile_types.size()); + chan_ipins_delays.resize(device_ctx.grid.get_num_layers()); + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + chan_ipins_delays[layer_num].resize(device_ctx.physical_tile_types.size()); + } //We assume that the routing connectivity of each instance of a physical tile is the same, //and so only measure one instance of each type - for (auto tile_type : device_ctx.physical_tile_types) { - vtr::Point sample_loc(-1, -1); + for (int layer_num = 0; layer_num < device_ctx.grid.get_num_layers(); layer_num++) { + for (auto tile_type : device_ctx.physical_tile_types) { + if (device_ctx.grid.num_instances(&tile_type, layer_num) == 0) { + continue; + } + t_physical_tile_loc sample_loc(OPEN, OPEN, OPEN); - sample_loc = pick_sample_tile(&tile_type, sample_loc); + sample_loc = pick_sample_tile(layer_num, &tile_type, sample_loc); - if (sample_loc.x() == -1 && sample_loc.y() == -1) { - //No untried instances of the current tile type left - VTR_LOG_WARN("Found no sample locations for %s\n", - tile_type.name); - continue; - } + if (sample_loc.x == OPEN && sample_loc.y == OPEN && sample_loc.layer_num == OPEN) { + //No untried instances of the current tile type left + VTR_LOG_WARN("Found no sample locations for %s\n", + tile_type.name); + continue; + } - int min_x = std::max(0, sample_loc.x() - X_OFFSET); - int min_y = std::max(0, sample_loc.y() - Y_OFFSET); - int max_x = std::min(int(device_ctx.grid.width()), sample_loc.x() + X_OFFSET); - int max_y = std::min(int(device_ctx.grid.height()), sample_loc.y() + Y_OFFSET); - - for (int ix = min_x; ix < max_x; ix++) { - for (int iy = min_y; iy < max_y; iy++) { - for (auto rr_type : {CHANX, CHANY}) { - for (const RRNodeId& node_id : node_lookup.find_channel_nodes(ix, iy, rr_type)) { - //Find the IPINs which are reachable from the wires within the bounding box - //around the selected tile location - dijkstra_flood_to_ipins(node_id, chan_ipins_delays); + int min_x = std::max(0, sample_loc.x - X_OFFSET); + int min_y = std::max(0, sample_loc.y - Y_OFFSET); + int max_x = std::min(int(device_ctx.grid.width()), sample_loc.x + X_OFFSET); + int max_y = std::min(int(device_ctx.grid.height()), sample_loc.y + Y_OFFSET); + + for (int ix = min_x; ix < max_x; ix++) { + for (int iy = min_y; iy < max_y; iy++) { + for (auto rr_type : {CHANX, CHANY}) { + for (const RRNodeId& node_id : node_lookup.find_channel_nodes(sample_loc.layer_num, ix, iy, rr_type)) { + //Find the IPINs which are reachable from the wires within the bounding box + //around the selected tile location + dijkstra_flood_to_ipins(node_id, chan_ipins_delays); + } } } } @@ -425,6 +440,7 @@ t_chan_ipins_delays compute_router_chan_ipin_lookahead() { t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_graph, t_physical_tile_type_ptr physical_tile, + int layer, int x, int y) { auto tile_pins_vec = get_flat_tile_pins(physical_tile); @@ -436,6 +452,7 @@ t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_g for (int pin_physical_num : tile_pins_vec) { RRNodeId pin_node_id = get_pin_rr_node_id(rr_graph.node_lookup(), physical_tile, + layer, x, y, pin_physical_num); @@ -471,6 +488,7 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d root.node = node; int ptc = rr_graph.node_ptc_num(node); + int node_layer_num = rr_graph.node_layer(node); /* * Perform Djikstra from the SOURCE/OPIN of interest, stopping at the the first @@ -517,12 +535,12 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d } //Keep costs of the best path to reach each wire type - if (!src_opin_delays[itile][ptc].count(seg_index) - || curr.delay < src_opin_delays[itile][ptc][seg_index].delay) { - src_opin_delays[itile][ptc][seg_index].wire_rr_type = curr_rr_type; - src_opin_delays[itile][ptc][seg_index].wire_seg_index = seg_index; - src_opin_delays[itile][ptc][seg_index].delay = curr.delay; - src_opin_delays[itile][ptc][seg_index].congestion = curr.congestion; + if (!src_opin_delays[node_layer_num][itile][ptc].count(seg_index) + || curr.delay < src_opin_delays[node_layer_num][itile][ptc][seg_index].delay) { + src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_rr_type = curr_rr_type; + src_opin_delays[node_layer_num][itile][ptc][seg_index].wire_seg_index = seg_index; + src_opin_delays[node_layer_num][itile][ptc][seg_index].delay = curr.delay; + src_opin_delays[node_layer_num][itile][ptc][seg_index].congestion = curr.congestion; } } else if (curr_rr_type == SOURCE || curr_rr_type == OPIN || curr_rr_type == IPIN) { @@ -536,10 +554,18 @@ static void dijkstra_flood_to_wires(int itile, RRNodeId node, util::t_src_opin_d RRNodeId next_node = rr_graph.rr_nodes().edge_sink_node(edge); // For the time being, we decide to not let the lookahead explore the node inside the clusters - t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(next_node), rr_graph.node_ylow(next_node)); + t_physical_tile_type_ptr physical_type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(next_node), + rr_graph.node_ylow(next_node), + rr_graph.node_layer(next_node)}); if (!is_inter_cluster_node(physical_type, rr_graph.node_type(next_node), rr_graph.node_ptc_num(next_node))) { + // Don't go inside the clusters + continue; + } + + if (rr_graph.node_layer(curr.node) != node_layer_num) { + //Don't change the layer continue; } @@ -579,6 +605,8 @@ static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& ch root.node = node; root.level = 0; + int root_layer = rr_graph.node_layer(node); + /* * Perform Djikstra from the CHAN of interest, stopping at the the first * reachable IPIN @@ -607,21 +635,22 @@ static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& ch if (curr_rr_type == IPIN) { int node_x = rr_graph.node_xlow(curr.node); int node_y = rr_graph.node_ylow(curr.node); + int node_layer = rr_graph.node_layer(curr.node); - auto tile_type = device_ctx.grid.get_physical_type(node_x, node_y); + auto tile_type = device_ctx.grid.get_physical_type({node_x, node_y, node_layer}); int itile = tile_type->index; int ptc = rr_graph.node_ptc_num(curr.node); - if (ptc >= int(chan_ipins_delays[itile].size())) { - chan_ipins_delays[itile].resize(ptc + 1); //Inefficient but functional... + if (ptc >= int(chan_ipins_delays[root_layer][itile].size())) { + chan_ipins_delays[root_layer][itile].resize(ptc + 1); //Inefficient but functional... } site_pin_delay = std::min(curr.delay, site_pin_delay); //Keep costs of the best path to reach each wire type - chan_ipins_delays[itile][ptc].wire_rr_type = curr_rr_type; - chan_ipins_delays[itile][ptc].delay = site_pin_delay; - chan_ipins_delays[itile][ptc].congestion = curr.congestion; + chan_ipins_delays[root_layer][itile][ptc].wire_rr_type = curr_rr_type; + chan_ipins_delays[root_layer][itile][ptc].delay = site_pin_delay; + chan_ipins_delays[root_layer][itile][ptc].congestion = curr.congestion; } else if (curr_rr_type == CHANX || curr_rr_type == CHANY) { if (curr.level >= MAX_EXPANSION_LEVEL) { continue; @@ -637,6 +666,11 @@ static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& ch RRNodeId next_node = rr_graph.rr_nodes().edge_sink_node(edge); + if (rr_graph.node_layer(next_node) != root_layer) { + //Don't change the layer + continue; + } + t_pq_entry next; next.congestion = new_cong; //Of current node next.delay = new_delay; //To reach next node @@ -651,18 +685,21 @@ static void dijkstra_flood_to_ipins(RRNodeId node, util::t_chan_ipins_delays& ch } } -static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr::Point prev) { +static t_physical_tile_loc pick_sample_tile(int layer_num, t_physical_tile_type_ptr tile_type, t_physical_tile_loc prev) { //Very simple for now, just pick the fist matching tile found - vtr::Point loc(OPEN, OPEN); + t_physical_tile_loc loc(OPEN, OPEN, OPEN); - //VTR_LOG("Prev: %d,%d\n", prev.x(), prev.y()); + //VTR_LOG("Prev: %d,%d\n", prev.x, prev.y); auto& device_ctx = g_vpr_ctx.device(); auto& grid = device_ctx.grid; - int y_init = prev.y() + 1; //Start searching next element above prev + int y_init = prev.y + 1; //Start searching next element above prev - for (int x = prev.x(); x < int(grid.width()); ++x) { + if (device_ctx.grid.num_instances(tile_type, layer_num) == 0) { + return loc; + } + for (int x = prev.x; x < int(grid.width()); ++x) { if (x < 0) continue; //VTR_LOG(" x: %d\n", x); @@ -671,20 +708,22 @@ static vtr::Point pick_sample_tile(t_physical_tile_type_ptr tile_type, vtr: if (y < 0) continue; //VTR_LOG(" y: %d\n", y); - if (grid.get_physical_type(x, y) == tile_type) { - loc.set_x(x); - loc.set_y(y); + if (grid.get_physical_type(t_physical_tile_loc(x, y, layer_num)) == tile_type) { + loc.x = x; + loc.y = y; + loc.layer_num = layer_num; break; } } - if (loc.x() != OPEN && loc.y() != OPEN) { + if (loc.x != OPEN && loc.y != OPEN && loc.layer_num != OPEN) { break; } else { y_init = 0; //Prepare to search next column } } - //VTR_LOG("Next: %d,%d\n", loc.x(), loc.y()); + + //VTR_LOG("Next: %d,%d\n", loc.x, loc.y); return loc; } diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h index 5a7a83aa9fd..f3a3d43249a 100644 --- a/vpr/src/route/router_lookahead_map_utils.h +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -268,7 +268,7 @@ struct t_reachable_wire_inf { // // When querying this data structure, the minimum cost is computed for each delay/congestion pair, and returned // as the lookahead expected cost. -typedef std::vector>> t_src_opin_delays; +typedef std::vector>>> t_src_opin_delays; //[from pin ptc num][target src ptc num]->cost typedef std::vector> t_ipin_primitive_sink_delays; @@ -282,13 +282,14 @@ typedef std::vector> t_ipin_primitive_sink_d // // This data structure stores the minimum delay to reach a specific SINK from the last connection between the wire (CHANX/CHANY) // and the tile's IPIN. If there are many connections to the same IPIN, the one with the minimum delay is selected. -typedef std::vector> t_chan_ipins_delays; +typedef std::vector>> t_chan_ipins_delays; t_src_opin_delays compute_router_src_opin_lookahead(bool is_flat); t_chan_ipins_delays compute_router_chan_ipin_lookahead(); t_ipin_primitive_sink_delays compute_intra_tile_dijkstra(const RRGraphView& rr_graph, t_physical_tile_type_ptr physical_tile, + int layer, int x, int y); diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 96581ccc093..dc0d7a06d04 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -112,6 +112,7 @@ static vtr::NdMatrix, 4> alloc_and_load_track_to_pin_lookup(vtr static void build_bidir_rr_opins(RRGraphBuilder& rr_graph_builder, const RRGraphView& rr_graph, + const int layer, const int i, const int j, const e_side side, @@ -128,6 +129,7 @@ static void build_bidir_rr_opins(RRGraphBuilder& rr_graph_builder, static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder, const RRGraphView& rr_graph, + const int layer, const int i, const int j, const e_side side, @@ -149,6 +151,7 @@ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder, static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder, const RRGraphView& rr_graph, + int layer, int x, int y, e_side side, @@ -207,12 +210,14 @@ static vtr::vector> get_pin_chains_flat( static void add_classes_rr_graph(RRGraphBuilder& rr_graph_builder, const std::vector& class_num_vec, + const int layer, const int root_x, const int root_y, t_physical_tile_type_ptr physical_type); static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder, const std::vector& pin_num_vec, + const int layer, const int i, const int j, t_physical_tile_type_ptr physical_type); @@ -225,6 +230,7 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder, * @param rr_graph_builder * @param arch_sw_inf_map * @param class_num_vec + * @param layer * @param i * @param j * @param rr_edges_to_create @@ -234,6 +240,7 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder, static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, std::map& arch_sw_inf_map, const std::vector& class_num_vec, + const int layer, const int i, const int j, t_rr_edge_info_set& rr_edges_to_create, @@ -242,6 +249,7 @@ static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, const std::vector& class_num_vec, + const int layer, const int i, const int j, t_rr_edge_info_set& rr_edges_to_create, @@ -251,6 +259,7 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder, std::map& arch_sw_inf_map, t_physical_tile_type_ptr physical_tile, + int layer, int root_x, int root_y, const int delayless_switch); @@ -302,6 +311,7 @@ static void add_intra_cluster_edges_rr_graph(RRGraphBuilder& rr_graph_builder, static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder, t_rr_edge_info_set& rr_edges_to_create, t_physical_tile_type_ptr physical_tile, + int layer, int i, int j); @@ -311,6 +321,7 @@ static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder, static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, int& num_collapsed_nodes, ClusterBlockId cluster_blk_id, + const int layer, const int i, const int j, const int cap, @@ -332,6 +343,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, const t_pb* pb, const t_cluster_pin_chain& nodes_to_collapse, int rel_cap, + int layer, int i, int j); @@ -345,6 +357,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, * @param nodes_to_collapse * @param R_minW_nmos * @param R_minW_pmos + * @param layer * @param i * @param j * @return Number of the collapsed nodes @@ -357,6 +370,7 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder, const t_cluster_pin_chain& nodes_to_collapse, float R_minW_nmos, float R_minW_pmos, + int layer, int i, int j); /** @@ -374,6 +388,7 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder, * @param chain_idx * @param node_idx * @param sink_pin_num + * @param layer * @param i * @param j */ @@ -389,6 +404,7 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, float R_minW_pmos, int chain_idx, int node_idx, + int layer, int i, int j); @@ -412,6 +428,7 @@ static float get_min_delay_to_chain(t_physical_tile_type_ptr physical_type, static std::unordered_set get_chain_pins(std::vector chain); static void build_rr_chan(RRGraphBuilder& rr_graph_builder, + const int layer, const int i, const int j, const t_rr_type chan_type, @@ -744,10 +761,12 @@ static void add_intra_cluster_edges_rr_graph(RRGraphBuilder& rr_graph_builder, auto block_loc = place_ctx.block_locs[cluster_blk_id].loc; int i = block_loc.x; int j = block_loc.y; + int layer = block_loc.layer; int abs_cap = block_loc.sub_tile; build_cluster_internal_edges(rr_graph_builder, num_collapsed_nodes, cluster_blk_id, + layer, i, j, abs_cap, @@ -769,6 +788,7 @@ static void add_intra_cluster_edges_rr_graph(RRGraphBuilder& rr_graph_builder, static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder, t_rr_edge_info_set& rr_edges_to_create, t_physical_tile_type_ptr physical_tile, + int layer, int i, int j) { auto pin_num_vec = get_flat_tile_pins(physical_tile); @@ -778,6 +798,7 @@ static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder, } auto pin_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_tile, + layer, i, j, pin_physical_num); @@ -787,6 +808,7 @@ static void add_intra_tile_edges_rr_graph(RRGraphBuilder& rr_graph_builder, for (auto driving_pin : driving_pins) { auto driving_pin_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_tile, + layer, i, j, driving_pin); @@ -1069,7 +1091,7 @@ static void build_rr_graph(const t_graph_type graph_type, } device_ctx.rr_graph_builder.resize_nodes(num_rr_nodes); - /* These are data structures used by the the unidir opin mapping. They are used + /* These are data structures used by the unidir opin mapping. They are used * to spread connections evenly for each segment type among the available * wire start points */ vtr::NdMatrix Fc_xofs({grid.height() - 1, @@ -1224,7 +1246,7 @@ static void build_rr_graph(const t_graph_type graph_type, // Verify no incremental node allocation. /* AA: Note that in the case of dedicated networks, we are currently underestimating the additional node count due to the clock networks. - * Thus this below error is logged; it's not actually an error, the node estimation needs to get fixed for dedicated clock networks. */ + * Thus, this below error is logged; it's not actually an error, the node estimation needs to get fixed for dedicated clock networks. */ if (rr_graph.num_nodes() > expected_node_count) { VTR_LOG_ERROR("Expected no more than %zu nodes, have %zu nodes\n", expected_node_count, rr_graph.num_nodes()); @@ -1361,6 +1383,7 @@ static void build_intra_cluster_rr_graph(const t_graph_type graph_type, void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder, const t_det_routing_arch& det_routing_arch, t_physical_tile_type_ptr physical_tile, + int layer, int x, int y, const int delayless_switch) { @@ -1369,6 +1392,7 @@ void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder, int num_rr_nodes = 0; alloc_and_load_tile_rr_node_indices(rr_graph_builder, physical_tile, + layer, x, y, &num_rr_nodes); @@ -1377,6 +1401,7 @@ void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder, alloc_and_load_tile_rr_graph(rr_graph_builder, sw_map, physical_tile, + layer, x, y, delayless_switch); @@ -1863,39 +1888,44 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder int num_edges = 0; /* Connection SINKS and SOURCES to their pins - Initializing IPINs/OPINs. */ - for (size_t i = 0; i < grid.width(); ++i) { - for (size_t j = 0; j < grid.height(); ++j) { - if (grid.get_width_offset(i, j) == 0 && grid.get_height_offset(i, j) == 0) { - t_physical_tile_type_ptr physical_tile = grid.get_physical_type(i, j); - std::vector class_num_vec; - std::vector pin_num_vec; - class_num_vec = get_tile_root_classes(physical_tile); - pin_num_vec = get_tile_root_pins(physical_tile); - add_classes_rr_graph(rr_graph_builder, - class_num_vec, - i, - j, - physical_tile); - - add_pins_rr_graph(rr_graph_builder, - pin_num_vec, - i, - j, - physical_tile); - - connect_src_sink_to_pins(rr_graph_builder, + for (int layer = 0; layer < grid.get_num_layers(); ++layer) { + for (int i = 0; i < (int)grid.width(); ++i) { + for (int j = 0; j < (int)grid.height(); ++j) { + if (grid.get_width_offset({i, j, layer}) == 0 && grid.get_height_offset({i, j, layer}) == 0) { + t_physical_tile_type_ptr physical_tile = grid.get_physical_type({i, j, layer}); + std::vector class_num_vec; + std::vector pin_num_vec; + class_num_vec = get_tile_root_classes(physical_tile); + pin_num_vec = get_tile_root_pins(physical_tile); + add_classes_rr_graph(rr_graph_builder, class_num_vec, + layer, i, j, - rr_edges_to_create, - delayless_switch, physical_tile); - //Create the actual SOURCE->OPIN, IPIN->SINK edges - uniquify_edges(rr_edges_to_create); - alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); - num_edges += rr_edges_to_create.size(); - rr_edges_to_create.clear(); + add_pins_rr_graph(rr_graph_builder, + pin_num_vec, + layer, + i, + j, + physical_tile); + + connect_src_sink_to_pins(rr_graph_builder, + class_num_vec, + layer, + i, + j, + rr_edges_to_create, + delayless_switch, + physical_tile); + + //Create the actual SOURCE->OPIN, IPIN->SINK edges + uniquify_edges(rr_edges_to_create); + alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); + num_edges += rr_edges_to_create.size(); + rr_edges_to_create.clear(); + } } } } @@ -1904,31 +1934,35 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder num_edges = 0; /* Build opins */ int rr_edges_before_directs = 0; - for (size_t i = 0; i < grid.width(); ++i) { - for (size_t j = 0; j < grid.height(); ++j) { - for (e_side side : SIDES) { - if (BI_DIRECTIONAL == directionality) { - build_bidir_rr_opins(rr_graph_builder, rr_graph, i, j, side, - opin_to_track_map, Fc_out, rr_edges_to_create, chan_details_x, chan_details_y, - grid, - directs, num_directs, clb_to_clb_directs, num_seg_types); - } else { - VTR_ASSERT(UNI_DIRECTIONAL == directionality); - bool clipped; - build_unidir_rr_opins(rr_graph_builder, rr_graph, i, j, side, grid, Fc_out, chan_width, - chan_details_x, chan_details_y, Fc_xofs, Fc_yofs, - rr_edges_to_create, &clipped, seg_index_map, - directs, num_directs, clb_to_clb_directs, num_seg_types, rr_edges_before_directs); - if (clipped) { - *Fc_clipped = true; + for (int layer = 0; layer < grid.get_num_layers(); layer++) { + for (size_t i = 0; i < grid.width(); ++i) { + for (size_t j = 0; j < grid.height(); ++j) { + for (e_side side : SIDES) { + if (BI_DIRECTIONAL == directionality) { + build_bidir_rr_opins(rr_graph_builder, rr_graph, layer, i, j, side, + opin_to_track_map, Fc_out, rr_edges_to_create, chan_details_x, + chan_details_y, + grid, + directs, num_directs, clb_to_clb_directs, num_seg_types); + } else { + VTR_ASSERT(UNI_DIRECTIONAL == directionality); + bool clipped; + build_unidir_rr_opins(rr_graph_builder, rr_graph, layer, i, j, side, grid, Fc_out, chan_width, + chan_details_x, chan_details_y, Fc_xofs, Fc_yofs, + rr_edges_to_create, &clipped, seg_index_map, + directs, num_directs, clb_to_clb_directs, num_seg_types, + rr_edges_before_directs); + if (clipped) { + *Fc_clipped = true; + } } - } - //Create the actual OPIN->CHANX/CHANY edges - uniquify_edges(rr_edges_to_create); - alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); - num_edges += rr_edges_to_create.size(); - rr_edges_to_create.clear(); + //Create the actual OPIN->CHANX/CHANY edges + uniquify_edges(rr_edges_to_create); + alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); + num_edges += rr_edges_to_create.size(); + rr_edges_to_create.clear(); + } } } } @@ -1939,41 +1973,43 @@ static std::function alloc_and_load_rr_graph(RRGraphBuilder num_edges = 0; /* Build channels */ VTR_ASSERT(Fs % 3 == 0); - for (size_t i = 0; i < grid.width() - 1; ++i) { - for (size_t j = 0; j < grid.height() - 1; ++j) { - if (i > 0) { - int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.x_list[j]); - build_rr_chan(rr_graph_builder, i, j, CHANX, track_to_pin_lookup_x, sb_conn_map, switch_block_conn, - CHANX_COST_INDEX_START, - chan_width, grid, tracks_per_chan, - sblock_pattern, Fs / 3, chan_details_x, chan_details_y, - rr_edges_to_create, - wire_to_ipin_switch, - directionality); - - //Create the actual CHAN->CHAN edges - uniquify_edges(rr_edges_to_create); - alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); - num_edges += rr_edges_to_create.size(); - - rr_edges_to_create.clear(); - } - if (j > 0) { - int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.y_list[i]); - build_rr_chan(rr_graph_builder, i, j, CHANY, track_to_pin_lookup_y, sb_conn_map, switch_block_conn, - CHANX_COST_INDEX_START + num_seg_types_x, - chan_width, grid, tracks_per_chan, - sblock_pattern, Fs / 3, chan_details_x, chan_details_y, - rr_edges_to_create, - wire_to_ipin_switch, - directionality); - - //Create the actual CHAN->CHAN edges - uniquify_edges(rr_edges_to_create); - alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); - num_edges += rr_edges_to_create.size(); - - rr_edges_to_create.clear(); + for (int layer = 0; layer < grid.get_num_layers(); ++layer) { + for (size_t i = 0; i < grid.width() - 1; ++i) { + for (size_t j = 0; j < grid.height() - 1; ++j) { + if (i > 0) { + int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.x_list[j]); + build_rr_chan(rr_graph_builder, layer, i, j, CHANX, track_to_pin_lookup_x, sb_conn_map, switch_block_conn, + CHANX_COST_INDEX_START, + chan_width, grid, tracks_per_chan, + sblock_pattern, Fs / 3, chan_details_x, chan_details_y, + rr_edges_to_create, + wire_to_ipin_switch, + directionality); + + //Create the actual CHAN->CHAN edges + uniquify_edges(rr_edges_to_create); + alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); + num_edges += rr_edges_to_create.size(); + + rr_edges_to_create.clear(); + } + if (j > 0) { + int tracks_per_chan = ((is_global_graph) ? 1 : chan_width.y_list[i]); + build_rr_chan(rr_graph_builder, layer, i, j, CHANY, track_to_pin_lookup_y, sb_conn_map, switch_block_conn, + CHANX_COST_INDEX_START + num_seg_types_x, + chan_width, grid, tracks_per_chan, + sblock_pattern, Fs / 3, chan_details_x, chan_details_y, + rr_edges_to_create, + wire_to_ipin_switch, + directionality); + + //Create the actual CHAN->CHAN edges + uniquify_edges(rr_edges_to_create); + alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); + num_edges += rr_edges_to_create.size(); + + rr_edges_to_create.clear(); + } } } } @@ -2054,43 +2090,49 @@ static void alloc_and_load_intra_cluster_rr_graph(RRGraphBuilder& rr_graph_build bool is_flat) { t_rr_edge_info_set rr_edges_to_create; int num_edges = 0; - for (size_t i = 0; i < grid.width(); ++i) { - for (size_t j = 0; j < grid.height(); ++j) { - if (grid.get_width_offset(i, j) == 0 && grid.get_height_offset(i, j) == 0) { - t_physical_tile_type_ptr physical_tile = grid.get_physical_type(i, j); - std::vector class_num_vec; - std::vector pin_num_vec; - class_num_vec = get_cluster_netlist_intra_tile_classes_at_loc(i, j, physical_tile); - pin_num_vec = get_cluster_netlist_intra_tile_pins_at_loc(i, - j, - pin_chains, - chain_pin_nums, - physical_tile); - add_classes_rr_graph(rr_graph_builder, - class_num_vec, - i, - j, - physical_tile); - - add_pins_rr_graph(rr_graph_builder, - pin_num_vec, - i, - j, - physical_tile); - - connect_src_sink_to_pins(rr_graph_builder, + for (int layer = 0; layer < grid.get_num_layers(); layer++) { + for (int i = 0; i < (int)grid.width(); ++i) { + for (int j = 0; j < (int)grid.height(); ++j) { + if (grid.get_width_offset({i, j, layer}) == 0 && grid.get_height_offset({i, j, layer}) == 0) { + t_physical_tile_type_ptr physical_tile = grid.get_physical_type({i, j, layer}); + std::vector class_num_vec; + std::vector pin_num_vec; + class_num_vec = get_cluster_netlist_intra_tile_classes_at_loc(layer, i, j, physical_tile); + pin_num_vec = get_cluster_netlist_intra_tile_pins_at_loc(layer, + i, + j, + pin_chains, + chain_pin_nums, + physical_tile); + add_classes_rr_graph(rr_graph_builder, class_num_vec, + layer, i, j, - rr_edges_to_create, - delayless_switch, physical_tile); - //Create the actual SOURCE->OPIN, IPIN->SINK edges - uniquify_edges(rr_edges_to_create); - alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); - num_edges += rr_edges_to_create.size(); - rr_edges_to_create.clear(); + add_pins_rr_graph(rr_graph_builder, + pin_num_vec, + layer, + i, + j, + physical_tile); + + connect_src_sink_to_pins(rr_graph_builder, + class_num_vec, + layer, + i, + j, + rr_edges_to_create, + delayless_switch, + physical_tile); + + //Create the actual SOURCE->OPIN, IPIN->SINK edges + uniquify_edges(rr_edges_to_create); + alloc_and_load_edges(rr_graph_builder, rr_edges_to_create); + num_edges += rr_edges_to_create.size(); + rr_edges_to_create.clear(); + } } } } @@ -2117,6 +2159,7 @@ static void alloc_and_load_intra_cluster_rr_graph(RRGraphBuilder& rr_graph_build static void add_classes_rr_graph(RRGraphBuilder& rr_graph_builder, const std::vector& class_num_vec, + const int layer, const int root_x, const int root_y, t_physical_tile_type_ptr physical_type) { @@ -2124,7 +2167,7 @@ static void add_classes_rr_graph(RRGraphBuilder& rr_graph_builder, for (auto class_num : class_num_vec) { auto class_type = get_class_type_from_class_physical_num(physical_type, class_num); - RRNodeId class_inode = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type, root_x, root_y, class_num); + RRNodeId class_inode = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type, layer, root_x, root_y, class_num); VTR_ASSERT(class_inode != RRNodeId::INVALID()); int class_num_pins = get_class_num_pins_from_class_physical_num(physical_type, class_num); if (class_type == DRIVER) { @@ -2140,6 +2183,8 @@ static void add_classes_rr_graph(RRGraphBuilder& rr_graph_builder, rr_graph_builder.set_node_capacity(class_inode, (short)class_num_pins); VTR_ASSERT(root_x <= std::numeric_limits::max() && root_y <= std::numeric_limits::max()); rr_graph_builder.set_node_coordinates(class_inode, (short)root_x, (short)root_y, (short)(root_x + physical_type->width - 1), (short)(root_y + physical_type->height - 1)); + VTR_ASSERT(layer <= std::numeric_limits::max()); + rr_graph_builder.set_node_layer(class_inode, layer); float R = 0.; float C = 0.; rr_graph_builder.set_node_rc_index(class_inode, NodeRCIndex(find_create_rr_rc_data(R, C, mutable_device_ctx.rr_rc_data))); @@ -2149,6 +2194,7 @@ static void add_classes_rr_graph(RRGraphBuilder& rr_graph_builder, static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder, const std::vector& pin_num_vec, + const int layer, const int i, const int j, t_physical_tile_type_ptr physical_type) { @@ -2167,7 +2213,8 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder, int y_offset = y_offset_vec[pin_coord]; e_side pin_side = pin_sides_vec[pin_coord]; auto node_type = (pin_type == DRIVER) ? OPIN : IPIN; - RRNodeId node_id = node_lookup.find_node(i + x_offset, + RRNodeId node_id = node_lookup.find_node(layer, + i + x_offset, j + y_offset, node_type, pin_num, @@ -2196,6 +2243,7 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder, j + y_offset, i + x_offset, j + y_offset); + rr_graph_builder.set_node_layer(node_id, layer); rr_graph_builder.add_node_side(node_id, pin_side); } } @@ -2205,6 +2253,7 @@ static void add_pins_rr_graph(RRGraphBuilder& rr_graph_builder, static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, std::map& /*arch_sw_inf_map*/, const std::vector& class_num_vec, + const int layer, const int i, const int j, t_rr_edge_info_set& rr_edges_to_create, @@ -2213,14 +2262,15 @@ static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, for (auto class_num : class_num_vec) { const auto& pin_list = get_pin_list_from_class_physical_num(physical_type_ptr, class_num); auto class_type = get_class_type_from_class_physical_num(physical_type_ptr, class_num); - RRNodeId class_rr_node_id = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, i, j, class_num); + RRNodeId class_rr_node_id = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, layer, i, j, class_num); VTR_ASSERT(class_rr_node_id != RRNodeId::INVALID()); //bool is_primitive = is_primitive_pin(physical_type_ptr, pin_list[0]); //t_logical_block_type_ptr logical_block = is_primitive ? get_logical_block_from_pin_physical_num(physical_type_ptr, pin_list[0]) : nullptr; for (auto pin_num : pin_list) { - RRNodeId pin_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, i, j, pin_num); + RRNodeId pin_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, layer, i, j, pin_num); if (pin_rr_node_id == RRNodeId::INVALID()) { - VTR_LOG_ERROR("In block (%d, %d) pin num: %d doesn't exist to be connected to class %d\n", + VTR_LOG_ERROR("In block (%d, %d, %d) pin num: %d doesn't exist to be connected to class %d\n", + layer, i, j, pin_num, @@ -2253,6 +2303,7 @@ static void connect_tile_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, const std::vector& class_num_vec, + const int layer, const int i, const int j, t_rr_edge_info_set& rr_edges_to_create, @@ -2261,12 +2312,13 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, for (auto class_num : class_num_vec) { const auto& pin_list = get_pin_list_from_class_physical_num(physical_type_ptr, class_num); auto class_type = get_class_type_from_class_physical_num(physical_type_ptr, class_num); - RRNodeId class_rr_node_id = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, i, j, class_num); + RRNodeId class_rr_node_id = get_class_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, layer, i, j, class_num); VTR_ASSERT(class_rr_node_id != RRNodeId::INVALID()); for (auto pin_num : pin_list) { - RRNodeId pin_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, i, j, pin_num); + RRNodeId pin_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type_ptr, layer, i, j, pin_num); if (pin_rr_node_id == RRNodeId::INVALID()) { - VTR_LOG_ERROR("In block (%d, %d) pin num: %d doesn't exist to be connected to class %d\n", + VTR_LOG_ERROR("In block (%d, %d, %d) pin num: %d doesn't exist to be connected to class %d\n", + layer, i, j, pin_num, @@ -2289,6 +2341,7 @@ static void connect_src_sink_to_pins(RRGraphBuilder& rr_graph_builder, static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder, std::map& arch_sw_inf_map, t_physical_tile_type_ptr physical_tile, + int layer, int root_x, int root_y, const int delayless_switch) { @@ -2302,12 +2355,14 @@ static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder, add_classes_rr_graph(rr_graph_builder, class_num_vec, + layer, root_x, root_y, physical_tile); add_pins_rr_graph(rr_graph_builder, pin_num_vec, + layer, root_x, root_y, physical_tile); @@ -2315,6 +2370,7 @@ static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder, connect_tile_src_sink_to_pins(rr_graph_builder, arch_sw_inf_map, class_num_vec, + layer, root_x, root_y, rr_edges_to_create, @@ -2328,6 +2384,7 @@ static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder, add_intra_tile_edges_rr_graph(rr_graph_builder, rr_edges_to_create, physical_tile, + layer, root_x, root_y); @@ -2342,6 +2399,7 @@ static void alloc_and_load_tile_rr_graph(RRGraphBuilder& rr_graph_builder, static void build_bidir_rr_opins(RRGraphBuilder& rr_graph_builder, const RRGraphView& rr_graph, + const int layer, const int i, const int j, const e_side side, @@ -2363,9 +2421,9 @@ static void build_bidir_rr_opins(RRGraphBuilder& rr_graph_builder, return; } - auto type = grid.get_physical_type(i, j); - int width_offset = grid.get_width_offset(i, j); - int height_offset = grid.get_height_offset(i, j); + auto type = grid.get_physical_type({i, j, layer}); + int width_offset = grid.get_width_offset({i, j, layer}); + int height_offset = grid.get_height_offset({i, j, layer}); const vtr::Matrix& Fc = Fc_out[type->index]; @@ -2386,18 +2444,18 @@ static void build_bidir_rr_opins(RRGraphBuilder& rr_graph_builder, total_pin_Fc += Fc[pin_index][iseg]; } - RRNodeId node_index = rr_graph_builder.node_lookup().find_node(i, j, OPIN, pin_index, side); + RRNodeId node_index = rr_graph_builder.node_lookup().find_node(layer, i, j, OPIN, pin_index, side); VTR_ASSERT(node_index); if (total_pin_Fc > 0) { - get_bidir_opin_connections(rr_graph_builder, i, j, pin_index, + get_bidir_opin_connections(rr_graph_builder, layer, i, j, pin_index, node_index, rr_edges_to_create, opin_to_track_map, chan_details_x, chan_details_y); } /* Add in direct connections */ - get_opin_direct_connections(rr_graph_builder, rr_graph, i, j, side, pin_index, + get_opin_direct_connections(rr_graph_builder, rr_graph, layer, i, j, side, pin_index, node_index, rr_edges_to_create, directs, num_directs, clb_to_clb_directs); } @@ -2430,6 +2488,7 @@ void free_rr_graph() { static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, int& num_collapsed_nodes, ClusterBlockId cluster_blk_id, + const int layer, const int i, const int j, const int abs_cap, @@ -2441,8 +2500,8 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, bool is_flat) { VTR_ASSERT(is_flat); /* Internal edges are added from the start tile */ - int width_offset = grid.get_width_offset(i, j); - int height_offset = grid.get_height_offset(i, j); + int width_offset = grid.get_width_offset({i, j, layer}); + int height_offset = grid.get_height_offset({i, j, layer}); VTR_ASSERT(width_offset == 0 && height_offset == 0); auto& cluster_net_list = g_vpr_ctx.clustering().clb_nlist; @@ -2475,6 +2534,7 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, pb, nodes_to_collapse, rel_cap, + layer, i, j); @@ -2491,6 +2551,7 @@ static void build_cluster_internal_edges(RRGraphBuilder& rr_graph_builder, nodes_to_collapse, R_minW_nmos, R_minW_pmos, + layer, i, j); } @@ -2503,6 +2564,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, const t_pb* pb, const t_cluster_pin_chain& nodes_to_collapse, int rel_cap, + int layer, int i, int j) { auto pin_num_range = get_pb_pins(physical_type, @@ -2524,6 +2586,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, } auto parent_pin_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type, + layer, i, j, pin_physical_num); @@ -2543,6 +2606,7 @@ static void add_pb_edges(RRGraphBuilder& rr_graph_builder, } auto conn_pin_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type, + layer, i, j, conn_pin_physical_num); @@ -2569,6 +2633,7 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder, const t_cluster_pin_chain& nodes_to_collapse, float R_minW_nmos, float R_minW_pmos, + int layer, int i, int j) { // Store the cluster pins in a set to make the search more run-time efficient @@ -2595,6 +2660,7 @@ static int add_edges_for_collapsed_nodes(RRGraphBuilder& rr_graph_builder, R_minW_pmos, chain_idx, node_idx, + layer, i, j); } @@ -2614,6 +2680,7 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, float R_minW_pmos, int chain_idx, int node_idx, + int layer, int i, int j) { // Chain node pin physical number @@ -2634,6 +2701,7 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, // Get the chain's sink node rr node it. RRNodeId sink_rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type, + layer, i, j, sink_pin_num); @@ -2664,6 +2732,7 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, sink_pin_num); RRNodeId rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type, + layer, i, j, pin_physical_num); @@ -2695,6 +2764,7 @@ static void add_chain_node_fan_in_edges(RRGraphBuilder& rr_graph_builder, sink_pin_num); RRNodeId rr_node_id = get_pin_rr_node_id(rr_graph_builder.node_lookup(), physical_type, + layer, i, j, src_pin); @@ -2763,6 +2833,7 @@ static std::unordered_set get_chain_pins(std::vector chai /* Allocates/loads edges for nodes belonging to specified channel segment and initializes * node properties such as cost, occupancy and capacity */ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, + const int layer, const int x_coord, const int y_coord, const t_rr_type chan_type, @@ -2786,7 +2857,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, auto& device_ctx = g_vpr_ctx.device(); auto& mutable_device_ctx = g_vpr_ctx.mutable_device(); - //Initally a assumes CHANX + //Initally assumes CHANX int seg_coord = x_coord; //The absolute coordinate of this segment within the channel int chan_coord = y_coord; //The absolute coordinate of this channel within the device int seg_dimension = device_ctx.grid.width() - 2; //-2 for no perim channels @@ -2833,7 +2904,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, from_seg_details = chan_details_x[start][y_coord].data(); } - RRNodeId node = rr_graph_builder.node_lookup().find_node(x_coord, y_coord, chan_type, track); + RRNodeId node = rr_graph_builder.node_lookup().find_node(layer, x_coord, y_coord, chan_type, track); if (!node) { continue; @@ -2841,7 +2912,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, /* Add the edges from this track to all it's connected pins into the list */ int num_edges = 0; - num_edges += get_track_to_pins(rr_graph_builder, start, chan_coord, track, tracks_per_chan, node, rr_edges_to_create, + num_edges += get_track_to_pins(rr_graph_builder, layer, start, chan_coord, track, tracks_per_chan, node, rr_edges_to_create, track_to_pin_lookup, seg_details, chan_type, seg_dimension, wire_to_ipin_switch, directionality); @@ -2858,7 +2929,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, max_opposite_chan_width = nodes_per_chan.x_max; } if (to_seg_details->length() > 0) { - num_edges += get_track_to_tracks(rr_graph_builder, chan_coord, start, track, chan_type, chan_coord, + num_edges += get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, chan_coord, opposite_chan_type, seg_dimension, max_opposite_chan_width, grid, Fs_per_side, sblock_pattern, node, rr_edges_to_create, from_seg_details, to_seg_details, opposite_chan_details, @@ -2878,7 +2949,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, max_opposite_chan_width = nodes_per_chan.x_max; } if (to_seg_details->length() > 0) { - num_edges += get_track_to_tracks(rr_graph_builder, chan_coord, start, track, chan_type, chan_coord + 1, + num_edges += get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, chan_coord + 1, opposite_chan_type, seg_dimension, max_opposite_chan_width, grid, Fs_per_side, sblock_pattern, node, rr_edges_to_create, from_seg_details, to_seg_details, opposite_chan_details, @@ -2911,7 +2982,7 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, max_chan_width = nodes_per_chan.y_max; } if (to_seg_details->length() > 0) { - num_edges += get_track_to_tracks(rr_graph_builder, chan_coord, start, track, chan_type, target_seg, + num_edges += get_track_to_tracks(rr_graph_builder, layer, chan_coord, start, track, chan_type, target_seg, chan_type, seg_dimension, max_chan_width, grid, Fs_per_side, sblock_pattern, node, rr_edges_to_create, from_seg_details, to_seg_details, from_chan_details, @@ -2937,6 +3008,8 @@ static void build_rr_chan(RRGraphBuilder& rr_graph_builder, rr_graph_builder.set_node_coordinates(node, x_coord, start, x_coord, end); } + rr_graph_builder.set_node_layer(node, layer); + int length = end - start + 1; float R = length * seg_details[track].Rmetal(); float C = length * seg_details[track].Cmetal(); @@ -3727,6 +3800,7 @@ static vtr::NdMatrix, 4> alloc_and_load_track_to_pin_lookup(vtr */ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder, const RRGraphView& rr_graph, + const int layer, const int i, const int j, const e_side side, @@ -3751,10 +3825,10 @@ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder, */ *Fc_clipped = false; - auto type = grid.get_physical_type(i, j); + auto type = grid.get_physical_type({i, j, layer}); - int width_offset = grid.get_width_offset(i, j); - int height_offset = grid.get_height_offset(i, j); + int width_offset = grid.get_width_offset({i, j, layer}); + int height_offset = grid.get_height_offset({i, j, layer}); /* Go through each pin and find its fanout. */ for (int pin_index = 0; pin_index < type->num_pins; ++pin_index) { @@ -3767,7 +3841,7 @@ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder, continue; } - RRNodeId opin_node_index = rr_graph_builder.node_lookup().find_node(i, j, OPIN, pin_index, side); + RRNodeId opin_node_index = rr_graph_builder.node_lookup().find_node(layer, i, j, OPIN, pin_index, side); if (!opin_node_index) continue; //No valid from node for (int iseg = 0; iseg < num_seg_types; iseg++) { @@ -3827,7 +3901,7 @@ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder, //VTR_ASSERT_MSG(seg_index == 0 || seg_index > 0,"seg_index map not working properly"); - rr_edge_count += get_unidir_opin_connections(rr_graph_builder, chan, seg, + rr_edge_count += get_unidir_opin_connections(rr_graph_builder, layer, chan, seg, seg_type_Fc, seg_index, chan_type, seg_details, opin_node_index, rr_edges_to_create, @@ -3840,7 +3914,7 @@ static void build_unidir_rr_opins(RRGraphBuilder& rr_graph_builder, } /* Add in direct connections */ - get_opin_direct_connections(rr_graph_builder, rr_graph, i, j, side, pin_index, opin_node_index, rr_edges_to_create, + get_opin_direct_connections(rr_graph_builder, rr_graph, layer, i, j, side, pin_index, opin_node_index, rr_edges_to_create, directs, num_directs, clb_to_clb_directs); } } @@ -3961,10 +4035,11 @@ static t_clb_to_clb_directs* alloc_and_load_clb_to_clb_directs(const t_direct_in /* Add all direct clb-pin-to-clb-pin edges to given opin * - * The current opin is located at (x,y) along the specified side + * The current opin is located at (layer,x,y) along the specified side */ static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder, const RRGraphView& rr_graph, + int layer, int x, int y, e_side side, @@ -3976,12 +4051,12 @@ static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder, const t_clb_to_clb_directs* clb_to_clb_directs) { auto& device_ctx = g_vpr_ctx.device(); - t_physical_tile_type_ptr curr_type = device_ctx.grid.get_physical_type(x, y); + t_physical_tile_type_ptr curr_type = device_ctx.grid.get_physical_type({x, y, layer}); int num_pins = 0; - int width_offset = device_ctx.grid.get_width_offset(x, y); - int height_offset = device_ctx.grid.get_height_offset(x, y); + int width_offset = device_ctx.grid.get_width_offset({x, y, layer}); + int height_offset = device_ctx.grid.get_height_offset({x, y, layer}); if (!curr_type->pinloc[width_offset][height_offset][side][opin]) { return num_pins; //No source pin on this side } @@ -4004,7 +4079,9 @@ static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder, && y + directs[i].y_offset < int(device_ctx.grid.height() - 1) && y + directs[i].y_offset > 0) { //Only add connections if the target clb type matches the type in the direct specification - t_physical_tile_type_ptr target_type = device_ctx.grid.get_physical_type(x + directs[i].x_offset, y + directs[i].y_offset); + t_physical_tile_type_ptr target_type = device_ctx.grid.get_physical_type({x + directs[i].x_offset, + y + directs[i].y_offset, + layer}); if (clb_to_clb_directs[i].to_clb_type == target_type && z + directs[i].sub_tile_offset < int(target_type->capacity) @@ -4064,13 +4141,13 @@ static int get_opin_direct_connections(RRGraphBuilder& rr_graph_builder, if (directs[i].to_side != NUM_SIDES) { //Explicit side specified, only create if pin exists on that side - RRNodeId inode = rr_graph_builder.node_lookup().find_node(x + directs[i].x_offset, y + directs[i].y_offset, IPIN, ipin, directs[i].to_side); + RRNodeId inode = rr_graph_builder.node_lookup().find_node(layer, x + directs[i].x_offset, y + directs[i].y_offset, IPIN, ipin, directs[i].to_side); if (inode) { inodes.push_back(inode); } } else { //No side specified, get all candidates - inodes = rr_graph_builder.node_lookup().find_nodes_at_all_sides(x + directs[i].x_offset, y + directs[i].y_offset, IPIN, ipin); + inodes = rr_graph_builder.node_lookup().find_nodes_at_all_sides(layer, x + directs[i].x_offset, y + directs[i].y_offset, IPIN, ipin); } if (inodes.size() > 0) { @@ -4286,6 +4363,7 @@ bool pins_connected(t_block_loc cluster_loc, int x = cluster_loc.loc.x; int y = cluster_loc.loc.y; + int layer = cluster_loc.loc.layer; int abs_cap = cluster_loc.loc.sub_tile; const t_sub_tile* sub_tile = nullptr; @@ -4316,10 +4394,10 @@ bool pins_connected(t_block_loc cluster_loc, VTR_ASSERT(to_pin_physical_num != OPEN); - RRNodeId from_node = get_pin_rr_node_id(rr_spatial_look_up, physical_type, x, y, from_pin_physical_num); + RRNodeId from_node = get_pin_rr_node_id(rr_spatial_look_up, physical_type, layer, x, y, from_pin_physical_num); VTR_ASSERT(from_node != RRNodeId::INVALID()); - RRNodeId to_node = get_pin_rr_node_id(rr_spatial_look_up, physical_type, x, y, to_pin_physical_num); + RRNodeId to_node = get_pin_rr_node_id(rr_spatial_look_up, physical_type, layer, x, y, to_pin_physical_num); VTR_ASSERT(to_node != RRNodeId::INVALID()); int num_edges = rr_graph.num_edges(from_node); diff --git a/vpr/src/route/rr_graph.h b/vpr/src/route/rr_graph.h index 821e0a0de01..af06257d98e 100644 --- a/vpr/src/route/rr_graph.h +++ b/vpr/src/route/rr_graph.h @@ -32,9 +32,12 @@ void create_rr_graph(const t_graph_type graph_type, int* Warnings, bool is_flat); +// Build a complete RR graph, including all modes, for the given tile. This is used by router lookahead when +// flat-routing is enabled. It allows it to store the cost from the border of a tile to a sink inside of it void build_tile_rr_graph(RRGraphBuilder& rr_graph_builder, const t_det_routing_arch& det_routing_arch, t_physical_tile_type_ptr physical_tile, + int layer, int x, int y, const int delayless_switch); diff --git a/vpr/src/route/rr_graph2.cpp b/vpr/src/route/rr_graph2.cpp index ecc695ac5e1..9ae222ca20c 100644 --- a/vpr/src/route/rr_graph2.cpp +++ b/vpr/src/route/rr_graph2.cpp @@ -27,6 +27,7 @@ static void get_switch_type(bool is_from_sb, short switch_types[2]); static void load_chan_rr_indices(const int max_chan_width, + const DeviceGrid& grid, const int chan_len, const int num_chans, const t_rr_type type, @@ -42,6 +43,7 @@ static void load_block_rr_indices(RRGraphBuilder& rr_graph_builder, static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder, t_physical_tile_type_ptr physical_type_ptr, const std::vector& pin_num_vec, + int layer, int root_x, int root_y, int* index, @@ -50,6 +52,7 @@ static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder, static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder, t_physical_tile_type_ptr physical_type_ptr, const std::vector& class_num_vec, + int layer, int x, int y, int block_width, @@ -58,6 +61,7 @@ static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder, static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, const std::vector conn_tracks, + const int layer, const int to_chan, const int to_seg, const int to_sb, @@ -71,6 +75,7 @@ static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, t_rr_edge_info_set& rr_edges_to_create); static int get_unidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, + const int layer, const int from_track, const int to_chan, const int to_seg, @@ -89,6 +94,7 @@ static int get_unidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, t_rr_edge_info_set& rr_edges_to_create); static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, + const int layer, const int from_track, const int to_chan, const int to_seg, @@ -143,7 +149,7 @@ void dump_seg_details(t_seg_details* seg_details, // from_seg_coord: The horizontal or vertical location along the channel (i.e. y-coord for CHANY, x-coord for CHANX) // from_chan_type: The from channel type // to_chan_type: The to channel type -static int should_create_switchblock(const DeviceGrid& grid, int from_chan_coord, int from_seg_coord, t_rr_type from_chan_type, t_rr_type to_chan_type); +static int should_create_switchblock(const DeviceGrid& grid, int layer_num, int from_chan_coord, int from_seg_coord, t_rr_type from_chan_type, t_rr_type to_chan_type); static bool should_apply_switch_override(int switch_override); @@ -649,6 +655,7 @@ int get_seg_end(const t_chan_seg_details* seg_details, const int itrack, const i /* Returns the number of tracks to which clb opin #ipin at (i,j) connects. * * Also stores the nodes to which this pin connects in rr_edges_to_create */ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder, + const int layer, const int i, const int j, const int ipin, @@ -665,9 +672,9 @@ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder, auto& device_ctx = g_vpr_ctx.device(); - type = device_ctx.grid.get_physical_type(i, j); - int width_offset = device_ctx.grid.get_width_offset(i, j); - int height_offset = device_ctx.grid.get_height_offset(i, j); + type = device_ctx.grid.get_physical_type({i, j, layer}); + int width_offset = device_ctx.grid.get_width_offset({i, j, layer}); + int height_offset = device_ctx.grid.get_height_offset({i, j, layer}); num_conn = 0; @@ -717,7 +724,7 @@ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder, /* Only connect to wire if there is a CB */ if (is_cblock(chan, seg, to_track, seg_details)) { to_switch = seg_details[to_track].arch_wire_switch(); - RRNodeId to_node = rr_graph_builder.node_lookup().find_node(tr_i, tr_j, to_type, to_track); + RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, tr_i, tr_j, to_type, to_track); if (!to_node) { continue; @@ -741,6 +748,7 @@ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder, * */ int get_unidir_opin_connections(RRGraphBuilder& rr_graph_builder, + const int layer, const int chan, const int seg, int Fc, @@ -802,8 +810,8 @@ int get_unidir_opin_connections(RRGraphBuilder& rr_graph_builder, dec_track = dec_muxes[dec_mux]; /* Figure the inodes of those muxes */ - RRNodeId inc_inode_index = rr_graph_builder.node_lookup().find_node(x, y, chan_type, inc_track); - RRNodeId dec_inode_index = rr_graph_builder.node_lookup().find_node(x, y, chan_type, dec_track); + RRNodeId inc_inode_index = rr_graph_builder.node_lookup().find_node(layer, x, y, chan_type, inc_track); + RRNodeId dec_inode_index = rr_graph_builder.node_lookup().find_node(layer, x, y, chan_type, dec_track); if (!inc_inode_index || !dec_inode_index) { continue; @@ -1039,48 +1047,50 @@ void dump_track_to_pin_map(t_track_to_pin_lookup& track_to_pin_map, } } static void load_chan_rr_indices(const int max_chan_width, + const DeviceGrid& grid, const int chan_len, const int num_chans, const t_rr_type type, const t_chan_details& chan_details, RRGraphBuilder& rr_graph_builder, int* index) { - for (int chan = 0; chan < num_chans - 1; ++chan) { - for (int seg = 1; seg < chan_len - 1; ++seg) { - /* Assign an inode to the starts of tracks */ - int x = (type == CHANX ? seg : chan); - int y = (type == CHANX ? chan : seg); - const t_chan_seg_details* seg_details = chan_details[x][y].data(); - - /* Reserve nodes in lookup to save memory */ - rr_graph_builder.node_lookup().reserve_nodes(chan, seg, type, max_chan_width); - - for (int track = 0; track < max_chan_width; ++track) { - /* TODO: May let the length() == 0 case go through, to model muxes */ - if (seg_details[track].length() <= 0) - continue; + for (int layer = 0; layer < grid.get_num_layers(); layer++) { + for (int chan = 0; chan < num_chans - 1; ++chan) { + for (int seg = 1; seg < chan_len - 1; ++seg) { + /* Assign an inode to the starts of tracks */ + int x = (type == CHANX ? seg : chan); + int y = (type == CHANX ? chan : seg); + const t_chan_seg_details* seg_details = chan_details[x][y].data(); - int start = get_seg_start(seg_details, track, chan, seg); + /* Reserve nodes in lookup to save memory */ + rr_graph_builder.node_lookup().reserve_nodes(layer, chan, seg, type, max_chan_width); - /* TODO: Now we still use the (y, x) convention here for CHANX. Should rework later */ - int node_x = chan; - int node_y = start; - if (CHANX == type) { - std::swap(node_x, node_y); - } + for (int track = 0; track < max_chan_width; ++track) { + /* TODO: May let the length() == 0 case go through, to model muxes */ + if (seg_details[track].length() <= 0) + continue; - /* If the start of the wire doesn't have a inode, - * assign one to it. */ - RRNodeId inode = rr_graph_builder.node_lookup().find_node(node_x, node_y, type, track); - if (!inode) { - inode = RRNodeId(*index); - ++(*index); + int start = get_seg_start(seg_details, track, chan, seg); - rr_graph_builder.node_lookup().add_node(inode, chan, start, type, track); - } + /* TODO: Now we still use the (y, x) convention here for CHANX. Should rework later */ + int node_x = chan; + int node_y = start; + if (CHANX == type) { + std::swap(node_x, node_y); + } + + /* If the start of the wire doesn't have an inode, + * assign one to it. */ + RRNodeId inode = rr_graph_builder.node_lookup().find_node(layer, node_x, node_y, type, track); + if (!inode) { + inode = RRNodeId(*index); + ++(*index); + rr_graph_builder.node_lookup().add_node(inode, layer, chan, start, type, track); + } - /* Assign inode of start of wire to current position */ - rr_graph_builder.node_lookup().add_node(inode, chan, seg, type, track); + /* Assign inode of start of wire to current position */ + rr_graph_builder.node_lookup().add_node(inode, layer, chan, seg, type, track); + } } } } @@ -1094,88 +1104,93 @@ static void load_block_rr_indices(RRGraphBuilder& rr_graph_builder, int* index, bool /*is_flat*/) { //Walk through the grid assigning indices to SOURCE/SINK IPIN/OPIN + for (int layer = 0; layer < grid.get_num_layers(); layer++) { + for (int x = 0; x < (int)grid.width(); x++) { + for (int y = 0; y < (int)grid.height(); y++) { + //Process each block from its root location + if (grid.get_width_offset({x, y, layer}) == 0 && grid.get_height_offset({x, y, layer}) == 0) { + t_physical_tile_type_ptr physical_type = grid.get_physical_type({x, + y, + layer}); + //Assign indices for SINKs and SOURCEs + // Note that SINKS/SOURCES have no side, so we always use side 0 + std::vector class_num_vec; + std::vector pin_num_vec; + + class_num_vec = get_tile_root_classes(physical_type); + pin_num_vec = get_tile_root_pins(physical_type); + add_classes_spatial_lookup(rr_graph_builder, + physical_type, + class_num_vec, + layer, + x, + y, + physical_type->width, + physical_type->height, + index); + + /* Limited sides for grids + * The wanted side depends on the location of the grid. + * In particular for perimeter grid, + * ------------------------------------------------------- + * Grid location | IPIN side + * ------------------------------------------------------- + * TOP | BOTTOM + * ------------------------------------------------------- + * RIGHT | LEFT + * ------------------------------------------------------- + * BOTTOM | TOP + * ------------------------------------------------------- + * LEFT | RIGHT + * ------------------------------------------------------- + * TOP-LEFT | BOTTOM & RIGHT + * ------------------------------------------------------- + * TOP-RIGHT | BOTTOM & LEFT + * ------------------------------------------------------- + * BOTTOM-LEFT | TOP & RIGHT + * ------------------------------------------------------- + * BOTTOM-RIGHT | TOP & LEFT + * ------------------------------------------------------- + * Other | First come first fit + * ------------------------------------------------------- + * + * Special for IPINs: + * If there are multiple wanted sides, first come first fit is applied + * This guarantee that there is only a unique rr_node + * for the same input pin on multiple sides, and thus avoid multiple driver problems + */ + std::vector wanted_sides; + if ((int)grid.height() - 1 == y) { /* TOP side */ + wanted_sides.push_back(BOTTOM); + } + if ((int)grid.width() - 1 == x) { /* RIGHT side */ + wanted_sides.push_back(LEFT); + } + if (0 == y) { /* BOTTOM side */ + wanted_sides.push_back(TOP); + } + if (0 == x) { /* LEFT side */ + wanted_sides.push_back(RIGHT); + } - for (size_t x = 0; x < grid.width(); x++) { - for (size_t y = 0; y < grid.height(); y++) { - //Process each block from it's root location - if (grid.get_width_offset(x, y) == 0 && grid.get_height_offset(x, y) == 0) { - t_physical_tile_type_ptr physical_type = grid.get_physical_type(x, y); - //Assign indices for SINKs and SOURCEs - // Note that SINKS/SOURCES have no side, so we always use side 0 - std::vector class_num_vec; - std::vector pin_num_vec; - - class_num_vec = get_tile_root_classes(physical_type); - pin_num_vec = get_tile_root_pins(physical_type); - add_classes_spatial_lookup(rr_graph_builder, - physical_type, - class_num_vec, - x, - y, - physical_type->width, - physical_type->height, - index); - - /* Limited sides for grids - * The wanted side depends on the location of the grid. - * In particular for perimeter grid, - * ------------------------------------------------------- - * Grid location | IPIN side - * ------------------------------------------------------- - * TOP | BOTTOM - * ------------------------------------------------------- - * RIGHT | LEFT - * ------------------------------------------------------- - * BOTTOM | TOP - * ------------------------------------------------------- - * LEFT | RIGHT - * ------------------------------------------------------- - * TOP-LEFT | BOTTOM & RIGHT - * ------------------------------------------------------- - * TOP-RIGHT | BOTTOM & LEFT - * ------------------------------------------------------- - * BOTTOM-LEFT | TOP & RIGHT - * ------------------------------------------------------- - * BOTTOM-RIGHT | TOP & LEFT - * ------------------------------------------------------- - * Other | First come first fit - * ------------------------------------------------------- - * - * Special for IPINs: - * If there are multiple wanted sides, first come first fit is applied - * This guarantee that there is only a unique rr_node - * for the same input pin on multiple sides, and thus avoid multiple driver problems - */ - std::vector wanted_sides; - if (grid.height() - 1 == y) { /* TOP side */ - wanted_sides.push_back(BOTTOM); - } - if (grid.width() - 1 == x) { /* RIGHT side */ - wanted_sides.push_back(LEFT); - } - if (0 == y) { /* BOTTOM side */ - wanted_sides.push_back(TOP); - } - if (0 == x) { /* LEFT side */ - wanted_sides.push_back(RIGHT); - } - - /* If wanted sides is empty still, this block does not have specific wanted sides, - * Deposit all the sides - */ - if (wanted_sides.empty()) { - for (e_side side : {TOP, BOTTOM, LEFT, RIGHT}) { - wanted_sides.push_back(side); + /* If wanted sides is empty still, this block does not have specific wanted sides, + * Deposit all the sides + */ + if (wanted_sides.empty()) { + for (e_side side : {TOP, BOTTOM, LEFT, RIGHT}) { + wanted_sides.push_back(side); + } } - } - add_pins_spatial_lookup(rr_graph_builder, - physical_type, - pin_num_vec, - x, - y, - index, - wanted_sides); + add_pins_spatial_lookup(rr_graph_builder, + physical_type, + pin_num_vec, + layer, + x, + y, + index, + wanted_sides); + } } } } @@ -1183,6 +1198,7 @@ static void load_block_rr_indices(RRGraphBuilder& rr_graph_builder, static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder, t_physical_tile_type_ptr physical_type_ptr, const std::vector& pin_num_vec, + int layer, int root_x, int root_y, int* index, @@ -1193,8 +1209,8 @@ static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder, for (int height_offset = 0; height_offset < physical_type_ptr->height; ++height_offset) { int y_tile = root_y + height_offset; //only nodes on the tile may be located in a location other than the root-location - rr_graph_builder.node_lookup().reserve_nodes(x_tile, y_tile, OPIN, physical_type_ptr->num_pins, side); - rr_graph_builder.node_lookup().reserve_nodes(x_tile, y_tile, IPIN, physical_type_ptr->num_pins, side); + rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, OPIN, physical_type_ptr->num_pins, side); + rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, IPIN, physical_type_ptr->num_pins, side); } } } @@ -1210,13 +1226,12 @@ static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder, int x_tile = root_x + x_offset[pin_coord_idx]; int y_tile = root_y + y_offset[pin_coord_idx]; auto side = pin_sides[pin_coord_idx]; - if (pin_type == DRIVER) { - rr_graph_builder.node_lookup().add_node(RRNodeId(*index), x_tile, y_tile, OPIN, pin_num, side); + rr_graph_builder.node_lookup().add_node(RRNodeId(*index), layer, x_tile, y_tile, OPIN, pin_num, side); assigned_to_rr_node = true; } else { VTR_ASSERT(pin_type == RECEIVER); - rr_graph_builder.node_lookup().add_node(RRNodeId(*index), x_tile, y_tile, IPIN, pin_num, side); + rr_graph_builder.node_lookup().add_node(RRNodeId(*index), layer, x_tile, y_tile, IPIN, pin_num, side); assigned_to_rr_node = true; } } @@ -1240,6 +1255,7 @@ static void add_pins_spatial_lookup(RRGraphBuilder& rr_graph_builder, static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder, t_physical_tile_type_ptr physical_type_ptr, const std::vector& class_num_vec, + int layer, int root_x, int root_y, int block_width, @@ -1247,18 +1263,19 @@ static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder, int* index) { for (int x_tile = root_x; x_tile < (root_x + block_width); x_tile++) { for (int y_tile = root_y; y_tile < (root_y + block_height); y_tile++) { - rr_graph_builder.node_lookup().reserve_nodes(x_tile, y_tile, SOURCE, class_num_vec.size(), SIDES[0]); - rr_graph_builder.node_lookup().reserve_nodes(x_tile, y_tile, SINK, class_num_vec.size(), SIDES[0]); + rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, SOURCE, class_num_vec.size(), SIDES[0]); + rr_graph_builder.node_lookup().reserve_nodes(layer, x_tile, y_tile, SINK, class_num_vec.size(), SIDES[0]); } } for (auto class_num : class_num_vec) { auto class_type = get_class_type_from_class_physical_num(physical_type_ptr, class_num); if (class_type == DRIVER) { - rr_graph_builder.node_lookup().add_node(RRNodeId(*index), root_x, root_y, SOURCE, class_num); + rr_graph_builder.node_lookup().add_node(RRNodeId(*index), layer, root_x, root_y, SOURCE, class_num); } else { VTR_ASSERT(class_type == RECEIVER); - rr_graph_builder.node_lookup().add_node(RRNodeId(*index), root_x, root_y, SINK, class_num); + + rr_graph_builder.node_lookup().add_node(RRNodeId(*index), layer, root_x, root_y, SINK, class_num); } ++(*index); } @@ -1273,11 +1290,11 @@ static void add_classes_spatial_lookup(RRGraphBuilder& rr_graph_builder, } int curr_x = root_x + x_offset; int curr_y = root_y + y_offset; - rr_graph_builder.node_lookup().mirror_nodes(vtr::Point(root_x, root_y), + rr_graph_builder.node_lookup().mirror_nodes(layer, vtr::Point(root_x, root_y), vtr::Point(curr_x, curr_y), SOURCE, SIDES[0]); - rr_graph_builder.node_lookup().mirror_nodes(vtr::Point(root_x, root_y), + rr_graph_builder.node_lookup().mirror_nodes(layer, vtr::Point(root_x, root_y), vtr::Point(curr_x, curr_y), SINK, SIDES[0]); @@ -1309,9 +1326,9 @@ void alloc_and_load_rr_node_indices(RRGraphBuilder& rr_graph_builder, /* Alloc the lookup table */ for (t_rr_type rr_type : RR_TYPES) { if (rr_type == CHANX) { - rr_graph_builder.node_lookup().resize_nodes(grid.height(), grid.width(), rr_type, NUM_SIDES); + rr_graph_builder.node_lookup().resize_nodes(grid.get_num_layers(), grid.height(), grid.width(), rr_type, NUM_SIDES); } else { - rr_graph_builder.node_lookup().resize_nodes(grid.width(), grid.height(), rr_type, NUM_SIDES); + rr_graph_builder.node_lookup().resize_nodes(grid.get_num_layers(), grid.width(), grid.height(), rr_type, NUM_SIDES); } } @@ -1319,9 +1336,9 @@ void alloc_and_load_rr_node_indices(RRGraphBuilder& rr_graph_builder, load_block_rr_indices(rr_graph_builder, grid, index, is_flat); /* Load the data for x and y channels */ - load_chan_rr_indices(nodes_per_chan->x_max, grid.width(), grid.height(), + load_chan_rr_indices(nodes_per_chan->x_max, grid, grid.width(), grid.height(), CHANX, chan_details_x, rr_graph_builder, index); - load_chan_rr_indices(nodes_per_chan->y_max, grid.height(), grid.width(), + load_chan_rr_indices(nodes_per_chan->y_max, grid, grid.height(), grid.width(), CHANY, chan_details_y, rr_graph_builder, index); } @@ -1330,39 +1347,44 @@ void alloc_and_load_intra_cluster_rr_node_indices(RRGraphBuilder& rr_graph_build const vtr::vector& pin_chains, const vtr::vector>& pin_chains_num, int* index) { - for (size_t x = 0; x < grid.width(); x++) { - for (size_t y = 0; y < grid.height(); y++) { - //Process each block from it's root location - if (grid.get_width_offset(x, y) == 0 && grid.get_height_offset(x, y) == 0) { - t_physical_tile_type_ptr physical_type = grid.get_physical_type(x, y); - //Assign indices for SINKs and SOURCEs - // Note that SINKS/SOURCES have no side, so we always use side 0 - std::vector class_num_vec; - std::vector pin_num_vec; - class_num_vec = get_cluster_netlist_intra_tile_classes_at_loc(x, y, physical_type); - pin_num_vec = get_cluster_netlist_intra_tile_pins_at_loc(x, - y, - pin_chains, - pin_chains_num, - physical_type); - add_classes_spatial_lookup(rr_graph_builder, - physical_type, - class_num_vec, - x, - y, - physical_type->width, - physical_type->height, - index); - - std::vector wanted_sides; - wanted_sides.push_back(e_side::TOP); - add_pins_spatial_lookup(rr_graph_builder, - physical_type, - pin_num_vec, - x, - y, - index, - wanted_sides); + for (int layer = 0; layer < grid.get_num_layers(); layer++) { + for (int x = 0; x < (int)grid.width(); x++) { + for (int y = 0; y < (int)grid.height(); y++) { + //Process each block from it's root location + if (grid.get_width_offset({x, y, layer}) == 0 && grid.get_height_offset({x, y, layer}) == 0) { + t_physical_tile_type_ptr physical_type = grid.get_physical_type({x, y, layer}); + //Assign indices for SINKs and SOURCEs + // Note that SINKS/SOURCES have no side, so we always use side 0 + std::vector class_num_vec; + std::vector pin_num_vec; + class_num_vec = get_cluster_netlist_intra_tile_classes_at_loc(layer, x, y, physical_type); + pin_num_vec = get_cluster_netlist_intra_tile_pins_at_loc(layer, + x, + y, + pin_chains, + pin_chains_num, + physical_type); + add_classes_spatial_lookup(rr_graph_builder, + physical_type, + class_num_vec, + layer, + x, + y, + physical_type->width, + physical_type->height, + index); + + std::vector wanted_sides; + wanted_sides.push_back(e_side::TOP); + add_pins_spatial_lookup(rr_graph_builder, + physical_type, + pin_num_vec, + layer, + x, + y, + index, + wanted_sides); + } } } } @@ -1386,111 +1408,114 @@ bool verify_rr_node_indices(const DeviceGrid& grid, int width = grid.width(); int height = grid.height(); - - for (int x = 0; x < width; ++x) { - for (int y = 0; y < height; ++y) { - for (t_rr_type rr_type : RR_TYPES) { - /* Get the list of nodes at a specific location (x, y) */ - std::vector nodes_from_lookup; - if (rr_type == CHANX || rr_type == CHANY) { - nodes_from_lookup = rr_graph.node_lookup().find_channel_nodes(x, y, rr_type); - } else { - nodes_from_lookup = rr_graph.node_lookup().find_grid_nodes_at_all_sides(x, y, rr_type); - } - for (RRNodeId inode : nodes_from_lookup) { - rr_node_counts[inode]++; - - if (rr_graph.node_type(inode) != rr_type) { - VPR_ERROR(VPR_ERROR_ROUTE, "RR node type does not match between rr_nodes and rr_node_indices (%s/%s): %s", - rr_node_typename[rr_graph.node_type(inode)], - rr_node_typename[rr_type], - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + int layer = grid.get_num_layers(); + + for (int l = 0; l < layer; ++l) { + for (int x = 0; x < width; ++x) { + for (int y = 0; y < height; ++y) { + for (t_rr_type rr_type : RR_TYPES) { + /* Get the list of nodes at a specific location (x, y) */ + std::vector nodes_from_lookup; + if (rr_type == CHANX || rr_type == CHANY) { + nodes_from_lookup = rr_graph.node_lookup().find_channel_nodes(l, x, y, rr_type); + } else { + nodes_from_lookup = rr_graph.node_lookup().find_grid_nodes_at_all_sides(l, x, y, rr_type); } + for (RRNodeId inode : nodes_from_lookup) { + rr_node_counts[inode]++; - if (rr_graph.node_type(inode) == CHANX) { - VTR_ASSERT_MSG(rr_graph.node_ylow(inode) == rr_graph.node_yhigh(inode), "CHANX should be horizontal"); - - if (y != rr_graph.node_ylow(inode)) { - VPR_ERROR(VPR_ERROR_ROUTE, "RR node y position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s", - rr_graph.node_ylow(inode), - y, + if (rr_graph.node_type(inode) != rr_type) { + VPR_ERROR(VPR_ERROR_ROUTE, "RR node type does not match between rr_nodes and rr_node_indices (%s/%s): %s", + rr_node_typename[rr_graph.node_type(inode)], + rr_node_typename[rr_type], describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); } - if (!rr_graph.x_in_node_range(x, inode)) { - VPR_ERROR(VPR_ERROR_ROUTE, "RR node x positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s", - rr_graph.node_xlow(inode), - rr_graph.node_xlow(inode), - x, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); - } - } else if (rr_graph.node_type(inode) == CHANY) { - VTR_ASSERT_MSG(rr_graph.node_xlow(inode) == rr_graph.node_xhigh(inode), "CHANY should be veritcal"); + if (rr_graph.node_type(inode) == CHANX) { + VTR_ASSERT_MSG(rr_graph.node_ylow(inode) == rr_graph.node_yhigh(inode), "CHANX should be horizontal"); - if (x != rr_graph.node_xlow(inode)) { - VPR_ERROR(VPR_ERROR_ROUTE, "RR node x position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s", - rr_graph.node_xlow(inode), - x, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); - } + if (y != rr_graph.node_ylow(inode)) { + VPR_ERROR(VPR_ERROR_ROUTE, "RR node y position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s", + rr_graph.node_ylow(inode), + y, + describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + } - if (!rr_graph.y_in_node_range(y, inode)) { - VPR_ERROR(VPR_ERROR_ROUTE, "RR node y positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s", - rr_graph.node_ylow(inode), - rr_graph.node_ylow(inode), - y, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); - } - } else if (rr_graph.node_type(inode) == SOURCE || rr_graph.node_type(inode) == SINK) { - //Sources have co-ordintes covering the entire block they are in - if (!rr_graph.x_in_node_range(x, inode)) { - VPR_ERROR(VPR_ERROR_ROUTE, "RR node x positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s", - rr_graph.node_xlow(inode), - rr_graph.node_xlow(inode), - x, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); - } + if (!rr_graph.x_in_node_range(x, inode)) { + VPR_ERROR(VPR_ERROR_ROUTE, "RR node x positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s", + rr_graph.node_xlow(inode), + rr_graph.node_xlow(inode), + x, + describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + } + } else if (rr_graph.node_type(inode) == CHANY) { + VTR_ASSERT_MSG(rr_graph.node_xlow(inode) == rr_graph.node_xhigh(inode), "CHANY should be veritcal"); + + if (x != rr_graph.node_xlow(inode)) { + VPR_ERROR(VPR_ERROR_ROUTE, "RR node x position does not agree between rr_nodes (%d) and rr_node_indices (%d): %s", + rr_graph.node_xlow(inode), + x, + describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + } - if (!rr_graph.y_in_node_range(y, inode)) { - VPR_ERROR(VPR_ERROR_ROUTE, "RR node y positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s", - rr_graph.node_ylow(inode), - rr_graph.node_ylow(inode), - y, - describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); - } + if (!rr_graph.y_in_node_range(y, inode)) { + VPR_ERROR(VPR_ERROR_ROUTE, "RR node y positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s", + rr_graph.node_ylow(inode), + rr_graph.node_ylow(inode), + y, + describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + } + } else if (rr_graph.node_type(inode) == SOURCE || rr_graph.node_type(inode) == SINK) { + //Sources have co-ordintes covering the entire block they are in + if (!rr_graph.x_in_node_range(x, inode)) { + VPR_ERROR(VPR_ERROR_ROUTE, "RR node x positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s", + rr_graph.node_xlow(inode), + rr_graph.node_xlow(inode), + x, + describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + } - } else { - VTR_ASSERT(rr_graph.node_type(inode) == IPIN || rr_graph.node_type(inode) == OPIN); - /* As we allow a pin to be indexable on multiple sides, - * This check code should be invalid - * if (rr_node.xlow() != x) { - * VPR_ERROR(VPR_ERROR_ROUTE, "RR node xlow does not match between rr_nodes and rr_node_indices (%d/%d): %s", - * rr_node.xlow(), - * x, - * describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str()); - * } - * - * if (rr_node.ylow() != y) { - * VPR_ERROR(VPR_ERROR_ROUTE, "RR node ylow does not match between rr_nodes and rr_node_indices (%d/%d): %s", - * rr_node.ylow(), - * y, - * describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str()); - * } - */ - } + if (!rr_graph.y_in_node_range(y, inode)) { + VPR_ERROR(VPR_ERROR_ROUTE, "RR node y positions do not agree between rr_nodes (%d <-> %d) and rr_node_indices (%d): %s", + rr_graph.node_ylow(inode), + rr_graph.node_ylow(inode), + y, + describe_rr_node(rr_graph, grid, rr_indexed_data, size_t(inode), is_flat).c_str()); + } + + } else { + VTR_ASSERT(rr_graph.node_type(inode) == IPIN || rr_graph.node_type(inode) == OPIN); + /* As we allow a pin to be indexable on multiple sides, + * This check code should be invalid + * if (rr_node.xlow() != x) { + * VPR_ERROR(VPR_ERROR_ROUTE, "RR node xlow does not match between rr_nodes and rr_node_indices (%d/%d): %s", + * rr_node.xlow(), + * x, + * describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str()); + * } + * + * if (rr_node.ylow() != y) { + * VPR_ERROR(VPR_ERROR_ROUTE, "RR node ylow does not match between rr_nodes and rr_node_indices (%d/%d): %s", + * rr_node.ylow(), + * y, + * describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str()); + * } + */ + } - if (rr_type == IPIN || rr_type == OPIN) { - /* As we allow a pin to be indexable on multiple sides, - * This check code should be invalid - * if (rr_node.side() != side) { - * VPR_ERROR(VPR_ERROR_ROUTE, "RR node xlow does not match between rr_nodes and rr_node_indices (%s/%s): %s", - * SIDE_STRING[rr_node.side()], - * SIDE_STRING[side], - * describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str()); - * } else { - * VTR_ASSERT(rr_node.side() == side); - * } - */ + if (rr_type == IPIN || rr_type == OPIN) { + /* As we allow a pin to be indexable on multiple sides, + * This check code should be invalid + * if (rr_node.side() != side) { + * VPR_ERROR(VPR_ERROR_ROUTE, "RR node xlow does not match between rr_nodes and rr_node_indices (%s/%s): %s", + * SIDE_STRING[rr_node.side()], + * SIDE_STRING[side], + * describe_rr_node(rr_graph, grid, rr_indexed_data, inode).c_str()); + * } else { + * VTR_ASSERT(rr_node.side() == side); + * } + */ + } } } } @@ -1537,6 +1562,7 @@ bool verify_rr_node_indices(const DeviceGrid& grid, } int get_track_to_pins(RRGraphBuilder& rr_graph_builder, + int layer, int seg, int chan, int track, @@ -1579,7 +1605,7 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder, } /* PAJ - if the pointed to is an EMPTY then shouldn't look for ipins */ - auto type = device_ctx.grid.get_physical_type(x, y); + auto type = device_ctx.grid.get_physical_type({x, y, layer}); if (type == device_ctx.EMPTY_PHYSICAL_TILE_TYPE) continue; @@ -1592,8 +1618,8 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder, /* We need the type to find the ipin map for this type */ - int width_offset = device_ctx.grid.get_width_offset(x, y); - int height_offset = device_ctx.grid.get_height_offset(x, y); + int width_offset = device_ctx.grid.get_width_offset({x, y, layer}); + int height_offset = device_ctx.grid.get_height_offset({x, y, layer}); max_conn = track_to_pin_lookup[type->index][phy_track][width_offset][height_offset][side].size(); for (iconn = 0; iconn < max_conn; iconn++) { @@ -1601,7 +1627,7 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder, /* Check there is a connection and Fc map isn't wrong */ /*int to_node = get_rr_node_index(L_rr_node_indices, x + width_offset, y + height_offset, IPIN, ipin, side);*/ - RRNodeId to_node = rr_graph_builder.node_lookup().find_node(x, y, IPIN, ipin, side); + RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, x, y, IPIN, ipin, side); if (to_node) { rr_edges_to_create.emplace_back(from_rr_node, to_node, wire_to_ipin_switch); ++num_conn; @@ -1632,6 +1658,7 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder, * transistor. */ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, + const int layer, const int from_chan, const int from_seg, const int from_track, @@ -1714,7 +1741,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, from_is_sblock = true; } - auto switch_override = should_create_switchblock(grid, from_chan, sb_seg, from_type, to_type); + auto switch_override = should_create_switchblock(grid, layer, from_chan, sb_seg, from_type, to_type); if (switch_override == NO_SWITCH) { continue; //Do not create an SB here } @@ -1734,7 +1761,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, to_sb = from_chan; } - /* to_chan_details may correspond to an x-directed or y-directed channel, depending for which + /* to_chan_details may correspond to an x-directed or y-directed channel, depending on which * channel type this function is used; so coordinates are reversed as necessary */ if (to_type == CHANX) { to_seg_details = to_chan_details[to_seg][to_chan].data(); @@ -1778,7 +1805,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, if (sb_seg < end_sb_seg) { if (custom_switch_block) { if (Direction::DEC == from_seg_details[from_track].direction() || BI_DIRECTIONAL == directionality) { - num_conn += get_track_to_chan_seg(rr_graph_builder, from_track, to_chan, to_seg, + num_conn += get_track_to_chan_seg(rr_graph_builder, layer, from_track, to_chan, to_seg, to_type, from_side_a, to_side, switch_override, sb_conn_map, from_rr_node, rr_edges_to_create); @@ -1788,7 +1815,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, /* For bidir, the target segment might have an unbuffered (bidir pass transistor) * switchbox, so we follow through regardless of whether the current segment has an SB */ conn_tracks = switch_block_conn[from_side_a][to_side][from_track]; - num_conn += get_bidir_track_to_chan_seg(rr_graph_builder, conn_tracks, + num_conn += get_bidir_track_to_chan_seg(rr_graph_builder, conn_tracks, layer, to_chan, to_seg, to_sb, to_type, to_seg_details, from_is_sblock, from_switch, switch_override, @@ -1799,7 +1826,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, /* Also, we are connecting from the top or right of SB so it * makes the most sense to only get there from Direction::DEC wires. */ if ((from_is_sblock) && (Direction::DEC == from_seg_details[from_track].direction())) { - num_conn += get_unidir_track_to_chan_seg(rr_graph_builder, from_track, to_chan, + num_conn += get_unidir_track_to_chan_seg(rr_graph_builder, layer, from_track, to_chan, to_seg, to_sb, to_type, max_chan_width, grid, from_side_a, to_side, Fs_per_side, sblock_pattern, @@ -1816,7 +1843,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, if (sb_seg > start_sb_seg) { if (custom_switch_block) { if (Direction::INC == from_seg_details[from_track].direction() || BI_DIRECTIONAL == directionality) { - num_conn += get_track_to_chan_seg(rr_graph_builder, from_track, to_chan, to_seg, + num_conn += get_track_to_chan_seg(rr_graph_builder, layer, from_track, to_chan, to_seg, to_type, from_side_b, to_side, switch_override, sb_conn_map, from_rr_node, rr_edges_to_create); @@ -1826,7 +1853,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, /* For bidir, the target segment might have an unbuffered (bidir pass transistor) * switchbox, so we follow through regardless of whether the current segment has an SB */ conn_tracks = switch_block_conn[from_side_b][to_side][from_track]; - num_conn += get_bidir_track_to_chan_seg(rr_graph_builder, conn_tracks, + num_conn += get_bidir_track_to_chan_seg(rr_graph_builder, conn_tracks, layer, to_chan, to_seg, to_sb, to_type, to_seg_details, from_is_sblock, from_switch, switch_override, @@ -1838,7 +1865,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, * makes the most sense to only get there from Direction::INC wires. */ if ((from_is_sblock) && (Direction::INC == from_seg_details[from_track].direction())) { - num_conn += get_unidir_track_to_chan_seg(rr_graph_builder, from_track, to_chan, + num_conn += get_unidir_track_to_chan_seg(rr_graph_builder, layer, from_track, to_chan, to_seg, to_sb, to_type, max_chan_width, grid, from_side_b, to_side, Fs_per_side, sblock_pattern, @@ -1856,6 +1883,7 @@ int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder, t_physical_tile_type_ptr physical_tile, + int layer, int x, int y, int* num_rr_nodes) { @@ -1869,6 +1897,7 @@ void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder, add_classes_spatial_lookup(rr_graph_builder, physical_tile, class_num_vec, + layer, x, y, physical_tile->width, @@ -1878,6 +1907,7 @@ void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder, add_pins_spatial_lookup(rr_graph_builder, physical_tile, pin_num_vec, + layer, x, y, num_rr_nodes, @@ -1886,6 +1916,7 @@ void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder, static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, const std::vector conn_tracks, + const int layer, const int to_chan, const int to_seg, const int to_sb, @@ -1916,7 +1947,7 @@ static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, num_conn = 0; for (iconn = 0; iconn < conn_tracks.size(); ++iconn) { to_track = conn_tracks[iconn]; - RRNodeId to_node = rr_graph_builder.node_lookup().find_node(to_x, to_y, to_type, to_track); + RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, to_x, to_y, to_type, to_track); if (!to_node) { continue; @@ -1953,6 +1984,7 @@ static int get_bidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, * See route/build_switchblocks.c for a detailed description of how the switch block * connection map sb_conn_map is generated. */ static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, + const int layer, const int from_wire, const int to_chan, const int to_seg, @@ -1995,7 +2027,7 @@ static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, if (conn_vector.at(iconn).from_wire != from_wire) continue; int to_wire = conn_vector.at(iconn).to_wire; - RRNodeId to_node = rr_graph_builder.node_lookup().find_node(to_x, to_y, to_chan_type, to_wire); + RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, to_x, to_y, to_chan_type, to_wire); if (!to_node) { continue; @@ -2027,6 +2059,7 @@ static int get_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, } static int get_unidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, + const int layer, const int from_track, const int to_chan, const int to_seg, @@ -2090,8 +2123,7 @@ static int get_unidir_track_to_chan_seg(RRGraphBuilder& rr_graph_builder, to_track = mux_labels[(to_mux + i) % num_labels]; sblock_pattern[sb_x][sb_y][from_side][to_side][from_track][j + 1] = to_track; } - - RRNodeId to_node = rr_graph_builder.node_lookup().find_node(to_x, to_y, to_type, to_track); + RRNodeId to_node = rr_graph_builder.node_lookup().find_node(layer, to_x, to_y, to_type, to_track); if (!to_node) { continue; @@ -2709,7 +2741,7 @@ static int find_label_of_track(const std::vector& wire_mux_on_track, return i_label; } -static int should_create_switchblock(const DeviceGrid& grid, int from_chan_coord, int from_seg_coord, t_rr_type from_chan_type, t_rr_type to_chan_type) { +static int should_create_switchblock(const DeviceGrid& grid, int layer_num, int from_chan_coord, int from_seg_coord, t_rr_type from_chan_type, t_rr_type to_chan_type) { //Convert the chan/seg indices to real x/y coordinates int y_coord; int x_coord; @@ -2722,9 +2754,9 @@ static int should_create_switchblock(const DeviceGrid& grid, int from_chan_coord x_coord = from_chan_coord; } - auto blk_type = grid.get_physical_type(x_coord, y_coord); - int width_offset = grid.get_width_offset(x_coord, y_coord); - int height_offset = grid.get_height_offset(x_coord, y_coord); + auto blk_type = grid.get_physical_type({x_coord, y_coord, layer_num}); + int width_offset = grid.get_width_offset({x_coord, y_coord, layer_num}); + int height_offset = grid.get_height_offset({x_coord, y_coord, layer_num}); e_sb_type sb_type = blk_type->switchblock_locations[width_offset][height_offset]; auto switch_override = blk_type->switchblock_switch_overrides[width_offset][height_offset]; diff --git a/vpr/src/route/rr_graph2.h b/vpr/src/route/rr_graph2.h index 289521432dd..b2d47519739 100644 --- a/vpr/src/route/rr_graph2.h +++ b/vpr/src/route/rr_graph2.h @@ -31,6 +31,7 @@ void alloc_and_load_rr_node_indices(RRGraphBuilder& rr_graph_builder, void alloc_and_load_tile_rr_node_indices(RRGraphBuilder& rr_graph_builder, t_physical_tile_type_ptr physical_tile, + int layer, int x, int y, int* num_rr_nodes); @@ -125,6 +126,7 @@ bool is_sblock(const int chan, const enum e_directionality directionality); int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder, + const int layer, const int i, const int j, const int ipin, @@ -135,6 +137,7 @@ int get_bidir_opin_connections(RRGraphBuilder& rr_graph_builder, const t_chan_details& chan_details_y); int get_unidir_opin_connections(RRGraphBuilder& rr_graph_builder, + const int layer, const int chan, const int seg, int Fc, @@ -149,6 +152,7 @@ int get_unidir_opin_connections(RRGraphBuilder& rr_graph_builder, bool* Fc_clipped); int get_track_to_pins(RRGraphBuilder& rr_graph_builder, + int layer, int seg, int chan, int track, @@ -163,6 +167,7 @@ int get_track_to_pins(RRGraphBuilder& rr_graph_builder, enum e_directionality directionality); int get_track_to_tracks(RRGraphBuilder& rr_graph_builder, + const int layer, const int from_chan, const int from_seg, const int from_track, diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index 50584679ea1..e3815e4e006 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -128,17 +128,19 @@ void sync_grid_to_blocks() { auto& device_ctx = g_vpr_ctx.device(); auto& device_grid = device_ctx.grid; + int num_layers = device_ctx.grid.get_num_layers(); + /* Reset usage and allocate blocks list if needed */ + place_ctx.grid_blocks = GridBlock(device_grid.width(), + device_grid.height(), + device_ctx.grid.get_num_layers()); auto& grid_blocks = place_ctx.grid_blocks; - grid_blocks.resize({device_grid.width(), device_grid.height()}); - for (size_t x = 0; x < device_grid.width(); ++x) { - for (size_t y = 0; y < device_grid.height(); ++y) { - auto& grid_block = grid_blocks[x][y]; - const auto& type = device_ctx.grid.get_physical_type(x, y); - grid_block.blocks.resize(type->capacity); - - for (int z = 0; z < type->capacity; ++z) { - grid_block.blocks[z] = EMPTY_BLOCK_ID; + + for (int layer_num = 0; layer_num < num_layers; layer_num++) { + for (int x = 0; x < (int)device_grid.width(); ++x) { + for (int y = 0; y < (int)device_grid.height(); ++y) { + const auto& type = device_ctx.grid.get_physical_type({x, y, layer_num}); + grid_blocks.initialized_grid_block_at_location({x, y, layer_num}, type->capacity); } } } @@ -146,9 +148,11 @@ void sync_grid_to_blocks() { /* Go through each block */ auto& cluster_ctx = g_vpr_ctx.clustering(); for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { + const auto& blk_loc = place_ctx.block_locs[blk_id].loc; int blk_x = place_ctx.block_locs[blk_id].loc.x; int blk_y = place_ctx.block_locs[blk_id].loc.y; int blk_z = place_ctx.block_locs[blk_id].loc.sub_tile; + int blk_layer = place_ctx.block_locs[blk_id].loc.layer; auto type = physical_tile_type(blk_id); @@ -162,32 +166,42 @@ void sync_grid_to_blocks() { } /* Check types match */ - if (type != device_ctx.grid.get_physical_type(blk_x, blk_y)) { - VPR_FATAL_ERROR(VPR_ERROR_PLACE, "A block is in a grid location (%d x %d) with a conflicting types '%s' and '%s' .\n", - blk_x, blk_y, + if (type != device_ctx.grid.get_physical_type({blk_x, blk_y, blk_layer})) { + VPR_FATAL_ERROR(VPR_ERROR_PLACE, "A block is in a grid location (%d x %d) layer (%d) with a conflicting types '%s' and '%s' .\n", + blk_x, blk_y, blk_layer, type->name, - device_ctx.grid.get_physical_type(blk_x, blk_y)->name); + device_ctx.grid.get_physical_type({blk_x, blk_y, blk_layer})->name); } /* Check already in use */ - if ((EMPTY_BLOCK_ID != place_ctx.grid_blocks[blk_x][blk_y].blocks[blk_z]) - && (INVALID_BLOCK_ID != place_ctx.grid_blocks[blk_x][blk_y].blocks[blk_z])) { - VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Location (%d, %d, %d) is used more than once.\n", - blk_x, blk_y, blk_z); + if ((EMPTY_BLOCK_ID != place_ctx.grid_blocks.block_at_location(blk_loc)) + && (INVALID_BLOCK_ID != place_ctx.grid_blocks.block_at_location(blk_loc))) { + VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Location (%d, %d, %d, %d) is used more than once.\n", + blk_x, blk_y, blk_z, blk_layer); } - if (device_ctx.grid.get_width_offset(blk_x, blk_y) != 0 || device_ctx.grid.get_height_offset(blk_x, blk_y) != 0) { - VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Large block not aligned in placment for cluster_ctx.blocks %lu at (%d, %d, %d).", - size_t(blk_id), blk_x, blk_y, blk_z); + if (device_ctx.grid.get_width_offset({blk_x, blk_y, blk_layer}) != 0 || device_ctx.grid.get_height_offset({blk_x, blk_y, blk_layer}) != 0) { + VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Large block not aligned in placment for cluster_ctx.blocks %lu at (%d, %d, %d, %d).", + size_t(blk_id), blk_x, blk_y, blk_z, blk_layer); } /* Set the block */ for (int width = 0; width < type->width; ++width) { for (int height = 0; height < type->height; ++height) { - place_ctx.grid_blocks[blk_x + width][blk_y + height].blocks[blk_z] = blk_id; - place_ctx.grid_blocks[blk_x + width][blk_y + height].usage++; - VTR_ASSERT(device_ctx.grid.get_width_offset(blk_x + width, blk_y + height) == width); - VTR_ASSERT(device_ctx.grid.get_height_offset(blk_x + width, blk_y + height) == height); + place_ctx.grid_blocks.set_block_at_location({blk_x + width, + blk_y + height, + blk_z, + blk_layer}, + blk_id); + place_ctx.grid_blocks.set_usage({blk_x + width, + blk_y + height, + blk_layer}, + place_ctx.grid_blocks.get_usage({blk_x + width, + blk_y + height, + blk_layer}) + + 1); + VTR_ASSERT(device_ctx.grid.get_width_offset({blk_x + width, blk_y + height, blk_layer}) == width); + VTR_ASSERT(device_ctx.grid.get_height_offset({blk_x + width, blk_y + height, blk_layer}) == height); } } } @@ -202,11 +216,15 @@ std::string rr_node_arch_name(int inode, bool is_flat) { std::string rr_node_arch_name; if (rr_graph.node_type(RRNodeId(inode)) == OPIN || rr_graph.node_type(RRNodeId(inode)) == IPIN) { //Pin names - auto type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node)); + auto type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(rr_node), + rr_graph.node_ylow(rr_node), + rr_graph.node_layer(rr_node)}); rr_node_arch_name += block_type_pin_index_to_name(type, rr_graph.node_pin_num(rr_node), is_flat); } else if (rr_graph.node_type(RRNodeId(inode)) == SOURCE || rr_graph.node_type(RRNodeId(inode)) == SINK) { //Set of pins associated with SOURCE/SINK - auto type = device_ctx.grid.get_physical_type(rr_graph.node_xlow(rr_node), rr_graph.node_ylow(rr_node)); + auto type = device_ctx.grid.get_physical_type({rr_graph.node_xlow(rr_node), + rr_graph.node_ylow(rr_node), + rr_graph.node_layer(rr_node)}); auto pin_names = block_type_class_index_to_pin_names(type, rr_graph.node_class_num(rr_node), is_flat); if (pin_names.size() > 1) { rr_node_arch_name += rr_graph.node_type_string(RRNodeId(inode)); @@ -516,7 +534,7 @@ t_physical_tile_type_ptr physical_tile_type(ClusterBlockId blk) { auto block_loc = place_ctx.block_locs[blk]; auto loc = block_loc.loc; - return device_ctx.grid.get_physical_type(loc.x, loc.y); + return device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); } t_physical_tile_type_ptr physical_tile_type(AtomBlockId atom_blk) { @@ -546,7 +564,7 @@ int get_sub_tile_index(ClusterBlockId blk) { auto loc = block_loc.loc; int sub_tile_coordinate = loc.sub_tile; - auto type = device_ctx.grid.get_physical_type(loc.x, loc.y); + auto type = device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); for (const auto& sub_tile : type->sub_tiles) { if (sub_tile.capacity.is_in_range(sub_tile_coordinate)) { @@ -732,9 +750,9 @@ t_logical_block_type_ptr infer_logic_block_type(const DeviceGrid& grid) { int rhs_num_instances = 0; // Count number of instances for each type for (auto type : lhs->equivalent_tiles) - lhs_num_instances += grid.num_instances(type); + lhs_num_instances += grid.num_instances(type, -1); for (auto type : rhs->equivalent_tiles) - rhs_num_instances += grid.num_instances(type); + rhs_num_instances += grid.num_instances(type, -1); return lhs_num_instances > rhs_num_instances; }; std::stable_sort(logic_block_candidates.begin(), logic_block_candidates.end(), by_desc_grid_count); @@ -759,7 +777,7 @@ t_logical_block_type_ptr find_most_common_block_type(const DeviceGrid& grid) { for (const auto& logical_block : device_ctx.logical_block_types) { size_t inst_cnt = 0; for (const auto& equivalent_tile : logical_block.equivalent_tiles) { - inst_cnt += grid.num_instances(equivalent_tile); + inst_cnt += grid.num_instances(equivalent_tile, -1); } if (max_count < inst_cnt) { @@ -781,7 +799,7 @@ t_physical_tile_type_ptr find_most_common_tile_type(const DeviceGrid& grid) { t_physical_tile_type_ptr max_type = nullptr; size_t max_count = 0; for (const auto& physical_tile : device_ctx.physical_tile_types) { - size_t inst_cnt = grid.num_instances(&physical_tile); + size_t inst_cnt = grid.num_instances(&physical_tile, -1); if (max_count < inst_cnt) { max_count = inst_cnt; @@ -1332,11 +1350,9 @@ std::tuplecapacity); auto& cluster_net_list = g_vpr_ctx.clustering().clb_nlist; @@ -2148,7 +2164,7 @@ t_physical_tile_type_ptr get_physical_tile_type(const ClusterBlockId blk) { t_pl_loc loc = place_ctx.block_locs[blk].loc; - return device_ctx.grid.get_physical_type(loc.x, loc.y); + return device_ctx.grid.get_physical_type({loc.x, loc.y, loc.layer}); } } @@ -2283,7 +2299,9 @@ int get_rr_node_max_ptc(const RRGraphView& rr_graph_view, VTR_ASSERT(node_type == IPIN || node_type == OPIN || node_type == SINK || node_type == SOURCE); const DeviceContext& device_ctx = g_vpr_ctx.device(); - auto physical_type = device_ctx.grid.get_physical_type(rr_graph_view.node_xlow(node_id), rr_graph_view.node_ylow(node_id)); + auto physical_type = device_ctx.grid.get_physical_type({rr_graph_view.node_xlow(node_id), + rr_graph_view.node_ylow(node_id), + rr_graph_view.node_layer(node_id)}); if (node_type == SINK || node_type == SOURCE) { return get_tile_class_max_ptc(physical_type, is_flat); @@ -2294,6 +2312,7 @@ int get_rr_node_max_ptc(const RRGraphView& rr_graph_view, RRNodeId get_pin_rr_node_id(const RRSpatialLookup& rr_spatial_lookup, t_physical_tile_type_ptr physical_tile, + const int layer, const int root_i, const int root_j, int pin_physical_num) { @@ -2306,7 +2325,8 @@ RRNodeId get_pin_rr_node_id(const RRSpatialLookup& rr_spatial_lookup, VTR_ASSERT(!x_offset.empty()); RRNodeId node_id = RRNodeId::INVALID(); for (int coord_idx = 0; coord_idx < (int)pin_sides.size(); coord_idx++) { - node_id = rr_spatial_lookup.find_node(root_i + x_offset[coord_idx], + node_id = rr_spatial_lookup.find_node(layer, + root_i + x_offset[coord_idx], root_j + y_offset[coord_idx], node_type, pin_physical_num, @@ -2319,13 +2339,14 @@ RRNodeId get_pin_rr_node_id(const RRSpatialLookup& rr_spatial_lookup, RRNodeId get_class_rr_node_id(const RRSpatialLookup& rr_spatial_lookup, t_physical_tile_type_ptr physical_tile, + const int layer, const int i, const int j, int class_physical_num) { auto class_type = get_class_type_from_class_physical_num(physical_tile, class_physical_num); VTR_ASSERT(class_type == DRIVER || class_type == RECEIVER); t_rr_type node_type = (class_type == e_pin_type::DRIVER) ? t_rr_type::SOURCE : t_rr_type::SINK; - return rr_spatial_lookup.find_node(i, j, node_type, class_physical_num); + return rr_spatial_lookup.find_node(layer, i, j, node_type, class_physical_num); } bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second) { @@ -2340,17 +2361,19 @@ bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second) { } else { VTR_ASSERT(first_rr_type == t_rr_type::IPIN || first_rr_type == t_rr_type::OPIN || first_rr_type == t_rr_type::SINK || first_rr_type == t_rr_type::SOURCE); VTR_ASSERT(second_rr_type == t_rr_type::IPIN || second_rr_type == t_rr_type::OPIN || second_rr_type == t_rr_type::SINK || second_rr_type == t_rr_type::SOURCE); + int first_layer = rr_graph.node_layer(node_first); int first_x = rr_graph.node_xlow(node_first); int first_y = rr_graph.node_ylow(node_first); + int sec_layer = rr_graph.node_layer(node_second); int sec_x = rr_graph.node_xlow(node_second); int sec_y = rr_graph.node_ylow(node_second); // Get the root-location of the pin's block - int first_root_x = first_x - device_ctx.grid.get_width_offset(first_x, first_y); - int first_root_y = first_y - device_ctx.grid.get_height_offset(first_x, first_y); + int first_root_x = first_x - device_ctx.grid.get_width_offset({first_x, first_y, first_layer}); + int first_root_y = first_y - device_ctx.grid.get_height_offset({first_x, first_y, first_layer}); - int sec_root_x = sec_x - device_ctx.grid.get_width_offset(sec_x, sec_y); - int sec_root_y = sec_y - device_ctx.grid.get_height_offset(sec_x, sec_y); + int sec_root_x = sec_x - device_ctx.grid.get_width_offset({sec_x, sec_y, sec_layer}); + int sec_root_y = sec_y - device_ctx.grid.get_height_offset({sec_x, sec_y, sec_layer}); // If the root-location of the nodes are similar, they should be located in the same tile if (first_root_x == sec_root_x && first_root_y == sec_root_y) @@ -2360,23 +2383,24 @@ bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second) { } } -std::vector get_cluster_netlist_intra_tile_classes_at_loc(const int i, - const int j, +std::vector get_cluster_netlist_intra_tile_classes_at_loc(int layer, + int i, + int j, t_physical_tile_type_ptr physical_type) { std::vector class_num_vec; const auto& place_ctx = g_vpr_ctx.placement(); const auto& atom_lookup = g_vpr_ctx.atom().lookup; - const auto& grid_block = place_ctx.grid_blocks[i][j]; + const auto& grid_block = place_ctx.grid_blocks; class_num_vec.reserve(physical_type->primitive_class_inf.size()); //iterate over different sub tiles inside a tile for (int abs_cap = 0; abs_cap < physical_type->capacity; abs_cap++) { - if (grid_block.subtile_empty(abs_cap)) { + if (grid_block.is_sub_tile_empty({i, j, layer}, abs_cap)) { continue; } - auto cluster_blk_id = grid_block.blocks[abs_cap]; + auto cluster_blk_id = grid_block.block_at_location({i, j, abs_cap, layer}); VTR_ASSERT(cluster_blk_id != ClusterBlockId::INVALID() || cluster_blk_id != EMPTY_BLOCK_ID); auto primitive_classes = get_cluster_internal_class_pairs(atom_lookup, @@ -2391,13 +2415,14 @@ std::vector get_cluster_netlist_intra_tile_classes_at_loc(const int i, return class_num_vec; } -std::vector get_cluster_netlist_intra_tile_pins_at_loc(const int i, +std::vector get_cluster_netlist_intra_tile_pins_at_loc(const int layer, + const int i, const int j, const vtr::vector& pin_chains, const vtr::vector>& pin_chains_num, t_physical_tile_type_ptr physical_type) { auto& place_ctx = g_vpr_ctx.placement(); - auto grid_block = place_ctx.grid_blocks[i][j]; + auto grid_block = place_ctx.grid_blocks; std::vector pin_num_vec; pin_num_vec.reserve(get_tile_num_internal_pin(physical_type)); @@ -2405,10 +2430,10 @@ std::vector get_cluster_netlist_intra_tile_pins_at_loc(const int i, for (int abs_cap = 0; abs_cap < physical_type->capacity; abs_cap++) { std::vector cluster_internal_pins; - if (grid_block.subtile_empty(abs_cap)) { + if (grid_block.is_sub_tile_empty({i, j, layer}, abs_cap)) { continue; } - auto cluster_blk_id = grid_block.blocks[abs_cap]; + auto cluster_blk_id = grid_block.block_at_location({i, j, abs_cap, layer}); VTR_ASSERT(cluster_blk_id != ClusterBlockId::INVALID() && cluster_blk_id != EMPTY_BLOCK_ID); cluster_internal_pins = get_cluster_internal_pins(cluster_blk_id); diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 67214a69a60..f3a8f8917e7 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -266,12 +266,14 @@ int get_rr_node_max_ptc(const RRGraphView& rr_graph_view, RRNodeId get_pin_rr_node_id(const RRSpatialLookup& rr_spatial_lookup, t_physical_tile_type_ptr physical_tile, + const int layer, const int root_i, const int root_j, int pin_physical_num); RRNodeId get_class_rr_node_id(const RRSpatialLookup& rr_spatial_lookup, t_physical_tile_type_ptr physical_tile, + const int layer, const int i, const int j, int class_physical_num); @@ -279,12 +281,14 @@ RRNodeId get_class_rr_node_id(const RRSpatialLookup& rr_spatial_lookup, // Check whether the given nodes are in the same cluster bool node_in_same_physical_tile(RRNodeId node_first, RRNodeId node_second); -std::vector get_cluster_netlist_intra_tile_classes_at_loc(const int i, - const int j, +std::vector get_cluster_netlist_intra_tile_classes_at_loc(int layer, + int i, + int j, t_physical_tile_type_ptr physical_type); /** - * @brief Returns the list of pins inside the tile located at (i, j), except fo the ones which are on a chain + * @brief Returns the list of pins inside the tile located at (layer, i, j), except for the ones which are on a chain + * @param layer * @param i * @param j * @param pin_chains @@ -292,7 +296,8 @@ std::vector get_cluster_netlist_intra_tile_classes_at_loc(const int i, * @param physical_type * @return */ -std::vector get_cluster_netlist_intra_tile_pins_at_loc(const int i, +std::vector get_cluster_netlist_intra_tile_pins_at_loc(const int layer, + const int i, const int j, const vtr::vector& pin_chains, const vtr::vector>& pin_chains_num, diff --git a/vpr/test/test_bfs_routing.cpp b/vpr/test/test_bfs_routing.cpp index e45c486665e..21d96d75d48 100644 --- a/vpr/test/test_bfs_routing.cpp +++ b/vpr/test/test_bfs_routing.cpp @@ -24,12 +24,12 @@ TEST_CASE("test_route_flow", "[vpr_noc_bfs_routing]") { // store the reference to device grid with // need to add this before routers are added - noc_model.set_device_grid_width((int)4); + noc_model.set_device_grid_spec((int)4, 0); // add all the routers for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { - noc_model.add_router((i * 4) + j, j, i); + noc_model.add_router((i * 4) + j, j, i, 0); } } diff --git a/vpr/test/test_map_lookahead_serdes.cpp b/vpr/test/test_map_lookahead_serdes.cpp index f9235bf340e..9beb03b3601 100644 --- a/vpr/test/test_map_lookahead_serdes.cpp +++ b/vpr/test/test_map_lookahead_serdes.cpp @@ -10,15 +10,17 @@ namespace { static constexpr const char kMapLookaheadBin[] = "test_map_lookahead.bin"; TEST_CASE("round_trip_map_lookahead", "[vpr]") { - constexpr std::array kDim({10, 12, 15, 16}); + constexpr std::array kDim({1, 10, 12, 15, 16}); f_wire_cost_map.resize(kDim); - for (size_t x = 0; x < kDim[0]; ++x) { - for (size_t y = 0; y < kDim[1]; ++y) { - for (size_t z = 0; z < kDim[2]; ++z) { - for (size_t w = 0; w < kDim[3]; ++w) { - f_wire_cost_map[x][y][z][w].delay = (x + 1) * (y + 1) * (z + 1) * (w + 1); - f_wire_cost_map[x][y][z][w].congestion = 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1); + for (size_t layer = 0; layer < kDim[0]; layer++) { + for (size_t x = 0; x < kDim[1]; ++x) { + for (size_t y = 0; y < kDim[2]; ++y) { + for (size_t z = 0; z < kDim[3]; ++z) { + for (size_t w = 0; w < kDim[4]; ++w) { + f_wire_cost_map[layer][x][y][z][w].delay = (x + 1) * (y + 1) * (z + 1) * (w + 1); + f_wire_cost_map[layer][x][y][z][w].congestion = 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1); + } } } } @@ -26,18 +28,20 @@ TEST_CASE("round_trip_map_lookahead", "[vpr]") { write_router_lookahead(kMapLookaheadBin); - for (size_t x = 0; x < kDim[0]; ++x) { - for (size_t y = 0; y < kDim[1]; ++y) { - for (size_t z = 0; z < kDim[2]; ++z) { - for (size_t w = 0; w < kDim[3]; ++w) { - f_wire_cost_map[x][y][z][w].delay = 0.f; - f_wire_cost_map[x][y][z][w].congestion = 0.f; + for (size_t layer = 0; layer < kDim[0]; layer++) { + for (size_t x = 0; x < kDim[1]; ++x) { + for (size_t y = 0; y < kDim[2]; ++y) { + for (size_t z = 0; z < kDim[3]; ++z) { + for (size_t w = 0; w < kDim[4]; ++w) { + f_wire_cost_map[layer][x][y][z][w].delay = 0.f; + f_wire_cost_map[layer][x][y][z][w].congestion = 0.f; + } } } } } - f_wire_cost_map.resize({0, 0, 0, 0}); + f_wire_cost_map.resize({0, 0, 0, 0, 0}); read_router_lookahead(kMapLookaheadBin); @@ -45,12 +49,14 @@ TEST_CASE("round_trip_map_lookahead", "[vpr]") { REQUIRE(f_wire_cost_map.dim_size(i) == kDim[i]); } - for (size_t x = 0; x < kDim[0]; ++x) { - for (size_t y = 0; y < kDim[1]; ++y) { - for (size_t z = 0; z < kDim[2]; ++z) { - for (size_t w = 0; w < kDim[3]; ++w) { - REQUIRE(f_wire_cost_map[x][y][z][w].delay == (x + 1) * (y + 1) * (z + 1) * (w + 1)); - REQUIRE(f_wire_cost_map[x][y][z][w].congestion == 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1)); + for (size_t layer = 0; layer < kDim[0]; layer++) { + for (size_t x = 0; x < kDim[1]; ++x) { + for (size_t y = 0; y < kDim[2]; ++y) { + for (size_t z = 0; z < kDim[3]; ++z) { + for (size_t w = 0; w < kDim[4]; ++w) { + REQUIRE(f_wire_cost_map[layer][x][y][z][w].delay == (x + 1) * (y + 1) * (z + 1) * (w + 1)); + REQUIRE(f_wire_cost_map[layer][x][y][z][w].congestion == 2 * (x + 1) * (y + 1) * (z + 1) * (w + 1)); + } } } } diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp index 65e76fb4dbb..978cdbe9248 100644 --- a/vpr/test/test_noc_place_utils.cpp +++ b/vpr/test/test_noc_place_utils.cpp @@ -38,7 +38,7 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { // store the reference to device grid with // the grid width will be the size of the noc mesh - noc_ctx.noc_model.set_device_grid_width((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST); + noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0); // individual router parameters int curr_router_id; @@ -58,7 +58,10 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { id_of_all_hard_routers_in_device.push_back(router_number); // add the router to the noc - noc_ctx.noc_model.add_router(curr_router_id, router_grid_position_x, router_grid_position_y); + noc_ctx.noc_model.add_router(curr_router_id, + router_grid_position_x, + router_grid_position_y, + 0); } noc_ctx.noc_model.make_room_for_noc_router_link_list(); @@ -93,7 +96,10 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") { const NocRouter& hard_router_block = noc_ctx.noc_model.get_single_noc_router((NocRouterId)cluster_block_number); t_block_loc current_cluster_block_location; current_cluster_block_location.is_fixed = true; - current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), hard_router_block.get_router_grid_position_y(), -1); + current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), + hard_router_block.get_router_grid_position_y(), + -1, + hard_router_block.get_router_layer_position()); // now add the cluster and its placed location to the placement datastructures place_ctx.block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); @@ -212,7 +218,7 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { // store the reference to device grid with // the grid width will be the size of the noc mesh - noc_ctx.noc_model.set_device_grid_width((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST); + noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0); // individual router parameters int curr_router_id; @@ -232,7 +238,10 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { id_of_all_hard_routers_in_device.push_back(router_number); // add the router to the noc - noc_ctx.noc_model.add_router(curr_router_id, router_grid_position_x, router_grid_position_y); + noc_ctx.noc_model.add_router(curr_router_id, + router_grid_position_x, + router_grid_position_y, + 0); } noc_ctx.noc_model.make_room_for_noc_router_link_list(); @@ -267,7 +276,10 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") { const NocRouter& hard_router_block = noc_ctx.noc_model.get_single_noc_router((NocRouterId)cluster_block_number); t_block_loc current_cluster_block_location; current_cluster_block_location.is_fixed = true; - current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), hard_router_block.get_router_grid_position_y(), -1); + current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), + hard_router_block.get_router_grid_position_y(), + -1, + hard_router_block.get_router_layer_position()); // now add the cluster and its placed location to the placement datastructures place_ctx.block_locs.insert(ClusterBlockId(cluster_block_number), current_cluster_block_location); @@ -448,7 +460,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ // store the reference to device grid with // the grid width will be the size of the noc mesh - noc_ctx.noc_model.set_device_grid_width((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST); + noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0); // individual router parameters int curr_router_id; @@ -480,7 +492,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ router_grid_position_y = router_number / MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST; // add the router to the noc - noc_ctx.noc_model.add_router(curr_router_id, router_grid_position_x, router_grid_position_y); + noc_ctx.noc_model.add_router(curr_router_id, + router_grid_position_x, + router_grid_position_y, + 0); } noc_ctx.noc_model.make_room_for_noc_router_link_list(); @@ -515,7 +530,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ const NocRouter& hard_router_block = noc_ctx.noc_model.get_single_noc_router((NocRouterId)cluster_block_number); t_block_loc current_cluster_block_location; current_cluster_block_location.is_fixed = true; - current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), hard_router_block.get_router_grid_position_y(), -1); + current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), + hard_router_block.get_router_grid_position_y(), + -1, + hard_router_block.get_router_layer_position()); router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number); @@ -658,12 +676,24 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ blocks_affected.num_moved_blocks = 2; blocks_affected.moved_blocks[0].block_num = swap_router_block_one; - blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1); - blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1); + blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); + blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); blocks_affected.moved_blocks[1].block_num = swap_router_block_two; - blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1); - blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1); + blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); + blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); // swap the hard router blocks where the two cluster blocks are placed on NocRouterId router_first_swap_cluster_location = router_where_cluster_is_placed[swap_router_block_one]; @@ -785,12 +815,24 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ blocks_affected.moved_blocks[0].block_num = swap_router_block_one; - blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1); - blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1); + blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); + blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); blocks_affected.moved_blocks[1].block_num = swap_router_block_two; - blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1); - blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1); + blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); + blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); // swap the hard router blocks where the two cluster blocks are placed on NocRouterId router_first_swap_cluster_location = router_where_cluster_is_placed[swap_router_block_one]; @@ -892,12 +934,24 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ blocks_affected.moved_blocks[0].block_num = swap_router_block_one; - blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1); - blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1); + blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); + blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); blocks_affected.moved_blocks[1].block_num = swap_router_block_two; - blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1); - blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1); + blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); + blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); // swap the hard router blocks where the two cluster blocks are placed on router_first_swap_cluster_location = router_where_cluster_is_placed[swap_router_block_one]; @@ -974,12 +1028,24 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_ blocks_affected.moved_blocks[0].block_num = swap_router_block_one; - blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1); - blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1); + blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); + blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); blocks_affected.moved_blocks[1].block_num = swap_router_block_two; - blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1); - blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1); + blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); + blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); // swap the hard router blocks where the two cluster blocks are placed on router_first_swap_cluster_location = router_where_cluster_is_placed[swap_router_block_one]; @@ -1137,7 +1203,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { // store the reference to device grid with // the grid width will be the size of the noc mesh - noc_ctx.noc_model.set_device_grid_width((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST); + noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0); // individual router parameters int curr_router_id; @@ -1166,7 +1232,10 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { router_grid_position_y = router_number / MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST; // add the router to the noc - noc_ctx.noc_model.add_router(curr_router_id, router_grid_position_x, router_grid_position_y); + noc_ctx.noc_model.add_router(curr_router_id, + router_grid_position_x, + router_grid_position_y, + 0); } noc_ctx.noc_model.make_room_for_noc_router_link_list(); @@ -1201,7 +1270,10 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { const NocRouter& hard_router_block = noc_ctx.noc_model.get_single_noc_router((NocRouterId)cluster_block_number); t_block_loc current_cluster_block_location; current_cluster_block_location.is_fixed = true; - current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), hard_router_block.get_router_grid_position_y(), -1); + current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), + hard_router_block.get_router_grid_position_y(), + -1, + hard_router_block.get_router_layer_position()); router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number); @@ -1319,12 +1391,24 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") { blocks_affected.num_moved_blocks = 2; blocks_affected.moved_blocks[0].block_num = swap_router_block_one; - blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1); - blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1); + blocks_affected.moved_blocks[0].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); + blocks_affected.moved_blocks[0].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); blocks_affected.moved_blocks[1].block_num = swap_router_block_two; - blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), -1); - blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), -1); + blocks_affected.moved_blocks[1].old_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_two]).get_router_layer_position()); + blocks_affected.moved_blocks[1].new_loc = t_pl_loc(noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_x(), + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_grid_position_y(), + -1, + noc_ctx.noc_model.get_single_noc_router(router_where_cluster_is_placed[swap_router_block_one]).get_router_layer_position()); // swap the hard router blocks where the two cluster blocks are placed on // this is needed to that we can @@ -1444,7 +1528,7 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { // store the reference to device grid with // the grid width will be the size of the noc mesh - noc_ctx.noc_model.set_device_grid_width((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST); + noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0); // individual router parameters int curr_router_id; @@ -1481,7 +1565,10 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { router_grid_position_y = router_number / MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST; // add the router to the noc - noc_ctx.noc_model.add_router(curr_router_id, router_grid_position_x, router_grid_position_y); + noc_ctx.noc_model.add_router(curr_router_id, + router_grid_position_x, + router_grid_position_y, + 0); } noc_ctx.noc_model.make_room_for_noc_router_link_list(); @@ -1516,7 +1603,10 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") { const NocRouter& hard_router_block = noc_ctx.noc_model.get_single_noc_router((NocRouterId)cluster_block_number); t_block_loc current_cluster_block_location; current_cluster_block_location.is_fixed = true; - current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), hard_router_block.get_router_grid_position_y(), -1); + current_cluster_block_location.loc = t_pl_loc(hard_router_block.get_router_grid_position_x(), + hard_router_block.get_router_grid_position_y(), + -1, + hard_router_block.get_router_layer_position()); router_where_cluster_is_placed.push_back((NocRouterId)cluster_block_number); diff --git a/vpr/test/test_noc_storage.cpp b/vpr/test/test_noc_storage.cpp index 8ceec3f36e1..a0a3e2ee54b 100644 --- a/vpr/test/test_noc_storage.cpp +++ b/vpr/test/test_noc_storage.cpp @@ -32,7 +32,7 @@ TEST_CASE("test_adding_routers_to_noc_storage", "[vpr_noc]") { // store the reference to device grid with // this will be set to the total number of routers (and should be set before adding routers) - test_noc.set_device_grid_width((int)NUM_OF_ROUTERS); + test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0); NocRouterId converted_id; @@ -44,10 +44,10 @@ TEST_CASE("test_adding_routers_to_noc_storage", "[vpr_noc]") { router_grid_position_y = router_number + dist(rand_num_gen); // add router to the golden vector - golden_set.emplace_back(router_number, router_grid_position_x, router_grid_position_y); + golden_set.emplace_back(router_number, router_grid_position_x, router_grid_position_y, 0); // add tje router to the noc - test_noc.add_router(curr_router_id, router_grid_position_x, router_grid_position_y); + test_noc.add_router(curr_router_id, router_grid_position_x, router_grid_position_y, 0); } // now verify that the routers were added properly by reading the routers back from the noc and comparing them to the golden set @@ -84,7 +84,7 @@ TEST_CASE("test_router_id_conversion", "[vpr_noc]") { // store the reference to device grid with // this will be set to the total number of routers (and should be set before adding routers) - test_noc.set_device_grid_width((int)NUM_OF_ROUTERS); + test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0); NocRouterId converted_id; @@ -96,10 +96,10 @@ TEST_CASE("test_router_id_conversion", "[vpr_noc]") { router_grid_position_y = router_number + dist(rand_num_gen); // add router to the golden vector - golden_set.emplace_back(router_number, router_grid_position_x, router_grid_position_y); + golden_set.emplace_back(router_number, router_grid_position_x, router_grid_position_y, 0); // add tje router to the noc - test_noc.add_router(curr_router_id, router_grid_position_x, router_grid_position_y); + test_noc.add_router(curr_router_id, router_grid_position_x, router_grid_position_y, 0); } // now verify that the routers were added properly by reading the routers back from the noc and comparing them to the golden set @@ -131,7 +131,7 @@ TEST_CASE("test_add_link", "[vpr_noc]") { // store the reference to device grid with // this will be set to the total number of routers (and should be set before adding routers) - test_noc.set_device_grid_width((int)NUM_OF_ROUTERS); + test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0); // keeps track of the number of links created int total_num_of_links = 0; @@ -147,7 +147,10 @@ TEST_CASE("test_add_link", "[vpr_noc]") { router_id = router_number; // add tje router to the noc - test_noc.add_router(router_id, curr_router_x_pos, curr_router_y_pos); + test_noc.add_router(router_id, + curr_router_x_pos, + curr_router_y_pos, + 0); } // allocate the size for outgoing link vector for each router @@ -209,7 +212,7 @@ TEST_CASE("test_router_link_list", "[vpr_noc]") { // store the reference to device grid with // this will be set to the total number of routers (and should be set before adding routers) - test_noc.set_device_grid_width((int)NUM_OF_ROUTERS); + test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0); // need to assign @@ -228,7 +231,7 @@ TEST_CASE("test_router_link_list", "[vpr_noc]") { router_id = router_number; // add tje router to the noc - test_noc.add_router(router_id, curr_router_x_pos, curr_router_y_pos); + test_noc.add_router(router_id, curr_router_x_pos, curr_router_y_pos, 0); } // allocate the size for outgoing link vector for each router @@ -284,7 +287,7 @@ TEST_CASE("test_remove_link", "[vpr_noc]") { // store the reference to device grid with // this will be set to the total number of routers (and should be set before adding routers) - test_noc.set_device_grid_width((int)NUM_OF_ROUTERS); + test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0); // temp variables that hold the routers involved within a link NocRouterId source; @@ -302,7 +305,10 @@ TEST_CASE("test_remove_link", "[vpr_noc]") { router_id = router_number; // add tje router to the noc - test_noc.add_router(router_id, curr_router_x_pos, curr_router_y_pos); + test_noc.add_router(router_id, + curr_router_x_pos, + curr_router_y_pos, + 0); } // now go through and add the links to the NoC @@ -404,7 +410,7 @@ TEST_CASE("test_generate_router_key_from_grid_location", "[vpr_noc]") { // store the reference to device grid with // this will be set to the total number of routers (and should be set before adding routers) - test_noc.set_device_grid_width((int)NUM_OF_ROUTERS); + test_noc.set_device_grid_spec((int)NUM_OF_ROUTERS, 0); NocRouterId converted_id; @@ -420,15 +426,21 @@ TEST_CASE("test_generate_router_key_from_grid_location", "[vpr_noc]") { golden_set.emplace_back((NocRouterId)router_number); // add the router to the noc - test_noc.add_router(curr_router_id, router_grid_position_x, router_grid_position_y); + test_noc.add_router(curr_router_id, + router_grid_position_x, + router_grid_position_y, + 0); } // now verify the test function by identifying all the routers using their grid locations // the grid locations go from 0 to the total number of routers in the NoC for (int grid_location = 0; grid_location < NUM_OF_ROUTERS; grid_location++) { // contains the grid location of a router block seen during placement - // we don't care about the subtile so give it an arbitrary value - t_pl_loc placement_router_grid_location = t_pl_loc(grid_location, grid_location, -1); + // we dont care about the subtile so give it a arbritary value + t_pl_loc placement_router_grid_location = t_pl_loc(grid_location, + grid_location, + -1, + 0); NocRouterId found_router_at_grid_location = test_noc.get_router_at_grid_location(placement_router_grid_location); diff --git a/vpr/test/test_place_delay_model_serdes.cpp b/vpr/test/test_place_delay_model_serdes.cpp index dbf140c9017..818b5cc3dfe 100644 --- a/vpr/test/test_place_delay_model_serdes.cpp +++ b/vpr/test/test_place_delay_model_serdes.cpp @@ -9,16 +9,20 @@ static constexpr const char kDeltaDelayBin[] = "test_delta_delay.bin"; static constexpr const char kOverrideDelayBin[] = "test_override_delay.bin"; TEST_CASE("round_trip_delta_delay_model", "[vpr]") { + constexpr size_t kDimLayer = 1; constexpr size_t kDimX = 10; constexpr size_t kDimY = 10; - vtr::Matrix delays; - delays.resize({kDimX, kDimY}); - - for (size_t x = 0; x < kDimX; ++x) { - for (size_t y = 0; y < kDimY; ++y) { - delays[x][y] = (x + 1) * (y + 1); + vtr::NdMatrix delays; + delays.resize({kDimLayer, kDimX, kDimY}); + + for (size_t layer = 0; layer < kDimLayer; ++layer) { + for (size_t x = 0; x < kDimX; ++x) { + for (size_t y = 0; y < kDimY; ++y) { + delays[layer][x][y] = (x + 1) * (y + 1); + } } } + DeltaDelayModel model(std::move(delays), false); const auto& delays1 = model.delays(); @@ -35,22 +39,26 @@ TEST_CASE("round_trip_delta_delay_model", "[vpr]") { REQUIRE(delays1.dim_size(dim) == delays2.dim_size(dim)); } - for (size_t x = 0; x < kDimX; ++x) { - for (size_t y = 0; y < kDimY; ++y) { - CHECK(delays1[x][y] == delays2[x][y]); + for (size_t layer = 0; layer < kDimLayer; ++layer) { + for (size_t x = 0; x < kDimX; ++x) { + for (size_t y = 0; y < kDimY; ++y) { + CHECK(delays1[layer][x][y] == delays2[layer][x][y]); + } } } } TEST_CASE("round_trip_override_delay_model", "[vpr]") { + constexpr size_t kDimLayer = 1; constexpr size_t kDimX = 10; constexpr size_t kDimY = 10; - vtr::Matrix delays; - delays.resize({kDimX, kDimY}); - - for (size_t x = 0; x < kDimX; ++x) { - for (size_t y = 0; y < kDimY; ++y) { - delays[x][y] = (x + 1) * (y + 1); + vtr::NdMatrix delays; + delays.resize({kDimLayer, kDimX, kDimY}); + for (size_t layer = 0; layer < kDimLayer; ++layer) { + for (size_t x = 0; x < kDimX; ++x) { + for (size_t y = 0; y < kDimY; ++y) { + delays[layer][x][y] = (x + 1) * (y + 1); + } } } OverrideDelayModel model(false); @@ -73,9 +81,11 @@ TEST_CASE("round_trip_override_delay_model", "[vpr]") { REQUIRE(delays1.dim_size(dim) == delays2.dim_size(dim)); } - for (size_t x = 0; x < kDimX; ++x) { - for (size_t y = 0; y < kDimY; ++y) { - CHECK(delays1[x][y] == delays2[x][y]); + for (size_t layer = 0; layer < kDimLayer; ++layer) { + for (size_t x = 0; x < kDimX; ++x) { + for (size_t y = 0; y < kDimY; ++y) { + CHECK(delays1[layer][x][y] == delays2[layer][x][y]); + } } } diff --git a/vpr/test/test_setup_noc.cpp b/vpr/test/test_setup_noc.cpp index 5e81373cf0d..b88949b11f3 100644 --- a/vpr/test/test_setup_noc.cpp +++ b/vpr/test/test_setup_noc.cpp @@ -418,17 +418,17 @@ TEST_CASE("test_create_noc_routers", "[vpr_setup_noc]") { * - router 8: (4,8) * - router 9: (8,8) */ - list_of_routers.push_back({0, 0, 0.5, 1}); - list_of_routers.push_back({4, 0, 4.5, 1}); - list_of_routers.push_back({8, 0, 8.5, 1}); + list_of_routers.push_back({0, 0, 0, 0.5, 1}); + list_of_routers.push_back({4, 0, 0, 4.5, 1}); + list_of_routers.push_back({8, 0, 0, 8.5, 1}); - list_of_routers.push_back({0, 4, 0.5, 5}); - list_of_routers.push_back({4, 4, 4.5, 5}); - list_of_routers.push_back({8, 4, 8.5, 5}); + list_of_routers.push_back({0, 4, 0, 0.5, 5}); + list_of_routers.push_back({4, 4, 0, 4.5, 5}); + list_of_routers.push_back({8, 4, 0, 8.5, 5}); - list_of_routers.push_back({0, 8, 0.5, 9}); - list_of_routers.push_back({4, 8, 4.5, 9}); - list_of_routers.push_back({8, 8, 8.5, 9}); + list_of_routers.push_back({0, 8, 0, 0.5, 9}); + list_of_routers.push_back({4, 8, 0, 4.5, 9}); + list_of_routers.push_back({8, 8, 0, 8.5, 9}); // create the noc model (to store the routers) NocStorage noc_model; @@ -595,24 +595,24 @@ TEST_CASE("test_create_noc_links", "[vpr_setup_noc]") { * - router 8: (4,8) * - router 9: (8,8) */ - list_of_routers.push_back({0, 0, 0.5, 1}); - list_of_routers.push_back({4, 0, 4.5, 1}); - list_of_routers.push_back({8, 0, 8.5, 1}); + list_of_routers.push_back({0, 0, 0, 0.5, 1}); + list_of_routers.push_back({4, 0, 0, 4.5, 1}); + list_of_routers.push_back({8, 0, 0, 8.5, 1}); - list_of_routers.push_back({0, 4, 0.5, 5}); - list_of_routers.push_back({4, 4, 4.5, 5}); - list_of_routers.push_back({8, 4, 8.5, 5}); + list_of_routers.push_back({0, 4, 0, 0.5, 5}); + list_of_routers.push_back({4, 4, 0, 4.5, 5}); + list_of_routers.push_back({8, 4, 0, 8.5, 5}); - list_of_routers.push_back({0, 8, 0.5, 9}); - list_of_routers.push_back({4, 8, 4.5, 9}); - list_of_routers.push_back({8, 8, 8.5, 9}); + list_of_routers.push_back({0, 8, 0, 0.5, 9}); + list_of_routers.push_back({4, 8, 0, 4.5, 9}); + list_of_routers.push_back({8, 8, 0, 8.5, 9}); // create the noc model (to store the routers) NocStorage noc_model; // store the reference to device grid with // this will be set to the device grid width - noc_model.set_device_grid_width((int)3); + noc_model.set_device_grid_spec((int)3, 0); // create the logical router list t_noc_inf noc_info; @@ -632,7 +632,10 @@ TEST_CASE("test_create_noc_links", "[vpr_setup_noc]") { noc_info.router_list.push_back(*temp_router); // add the router to the NoC - noc_model.add_router(router_id, list_of_routers[router_id - 1].grid_width_position, list_of_routers[router_id - 1].grid_height_position); + noc_model.add_router(router_id, + list_of_routers[router_id - 1].grid_width_position, + list_of_routers[router_id - 1].grid_height_position, + list_of_routers[router_id - 1].layer_position); } delete temp_router; @@ -738,17 +741,17 @@ TEST_CASE("test_setup_noc", "[vpr_setup_noc]") { * - router 8: (4,8) * - router 9: (8,8) */ - list_of_routers.push_back({0, 0, 0.5, 1}); - list_of_routers.push_back({4, 0, 4.5, 1}); - list_of_routers.push_back({8, 0, 8.5, 1}); + list_of_routers.push_back({0, 0, 0, 0.5, 1}); + list_of_routers.push_back({4, 0, 0, 4.5, 1}); + list_of_routers.push_back({8, 0, 0, 8.5, 1}); - list_of_routers.push_back({0, 4, 0.5, 5}); - list_of_routers.push_back({4, 4, 4.5, 5}); - list_of_routers.push_back({8, 4, 8.5, 5}); + list_of_routers.push_back({0, 4, 0, 0.5, 5}); + list_of_routers.push_back({4, 4, 0, 4.5, 5}); + list_of_routers.push_back({8, 4, 0, 8.5, 5}); - list_of_routers.push_back({0, 8, 0.5, 9}); - list_of_routers.push_back({4, 8, 4.5, 9}); - list_of_routers.push_back({8, 8, 8.5, 9}); + list_of_routers.push_back({0, 8, 0, 0.5, 9}); + list_of_routers.push_back({4, 8, 0, 4.5, 9}); + list_of_routers.push_back({8, 8, 0, 8.5, 9}); for (int router_id = 1; router_id < 10; router_id++) { // we will have 9 logical routers that will take up all physical routers diff --git a/vpr/test/test_vpr_constraints.cpp b/vpr/test/test_vpr_constraints.cpp index ca58509468d..30772950e19 100644 --- a/vpr/test/test_vpr_constraints.cpp +++ b/vpr/test/test_vpr_constraints.cpp @@ -18,26 +18,27 @@ TEST_CASE("Region", "[vpr]") { Region r1; - r1.set_region_rect(1, 2, 3, 4); + r1.set_region_rect({1, 2, 3, 4, 5}); r1.set_sub_tile(2); - vtr::Rect rect; - rect = r1.get_region_rect(); + const auto r1_coord = r1.get_region_rect(); - REQUIRE(rect.xmin() == 1); - REQUIRE(rect.ymin() == 2); - REQUIRE(rect.xmax() == 3); - REQUIRE(rect.ymax() == 4); + REQUIRE(r1_coord.xmin == 1); + REQUIRE(r1_coord.ymin == 2); + REQUIRE(r1_coord.xmax == 3); + REQUIRE(r1_coord.ymax == 4); + REQUIRE(r1_coord.layer_num == 5); REQUIRE(r1.get_sub_tile() == 2); //checking that default constructor creates an empty rectangle (999, 999,-1,-1) Region def_region; bool is_def_empty = false; - vtr::Rect def_rect = def_region.get_region_rect(); - is_def_empty = def_rect.empty(); + const auto def_coord = def_region.get_region_rect(); + is_def_empty = def_region.empty(); REQUIRE(is_def_empty == true); - REQUIRE(def_rect.xmin() == 999); + REQUIRE(def_coord.xmin == 999); + REQUIRE(def_coord.layer_num == -1); REQUIRE(def_region.get_sub_tile() == -1); } @@ -45,7 +46,7 @@ TEST_CASE("Region", "[vpr]") { TEST_CASE("PartitionRegion", "[vpr]") { Region r1; - r1.set_region_rect(2, 3, 6, 7); + r1.set_region_rect({2, 3, 6, 7, 0}); r1.set_sub_tile(3); PartitionRegion pr1; @@ -54,12 +55,13 @@ TEST_CASE("PartitionRegion", "[vpr]") { std::vector pr_regions = pr1.get_partition_region(); REQUIRE(pr_regions[0].get_sub_tile() == 3); - vtr::Rect rect; - rect = pr_regions[0].get_region_rect(); - REQUIRE(rect.xmin() == 2); - REQUIRE(rect.ymin() == 3); - REQUIRE(rect.xmax() == 6); - REQUIRE(rect.ymax() == 7); + + const auto pr_reg_coord = pr_regions[0].get_region_rect(); + REQUIRE(pr_reg_coord.layer_num == 0); + REQUIRE(pr_reg_coord.xmin == 2); + REQUIRE(pr_reg_coord.ymin == 3); + REQUIRE(pr_reg_coord.xmax == 6); + REQUIRE(pr_reg_coord.ymax == 7); } //Test Partition class accessors and mutators @@ -71,7 +73,7 @@ TEST_CASE("Partition", "[vpr]") { //create region and partitionregions objects to test functions of the Partition class Region r1; - r1.set_region_rect(2, 3, 7, 8); + r1.set_region_rect({2, 3, 7, 8, 0}); r1.set_sub_tile(3); PartitionRegion part_reg; @@ -82,12 +84,13 @@ TEST_CASE("Partition", "[vpr]") { std::vector regions = part_reg_2.get_partition_region(); REQUIRE(regions[0].get_sub_tile() == 3); - vtr::Rect rect; - rect = regions[0].get_region_rect(); - REQUIRE(rect.xmin() == 2); - REQUIRE(rect.ymin() == 3); - REQUIRE(rect.xmax() == 7); - REQUIRE(rect.ymax() == 8); + + const auto pr_reg_coord = regions[0].get_region_rect(); + REQUIRE(pr_reg_coord.layer_num == 0); + REQUIRE(pr_reg_coord.xmin == 2); + REQUIRE(pr_reg_coord.ymin == 3); + REQUIRE(pr_reg_coord.xmax == 7); + REQUIRE(pr_reg_coord.ymax == 8); } //Test VprConstraints class accessors and mutators @@ -134,35 +137,37 @@ TEST_CASE("RegionIntersect", "[vpr]") { Region region1; Region region2; - region1.set_region_rect(1, 2, 3, 5); - region2.set_region_rect(2, 3, 4, 6); + region1.set_region_rect({1, 2, 3, 5, 0}); + region2.set_region_rect({2, 3, 4, 6, 0}); Region int_reg; int_reg = intersection(region1, region2); - vtr::Rect rect = int_reg.get_region_rect(); + auto intersect_reg_coord = int_reg.get_region_rect(); - REQUIRE(rect.xmin() == 2); - REQUIRE(rect.ymin() == 3); - REQUIRE(rect.xmax() == 3); - REQUIRE(rect.ymax() == 5); + REQUIRE(intersect_reg_coord.layer_num == 0); + REQUIRE(intersect_reg_coord.xmin == 2); + REQUIRE(intersect_reg_coord.ymin == 3); + REQUIRE(intersect_reg_coord.xmax == 3); + REQUIRE(intersect_reg_coord.ymax == 5); //Test full overlap Region region3; Region region4; - region3.set_region_rect(5, 1, 8, 6); - region4.set_region_rect(6, 3, 8, 6); + region3.set_region_rect({5, 1, 8, 6, 0}); + region4.set_region_rect({6, 3, 8, 6, 0}); Region int_reg_2; int_reg_2 = intersection(region3, region4); - vtr::Rect rect_2 = int_reg_2.get_region_rect(); + intersect_reg_coord = int_reg_2.get_region_rect(); - REQUIRE(rect_2.xmin() == 6); - REQUIRE(rect_2.ymin() == 3); - REQUIRE(rect_2.xmax() == 8); - REQUIRE(rect_2.ymax() == 6); + REQUIRE(intersect_reg_coord.layer_num == 0); + REQUIRE(intersect_reg_coord.xmin == 6); + REQUIRE(intersect_reg_coord.ymin == 3); + REQUIRE(intersect_reg_coord.xmax == 8); + REQUIRE(intersect_reg_coord.ymax == 6); //Test no intersection (rectangles don't overlap, intersect region will be returned empty) @@ -187,11 +192,12 @@ TEST_CASE("RegionIntersect", "[vpr]") { Region int_reg_5; int_reg_5 = intersection(region1, region2); - vtr::Rect rect_5 = int_reg_5.get_region_rect(); - REQUIRE(rect_5.xmin() == 2); - REQUIRE(rect_5.ymin() == 3); - REQUIRE(rect_5.xmax() == 3); - REQUIRE(rect_5.ymax() == 5); + const auto reg_5_coord = int_reg_5.get_region_rect(); + REQUIRE(reg_5_coord.layer_num == 0); + REQUIRE(reg_5_coord.xmin == 2); + REQUIRE(reg_5_coord.ymin == 3); + REQUIRE(reg_5_coord.xmax == 3); + REQUIRE(reg_5_coord.ymax == 5); } //The following six test cases test the intersection function for PartitionRegions @@ -204,9 +210,23 @@ TEST_CASE("PartRegionIntersect", "[vpr]") { Region r2; Region r3; - r1.set_region_rect(0, 0, 1, 1); - r2.set_region_rect(1, 1, 2, 2); - r3.set_region_rect(0, 0, 2, 2); + r1.set_region_rect({0, + 0, + 1, + 1, + 0}); + + r2.set_region_rect({1, + 1, + 2, + 2, + 0}); + + r3.set_region_rect({0, + 0, + 2, + 2, + 0}); pr1.add_to_part_region(r1); pr1.add_to_part_region(r2); @@ -219,8 +239,13 @@ TEST_CASE("PartRegionIntersect", "[vpr]") { vtr::Rect int_rect(0, 0, 1, 1); vtr::Rect int_rect_2(1, 1, 2, 2); - REQUIRE(regions[0].get_region_rect() == int_rect); - REQUIRE(regions[1].get_region_rect() == int_rect_2); + + const auto first_reg_coord = regions[0].get_region_rect(); + const auto second_reg_coord = regions[1].get_region_rect(); + REQUIRE(vtr::Rect(first_reg_coord.xmin, first_reg_coord.ymin, first_reg_coord.xmax, first_reg_coord.ymax) == int_rect); + REQUIRE(vtr::Rect(second_reg_coord.xmin, second_reg_coord.ymin, second_reg_coord.xmax, second_reg_coord.ymax) == int_rect_2); + REQUIRE(first_reg_coord.layer_num == 0); + REQUIRE(second_reg_coord.layer_num == 0); } //2x1 regions, 1 overlap @@ -232,9 +257,9 @@ TEST_CASE("PartRegionIntersect2", "[vpr]") { Region r2; Region r3; - r1.set_region_rect(0, 0, 2, 2); - r2.set_region_rect(4, 4, 6, 6); - r3.set_region_rect(0, 0, 2, 2); + r1.set_region_rect({0, 0, 2, 2, 0}); + r2.set_region_rect({4, 4, 6, 6, 0}); + r3.set_region_rect({0, 0, 2, 2, 0}); pr1.add_to_part_region(r1); pr1.add_to_part_region(r2); @@ -246,7 +271,9 @@ TEST_CASE("PartRegionIntersect2", "[vpr]") { std::vector regions = int_pr.get_partition_region(); vtr::Rect int_rect(0, 0, 2, 2); REQUIRE(regions.size() == 1); - REQUIRE(regions[0].get_region_rect() == int_rect); + const auto first_reg_coord = regions[0].get_region_rect(); + REQUIRE(vtr::Rect(first_reg_coord.xmin, first_reg_coord.ymin, first_reg_coord.xmax, first_reg_coord.ymax) == int_rect); + REQUIRE(first_reg_coord.layer_num == 0); } //2x2 regions, no overlaps @@ -259,14 +286,14 @@ TEST_CASE("PartRegionIntersect3", "[vpr]") { Region r3; Region r4; - r1.set_region_rect(1, 2, 3, 5); + r1.set_region_rect({1, 2, 3, 5, 0}); r1.set_sub_tile(2); - r2.set_region_rect(4, 2, 6, 4); + r2.set_region_rect({4, 2, 6, 4, 0}); - r3.set_region_rect(4, 5, 5, 7); + r3.set_region_rect({4, 5, 5, 7, 0}); - r4.set_region_rect(1, 2, 3, 5); + r4.set_region_rect({1, 2, 3, 5, 0}); r4.set_sub_tile(4); pr1.add_to_part_region(r1); @@ -292,14 +319,14 @@ TEST_CASE("PartRegionIntersect4", "[vpr]") { Region r3; Region r4; - r1.set_region_rect(1, 2, 3, 5); + r1.set_region_rect({1, 2, 3, 5, 0}); r1.set_sub_tile(2); - r2.set_region_rect(4, 2, 6, 4); + r2.set_region_rect({4, 2, 6, 4, 0}); - r3.set_region_rect(4, 5, 5, 7); + r3.set_region_rect({4, 5, 5, 7, 0}); - r4.set_region_rect(1, 2, 3, 4); + r4.set_region_rect({1, 2, 3, 4, 0}); r4.set_sub_tile(2); pr1.add_to_part_region(r1); @@ -315,7 +342,9 @@ TEST_CASE("PartRegionIntersect4", "[vpr]") { vtr::Rect intersect(1, 2, 3, 4); REQUIRE(regions.size() == 1); - REQUIRE(regions[0].get_region_rect() == intersect); + const auto first_reg_coord = regions[0].get_region_rect(); + REQUIRE(first_reg_coord.layer_num == 0); + REQUIRE(first_reg_coord.get_rect() == intersect); REQUIRE(regions[0].get_sub_tile() == 2); } @@ -329,13 +358,13 @@ TEST_CASE("PartRegionIntersect5", "[vpr]") { Region r3; Region r4; - r1.set_region_rect(1, 5, 5, 7); + r1.set_region_rect({1, 5, 5, 7, 0}); - r2.set_region_rect(6, 3, 8, 5); + r2.set_region_rect({6, 3, 8, 5, 0}); - r3.set_region_rect(2, 6, 4, 9); + r3.set_region_rect({2, 6, 4, 9, 0}); - r4.set_region_rect(6, 4, 8, 7); + r4.set_region_rect({6, 4, 8, 7, 0}); pr1.add_to_part_region(r1); pr1.add_to_part_region(r2); @@ -351,8 +380,13 @@ TEST_CASE("PartRegionIntersect5", "[vpr]") { vtr::Rect int_r2r4(6, 4, 8, 5); REQUIRE(regions.size() == 2); - REQUIRE(regions[0].get_region_rect() == int_r1r3); - REQUIRE(regions[1].get_region_rect() == int_r2r4); + const auto first_reg_coord = regions[0].get_region_rect(); + const auto second_reg_coord = regions[1].get_region_rect(); + + REQUIRE(first_reg_coord.layer_num == 0); + REQUIRE(second_reg_coord.layer_num == 0); + REQUIRE(first_reg_coord.get_rect() == int_r1r3); + REQUIRE(second_reg_coord.get_rect() == int_r2r4); } //2x2 regions, 4 overlap @@ -365,13 +399,13 @@ TEST_CASE("PartRegionIntersect6", "[vpr]") { Region r3; Region r4; - r1.set_region_rect(2, 3, 4, 7); + r1.set_region_rect({2, 3, 4, 7, 0}); - r2.set_region_rect(5, 3, 7, 8); + r2.set_region_rect({5, 3, 7, 8, 0}); - r3.set_region_rect(2, 2, 7, 4); + r3.set_region_rect({2, 2, 7, 4, 0}); - r4.set_region_rect(2, 6, 7, 8); + r4.set_region_rect({2, 6, 7, 8, 0}); pr1.add_to_part_region(r1); pr1.add_to_part_region(r2); @@ -389,10 +423,15 @@ TEST_CASE("PartRegionIntersect6", "[vpr]") { vtr::Rect int_r2r4(5, 6, 7, 8); REQUIRE(regions.size() == 4); - REQUIRE(regions[0].get_region_rect() == int_r1r3); - REQUIRE(regions[1].get_region_rect() == int_r1r4); - REQUIRE(regions[2].get_region_rect() == int_r2r3); - REQUIRE(regions[3].get_region_rect() == int_r2r4); + REQUIRE(regions[0].get_region_rect().get_rect() == int_r1r3); + REQUIRE(regions[1].get_region_rect().get_rect() == int_r1r4); + REQUIRE(regions[2].get_region_rect().get_rect() == int_r2r3); + REQUIRE(regions[3].get_region_rect().get_rect() == int_r2r4); + + REQUIRE(regions[0].get_region_rect().layer_num == 0); + REQUIRE(regions[1].get_region_rect().layer_num == 0); + REQUIRE(regions[2].get_region_rect().layer_num == 0); + REQUIRE(regions[3].get_region_rect().layer_num == 0); } //Test calculation of macro constraints @@ -405,12 +444,12 @@ TEST_CASE("MacroConstraints", "[vpr]") { t_pl_offset offset(2, 1, 0); Region reg; - reg.set_region_rect(5, 2, 9, 6); + reg.set_region_rect({5, 2, 9, 6, 0}); head_pr.add_to_part_region(reg); Region grid_reg; - grid_reg.set_region_rect(0, 0, 20, 20); + grid_reg.set_region_rect({0, 0, 20, 20, 0}); PartitionRegion grid_pr; grid_pr.add_to_part_region(grid_reg); @@ -418,12 +457,13 @@ TEST_CASE("MacroConstraints", "[vpr]") { std::vector mac_regions = macro_pr.get_partition_region(); - vtr::Rect mac_rect = mac_regions[0].get_region_rect(); + const auto mac_first_reg_coord = mac_regions[0].get_region_rect(); - REQUIRE(mac_rect.xmin() == 7); - REQUIRE(mac_rect.ymin() == 3); - REQUIRE(mac_rect.xmax() == 11); - REQUIRE(mac_rect.ymax() == 7); + REQUIRE(mac_first_reg_coord.layer_num == 0); + REQUIRE(mac_first_reg_coord.xmin == 7); + REQUIRE(mac_first_reg_coord.ymin == 3); + REQUIRE(mac_first_reg_coord.xmax == 11); + REQUIRE(mac_first_reg_coord.ymax == 7); } #if 0 diff --git a/vpr/test/test_xy_routing.cpp b/vpr/test/test_xy_routing.cpp index bc163a7308f..67517271f43 100644 --- a/vpr/test/test_xy_routing.cpp +++ b/vpr/test/test_xy_routing.cpp @@ -54,7 +54,7 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") { // add all the routers for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { - noc_model.add_router((i * 4) + j, j, i); + noc_model.add_router((i * 4) + j, j, i, 0); } } @@ -222,12 +222,12 @@ TEST_CASE("test_route_flow when it fails in a mesh topology.", "[vpr_noc_xy_rout // store the reference to device grid with // this will be set to the device grid width - noc_model.set_device_grid_width((int)4); + noc_model.set_device_grid_spec((int)4, 0); // add all the routers for (int i = 0; i < 4; i++) { for (int j = 0; j < 4; j++) { - noc_model.add_router((i * 4) + j, j, i); + noc_model.add_router((i * 4) + j, j, i, 0); } } @@ -331,12 +331,12 @@ TEST_CASE("test_route_flow when it fails in a non mesh topology.", "[vpr_noc_xy_ // store the reference to device grid with // this will be set to the device grid width - noc_model.set_device_grid_width((int)4); + noc_model.set_device_grid_spec((int)4, 0); - noc_model.add_router(0, 0, 0); - noc_model.add_router(1, 2, 2); - noc_model.add_router(2, 1, 2); - noc_model.add_router(3, 3, 0); + noc_model.add_router(0, 0, 0, 0); + noc_model.add_router(1, 2, 2, 0); + noc_model.add_router(2, 1, 2, 0); + noc_model.add_router(3, 3, 0, 0); noc_model.make_room_for_noc_router_link_list(); diff --git a/vtr_flow/arch/multi_die/stratixiv_arch.timing_with_embedded_3X3_mesh_noc_topology_with_LAB_on_top.xml b/vtr_flow/arch/multi_die/3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml similarity index 99% rename from vtr_flow/arch/multi_die/stratixiv_arch.timing_with_embedded_3X3_mesh_noc_topology_with_LAB_on_top.xml rename to vtr_flow/arch/multi_die/3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml index bb403bb473e..0d7462a75e1 100644 --- a/vtr_flow/arch/multi_die/stratixiv_arch.timing_with_embedded_3X3_mesh_noc_topology_with_LAB_on_top.xml +++ b/vtr_flow/arch/multi_die/3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml @@ -4417,19 +4417,37 @@ - - + + - - - + + + + + + + + + + + - + + + - - - + + + + + + + + + + + @@ -4743,29 +4761,69 @@ - - - + + + - + - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + noc_router_adapter.master_tready noc_router_adapter.master_tvalid noc_router_adapter.master_tdata[31:0] noc_router_adapter.clk noc_router_adapter.reset + noc_router_adapter.master_tstrb[3:0] noc_router_adapter.master_tkeep[3:0] noc_router_adapter.master_tid[7:0] noc_router_adapter.master_tdest[7:0] noc_router_adapter.master_tuser[7:0] noc_router_adapter.master_tlast + noc_router_adapter.slave_tready noc_router_adapter.slave_tvalid noc_router_adapter.slave_tdata[31:0] + noc_router_adapter.slave_tstrb[3:0] noc_router_adapter.slave_tkeep[3:0] noc_router_adapter.slave_tid[7:0] noc_router_adapter.slave_tdest[7:0] noc_router_adapter.slave_tuser[7:0] noc_router_adapter.slave_tlast + @@ -4837,109 +4895,81 @@ - + - - - - - - - - - - - - - + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + Note the use of different priorities to avoid ambiguity on small devices --> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - + + + + + + + + + + - + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + @@ -48312,7 +48433,7 @@ - - + + diff --git a/vtr_flow/arch/multi_die/sub_tiles_two_layers.xml b/vtr_flow/arch/multi_die/sub_tiles_two_layers.xml deleted file mode 100644 index 280ac485dce..00000000000 --- a/vtr_flow/arch/multi_die/sub_tiles_two_layers.xml +++ /dev/null @@ -1,264 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - io_tile_0.in io_tile_0.out - io_tile_0.in io_tile_0.out - io_tile_0.in io_tile_0.out - io_tile_0.in io_tile_0.out - - - - - - - - - - - - - - io_tile_1.in io_tile_1.out - io_tile_1.in io_tile_1.out - io_tile_1.in io_tile_1.out - io_tile_1.in io_tile_1.out - - - - - - - - - - - - - - io_tile_2.in io_tile_2.out - io_tile_2.in io_tile_2.out - io_tile_2.in io_tile_2.out - io_tile_2.in io_tile_2.out - - - - - - - - - - - - - - - - - - - - pass_through_tile_0.in pass_through_tile_0.out - pass_through_tile_0.in pass_through_tile_0.out - pass_through_tile_0.in pass_through_tile_0.out - pass_through_tile_0.in pass_through_tile_0.out - - - - - - - - - - - - - - - - - - - - - pass_through_tile_1.in pass_through_tile_1.enable pass_through_tile_1.out - pass_through_tile_1.in pass_through_tile_1.enable pass_through_tile_1.out - pass_through_tile_1.in pass_through_tile_1.enable pass_through_tile_1.out - pass_through_tile_1.in pass_through_tile_1.enable pass_through_tile_1.out - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1 1 - 1 - - - diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/task_list.txt index 5743c2bbe74..5714a36569d 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1/task_list.txt @@ -11,6 +11,6 @@ regression_tests/vtr_reg_nightly_test1/arithmetic_tasks/figure_8 regression_tests/vtr_reg_nightly_test1/arithmetic_tasks/multless_consts regression_tests/vtr_reg_nightly_test1/arithmetic_tasks/open_cores regression_tests/vtr_reg_nightly_test1/arithmetic_tasks/open_cores_frac -regression_tests/vtr_reg_nightly_test1/symbiflow +#regression_tests/vtr_reg_nightly_test1/symbiflow regression_tests/vtr_reg_nightly_test1/power_extended_arch_list regression_tests/vtr_reg_nightly_test1/power_extended_circuit_list diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/FIR_filters_frac/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/FIR_filters_frac/config/golden_results.txt index 436eece4174..c4ddf07fa25 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/FIR_filters_frac/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/FIR_filters_frac/config/golden_results.txt @@ -219,7 +219,7 @@ k6_frac_N8_22nm.xml fir_nopipe_35.v common 18.54 vpr 68.44 MiB 0.11 13884 -1 -1 k6_frac_N8_22nm.xml fir_nopipe_36.v common 27.33 vpr 68.89 MiB 0.16 13980 -1 -1 13 2.08 -1 -1 40032 -1 -1 153 22 0 10 success v8.0.0-7665-g5d69764bf Release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-04-24T18:37:45 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks 70548 22 19 3011 2724 1 2050 204 22 22 484 mult_36 auto 31.5 MiB 0.99 14453 68.9 MiB 0.85 0.01 13.7707 -751.494 -13.7707 13.7707 1.96 0.0058713 0.00525496 0.38135 0.334677 76 29166 47 1.29336e+07 6.02122e+06 2.20457e+06 4554.90 15.06 2.28366 2.01976 56682 573177 -1 24174 21 12121 23445 2974695 614218 0 0 2974695 614218 21988 13845 0 0 122333 110950 0 0 168746 134322 0 0 22790 15583 0 0 1310893 167979 0 0 1327945 171539 0 0 21988 0 0 9898 19163 19575 82292 1497 212 14.9018 14.9018 -1391.02 -14.9018 0 0 2.73077e+06 5642.09 0.92 0.68 0.57 -1 -1 0.92 0.223405 0.203602 1168 1801 -1 -1 -1 -1 k6_frac_N8_22nm.xml fir_nopipe_37.v common 20.49 vpr 70.30 MiB 0.16 14328 -1 -1 13 1.92 -1 -1 39096 -1 -1 158 22 0 11 success v8.0.0-7665-g5d69764bf Release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-04-24T18:37:45 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks 71988 22 19 3132 2828 1 2123 210 24 24 576 mult_36 auto 32.1 MiB 0.83 15481 70.3 MiB 0.52 0.01 14.4868 -903.454 -14.4868 14.4868 1.47 0.00306847 0.00263064 0.207105 0.179949 74 29452 32 1.56141e+07 6.48458e+06 2.56259e+06 4448.94 8.43 1.17288 1.02856 66498 666725 -1 25337 21 11769 22731 2983928 599958 0 0 2983928 599958 21317 13737 0 0 114590 102878 0 0 157808 125386 0 0 21951 15152 0 0 1311616 173115 0 0 1356646 169690 0 0 21317 0 0 9577 19036 19669 80749 1448 22 14.8704 14.8704 -1523.2 -14.8704 0 0 3.19068e+06 5539.38 1.52 1.15 0.64 -1 -1 1.52 0.412289 0.377043 1192 1872 -1 -1 -1 -1 k6_frac_N8_22nm.xml fir_nopipe_38.v common 23.84 vpr 69.82 MiB 0.13 14456 -1 -1 13 2.29 -1 -1 39136 -1 -1 160 22 0 11 success v8.0.0-7665-g5d69764bf Release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-04-24T18:37:45 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks 71496 22 19 3159 2855 1 2172 212 24 24 576 mult_36 auto 32.5 MiB 0.64 15337 69.8 MiB 0.44 0.01 14.4084 -940.203 -14.4084 14.4084 1.48 0.00302875 0.00265189 0.178818 0.156722 74 30259 35 1.56141e+07 6.51152e+06 2.56259e+06 4448.94 12.51 1.52698 1.35053 66498 666725 -1 25578 20 12078 22960 2674420 542857 0 0 2674420 542857 21228 13939 0 0 113706 101270 0 0 158343 125181 0 0 21777 15614 0 0 1174697 148697 0 0 1184669 138156 0 0 21228 0 0 9177 19896 21161 78183 1763 104 15.2386 15.2386 -1564.1 -15.2386 0 0 3.19068e+06 5539.38 1.40 0.63 0.66 -1 -1 1.40 0.210823 0.191438 1207 1880 -1 -1 -1 -1 -k6_frac_N8_22nm.xml fir_nopipe_39.v common 28.11 vpr 70.57 MiB 0.17 14744 -1 -1 13 2.23 -1 -1 39360 -1 -1 169 22 0 11 success v8.0.0-7665-g5d69764bf Release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-04-24T18:37:45 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks 72264 22 19 3284 2963 1 2259 221 24 24 576 mult_36 auto 33.3 MiB 1.01 16556 70.6 MiB 0.90 0.01 14.8416 -966.85 -14.8416 14.8416 1.59 0.00650907 0.00585699 0.393855 0.346342 76 32170 30 1.56141e+07 6.63277e+06 2.61600e+06 4541.67 14.41 2.18024 1.93456 67070 679911 -1 26884 21 13810 27680 3176058 652222 0 0 3176058 652222 25655 16135 0 0 137932 124280 0 0 192955 149921 0 0 26692 18056 0 0 1400623 174048 0 0 1392201 169782 0 0 25655 0 0 11873 26622 27157 100786 2051 376 15.4681 15.4681 -1620.41 -15.4681 0 0 3.24203e+06 5628.53 1.62 1.27 0.67 -1 -1 1.62 0.473982 0.431196 1267 1957 -1 -1 -1 -1 +k6_frac_N8_22nm.xml fir_nopipe_39.v common 28.11 vpr 70.57 MiB 0.17 14744 -1 -1 13 2.23 -1 -1 39360 -1 -1 169 22 0 11 success v8.0.0-7665-g5d69764bf Release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-04-24T18:37:45 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks 72264 22 19 3284 2963 1 2259 221 24 24 576 mult_36 auto 33.3 MiB 1.01 16556 70.6 MiB 0.90 0.01 14.8416 -966.85 -14.8416 14.8416 1.59 0.00650907 0.00585699 0.393855 0.346342 76 32170 30 1.56141e+07 6.63277e+06 2.61600e+06 4541.67 19 2.18024 1.93456 67070 679911 -1 26884 21 13810 27680 3176058 652222 0 0 3176058 652222 25655 16135 0 0 137932 124280 0 0 192955 149921 0 0 26692 18056 0 0 1400623 174048 0 0 1392201 169782 0 0 25655 0 0 11873 26622 27157 100786 2051 376 15.4681 15.4681 -1620.41 -15.4681 0 0 3.24203e+06 5628.53 1.62 1.27 0.67 -1 -1 1.62 0.473982 0.431196 1267 1957 -1 -1 -1 -1 k6_frac_N8_22nm.xml fir_nopipe_40.v common 26.77 vpr 70.73 MiB 0.17 14836 -1 -1 13 2.00 -1 -1 39440 -1 -1 169 22 0 11 success v8.0.0-7665-g5d69764bf Release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-04-24T18:37:45 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks 72424 22 19 3343 3022 1 2282 221 24 24 576 mult_36 auto 33.6 MiB 1.05 16103 70.7 MiB 0.58 0.01 14.5379 -829.329 -14.5379 14.5379 2.04 0.00343434 0.00300382 0.232702 0.203659 80 29222 36 1.56141e+07 6.63277e+06 2.72095e+06 4723.87 14.59 2.49836 2.21411 68798 719145 -1 25637 21 12426 23913 3552178 755815 0 0 3552178 755815 22411 14441 0 0 128209 114994 0 0 173105 138175 0 0 23278 16247 0 0 1607227 230429 0 0 1597948 241529 0 0 22411 0 0 10013 19456 19893 83655 1535 171 14.9564 14.9564 -1203.73 -14.9564 0 0 3.41546e+06 5929.62 1.10 0.91 0.59 -1 -1 1.10 0.2891 0.260412 1284 1997 -1 -1 -1 -1 k6_frac_N8_22nm.xml fir_nopipe_41.v common 37.06 vpr 71.28 MiB 0.18 15156 -1 -1 13 2.57 -1 -1 41032 -1 -1 175 22 0 12 success v8.0.0-7665-g5d69764bf Release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-04-24T18:37:45 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks 72988 22 19 3448 3110 1 2364 228 24 24 576 mult_36 auto 34.2 MiB 1.07 17649 71.3 MiB 1.02 0.02 14.3188 -964.321 -14.3188 14.3188 2.43 0.00675759 0.00584811 0.444538 0.388722 78 32357 39 1.56141e+07 7.1096e+06 2.67122e+06 4637.53 22.91 3.03376 2.67828 68222 705597 -1 28453 21 13551 26328 3635002 733011 0 0 3635002 733011 24522 15496 0 0 141779 127416 0 0 197677 156706 0 0 25322 17215 0 0 1581279 210240 0 0 1664423 205938 0 0 24522 0 0 11003 24627 22161 93773 1842 81 14.7304 14.7304 -1614.39 -14.7304 0 0 3.35110e+06 5817.88 1.09 0.86 0.49 -1 -1 1.09 0.25542 0.230798 1333 2054 -1 -1 -1 -1 k6_frac_N8_22nm.xml fir_nopipe_42.v common 38.22 vpr 72.71 MiB 0.19 15256 -1 -1 13 2.42 -1 -1 41432 -1 -1 179 22 0 12 success v8.0.0-7665-g5d69764bf Release IPO VTR_ASSERT_LEVEL=2 GNU 9.4.0 on Linux-4.15.0-197-generic x86_64 2023-04-24T18:37:45 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/RLplace_2/vtr-verilog-to-routing/vtr_flow/tasks 74460 22 19 3510 3172 1 2403 232 24 24 576 mult_36 auto 34.7 MiB 0.87 18000 72.7 MiB 0.63 0.01 14.4441 -997.144 -14.4441 14.4441 1.61 0.00364618 0.0032181 0.254867 0.222427 78 33010 27 1.56141e+07 7.16349e+06 2.67122e+06 4637.53 24.63 3.03733 2.68228 68222 705597 -1 29079 20 13363 25835 3098611 622458 0 0 3098611 622458 24053 15542 0 0 131352 117386 0 0 185001 145637 0 0 24858 17574 0 0 1358194 166877 0 0 1375153 159442 0 0 24053 0 0 10717 23329 23655 92480 1844 32 14.9453 14.9453 -1699.6 -14.9453 0 0 3.35110e+06 5817.88 1.62 1.21 0.73 -1 -1 1.62 0.445071 0.40401 1352 2097 -1 -1 -1 -1 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/task_list.txt index 82389f84a1e..54defeb95f6 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test1_odin/task_list.txt @@ -11,6 +11,6 @@ regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/FIR_filters_frac regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/multless_consts regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/open_cores regression_tests/vtr_reg_nightly_test1_odin/arithmetic_tasks/open_cores_frac -regression_tests/vtr_reg_nightly_test1_odin/symbiflow +#regression_tests/vtr_reg_nightly_test1_odin/symbiflow regression_tests/vtr_reg_nightly_test1_odin/power_extended_arch_list regression_tests/vtr_reg_nightly_test1_odin/power_extended_circuit_list diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/config.txt new file mode 100644 index 00000000000..2a773936a9a --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/config.txt @@ -0,0 +1,34 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_clique/ + +# Path to directory of architectures to use +archs_dir=arch/multi_die/ + +# Path to directory of NoC Traffic Patterns to use +noc_traffics_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_clique + +# Add circuits to list to sweep +circuit_list_add=complex_64_noc_clique.blif + +# Add architectures to list to sweep +arch_list_add=3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml + +# Add NoC Traffic Patterns to list to sweep +noc_traffic_list_add=complex_64_noc_clique.flows + +# Parse info and how to parse +parse_file=vpr_noc.txt + +# How to parse QoR info +qor_parse_file=qor_noc_spec.txt + +# Pass requirements +pass_requirements_file=pass_requirements.txt + +# Script parameters +script_params =-starting_stage vpr --noc on --noc_routing_algorithm xy_routing --device "EP4SE820" --pack --place + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/golden_results.txt new file mode 100644 index 00000000000..86934fc4bc0 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_clique_topology/config/golden_results.txt @@ -0,0 +1,2 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time NoC_agg_bandwidth NoC_latency +stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml complex_64_noc_clique.blif common 8722.02 vpr 7.77 GiB -1 2 -1 -1 success v8.0.0-6827-g874e0cb8d-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2023-01-19T13:42:08 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks 8148772 2 64 249332 210540 1 129121 8146 220 162 35640 -1 EP4SE820 2824.5 MiB 402.18 1227222 7957.8 MiB 792.01 4.20 6.60816 -853447 -6.60816 6.60816 2267.92 0.667678 0.54378 90.027 73.7401 154 1426225 49 0 0 3.59543e+08 10088.2 4276.17 411.681 346.038 1425419 20 357462 849967 447693681 43661832 7.19548 7.19548 -1.04483e+06 -7.19548 0 0 4.57197e+08 12828.2 417.73 79.91 33.4499 29.4545 8.4624e+09 8.0592e-05 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/config.txt new file mode 100644 index 00000000000..e08d2586bc5 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/config.txt @@ -0,0 +1,34 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_nearest_neighbor/ + +# Path to directory of architectures to use +archs_dir=arch/multi_die/ + +# Path to directory of NoC Traffic Patterns to use +noc_traffics_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_nearest_neighbor + +# Add circuits to list to sweep +circuit_list_add=complex_64_noc_nearest_neighbor.blif + +# Add architectures to list to sweep +arch_list_add=3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml + +# Add NoC Traffic Patterns to list to sweep +noc_traffic_list_add=complex_64_noc_nearest_neighbor.flows + +# Parse info and how to parse +parse_file=vpr_noc.txt + +# How to parse QoR info +qor_parse_file=qor_noc_spec.txt + +# Pass requirements +pass_requirements_file=pass_requirements.txt + +# Script parameters +script_params =-starting_stage vpr --noc on --noc_routing_algorithm xy_routing --device "EP4SE820" --pack --place + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/golden_results.txt new file mode 100644 index 00000000000..7bfc23cd7a9 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_nearest_neighbor_topology/config/golden_results.txt @@ -0,0 +1,2 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time NoC_agg_bandwidth NoC_latency +stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml complex_64_noc_2D_chain.blif common 8560.06 vpr 7.77 GiB -1 2 -1 -1 success v8.0.0-6827-g874e0cb8d-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2023-01-19T13:42:08 betzgrp-wintermute.eecg.utoronto.ca /home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks 8144244 2 32 245317 207097 1 127846 7926 220 162 35640 -1 EP4SE820 2807.4 MiB 400.67 1238130 7953.4 MiB 769.07 5.03 6.71786 -823307 -6.71786 6.71786 2196.38 0.600359 0.532866 91.0284 76.9373 154 1432666 41 0 0 3.59543e+08 10088.2 4213.30 388.018 328.35 1435190 17 353532 839730 444668516 43599148 7.3303 7.3303 -1.03553e+06 -7.3303 0 0 4.57197e+08 12828.2 437.18 75.13 30.7833 27.281 7.4e+07 6.28e-07 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/config.txt new file mode 100644 index 00000000000..83f46463453 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/config.txt @@ -0,0 +1,38 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_star/ + +# Path to directory of architectures to use +archs_dir=arch/multi_die/ + +# Path to directory of NoC Traffic Patterns to use +noc_traffics_dir=benchmarks/noc/Synthetic_Designs/complex_64_noc_star/ + +# Add circuits to list to sweep +circuit_list_add=complex_64_noc_star.blif + + +# Add architectures to list to sweep +arch_list_add=3d_stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml + +# Add NoC Traffic Patterns to list to sweep +noc_traffic_list_add=complex_64_noc_star_no_constraints.flows +noc_traffic_list_add=complex_64_noc_star_2_bandwidths.flows +noc_traffic_list_add=complex_64_noc_star_6_bandwidths.flows +noc_traffic_list_add=complex_64_noc_star_24_latency_constraints.flows +noc_traffic_list_add=complex_64_noc_star_63_latency_constraints.flows + +# Parse info and how to parse +parse_file=vpr_noc.txt + +# How to parse QoR info +qor_parse_file=qor_noc_spec.txt + +# Pass requirements +pass_requirements_file=pass_requirements.txt + +# Script parameters +script_params_common =-starting_stage vpr --noc on --noc_routing_algorithm xy_routing --device "EP4SE820" --pack --place diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/golden_results.txt new file mode 100644 index 00000000000..785e33bf66a --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_nightly_test5/vpr_3d_noc_star_topology/config/golden_results.txt @@ -0,0 +1,6 @@ +arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time NoC_agg_bandwidth NoC_latency +stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml complex_64_noc_star.blif common_-noc_flows_file_../../../../../../../../benchmarks/noc/Synthetic_Designs/complex_64_noc_star/complex_64_noc_star_no_constraints.flows 9220.99 vpr 7.73 GiB -1 2 -1 -1 success v8.0.0-6827-g874e0cb8d-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2023-01-19T13:42:08 betzgrp-pchenry /home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks 8108432 2 32 239118 200960 1 125464 7868 220 162 35640 -1 EP4SE820 2747.6 MiB 451.61 1045127 7918.4 MiB 842.47 4.96 7.29224 -793365 -7.29224 7.29224 2452.84 0.655239 0.571814 107.218 87.7599 154 1195521 42 0 0 3.59543e+08 10088.2 4553.02 410.733 344.98 1194844 18 337553 761991 386559130 36912927 7.62869 7.62869 -982536 -7.62869 0 0 4.57197e+08 12828.2 372.54 65.96 28.4745 25.3252 9.6e+07 5.43e-07 +stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml complex_64_noc_star.blif common_-noc_flows_file_../../../../../../../../benchmarks/noc/Synthetic_Designs/complex_64_noc_star/complex_64_noc_star_2_bandwidths.flows 9063.36 vpr 7.73 GiB -1 2 -1 -1 success v8.0.0-6827-g874e0cb8d-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2023-01-19T13:42:08 betzgrp-pchenry /home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks 8108084 2 32 239118 200960 1 125464 7868 220 162 35640 -1 EP4SE820 2747.4 MiB 442.55 1135803 7918.1 MiB 787.79 4.52 6.81554 -811789 -6.81554 6.81554 2376.12 0.650221 0.521368 100.126 80.0299 156 1282477 29 0 0 3.63383e+08 10195.9 4524.00 344.409 286.954 1284251 18 340249 763996 395948352 37975896 7.19262 7.19262 -1.01766e+06 -7.19262 0 0 4.60857e+08 12930.9 374.68 69.31 29.5326 26.1693 1.056e+08 5.51e-07 +stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml complex_64_noc_star.blif common_-noc_flows_file_../../../../../../../../benchmarks/noc/Synthetic_Designs/complex_64_noc_star/complex_64_noc_star_6_bandwidths.flows 8608.75 vpr 7.73 GiB -1 2 -1 -1 success v8.0.0-6827-g874e0cb8d-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2023-01-19T13:42:08 betzgrp-pchenry /home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks 8108236 2 32 239118 200960 1 125464 7868 220 162 35640 -1 EP4SE820 2747.7 MiB 426.92 1113432 7918.2 MiB 824.40 4.65 7.74124 -815509 -7.74124 7.74124 2331.45 0.653023 0.521789 103.46 83.6904 156 1266529 23 0 0 3.63383e+08 10195.9 4077.60 341.899 286.116 1270380 18 332509 742781 383975630 36523096 8.09967 8.09967 -988125 -8.09967 0 0 4.60857e+08 12930.9 407.61 68.56 30.06 26.6489 2.636e+08 5.51e-07 +stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml complex_64_noc_star.blif common_-noc_flows_file_../../../../../../../../benchmarks/noc/Synthetic_Designs/complex_64_noc_star/complex_64_noc_star_24_latency_constraints.flows 9546.78 vpr 7.73 GiB -1 2 -1 -1 success v8.0.0-6827-g874e0cb8d-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2023-01-19T13:42:08 betzgrp-pchenry /home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks 8108132 2 32 239118 200960 1 125464 7868 220 162 35640 -1 EP4SE820 2747.6 MiB 423.91 1068621 7918.1 MiB 811.89 4.04 7.72717 -803754 -7.72717 7.72717 2426.52 0.710002 0.571345 102.84 83.4583 154 1224191 48 0 0 3.59543e+08 10088.2 5008.21 405.36 340.486 1223376 17 334021 746747 382394575 36624473 7.87541 7.87541 -971559 -7.87541 0 0 4.57197e+08 12828.2 337.96 63.14 27.0181 24.0375 9.6e+07 5.43e-07 +stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml complex_64_noc_star.blif common_-noc_flows_file_../../../../../../../../benchmarks/noc/Synthetic_Designs/complex_64_noc_star/complex_64_noc_star_63_latency_constraints.flows 8773.66 vpr 7.73 GiB -1 2 -1 -1 success v8.0.0-6827-g874e0cb8d-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-167-generic x86_64 2023-01-19T13:42:08 betzgrp-pchenry /home/mahmo494/Desktop/add_noc_testcases/vtr-verilog-to-routing/vtr_flow/tasks 8108316 2 32 239118 200960 1 125464 7868 220 162 35640 -1 EP4SE820 2747.6 MiB 429.96 1059490 7918.3 MiB 784.75 4.56 7.39441 -807678 -7.39441 7.39441 2390.87 0.606083 0.527822 95.4872 77.8918 156 1207682 34 0 0 3.63383e+08 10195.9 4234.99 357.12 300.047 1211566 17 332447 741871 381156942 36122392 7.96259 7.96259 -1.01178e+06 -7.96259 0 0 4.60857e+08 12930.9 391.24 65.60 28.0846 24.9773 9.6e+07 5.49e-07