diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp index f604e291657..28deac94516 100644 --- a/libs/libarchfpga/src/physical_types.cpp +++ b/libs/libarchfpga/src/physical_types.cpp @@ -110,7 +110,12 @@ std::vector t_physical_tile_type::get_clock_pins_indices() const { int clock_pins_start_idx = 0; int clock_pins_stop_idx = 0; - for (int capacity_num = 0; capacity_num < this->capacity; capacity_num++) { + int num_capacity = 1; + if (capacity_type == e_capacity_type::DUPLICATE) { + num_capacity = this->capacity; + } + + for (int capacity_num = 0; capacity_num < num_capacity; capacity_num++) { // Ranges are picked on the basis that pins are ordered: inputs, outputs, then clock pins // This is because ProcessPb_type assigns pb_type port indices in that order and // SetupPinLocationsAndPinClasses assigns t_logical_block_type_ptr pin indices in the order of port indices diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index 36901e6f0ed..e28832bf918 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -523,6 +523,12 @@ enum class e_sb_type { }; +enum class e_capacity_type { + DUPLICATE, // Capacity duplicates ports. + EXPLICIT // Capacity increases the number of logical tiles, but does not + // modify the physical ports. +}; + constexpr int NO_SWITCH = -1; constexpr int DEFAULT_SWITCH = -2; @@ -577,6 +583,7 @@ struct t_physical_tile_type { int num_clock_pins = 0; int capacity = 0; + e_capacity_type capacity_type = e_capacity_type::DUPLICATE; int width = 0; int height = 0; @@ -625,18 +632,20 @@ struct t_physical_tile_type { * vtr::bimap container. */ struct t_logical_pin { + int z_index = -1; int pin = -1; - t_logical_pin(int value) { + t_logical_pin(int z_index_value, int value) { + z_index = z_index_value; pin = value; } bool operator==(const t_logical_pin o) const { - return pin == o.pin; + return z_index == o.z_index && pin == o.pin; } bool operator<(const t_logical_pin o) const { - return pin < o.pin; + return std::make_pair(z_index, pin) < std::make_pair(o.z_index, o.pin); } }; diff --git a/libs/libarchfpga/src/read_xml_arch_file.cpp b/libs/libarchfpga/src/read_xml_arch_file.cpp index 32381facab6..bba80f64fda 100644 --- a/libs/libarchfpga/src/read_xml_arch_file.cpp +++ b/libs/libarchfpga/src/read_xml_arch_file.cpp @@ -86,10 +86,10 @@ static void LoadPinLoc(pugi::xml_node Locations, t_physical_tile_type* type, const pugiutil::loc_data& loc_data); template -static std::pair ProcessPinString(pugi::xml_node Locations, - T type, - const char* pin_loc_string, - const pugiutil::loc_data& loc_data); +static std::pair> ProcessPinString(pugi::xml_node Locations, + T type, + const char* pin_loc_string, + const pugiutil::loc_data& loc_data); /* Process XML hierarchy */ static void ProcessTiles(pugi::xml_node Node, @@ -427,7 +427,12 @@ static void SetupPinLocationsAndPinClasses(pugi::xml_node Locations, pugi::xml_node Cur; - capacity = PhysicalTileType->capacity; + if (PhysicalTileType->capacity_type == e_capacity_type::DUPLICATE) { + capacity = PhysicalTileType->capacity; + } else { + VTR_ASSERT(PhysicalTileType->capacity_type == e_capacity_type::EXPLICIT); + capacity = 1; + } if (!Locations) { PhysicalTileType->pin_location_distribution = E_SPREAD_PIN_DISTR; } else { @@ -811,23 +816,40 @@ static void LoadPinLoc(pugi::xml_node Locations, VTR_ASSERT(ipin == output_pins.size()); } else { + int capacity; + if (type->capacity_type == e_capacity_type::DUPLICATE) { + capacity = type->capacity; + } else { + VTR_ASSERT(type->capacity_type == e_capacity_type::EXPLICIT); + capacity = 1; + } + VTR_ASSERT(type->pin_location_distribution == E_CUSTOM_PIN_DISTR); for (int width = 0; width < type->width; ++width) { for (int height = 0; height < type->height; ++height) { for (e_side side : {TOP, RIGHT, BOTTOM, LEFT}) { for (int pin = 0; pin < type->num_pin_loc_assignments[width][height][side]; ++pin) { - auto pin_range = ProcessPinString(Locations, - type, - type->pin_loc_assignments[width][height][side][pin], - loc_data); + int instance_idx; + std::pair pin_range; + std::tie(instance_idx, pin_range) = ProcessPinString(Locations, + type, + type->pin_loc_assignments[width][height][side][pin], + loc_data); + if (instance_idx != 0) { + archfpga_throw( + loc_data.filename_c_str(), + loc_data.line(Locations), + "Instance index is not allocated on custom pin: %s\n", + type->pin_loc_assignments[width][height][side][pin]); + } for (int pin_num = pin_range.first; pin_num < pin_range.second; ++pin_num) { - VTR_ASSERT(pin_num < type->num_pins / type->capacity); - for (int capacity = 0; capacity < type->capacity; ++capacity) { - type->pinloc[width][height][side][pin_num + capacity * type->num_pins / type->capacity] = true; - type->pin_width_offset[pin_num + capacity * type->num_pins / type->capacity] += width; - type->pin_height_offset[pin_num + capacity * type->num_pins / type->capacity] += height; - physical_pin_counts[pin_num + capacity * type->num_pins / type->capacity] += 1; + VTR_ASSERT(pin_num < type->num_pins / capacity); + for (int icapacity = 0; icapacity < capacity; ++icapacity) { + type->pinloc[width][height][side][pin_num + icapacity * type->num_pins / capacity] = true; + type->pin_width_offset[pin_num + icapacity * type->num_pins / capacity] += width; + type->pin_height_offset[pin_num + icapacity * type->num_pins / capacity] += height; + physical_pin_counts[pin_num + icapacity * type->num_pins / capacity] += 1; } } } @@ -848,10 +870,11 @@ static void LoadPinLoc(pugi::xml_node Locations, } template -static std::pair ProcessPinString(pugi::xml_node Locations, - T type, - const char* pin_loc_string, - const pugiutil::loc_data& loc_data) { +static std::pair> ProcessPinString(pugi::xml_node Locations, + T type, + const char* pin_loc_string, + const pugiutil::loc_data& loc_data) { + int instance_idx = 0; int num_tokens; auto tokens = GetTokensFromString(pin_loc_string, &num_tokens); @@ -866,6 +889,31 @@ static std::pair ProcessPinString(pugi::xml_node Locations, token_index++; token = tokens[token_index]; + if (token.type == TOKEN_OPEN_SQUARE_BRACKET) { + token_index++; + token = tokens[token_index]; + + if (token.type != TOKEN_INT) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations), + "Instance index is not a valid number: %s\n", + pin_loc_string); + } + + instance_idx = vtr::atoi(token.data); + + token_index++; + token = tokens[token_index]; + + if (token.type != TOKEN_CLOSE_SQUARE_BRACKET) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations), + "Missing close brace on instance index: %s\n", + pin_loc_string); + } + + token_index++; + token = tokens[token_index]; + } + if (token.type != TOKEN_DOT) { archfpga_throw(loc_data.filename_c_str(), loc_data.line(Locations), "No dot is present to separate type name and port name: %s\n", pin_loc_string); @@ -893,7 +941,7 @@ static std::pair ProcessPinString(pugi::xml_node Locations, // All the pins of the port are taken or the port has a single pin if (token_index == num_tokens) { freeTokens(tokens, num_tokens); - return std::make_pair(abs_first_pin_idx, abs_first_pin_idx + port->num_pins); + return std::make_pair(instance_idx, std::make_pair(abs_first_pin_idx, abs_first_pin_idx + port->num_pins)); } token = tokens[token_index]; @@ -931,7 +979,7 @@ static std::pair ProcessPinString(pugi::xml_node Locations, } freeTokens(tokens, num_tokens); - return std::make_pair(abs_first_pin_idx + first_pin, abs_first_pin_idx + first_pin + 1); + return std::make_pair(instance_idx, std::make_pair(abs_first_pin_idx + first_pin, abs_first_pin_idx + first_pin + 1)); } token_index++; @@ -964,7 +1012,7 @@ static std::pair ProcessPinString(pugi::xml_node Locations, } freeTokens(tokens, num_tokens); - return std::make_pair(abs_first_pin_idx + first_pin, abs_first_pin_idx + last_pin + 1); + return std::make_pair(instance_idx, std::make_pair(abs_first_pin_idx + first_pin, abs_first_pin_idx + last_pin + 1)); } static void ProcessPinToPinAnnotations(pugi::xml_node Parent, @@ -2065,9 +2113,16 @@ static void Process_Fc(pugi::xml_node Node, /* Go through all the port/segment combinations and create the (potentially * overriden) pin/seg Fc specifications */ - int pins_per_capacity_instance = PhysicalTileType->num_pins / PhysicalTileType->capacity; + int capacity; + if (PhysicalTileType->capacity_type == e_capacity_type::DUPLICATE) { + capacity = PhysicalTileType->capacity; + } else { + VTR_ASSERT(PhysicalTileType->capacity_type == e_capacity_type::EXPLICIT); + capacity = 1; + } + int pins_per_capacity_instance = PhysicalTileType->num_pins / capacity; for (size_t iseg = 0; iseg < segments.size(); ++iseg) { - for (int icapacity = 0; icapacity < PhysicalTileType->capacity; ++icapacity) { + for (int icapacity = 0; icapacity < capacity; ++icapacity) { //If capacity > 0, we need t offset the block index by the number of pins per instance //this ensures that all pins have an Fc specification int iblk_pin = icapacity * pins_per_capacity_instance; @@ -2998,12 +3053,21 @@ static void ProcessTiles(pugi::xml_node Node, /* Process tile port definitions */ ProcessTilePorts(CurTileType, &PhysicalTileType, loc_data); - PhysicalTileType.num_pins = PhysicalTileType.capacity - * (PhysicalTileType.num_input_pins - + PhysicalTileType.num_output_pins - + PhysicalTileType.num_clock_pins); - PhysicalTileType.num_receivers = PhysicalTileType.capacity * PhysicalTileType.num_input_pins; - PhysicalTileType.num_drivers = PhysicalTileType.capacity * PhysicalTileType.num_output_pins; + if (PhysicalTileType.capacity_type == e_capacity_type::DUPLICATE) { + PhysicalTileType.num_pins = PhysicalTileType.capacity + * (PhysicalTileType.num_input_pins + + PhysicalTileType.num_output_pins + + PhysicalTileType.num_clock_pins); + PhysicalTileType.num_receivers = PhysicalTileType.capacity * PhysicalTileType.num_input_pins; + PhysicalTileType.num_drivers = PhysicalTileType.capacity * PhysicalTileType.num_output_pins; + } else { + VTR_ASSERT(PhysicalTileType.capacity_type == e_capacity_type::EXPLICIT); + PhysicalTileType.num_pins = (PhysicalTileType.num_input_pins + + PhysicalTileType.num_output_pins + + PhysicalTileType.num_clock_pins); + PhysicalTileType.num_receivers = PhysicalTileType.num_input_pins; + PhysicalTileType.num_drivers = PhysicalTileType.num_output_pins; + } /* Assign Fc, Pin locations ans Switch Block locations to the Physical Tile Type */ @@ -3050,7 +3114,7 @@ static void ProcessTiles(pugi::xml_node Node, static void ProcessTileProps(pugi::xml_node Node, t_physical_tile_type* PhysicalTileType, const pugiutil::loc_data& loc_data) { - expect_only_attributes(Node, {"name", "capacity", "width", "height", "area"}, loc_data); + expect_only_attributes(Node, {"name", "capacity", "capacity_type", "width", "height", "area"}, loc_data); /* Load type name */ auto Prop = get_attribute(Node, "name", loc_data).value(); @@ -3066,6 +3130,20 @@ static void ProcessTileProps(pugi::xml_node Node, archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), "Area for type %s must be non-negative\n", PhysicalTileType->name); } + + const char* capacity_type = get_attribute(Node, "capacity_type", loc_data, ReqOpt::OPTIONAL).as_string(nullptr); + if (capacity_type) { + if (strcmp(capacity_type, "duplicate") == 0) { + PhysicalTileType->capacity_type = e_capacity_type::DUPLICATE; + } else if (strcmp(capacity_type, "explicit") == 0) { + PhysicalTileType->capacity_type = e_capacity_type::EXPLICIT; + } else { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), + "Unknown 'capacity_type' %s\n", capacity_type); + } + } else { + PhysicalTileType->capacity_type = e_capacity_type::DUPLICATE; + } } static void ProcessTilePorts(pugi::xml_node Parent, @@ -3261,11 +3339,16 @@ static void ProcessEquivalentSiteDirectConnection(pugi::xml_node Parent, "Pin definition differ between site %s and tile %s. User-defined pin mapping is required.\n", LogicalBlockType->pb_type->name, PhysicalTileType->name); } + if (PhysicalTileType->capacity_type != e_capacity_type::DUPLICATE) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent), + "Custom site pins are required if capacity_type == explicit\n"); + } + vtr::bimap directs_map; for (int npin = 0; npin < num_pins; npin++) { t_physical_pin physical_pin(npin); - t_logical_pin logical_pin(npin); + t_logical_pin logical_pin(/*z_index=*/0, npin); directs_map.insert(logical_pin, physical_pin); } @@ -3302,8 +3385,35 @@ static void ProcessEquivalentSiteCustomConnection(pugi::xml_node Parent, // `to` attribute is relative to the logical block pins to = std::string(get_attribute(CurDirect, "to", loc_data).value()); - auto from_pins = ProcessPinString(CurDirect, PhysicalTileType, from.c_str(), loc_data); - auto to_pins = ProcessPinString(CurDirect, LogicalBlockType, to.c_str(), loc_data); + int to_idx; + int from_idx; + std::pair from_pins; + std::pair to_pins; + std::tie(from_idx, from_pins) = ProcessPinString(CurDirect, PhysicalTileType, from.c_str(), loc_data); + std::tie(to_idx, to_pins) = ProcessPinString(CurDirect, LogicalBlockType, to.c_str(), loc_data); + + if (from_idx != 0) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent), + "No physical tile instance index is allowed: %s.\n", from.c_str()); + } + + if (PhysicalTileType->capacity_type == e_capacity_type::DUPLICATE) { + if (to_idx != 0) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent), + "Logical tile instances are not allowed with capacity_type == DUPLICATE: %s\n", + to.c_str()); + } + } else if (PhysicalTileType->capacity_type == e_capacity_type::EXPLICIT) { + if (to_idx < 0 || to_idx >= PhysicalTileType->capacity) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent), + "Logical tile instance %d is out of bounds of capacity: %s\n", + to_idx, to.c_str()); + } + } else { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(Parent), + "Unknown capacity_type %d.\n", + PhysicalTileType->capacity_type); + } // Checking that the number of pins is exactly the same if (from_pins.second - from_pins.first != to_pins.second - to_pins.first) { @@ -3316,7 +3426,7 @@ static void ProcessEquivalentSiteCustomConnection(pugi::xml_node Parent, int num_pins = from_pins.second - from_pins.first; for (int i = 0; i < num_pins; i++) { t_physical_pin physical_pin(from_pins.first + i); - t_logical_pin logical_pin(to_pins.first + i); + t_logical_pin logical_pin(to_idx, to_pins.first + i); auto result = directs_map.insert(logical_pin, physical_pin); if (!result.second) { @@ -4861,33 +4971,43 @@ static void link_physical_logical_types(std::vector& Physi for (int pin = 0; pin < logical_block.pb_type->num_pins; pin++) { for (auto& tile : logical_block.equivalent_tiles) { - auto direct_map = tile->tile_block_pin_directs_map.at(logical_block.index); - auto result = direct_map.find(t_logical_pin(pin)); - if (result == direct_map.end()) { - archfpga_throw(__FILE__, __LINE__, - "Logical pin %d not present in pin mapping between Tile %s and Block %s.\n", - pin, tile->name, logical_block.name); + int capacity; + if (tile->capacity_type == e_capacity_type::DUPLICATE) { + capacity = 1; + } else { + VTR_ASSERT(tile->capacity_type == e_capacity_type::EXPLICIT); + capacity = tile->capacity; } - int phy_index = result->second.pin; + for (int z_index = 0; z_index < capacity; ++z_index) { + const auto& direct_map = tile->tile_block_pin_directs_map.at(logical_block.index); + auto result = direct_map.find(t_logical_pin(z_index, pin)); + if (result == direct_map.end()) { + archfpga_throw(__FILE__, __LINE__, + "Logical pin %d not present in pin mapping between Tile %s and Block %s.\n", + pin, tile->name, logical_block.name); + } - bool is_ignored = tile->is_ignored_pin[phy_index]; - bool is_global = tile->is_pin_global[phy_index]; + int phy_index = result->second.pin; - auto ignored_result = ignored_pins_check_map.insert(std::pair(pin, is_ignored)); - if (!ignored_result.second && ignored_result.first->second != is_ignored) { - archfpga_throw(__FILE__, __LINE__, - "Physical Tile %s has a different value for the ignored pin (physical pin: %d, logical pin: %d) " - "different from the corresponding pins of the other equivalent sites\n.", - tile->name, phy_index, pin); - } + bool is_ignored = tile->is_ignored_pin[phy_index]; + bool is_global = tile->is_pin_global[phy_index]; - auto global_result = global_pins_check_map.insert(std::pair(pin, is_global)); - if (!global_result.second && global_result.first->second != is_global) { - archfpga_throw(__FILE__, __LINE__, - "Physical Tile %s has a different value for the global pin (physical pin: %d, logical pin: %d) " - "different from the corresponding pins of the other equivalent sites\n.", - tile->name, phy_index, pin); + auto ignored_result = ignored_pins_check_map.insert(std::pair(pin, is_ignored)); + if (!ignored_result.second && ignored_result.first->second != is_ignored) { + archfpga_throw(__FILE__, __LINE__, + "Physical Tile %s has a different value for the ignored pin (physical pin: %d, logical pin: %d) " + "different from the corresponding pins of the other equivalent sites\n.", + tile->name, phy_index, pin); + } + + auto global_result = global_pins_check_map.insert(std::pair(pin, is_global)); + if (!global_result.second && global_result.first->second != is_global) { + archfpga_throw(__FILE__, __LINE__, + "Physical Tile %s has a different value for the global pin (physical pin: %d, logical pin: %d) " + "different from the corresponding pins of the other equivalent sites\n.", + tile->name, phy_index, pin); + } } } } @@ -4905,7 +5025,15 @@ static void check_port_direct_mappings(t_physical_tile_type_ptr physical_tile, t auto& pin_direct_mapping = physical_tile->tile_block_pin_directs_map.at(logical_block->index); - if (pb_type->num_pins != (int)pin_direct_mapping.size()) { + int capacity; + if (physical_tile->capacity_type == e_capacity_type::DUPLICATE) { + capacity = 1; + } else { + VTR_ASSERT(physical_tile->capacity_type == e_capacity_type::EXPLICIT); + capacity = physical_tile->capacity; + } + + if (pb_type->num_pins * capacity != (int)pin_direct_mapping.size()) { archfpga_throw(__FILE__, __LINE__, "Logical block (%s) and Physical tile (%s) have a different number of ports.\n", logical_block->name, physical_tile->name); diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index ac2f324eb92..dc9d1e51d01 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -79,7 +79,7 @@ void printClusteredNetlistStats() { num_blocks_type[logical_block->index]++; if (is_io_type(physical_tile)) { for (j = 0; j < logical_block->pb_type->num_pins; j++) { - int physical_pin = get_physical_pin(physical_tile, logical_block, j); + int physical_pin = get_physical_pin(physical_tile, /*z_index=*/0, logical_block, j); auto pin_class = physical_tile->pin_class[physical_pin]; auto class_inf = physical_tile->class_inf[pin_class]; diff --git a/vpr/src/base/check_netlist.cpp b/vpr/src/base/check_netlist.cpp index ee77a8b8fff..aeafa487e82 100644 --- a/vpr/src/base/check_netlist.cpp +++ b/vpr/src/base/check_netlist.cpp @@ -95,7 +95,7 @@ static int check_connections_to_global_clb_pins(ClusterNetId net_id, int verbosi auto physical_type = pick_best_physical_type(logical_type); int log_index = cluster_ctx.clb_nlist.pin_logical_index(pin_id); - int pin_index = get_physical_pin(physical_type, logical_type, log_index); + int pin_index = get_physical_pin(physical_type, /*z_index=*/0, logical_type, log_index); if (physical_type->is_ignored_pin[pin_index] != net_is_ignored && !is_io_type(physical_type)) { diff --git a/vpr/src/base/read_netlist.cpp b/vpr/src/base/read_netlist.cpp index a44ad17e6b3..c2b570740f4 100644 --- a/vpr/src/base/read_netlist.cpp +++ b/vpr/src/base/read_netlist.cpp @@ -951,7 +951,7 @@ static void load_external_nets_and_cb(ClusteredNetlist& clb_nlist) { block_type = clb_nlist.block_type(blk_id); auto tile_type = pick_best_physical_type(block_type); for (j = 0; j < block_type->pb_type->num_pins; j++) { - int physical_pin = get_physical_pin(tile_type, block_type, j); + int physical_pin = get_physical_pin(tile_type, /*z_index=*/0, block_type, j); //Iterate through each pin of the block, and see if there is a net allocated/used for it clb_net_id = clb_nlist.block_net(blk_id, j); @@ -1001,7 +1001,7 @@ static void load_external_nets_and_cb(ClusteredNetlist& clb_nlist) { block_type = clb_nlist.block_type(clb_nlist.pin_block(pin_id)); auto tile_type = pick_best_physical_type(block_type); int logical_pin = clb_nlist.pin_logical_index(pin_id); - int physical_pin = get_physical_pin(tile_type, block_type, logical_pin); + int physical_pin = get_physical_pin(tile_type, /*z_index=*/0, block_type, logical_pin); if (tile_type->is_ignored_pin[physical_pin] != is_ignored_net) { VTR_LOG_WARN( diff --git a/vpr/src/base/read_route.cpp b/vpr/src/base/read_route.cpp index 9ec4069fe2c..6d4dd1142ed 100644 --- a/vpr/src/base/read_route.cpp +++ b/vpr/src/base/read_route.cpp @@ -23,6 +23,7 @@ #include "rr_graph.h" #include "vtr_assert.h" #include "vtr_util.h" +#include "vtr_time.h" #include "tatum/echo_writer.hpp" #include "vtr_log.h" #include "check_route.h" @@ -40,6 +41,7 @@ #include "echo_files.h" #include "route_common.h" #include "read_route.h" +#include "rr_graph2.h" /*************Functions local to this module*************/ static void process_route(std::ifstream& fp, const char* filename, int& lineno); @@ -192,7 +194,7 @@ static void process_nets(std::ifstream& fp, ClusterNetId inet, std::string name, static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* filename, int& lineno) { /* Not a global net. Goes through every node and add it into trace.head*/ - auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + auto& cluster_ctx = g_vpr_ctx.clustering(); auto& device_ctx = g_vpr_ctx.mutable_device(); auto& route_ctx = g_vpr_ctx.mutable_routing(); auto& place_ctx = g_vpr_ctx.placement(); @@ -206,6 +208,45 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file std::string input; std::vector tokens; + // Build lookup from SOURCE/SINK node to ClusterBlockId. + std::unordered_map node_to_block; + + { + vtr::ScopedStartFinishTimer timer("Building ClusterBlockId lookup"); + + for (auto net_id : cluster_ctx.clb_nlist.nets()) { + int pin_count = 0; + for (auto pin_id : cluster_ctx.clb_nlist.net_pins(net_id)) { + auto block_id = cluster_ctx.clb_nlist.pin_block(pin_id); + + const auto* logical_tile = cluster_ctx.clb_nlist.block_type(block_id); + const auto* physical_tile = physical_tile_type(block_id); + VTR_ASSERT(block_id); + int i = place_ctx.block_locs[block_id].loc.x; + int j = place_ctx.block_locs[block_id].loc.y; + + int logical_pin_index = cluster_ctx.clb_nlist.pin_logical_index(pin_id); + int physical_pin_index = get_physical_pin( + physical_tile, place_ctx.block_locs[block_id].loc.z, + logical_tile, logical_pin_index); + int physical_pin_class = physical_tile->pin_class[physical_pin_index]; + int class_inode = get_rr_node_index(device_ctx.rr_node_indices, + i, j, (pin_count == 0 ? SOURCE : SINK), /* First pin is driver */ + physical_pin_class); + + auto result = node_to_block.insert(std::make_pair(class_inode, block_id)); + if (!result.second && result.first->second != block_id) { + vpr_throw(VPR_ERROR_ROUTE, filename, lineno, + "Clustered netlist has inconsistent rr node mapping, class rr node %d has two block ids %zu and %zu?", + class_inode, (size_t)block_id, result.first->second); + } + pin_count++; + } + } + + VTR_LOG("ClusterBlockId lookup has %zu entries\n", node_to_block.size()); + } + /*Walk through every line that begins with Node:*/ while (std::getline(fp, input)) { ++lineno; @@ -285,9 +326,22 @@ static void process_nodes(std::ifstream& fp, ClusterNetId inet, const char* file if (tokens[6 + offset] != "Switch:") { /*This is an opin or ipin, process its pin nums*/ if (!is_io_type(device_ctx.grid[x][y].type) && (tokens[2] == "IPIN" || tokens[2] == "OPIN")) { + // Convert this IPIN/OPIN back to class. + auto rr_type = device_ctx.rr_nodes[inode].type(); + VTR_ASSERT(rr_type == IPIN || rr_type == OPIN); int pin_num = device_ctx.rr_nodes[inode].ptc_num(); - int height_offset = device_ctx.grid[x][y].height_offset; - ClusterBlockId iblock = place_ctx.grid_blocks[x][y - height_offset].blocks[0]; + int iclass = device_ctx.grid[x][y].type->pin_class[pin_num]; + int class_inode = get_rr_node_index(device_ctx.rr_node_indices, + x, y, (rr_type == OPIN ? SOURCE : SINK), iclass); + + auto itr = node_to_block.find(class_inode); + if (itr == node_to_block.end()) { + vpr_throw(VPR_ERROR_ROUTE, filename, lineno, + "Class RR node %d does not have an associated ClusterBlockId?", class_inode); + } + + ClusterBlockId iblock = itr->second; + VTR_ASSERT(iblock); t_pb_graph_pin* pb_pin = get_pb_graph_node_pin_from_block_pin(iblock, pin_num); t_pb_type* pb_type = pb_pin->parent_node->pb_type; diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 06b3731cf2b..9cd7e176bb0 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -286,6 +286,7 @@ struct RoutingContext : public Context { vtr::vector> trace_nodes; vtr::vector> net_rr_terminals; /* [0..num_nets-1][0..num_pins-1] */ + std::unordered_map rr_net_map; vtr::vector> rr_blk_source; /* [0..num_blocks-1][0..num_class-1] */ diff --git a/vpr/src/draw/draw.cpp b/vpr/src/draw/draw.cpp index d29ede76133..e260f8aacc9 100644 --- a/vpr/src/draw/draw.cpp +++ b/vpr/src/draw/draw.cpp @@ -2661,7 +2661,7 @@ void draw_highlight_blocks_color(t_logical_block_type_ptr type, ClusterBlockId b continue; auto physical_tile = physical_tile_type(blk_id); - int physical_pin = get_physical_pin(physical_tile, type, k); + int physical_pin = get_physical_pin(physical_tile, /*z_index=*/0, type, k); iclass = physical_tile->pin_class[physical_pin]; diff --git a/vpr/src/pack/output_clustering.cpp b/vpr/src/pack/output_clustering.cpp index bb8a98ae5d0..257793b86ff 100644 --- a/vpr/src/pack/output_clustering.cpp +++ b/vpr/src/pack/output_clustering.cpp @@ -66,7 +66,7 @@ static void print_stats() { auto logical_block = cluster_ctx.clb_nlist.block_type(blk_id); auto physical_tile = pick_best_physical_type(logical_block); for (ipin = 0; ipin < logical_block->pb_type->num_pins; ipin++) { - int physical_pin = get_physical_pin(physical_tile, logical_block, ipin); + int physical_pin = get_physical_pin(physical_tile, /*z_index=*/0, logical_block, ipin); auto pin_class = physical_tile->pin_class[physical_pin]; auto pin_class_inf = physical_tile->class_inf[pin_class]; diff --git a/vpr/src/place/place_macro.cpp b/vpr/src/place/place_macro.cpp index 5411e3223f8..18b89732735 100644 --- a/vpr/src/place/place_macro.cpp +++ b/vpr/src/place/place_macro.cpp @@ -82,7 +82,7 @@ static void find_all_the_macro(int* num_of_macro, std::vector& p num_blk_pins = cluster_ctx.clb_nlist.block_type(blk_id)->pb_type->num_pins; for (to_iblk_pin = 0; to_iblk_pin < num_blk_pins; to_iblk_pin++) { - int to_physical_pin = get_physical_pin(physical_tile, logical_block, to_iblk_pin); + int to_physical_pin = get_physical_pin(physical_tile, /*z_index=*/0, logical_block, to_iblk_pin); to_net_id = cluster_ctx.clb_nlist.block_net(blk_id, to_iblk_pin); to_idirect = f_idirect_from_blk_pin[physical_tile->index][to_physical_pin]; @@ -102,7 +102,7 @@ static void find_all_the_macro(int* num_of_macro, std::vector& p || (is_constant_clb_net(to_net_id) && !net_is_driven_by_direct(to_net_id)))) { for (from_iblk_pin = 0; from_iblk_pin < num_blk_pins; from_iblk_pin++) { - int from_physical_pin = get_physical_pin(physical_tile, logical_block, from_iblk_pin); + int from_physical_pin = get_physical_pin(physical_tile, /*z_index=*/0, logical_block, from_iblk_pin); from_net_id = cluster_ctx.clb_nlist.block_net(blk_id, from_iblk_pin); from_idirect = f_idirect_from_blk_pin[physical_tile->index][from_physical_pin]; diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index c6dd9449a4e..4da1df55862 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -86,7 +86,8 @@ static t_trace_branch traceback_branch(int node, std::unordered_set& main_b static std::pair add_trace_non_configurable(t_trace* head, t_trace* tail, int node, std::unordered_set& visited); static std::pair add_trace_non_configurable_recurr(int node, std::unordered_set& visited, int depth = 0); -static vtr::vector> load_net_rr_terminals(const t_rr_node_indices& L_rr_node_indices); +static vtr::vector> load_net_rr_terminals(const t_rr_node_indices& L_rr_node_indices, + std::unordered_map* rr_net_map); static vtr::vector> load_rr_clb_sources(const t_rr_node_indices& L_rr_node_indices); static t_clb_opins_used alloc_and_load_clb_opins_used_locally(); @@ -479,7 +480,7 @@ void init_route_structs(int bb_factor) { Bucket::init(device_ctx.grid); //Various look-ups - route_ctx.net_rr_terminals = load_net_rr_terminals(device_ctx.rr_node_indices); + route_ctx.net_rr_terminals = load_net_rr_terminals(device_ctx.rr_node_indices, &route_ctx.rr_net_map); route_ctx.route_bb = load_route_bb(bb_factor); route_ctx.rr_blk_source = load_rr_clb_sources(device_ctx.rr_node_indices); route_ctx.clb_opins_used_locally = alloc_and_load_clb_opins_used_locally(); @@ -990,7 +991,11 @@ void reset_rr_node_route_structs() { /* Allocates and loads the route_ctx.net_rr_terminals data structure. For each net it stores the rr_node * * index of the SOURCE of the net and all the SINKs of the net [clb_nlist.nets()][clb_nlist.net_pins()]. * * Entry [inet][pnum] stores the rr index corresponding to the SOURCE (opin) or SINK (ipin) of the pin. */ -static vtr::vector> load_net_rr_terminals(const t_rr_node_indices& L_rr_node_indices) { +static vtr::vector> load_net_rr_terminals( + const t_rr_node_indices& L_rr_node_indices, + std::unordered_map* rr_net_map) { + VTR_ASSERT(rr_net_map != nullptr); + rr_net_map->clear(); vtr::vector> net_rr_terminals; auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -1020,6 +1025,16 @@ static vtr::vector> load_net_rr_terminals(const t int inode = get_rr_node_index(L_rr_node_indices, i, j, (pin_count == 0 ? SOURCE : SINK), /* First pin is driver */ iclass); net_rr_terminals[net_id][pin_count] = inode; + + auto result = rr_net_map->insert(std::make_pair(inode, block_id)); + // If the map already contains an entry for inode, make sure it + // is consistent with the existing entry. + if (!result.second && block_id != result.first->second) { + VPR_FATAL_ERROR(VPR_ERROR_ROUTE, + "Clustered netlist has inconsistent rr node mapping, class rr node %d has two block ids %zu and %zu?", + inode, (size_t)block_id, (size_t)result.first->second); + } + pin_count++; } } @@ -1263,11 +1278,18 @@ void print_route(FILE* fp, const vtr::vector& traceba fprintf(fp, "%d ", device_ctx.rr_nodes[inode].ptc_num()); if (!is_io_type(device_ctx.grid[ilow][jlow].type) && (rr_type == IPIN || rr_type == OPIN)) { + // Go from IPIN/OPIN to SOURCE/SINK + auto* type = device_ctx.grid[ilow][jlow].type; int pin_num = device_ctx.rr_nodes[inode].ptc_num(); - int xoffset = device_ctx.grid[ilow][jlow].width_offset; - int yoffset = device_ctx.grid[ilow][jlow].height_offset; - ClusterBlockId iblock = place_ctx.grid_blocks[ilow - xoffset][jlow - yoffset].blocks[0]; - VTR_ASSERT(iblock); + int iclass = type->pin_class[pin_num]; + int class_inode = get_rr_node_index(device_ctx.rr_node_indices, + ilow, jlow, (rr_type == OPIN ? SOURCE : SINK), iclass); + + // Use the rr_net_map to go from class inode back to ClusterBlockId. + auto itr = route_ctx.rr_net_map.find(class_inode); + VTR_ASSERT(itr != route_ctx.rr_net_map.end()); + ClusterBlockId iblock = itr->second; + t_pb_graph_pin* pb_pin = get_pb_graph_node_pin_from_block_pin(iblock, pin_num); t_pb_type* pb_type = pb_pin->parent_node->pb_type; fprintf(fp, " %s.%s[%d] ", pb_type->name, pb_pin->port->name, pb_pin->pin_number); diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index 13a0da96c0f..578d92ee68c 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -192,15 +192,21 @@ std::string block_type_pin_index_to_name(t_physical_tile_type_ptr type, int pin_ std::string pin_name = type->name; - if (type->capacity > 1) { - int pins_per_inst = type->num_pins / type->capacity; - int inst_num = pin_index / pins_per_inst; - pin_index %= pins_per_inst; + if (type->capacity_type == e_capacity_type::DUPLICATE) { + if (type->capacity > 1) { + int pins_per_inst = type->num_pins / type->capacity; + int inst_num = pin_index / pins_per_inst; + pin_index %= pins_per_inst; - pin_name += "[" + std::to_string(inst_num) + "]"; - } + pin_name += "[" + std::to_string(inst_num) + "]"; + } - pin_name += "."; + pin_name += "."; + } else { + VTR_ASSERT(type->capacity_type == e_capacity_type::EXPLICIT); + VTR_ASSERT(pin_index < type->num_pins); + pin_name += "."; + } int curr_index = 0; for (auto const& port : type->ports) { @@ -319,7 +325,7 @@ std::vector find_clb_pin_connected_atom_pins(ClusterBlockId clb, int auto logical_block = clb_nlist.block_type(clb); auto physical_tile = pick_best_physical_type(logical_block); - int physical_pin = get_physical_pin(physical_tile, logical_block, logical_pin); + int physical_pin = get_physical_pin(physical_tile, /*z_index=*/0, logical_block, logical_pin); if (is_opin(physical_pin, physical_tile)) { //output @@ -2067,21 +2073,18 @@ void place_sync_external_block_connections(ClusterBlockId iblk) { auto physical_tile = physical_tile_type(iblk); auto logical_block = clb_nlist.block_type(iblk); - VTR_ASSERT(physical_tile->num_pins % physical_tile->capacity == 0); - int max_num_block_pins = physical_tile->num_pins / physical_tile->capacity; - /* Logical location and physical location is offset by z * max_num_block_pins */ - for (auto pin : clb_nlist.block_pins(iblk)) { int logical_pin_index = clb_nlist.pin_logical_index(pin); - int physical_pin_index = get_physical_pin(physical_tile, logical_block, logical_pin_index); - - int new_physical_pin_index = physical_pin_index + place_ctx.block_locs[iblk].loc.z * max_num_block_pins; - auto result = place_ctx.physical_pins.find(pin); - if (result != place_ctx.physical_pins.end()) { - place_ctx.physical_pins[pin] = new_physical_pin_index; + int new_physical_pin = get_physical_pin( + physical_tile, place_ctx.block_locs[iblk].loc.z, + logical_block, logical_pin_index); + auto iter = place_ctx.physical_pins.find(pin); + if (iter != place_ctx.physical_pins.end()) { + *iter = new_physical_pin; } else { - place_ctx.physical_pins.insert(pin, new_physical_pin_index); + place_ctx.physical_pins.insert( + pin, new_physical_pin); } } } @@ -2123,32 +2126,34 @@ t_logical_block_type_ptr pick_best_logical_type(t_physical_tile_type_ptr physica return physical_tile->equivalent_sites[0]; } -int get_logical_pin(t_physical_tile_type_ptr physical_tile, - t_logical_block_type_ptr logical_block, - int pin) { - t_physical_pin physical_pin(pin); - - auto direct_map = physical_tile->tile_block_pin_directs_map.at(logical_block->index); - auto result = direct_map.find(physical_pin); +int get_physical_pin(const ClusterBlockId blk, + t_logical_block_type_ptr logical_block, + int pin) { + auto& place_ctx = g_vpr_ctx.placement(); + auto& device_ctx = g_vpr_ctx.device(); - if (result == direct_map.inverse_end()) { - VTR_LOG_WARN( - "Couldn't find the corresponding logical pin of the physical pin %d." - "Physical Tile: %s, Logical Block: %s.\n", - pin, physical_tile->name, logical_block->name); - return OPEN; - } + auto block_loc = place_ctx.block_locs[blk]; + auto loc = block_loc.loc; - return result->second.pin; + return get_physical_pin( + device_ctx.grid[loc.x][loc.y].type, + loc.z, + logical_block, + pin); } int get_physical_pin(t_physical_tile_type_ptr physical_tile, + int z_index, t_logical_block_type_ptr logical_block, int pin) { - t_logical_pin logical_pin(pin); - - auto direct_map = physical_tile->tile_block_pin_directs_map.at(logical_block->index); - auto result = direct_map.find(logical_pin); + const auto& direct_map = physical_tile->tile_block_pin_directs_map.at(logical_block->index); + auto result = direct_map.begin(); + if (physical_tile->capacity_type == e_capacity_type::DUPLICATE) { + result = direct_map.find(t_logical_pin(/*z_index=*/0, pin)); + } else { + VTR_ASSERT(physical_tile->capacity_type == e_capacity_type::EXPLICIT); + result = direct_map.find(t_logical_pin(z_index, pin)); + } if (result == direct_map.end()) { VTR_LOG_WARN( @@ -2158,7 +2163,15 @@ int get_physical_pin(t_physical_tile_type_ptr physical_tile, return OPEN; } - return result->second.pin; + int physical_pin_index = result->second.pin; + if (physical_tile->capacity_type == e_capacity_type::DUPLICATE) { + int max_num_block_pins = physical_tile->num_pins / physical_tile->capacity; + /* Logical location and physical location is offset by z * max_num_block_pins */ + return physical_pin_index + z_index * max_num_block_pins; + } else { + VTR_ASSERT(physical_tile->capacity_type == e_capacity_type::EXPLICIT); + return physical_pin_index; + } } int net_pin_to_tile_pin_index(const ClusterNetId net_id, int net_pin_index) { diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index c511bc243df..9bba08e3fb6 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -159,10 +159,11 @@ bool is_tile_compatible(t_physical_tile_type_ptr physical_tile, t_logical_block_ t_physical_tile_type_ptr pick_best_physical_type(t_logical_block_type_ptr logical_block); t_logical_block_type_ptr pick_best_logical_type(t_physical_tile_type_ptr physical_tile); -int get_logical_pin(t_physical_tile_type_ptr physical_tile, - t_logical_block_type_ptr logical_block, - int pin); int get_physical_pin(t_physical_tile_type_ptr physical_tile, + int z_index, + t_logical_block_type_ptr logical_block, + int pin); +int get_physical_pin(const ClusterBlockId blk, t_logical_block_type_ptr logical_block, int pin);