diff --git a/libs/EXTERNAL/libinterchange/interchange/DeviceResources.capnp b/libs/EXTERNAL/libinterchange/interchange/DeviceResources.capnp index 09c8d438555..111cf20f994 100644 --- a/libs/EXTERNAL/libinterchange/interchange/DeviceResources.capnp +++ b/libs/EXTERNAL/libinterchange/interchange/DeviceResources.capnp @@ -59,9 +59,8 @@ annotation wireRef(*) :WireRef; using WireIdx = UInt32; struct WireTypeRef { - type @0 :Ref.ReferenceType = parent; + type @0 :Ref.ReferenceType = rootValue; field @1 :Text = "wireTypes"; - depth @2 :Int32 = 1; } annotation wireTypeRef(*) :WireTypeRef; using WireTypeIdx = UInt32; @@ -81,19 +80,15 @@ using TileTypeSiteTypeIdx = UInt32; using TileTypeSubTileIdx = UInt16; struct PIPTimingRef { - type @0 :Ref.ReferenceType = parent; - field @1 :Text = "pipTimingList"; - depth @2 :Int32 = 1; - + type @0 :Ref.ReferenceType = rootValue; + field @1 :Text = "pipTimings"; } annotation pipTimingRef(*) :PIPTimingRef; using PipTimingIdx = UInt32; struct NodeTimingRef { - type @0 :Ref.ReferenceType = parent; - field @1 :Text = "nodeTimingList"; - depth @2 :Int32 = 1; - + type @0 :Ref.ReferenceType = rootValue; + field @1 :Text = "nodeTimings"; } annotation nodeTimingRef(*) :NodeTimingRef; using NodeTimingIdx = UInt32; @@ -632,7 +627,7 @@ struct Device { } struct PinDelay { - pin @0 : BELPinIdx $belPinRef(); + pin @0 : BELPinIdx; union { noClock @1 : Void; clockEdge @2 : ClockEdge; diff --git a/libs/libarchfpga/src/fpga_interchange_arch_utils.cpp b/libs/libarchfpga/src/fpga_interchange_arch_utils.cpp new file mode 100644 index 00000000000..014f1155596 --- /dev/null +++ b/libs/libarchfpga/src/fpga_interchange_arch_utils.cpp @@ -0,0 +1,186 @@ +#include "fpga_interchange_arch_utils.h" + +/****************** Utility functions ******************/ + +/** + * @brief The FPGA interchange timing model includes three different corners (min, typ and max) for each of the two + * speed_models (slow and fast). + * + * Timing data can be found on PIPs, nodes, site pins and bel pins. + * This function retrieves the timing value based on the wanted speed model and the wanted corner. + * + * Corner model is considered valid if at least one configuration is set. + * In that case this value shall be returned. + * + * More information on the FPGA Interchange timing model can be found here: + * - https://github.com/chipsalliance/fpga-interchange-schema/blob/main/interchange/DeviceResources.capnp + */ + +float get_corner_value(DeviceResources::Device::CornerModel::Reader model, const char* speed_model, const char* value) { + bool slow_model = std::string(speed_model) == std::string("slow"); + bool fast_model = std::string(speed_model) == std::string("fast"); + + bool min_corner = std::string(value) == std::string("min"); + bool typ_corner = std::string(value) == std::string("typ"); + bool max_corner = std::string(value) == std::string("max"); + + if (!slow_model && !fast_model) { + archfpga_throw("", __LINE__, "Wrong speed model `%s`. Expected `slow` or `fast`\n", speed_model); + } + + if (!min_corner && !typ_corner && !max_corner) { + archfpga_throw("", __LINE__, "Wrong corner model `%s`. Expected `min`, `typ` or `max`\n", value); + } + + bool has_fast = model.getFast().hasFast(); + bool has_slow = model.getSlow().hasSlow(); + + if ((slow_model || (fast_model && !has_fast)) && has_slow) { + auto half = model.getSlow().getSlow(); + if (min_corner && half.getMin().isMin()) { + return half.getMin().getMin(); + } else if (typ_corner && half.getTyp().isTyp()) { + return half.getTyp().getTyp(); + } else if (max_corner && half.getMax().isMax()) { + return half.getMax().getMax(); + } else { + if (half.getMin().isMin()) { + return half.getMin().getMin(); + } else if (half.getTyp().isTyp()) { + return half.getTyp().getTyp(); + } else if (half.getMax().isMax()) { + return half.getMax().getMax(); + } else { + archfpga_throw("", __LINE__, "Invalid speed model %s. No value found!\n", speed_model); + } + } + } else if ((fast_model || slow_model) && has_fast) { + auto half = model.getFast().getFast(); + if (min_corner && half.getMin().isMin()) { + return half.getMin().getMin(); + } else if (typ_corner && half.getTyp().isTyp()) { + return half.getTyp().getTyp(); + } else if (max_corner && half.getMax().isMax()) { + return half.getMax().getMax(); + } else { + if (half.getMin().isMin()) { + return half.getMin().getMin(); + } else if (half.getTyp().isTyp()) { + return half.getTyp().getTyp(); + } else if (half.getMax().isMax()) { + return half.getMax().getMax(); + } else { + archfpga_throw("", __LINE__, "Invalid speed model %s. No value found!\n", speed_model); + } + } + } + return 0.; +} + +char* int_to_string(char* buff, int value) { + if (value < 10) { + return &(*buff = '0' + value) + 1; + } else { + return &(*int_to_string(buff, value / 10) = '0' + value % 10) + 1; + } +} + +void pip_types(std::set>& seen, + DeviceResources::Device::Reader ar_) { + for (const auto& tile : ar_.getTileTypeList()) { + for (const auto& pip : tile.getPips()) { + /* + * FIXME + * right now we allow for pseudo pips even tho we don't check if they are avaiable + * if (pip.isPseudoCells()) + * continue; + */ + seen.emplace(pip.getTiming(), pip.getBuffered21()); + if (!pip.getDirectional()) { + seen.emplace(pip.getTiming(), pip.getBuffered20()); + } + } + } +} + +// This function is using templates for first argument +void fill_switch(t_arch_switch_inf* switch_, + float R, + float Cin, + float Cout, + float Cinternal, + float Tdel, + float mux_trans_size, + float buf_size, + char* name, + SwitchType type) { + switch_->R = R; + switch_->Cin = Cin; + switch_->Cout = Cout; + switch_->Cinternal = Cinternal; + switch_->set_Tdel(t_arch_switch_inf::UNDEFINED_FANIN, Tdel); + switch_->mux_trans_size = mux_trans_size; + switch_->buf_size = buf_size; + switch_->name = name; + switch_->set_type(type); +} + +void process_cell_bel_mappings(DeviceResources::Device::Reader ar_, + std::unordered_map>& bel_cell_mappings_, + std::function str) { + auto primLib = ar_.getPrimLibs(); + auto portList = primLib.getPortList(); + + for (auto cell_mapping : ar_.getCellBelMap()) { + size_t cell_name = cell_mapping.getCell(); + + int found_valid_prim = false; + for (auto primitive : primLib.getCellDecls()) { + if (cell_name != primitive.getName()) continue; + if (str(primitive.getLib()) != std::string("primitives")) continue; + + bool has_inout = false; + for (auto port_idx : primitive.getPorts()) { + auto port = portList[port_idx]; + + if (port.getDir() == INOUT) { + has_inout = true; + break; + } + } + + if (!has_inout) { + found_valid_prim = true; + break; + } + } + + if (!found_valid_prim) + continue; + + for (auto common_pins : cell_mapping.getCommonPins()) { + std::vector> pins; + + for (auto pin_map : common_pins.getPins()) + pins.emplace_back(pin_map.getCellPin(), pin_map.getBelPin()); + + for (auto site_type_entry : common_pins.getSiteTypes()) { + size_t site_type = site_type_entry.getSiteType(); + + for (auto bel : site_type_entry.getBels()) { + t_bel_cell_mapping mapping; + + mapping.cell = cell_name; + mapping.site = site_type; + mapping.pins = pins; + + std::set maps{mapping}; + auto res = bel_cell_mappings_.emplace(bel, maps); + if (!res.second) { + res.first->second.insert(mapping); + } + } + } + } + } +} diff --git a/libs/libarchfpga/src/fpga_interchange_arch_utils.h b/libs/libarchfpga/src/fpga_interchange_arch_utils.h new file mode 100644 index 00000000000..40d95ed6918 --- /dev/null +++ b/libs/libarchfpga/src/fpga_interchange_arch_utils.h @@ -0,0 +1,98 @@ +#ifndef FPGAINTERCHANGE_ARCH_UTILS_FILE_H +#define FPGAINTERCHANGE_ARCH_UTILS_FILE_H + +#include "arch_types.h" +#include "arch_util.h" +#include "arch_error.h" +#include "vtr_error.h" +#include "vtr_util.h" + +#include "DeviceResources.capnp.h" +#include "LogicalNetlist.capnp.h" +#include "capnp/serialize.h" +#include "capnp/serialize-packed.h" +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Necessary to reduce code verbosity when getting the pin directions +static const auto INPUT = LogicalNetlist::Netlist::Direction::INPUT; +static const auto OUTPUT = LogicalNetlist::Netlist::Direction::OUTPUT; +static const auto INOUT = LogicalNetlist::Netlist::Direction::INOUT; + +struct t_bel_cell_mapping { + size_t cell; + size_t site; + std::vector> pins; + + bool operator<(const t_bel_cell_mapping& other) const { + return cell < other.cell || (cell == other.cell && site < other.site); + } +}; + +/****************** Utility functions ******************/ + +/** + * @brief The FPGA interchange timing model includes three different corners (min, typ and max) for each of the two + * speed_models (slow and fast). + * + * Timing data can be found on PIPs, nodes, site pins and bel pins. + * This function retrieves the timing value based on the wanted speed model and the wanted corner. + * + * Corner model is considered valid if at least one configuration is set. + * In that case this value shall be returned. + * + * More information on the FPGA Interchange timing model can be found here: + * - https://github.com/chipsalliance/fpga-interchange-schema/blob/main/interchange/DeviceResources.capnp + */ + +float get_corner_value(DeviceResources::Device::CornerModel::Reader model, const char* speed_model, const char* value); + +char* int_to_string(char* buff, int value); + +void pip_types(std::set>& seen, + DeviceResources::Device::Reader ar_); + +void process_cell_bel_mappings(DeviceResources::Device::Reader ar_, + std::unordered_map>& bel_cell_mappings_, + std::function str); + +#ifdef __cplusplus +} +#endif +template +void fill_switch(T* switch_, + float R, + float Cin, + float Cout, + float Cinternal, + float Tdel, + float mux_trans_size, + float buf_size, + char* name, + SwitchType type); + +void fill_switch(t_arch_switch_inf* switch_, + float R, + float Cin, + float Cout, + float Cinternal, + float Tdel, + float mux_trans_size, + float buf_size, + char* name, + SwitchType type); + +template +void process_switches_array(DeviceResources::Device::Reader ar_, + std::set>& seen, + T1 switch_array, + std::vector, int>>& pips_models_); + +#include "fpga_interchange_arch_utils.impl.h" +#endif diff --git a/libs/libarchfpga/src/fpga_interchange_arch_utils.impl.h b/libs/libarchfpga/src/fpga_interchange_arch_utils.impl.h new file mode 100644 index 00000000000..52fe9206c55 --- /dev/null +++ b/libs/libarchfpga/src/fpga_interchange_arch_utils.impl.h @@ -0,0 +1,93 @@ +/****************** Utility functions ******************/ + +/** + * @brief The FPGA interchange timing model includes three different corners (min, typ and max) for each of the two + * speed_models (slow and fast). + * + * Timing data can be found on PIPs, nodes, site pins and bel pins. + * This function retrieves the timing value based on the wanted speed model and the wanted corner. + * + * Corner model is considered valid if at least one configuration is set. + * In that case this value shall be returned. + * + * More information on the FPGA Interchange timing model can be found here: + * - https://github.com/chipsalliance/fpga-interchange-schema/blob/main/interchange/DeviceResources.capnp + */ + +template +void fill_switch(T* switch_, + float R, + float Cin, + float Cout, + float Cinternal, + float Tdel, + float mux_trans_size, + float buf_size, + char* name, + SwitchType type) { + switch_->R = R; + switch_->Cin = Cin; + switch_->Cout = Cout; + switch_->Cinternal = Cinternal; + switch_->Tdel = Tdel; + switch_->mux_trans_size = mux_trans_size; + switch_->buf_size = buf_size; + switch_->name = name; + switch_->set_type(type); +} + +template +void process_switches_array(DeviceResources::Device::Reader ar_, + std::set>& seen, + T1 switch_array, + std::vector, int>>& pips_models_) { + fill_switch(&switch_array[T2(0)], 0, 0, 0, 0, 0, 0, 0, + vtr::strdup("short"), SwitchType::SHORT); + fill_switch(&switch_array[T2(1)], 0, 0, 0, 0, 0, 0, 0, + vtr::strdup("generic"), SwitchType::MUX); + + const auto& pip_models = ar_.getPipTimings(); + float R, Cin, Cout, Cint, Tdel; + std::string switch_name; + SwitchType type; + int id = 2; + for (const auto& value : seen) { + int timing_model_id; + int mux_trans_size; + bool buffered; + std::tie(timing_model_id, buffered) = value; + const auto& model = pip_models[timing_model_id]; + pips_models_.emplace_back(std::make_tuple(value, id)); + + R = Cin = Cint = Cout = Tdel = 0.0; + std::stringstream name; + std::string mux_type_string = buffered ? "mux_" : "passGate_"; + name << mux_type_string; + + R = get_corner_value(model.getOutputResistance(), "slow", "min"); + name << "R" << std::scientific << R; + + Cin = get_corner_value(model.getInputCapacitance(), "slow", "min"); + name << "Cin" << std::scientific << Cin; + + Cout = get_corner_value(model.getOutputCapacitance(), "slow", "min"); + name << "Cout" << std::scientific << Cout; + + if (buffered) { + Cint = get_corner_value(model.getInternalCapacitance(), "slow", "min"); + name << "Cinternal" << std::scientific << Cint; + } + + Tdel = get_corner_value(model.getInternalDelay(), "slow", "min"); + name << "Tdel" << std::scientific << Tdel; + + switch_name = name.str(); + type = buffered ? SwitchType::MUX : SwitchType::PASS_GATE; + mux_trans_size = buffered ? 1 : 0; + + fill_switch(&switch_array[T2(id)], R, Cin, Cout, Cint, Tdel, + mux_trans_size, 0, vtr::strdup(switch_name.c_str()), type); + + id++; + } +} diff --git a/libs/libarchfpga/src/read_fpga_interchange_arch.cpp b/libs/libarchfpga/src/read_fpga_interchange_arch.cpp index 4d56a3f535b..fe618c6aca4 100644 --- a/libs/libarchfpga/src/read_fpga_interchange_arch.cpp +++ b/libs/libarchfpga/src/read_fpga_interchange_arch.cpp @@ -21,6 +21,7 @@ #include "arch_types.h" #include "read_fpga_interchange_arch.h" +#include "fpga_interchange_arch_utils.h" /* * FPGA Interchange Device frontend @@ -36,11 +37,6 @@ using namespace DeviceResources; using namespace LogicalNetlist; using namespace capnp; -// Necessary to reduce code verbosity when getting the pin directions -static const auto INPUT = LogicalNetlist::Netlist::Direction::INPUT; -static const auto OUTPUT = LogicalNetlist::Netlist::Direction::OUTPUT; -static const auto INOUT = LogicalNetlist::Netlist::Direction::INOUT; - static const auto LOGIC = Device::BELCategory::LOGIC; static const auto ROUTING = Device::BELCategory::ROUTING; static const auto SITE_PORT = Device::BELCategory::SITE_PORT; @@ -58,16 +54,6 @@ struct t_package_pin { std::string bel_name; }; -struct t_bel_cell_mapping { - size_t cell; - size_t site; - std::vector> pins; - - bool operator<(const t_bel_cell_mapping& other) const { - return cell < other.cell || (cell == other.cell && site < other.site); - } -}; - // Intermediate data type to store information on interconnects to be created struct t_ic_data { std::string input; @@ -78,82 +64,6 @@ struct t_ic_data { /****************** Utility functions ******************/ -/** - * @brief The FPGA interchange timing model includes three different corners (min, typ and max) for each of the two - * speed_models (slow and fast). - * - * Timing data can be found on PIPs, nodes, site pins and bel pins. - * This function retrieves the timing value based on the wanted speed model and the wanted corner. - * - * More information on the FPGA Interchange timing model can be found here: - * - https://github.com/chipsalliance/fpga-interchange-schema/blob/main/interchange/DeviceResources.capnp - */ -static float get_corner_value(Device::CornerModel::Reader model, const char* speed_model, const char* value) { - bool slow_model = std::string(speed_model) == std::string("slow"); - bool fast_model = std::string(speed_model) == std::string("fast"); - - bool min_corner = std::string(value) == std::string("min"); - bool typ_corner = std::string(value) == std::string("typ"); - bool max_corner = std::string(value) == std::string("max"); - - if (!slow_model && !fast_model) { - archfpga_throw("", __LINE__, - "Wrong speed model `%s`. Expected `slow` or `fast`\n", speed_model); - } - - if (!min_corner && !typ_corner && !max_corner) { - archfpga_throw("", __LINE__, - "Wrong corner model `%s`. Expected `min`, `typ` or `max`\n", value); - } - - bool has_fast = model.getFast().hasFast(); - bool has_slow = model.getSlow().hasSlow(); - - if (slow_model && has_slow) { - auto half = model.getSlow().getSlow(); - if (min_corner && half.getMin().isMin()) { - return half.getMin().getMin(); - } else if (typ_corner && half.getTyp().isTyp()) { - return half.getTyp().getTyp(); - } else if (max_corner && half.getMax().isMax()) { - return half.getMax().getMax(); - } else { - if (half.getMin().isMin()) { - return half.getMin().getMin(); - } else if (half.getTyp().isTyp()) { - return half.getTyp().getTyp(); - } else if (half.getMax().isMax()) { - return half.getMax().getMax(); - } else { - archfpga_throw("", __LINE__, - "Invalid speed model %s. No value found!\n", speed_model); - } - } - } else if (fast_model && has_fast) { - auto half = model.getFast().getFast(); - if (min_corner && half.getMin().isMin()) { - return half.getMin().getMin(); - } else if (typ_corner && half.getTyp().isTyp()) { - return half.getTyp().getTyp(); - } else if (max_corner && half.getMax().isMax()) { - return half.getMax().getMax(); - } else { - if (half.getMin().isMin()) { - return half.getMin().getMin(); - } else if (half.getTyp().isTyp()) { - return half.getTyp().getTyp(); - } else if (half.getMax().isMax()) { - return half.getMax().getMax(); - } else { - archfpga_throw("", __LINE__, - "Invalid speed model %s. No value found!\n", speed_model); - } - } - } - - return 0.; -} - /** @brief Returns the port corresponding to the given model in the architecture */ static t_model_ports* get_model_port(t_arch* arch, std::string model, std::string port, bool fail = true) { for (t_model* m : {arch->models, arch->model_library}) { @@ -258,6 +168,29 @@ static t_pin_to_pin_annotation get_pack_pattern(std::string pp_name, std::string return pp; } +static void add_segment_with_default_values(t_segment_inf& seg, std::string name) { + // Use default values as we will populate rr_graph with correct values + // This segments are just declaration of future use + seg.name = name; + seg.length = 1; + seg.frequency = 1; + seg.Rmetal = 1e2; + seg.Cmetal = 1e-15; + seg.parallel_axis = BOTH_AXIS; + + // TODO: Only bi-directional segments are created, but it the interchange format + // has directionality information on PIPs, which may be used to infer the + // segments' directonality. + seg.directionality = BI_DIRECTIONAL; + seg.arch_wire_switch = 1; + seg.arch_opin_switch = 1; + seg.cb.resize(1); + seg.cb[0] = true; + seg.sb.resize(2); + seg.sb[0] = true; + seg.sb[1] = true; +} + /****************** End Utility functions ******************/ struct ArchReader { @@ -284,7 +217,8 @@ struct ArchReader { // Preprocess arch information process_luts(); process_package_pins(); - process_cell_bel_mappings(); + auto func = std::bind(&ArchReader::str, this, std::placeholders::_1); + process_cell_bel_mappings(ar_, bel_cell_mappings_, func); process_constants(); process_bels_and_sites(); @@ -334,7 +268,6 @@ struct ArchReader { // Bel Cell mappings std::unordered_map> bel_cell_mappings_; - std::unordered_map segment_name_to_segment_idx; // Utils @@ -448,6 +381,24 @@ struct ArchReader { return pad_bels_.count(name) != 0; } + void add_ltype(std::function map, + const char* name, + t_sub_tile& sub_tile, + t_physical_tile_type& type) { + vtr::bimap directs_map; + auto ltype = get_type_by_name(name, ltypes_); + + for (int npin = 0; npin < ltype->pb_type->num_ports; npin++) { + t_physical_pin physical_pin(map(npin)); + t_logical_pin logical_pin(npin); + + directs_map.insert(logical_pin, physical_pin); + } + + type.tile_block_pin_directs_map[ltype->index][sub_tile.index] = directs_map; + sub_tile.equivalent_sites.push_back(ltype); + } + /** @brief Utility function to fill in all the necessary information for the sub_tile * * Given a physical tile type and a corresponding sub tile with additional information on the IO pin count @@ -460,7 +411,13 @@ struct ArchReader { * - equivalent_sites * - tile_block_pin_directs_map **/ - void fill_sub_tile(t_physical_tile_type& type, t_sub_tile& sub_tile, int num_pins, int input_count, int output_count) { + void fill_sub_tile(t_physical_tile_type& type, + t_sub_tile& sub_tile, + int num_pins, + int input_count, + int output_count, + std::unordered_map* port_name_to_sub_tile_idx = nullptr, + Device::SiteTypeInTileType::Reader* site_in_tile = nullptr) { sub_tile.num_phy_pins += num_pins; type.num_pins += num_pins; type.num_inst_pins += num_pins; @@ -482,19 +439,35 @@ struct ArchReader { } } - vtr::bimap directs_map; + if (site_in_tile) { + auto site_types = ar_.getSiteTypeList(); + auto site_type = site_types[site_in_tile->getPrimaryType()]; + auto alt_sites_pins = site_in_tile->getAltPinsToPrimaryPins(); - for (int npin = 0; npin < type.num_pins; npin++) { - t_physical_pin physical_pin(npin); - t_logical_pin logical_pin(npin); + if (take_sites_.count(site_type.getName()) != 0) { + std::function map = [](int x) { return x; }; + add_ltype(map, sub_tile.name, sub_tile, type); + } - directs_map.insert(logical_pin, physical_pin); - } + for (int i = 0; i < (int)site_type.getAltSiteTypes().size(); ++i) { + auto alt_site = site_types[site_type.getAltSiteTypes()[i]]; - auto ltype = get_type_by_name(sub_tile.name, ltypes_); - sub_tile.equivalent_sites.push_back(ltype); + if (take_sites_.count(alt_site.getName()) == 0) + continue; - type.tile_block_pin_directs_map[ltype->index][sub_tile.index] = directs_map; + auto pin_map = alt_sites_pins[i]; + + std::function map = [pin_map, site_type, port_name_to_sub_tile_idx, this](int x) { + auto pin = site_type.getPins()[pin_map.getPins()[x]]; + return (*port_name_to_sub_tile_idx)[str(pin.getName())]; + }; + + add_ltype(map, str(alt_site.getName()).c_str(), sub_tile, type); + } + } else { + std::function map = [](int x) { return x; }; + add_ltype(map, sub_tile.name, sub_tile, type); + } // Assign FC specs int iblk_pin = 0; @@ -769,7 +742,22 @@ struct ArchReader { if (found) take_sites_.insert(site_type.getName()); - // TODO: Enable also alternative site types handling + for (auto alt_site_idx : site_type.getAltSiteTypes()) { + auto alt_site = site_types[alt_site_idx]; + found = false; + for (auto bel : alt_site.getBels()) { + auto bel_name = bel.getName(); + bool res = bel_cell_mappings_.find(bel_name) != bel_cell_mappings_.end(); + + found |= res; + + if (res || is_pad(str(bel_name))) + take_bels_.insert(bel_name); + } + + if (found) + take_sites_.insert(alt_site.getName()); + } } } } @@ -836,64 +824,6 @@ struct ArchReader { } } - void process_cell_bel_mappings() { - auto primLib = ar_.getPrimLibs(); - auto portList = primLib.getPortList(); - - for (auto cell_mapping : ar_.getCellBelMap()) { - size_t cell_name = cell_mapping.getCell(); - - int found_valid_prim = false; - for (auto primitive : primLib.getCellDecls()) { - bool is_prim = str(primitive.getLib()) == std::string("primitives"); - bool is_cell = cell_name == primitive.getName(); - - bool has_inout = false; - for (auto port_idx : primitive.getPorts()) { - auto port = portList[port_idx]; - - if (port.getDir() == INOUT) { - has_inout = true; - break; - } - } - - if (is_prim && is_cell && !has_inout) { - found_valid_prim = true; - break; - } - } - - if (!found_valid_prim) - continue; - - for (auto common_pins : cell_mapping.getCommonPins()) { - std::vector> pins; - - for (auto pin_map : common_pins.getPins()) - pins.emplace_back(pin_map.getCellPin(), pin_map.getBelPin()); - - for (auto site_type_entry : common_pins.getSiteTypes()) { - size_t site_type = site_type_entry.getSiteType(); - - for (auto bel : site_type_entry.getBels()) { - t_bel_cell_mapping mapping; - - mapping.cell = cell_name; - mapping.site = site_type; - mapping.pins = pins; - - std::set maps{mapping}; - auto res = bel_cell_mappings_.emplace(bel, maps); - if (!res.second) { - res.first->second.insert(mapping); - } - } - } - } - } - } - void process_constants() { auto consts = ar_.getConstants(); @@ -1957,8 +1887,14 @@ struct ArchReader { bool has_valid_sites = false; - for (auto site_type : tile.getSiteTypes()) - has_valid_sites |= take_sites_.count(siteTypeList[site_type.getPrimaryType()].getName()) != 0; + for (auto site_type : tile.getSiteTypes()) { + auto site_ = siteTypeList[site_type.getPrimaryType()]; + has_valid_sites |= take_sites_.count(site_.getName()) != 0; + for (auto alt_site_idx : site_.getAltSiteTypes()) { + auto alt_site_ = siteTypeList[alt_site_idx]; + has_valid_sites |= take_sites_.count(alt_site_.getName()) != 0; + } + } if (!has_valid_sites) continue; @@ -1991,18 +1927,22 @@ struct ArchReader { } void process_sub_tiles(t_physical_tile_type& type, Device::TileType::Reader& tile) { - // TODO: only one subtile at the moment auto siteTypeList = ar_.getSiteTypeList(); for (auto site_in_tile : tile.getSiteTypes()) { t_sub_tile sub_tile; + bool site_taken = false; + auto site = siteTypeList[site_in_tile.getPrimaryType()]; + site_taken |= take_sites_.count(site.getName()) != 0; + for (auto alt_site_idx : site.getAltSiteTypes()) { + auto alt_site = siteTypeList[alt_site_idx]; + site_taken |= take_sites_.count(alt_site.getName()) != 0; + } - if (take_sites_.count(site.getName()) == 0) + if (!site_taken) continue; - auto pins_to_wires = site_in_tile.getPrimaryPinsToTileWires(); - sub_tile.index = type.capacity; sub_tile.name = vtr::strdup(str(site.getName()).c_str()); sub_tile.capacity.set(type.capacity, type.capacity); @@ -2013,8 +1953,7 @@ struct ArchReader { int icount = 0; int ocount = 0; - std::unordered_map port_name_to_wire_name; - int idx = 0; + std::unordered_map port_name_to_sub_tile_idx; for (auto dir : {INPUT, OUTPUT}) { int port_idx_by_type = 0; for (auto pin : site.getPins()) { @@ -2025,9 +1964,9 @@ struct ArchReader { port.name = vtr::strdup(str(pin.getName()).c_str()); - port_name_to_wire_name[std::string(port.name)] = str(pins_to_wires[idx++]); - sub_tile.sub_tile_to_tile_pin_indices.push_back(type.num_pins + port_idx); + + port_name_to_sub_tile_idx[str(pin.getName())] = port_idx; port.index = port_idx++; port.absolute_first_pin_index = abs_first_pin_idx++; @@ -2046,9 +1985,9 @@ struct ArchReader { } auto pins_size = site.getPins().size(); - fill_sub_tile(type, sub_tile, pins_size, icount, ocount); + fill_sub_tile(type, sub_tile, pins_size, icount, ocount, &port_name_to_sub_tile_idx, &site_in_tile); - type.sub_tiles.push_back(sub_tile); + type.sub_tiles.emplace_back(sub_tile); } } @@ -2205,6 +2144,7 @@ struct ArchReader { t_sub_tile sub_tile; sub_tile.index = 0; sub_tile.name = vtr::strdup(const_block_.c_str()); + sub_tile.capacity.set(0, 0); int count = 0; for (auto const_cell : const_cells) { sub_tile.sub_tile_to_tile_pin_indices.push_back(count); @@ -2215,7 +2155,7 @@ struct ArchReader { port.name = vtr::strdup((const_cell.first + "_" + const_cell.second).c_str()); - port.index = port.absolute_first_pin_index = port.port_index_by_type = 0; + port.index = port.absolute_first_pin_index = port.port_index_by_type = count; sub_tile.ports.push_back(port); @@ -2295,10 +2235,10 @@ struct ArchReader { grid_def.loc_defs.emplace_back(std::move(single)); } - // The constant source tile will be placed at (0, 0) + // The constant source tile will be placed at (1, max_height) t_grid_loc_def constant(const_block_, 1); constant.x.start_expr = std::to_string(1); - constant.y.start_expr = std::to_string(1); + constant.y.start_expr = std::to_string(grid_def.height - 1); constant.x.end_expr = constant.x.start_expr + " + w - 1"; constant.y.end_expr = constant.y.start_expr + " + h - 1"; @@ -2345,101 +2285,37 @@ struct ArchReader { } void process_switches() { - std::set> pip_timing_models; - for (auto tile_type : ar_.getTileTypeList()) { - for (auto pip : tile_type.getPips()) { - pip_timing_models.insert(std::pair(pip.getBuffered21(), pip.getTiming())); - if (!pip.getDirectional()) - pip_timing_models.insert(std::pair(pip.getBuffered20(), pip.getTiming())); - } - } - - auto timing_data = ar_.getPipTimings(); + std::set> seen; + pip_types(seen, ar_); - std::vector> pip_timing_models_list; - pip_timing_models_list.reserve(pip_timing_models.size()); + /* + * Number of different switch types in architecture is equal to number + * of different and RR special switches: SHORT and MUX type ones. + * Process_switches_array() filles out arch->Switches array with + * RR specific switches alongside FPGA interchange defined ones. + */ + arch_->num_switches = seen.size() + 2; - for (auto entry : pip_timing_models) { - pip_timing_models_list.push_back(entry); + if (arch_->num_switches > 0) { + arch_->Switches = new t_arch_switch_inf[arch_->num_switches]; } - size_t num_switches = pip_timing_models.size() + 2; - std::string switch_name; - - arch_->num_switches = num_switches; - - if (num_switches > 0) { - arch_->Switches = new t_arch_switch_inf[num_switches]; - } + std::vector, int>> pips_models_; + /* + * This nuction is template function defined in + * libs/libarchfpga/src/fpga_interchange_arch_utils.impl.h + */ + process_switches_array(ar_, seen, arch_->Switches, pips_models_); - float R, Cin, Cint, Cout, Tdel; - for (size_t i = 0; i < num_switches; ++i) { + for (int i = 0; i < arch_->num_switches; ++i) { t_arch_switch_inf* as = &arch_->Switches[i]; - R = Cin = Cint = Cout = Tdel = 0.0; - SwitchType type; - - if (i == 0) { - switch_name = "short"; - type = SwitchType::SHORT; - R = 0.0; - } else if (i == 1) { - switch_name = "generic"; - type = SwitchType::MUX; - R = 0.0; - } else { - auto entry = pip_timing_models_list[i - 2]; - auto model = timing_data[entry.second]; - std::stringstream name; - std::string mux_type_string = entry.first ? "mux_" : "passGate_"; - name << mux_type_string; - - // FIXME: allow to dynamically choose different speed models and corners - R = get_corner_value(model.getOutputResistance(), "slow", "min"); - name << "R" << std::scientific << R; - - Cin = get_corner_value(model.getInputCapacitance(), "slow", "min"); - name << "Cin" << std::scientific << Cin; - - Cout = get_corner_value(model.getOutputCapacitance(), "slow", "min"); - name << "Cout" << std::scientific << Cout; - - if (entry.first) { - Cint = get_corner_value(model.getInternalCapacitance(), "slow", "min"); - name << "Cinternal" << std::scientific << Cint; - } - - Tdel = get_corner_value(model.getInternalDelay(), "slow", "min"); - name << "Tdel" << std::scientific << Tdel; - - switch_name = name.str() + std::to_string(i); - type = entry.first ? SwitchType::MUX : SwitchType::PASS_GATE; - } - - /* Should never happen */ - if (switch_name == std::string(VPR_DELAYLESS_SWITCH_NAME)) { - archfpga_throw(arch_file_, __LINE__, - "Switch name '%s' is a reserved name for VPR internal usage!", switch_name.c_str()); - } - - as->name = vtr::strdup(switch_name.c_str()); - as->set_type(type); - as->mux_trans_size = as->type() == SwitchType::MUX ? 1 : 0; - - as->R = R; - as->Cin = Cin; - as->Cout = Cout; - as->Cinternal = Cint; - as->set_Tdel(t_arch_switch_inf::UNDEFINED_FANIN, Tdel); - if (as->type() == SwitchType::SHORT || as->type() == SwitchType::PASS_GATE) { as->buf_size_type = BufferSize::ABSOLUTE; - as->buf_size = 0; as->power_buffer_type = POWER_BUFFER_TYPE_ABSOLUTE_SIZE; as->power_buffer_size = 0.; } else { as->buf_size_type = BufferSize::AUTO; - as->buf_size = 0.; as->power_buffer_type = POWER_BUFFER_TYPE_AUTO; } } @@ -2448,47 +2324,40 @@ struct ArchReader { void process_segments() { // Segment names will be taken from wires connected to pips // They are good representation for nodes + + auto wires = ar_.getWires(); + auto wire_types = ar_.getWireTypes(); + + std::unordered_map wire_map; + for (auto wire : wires) { + auto type = wire_types[wire.getType()]; + wire_map.emplace(wire.getWire(), type.getCategory()); + } + std::set wire_names; for (auto tile_type : ar_.getTileTypeList()) { - auto wires = tile_type.getWires(); + auto tile_wires = tile_type.getWires(); + for (auto pip : tile_type.getPips()) { - wire_names.insert(wires[pip.getWire0()]); - wire_names.insert(wires[pip.getWire1()]); + auto wire0 = tile_wires[pip.getWire0()]; + auto wire1 = tile_wires[pip.getWire1()]; + + if (wire_map[wire0] == Device::WireCategory::GENERAL) + wire_names.insert(wire0); + + if (wire_map[wire1] == Device::WireCategory::GENERAL) + wire_names.insert(wire1); } } - // FIXME: have only one segment type for the time being, so that - // the RR graph generation is correct. - // This can be removed once the RR graph reader from the interchange - // device is ready and functional. - size_t num_seg = 1; //wire_names.size(); + int num_seg = wire_names.size() + 1; arch_->Segments.resize(num_seg); - size_t index = 0; + + size_t index = 1; + add_segment_with_default_values(arch_->Segments[0], std::string("__generic__")); for (auto i : wire_names) { - if (index >= num_seg) break; - - // Use default values as we will populate rr_graph with correct values - // This segments are just declaration of future use - arch_->Segments[index].name = str(i); - arch_->Segments[index].length = 1; - arch_->Segments[index].frequency = 1; - arch_->Segments[index].Rmetal = 1e-12; - arch_->Segments[index].Cmetal = 1e-12; - arch_->Segments[index].parallel_axis = BOTH_AXIS; - - // TODO: Only bi-directional segments are created, but it the interchange format - // has directionality information on PIPs, which may be used to infer the - // segments' directonality. - arch_->Segments[index].directionality = BI_DIRECTIONAL; - arch_->Segments[index].arch_wire_switch = 1; - arch_->Segments[index].arch_opin_switch = 1; - arch_->Segments[index].cb.resize(1); - arch_->Segments[index].cb[0] = true; - arch_->Segments[index].sb.resize(2); - arch_->Segments[index].sb[0] = true; - arch_->Segments[index].sb[1] = true; - segment_name_to_segment_idx[str(i)] = index; + add_segment_with_default_values(arch_->Segments[index], str(i)); ++index; } } diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index f75745facae..c03fef2e119 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -422,6 +422,7 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts) RouterOpts->max_logged_overused_rr_nodes = Options.max_logged_overused_rr_nodes; RouterOpts->generate_rr_node_overuse_report = Options.generate_rr_node_overuse_report; + RouterOpts->FPGAInterchange = (Options.arch_format == e_arch_format::FPGAInterchange); } static void SetupAnnealSched(const t_options& Options, diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 074e84a2e8c..fa6e8283a9f 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -1480,7 +1480,9 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg .help( "File format for the input atom-level circuit/netlist.\n" " * vtr: Architecture expressed in the explicit VTR format" - " * fpga-interchage: Architecture expressed in the FPGA Interchange schema format\n") + " * fpga-interchage: Architecture expressed in the FPGA Interchange schema format\n" + " FPGA Interchange in curent version does not allow for auto size grids\n" + " Grid size is taken directly from device database file\n") .default_value("vtr") .show_in(argparse::ShowIn::HELP_ONLY); diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index f469e76dbc4..f4c72fdcadd 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1250,6 +1250,9 @@ struct t_router_opts { e_rr_node_reorder_algorithm reorder_rr_graph_nodes_algorithm = DONT_REORDER; int reorder_rr_graph_nodes_threshold = 0; int reorder_rr_graph_nodes_seed = 1; + + // Options related to FPGA Interchange + bool FPGAInterchange; }; struct t_analysis_opts { diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index 5172a66e2be..fb69a8e7190 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -31,6 +31,7 @@ #include "build_switchblocks.h" #include "rr_graph_writer.h" #include "rr_graph_reader.h" +#include "rr_graph_fpga_interchange.h" #include "router_lookahead_map.h" #include "rr_graph_clock.h" #include "edge_groups.h" @@ -315,7 +316,8 @@ void create_rr_graph(const t_graph_type graph_type, int* Warnings) { const auto& device_ctx = g_vpr_ctx.device(); auto& mutable_device_ctx = g_vpr_ctx.mutable_device(); - if (!det_routing_arch->read_rr_graph_filename.empty()) { + + if (!det_routing_arch->read_rr_graph_filename.empty() || router_opts.FPGAInterchange) { if (device_ctx.read_rr_graph_filename != det_routing_arch->read_rr_graph_filename) { free_rr_graph(); @@ -332,6 +334,22 @@ void create_rr_graph(const t_graph_type graph_type, router_opts.reorder_rr_graph_nodes_threshold, router_opts.reorder_rr_graph_nodes_seed); } + } else if (!device_ctx.read_rr_graph_filename.size() && router_opts.FPGAInterchange) { + free_rr_graph(); + + VTR_LOG("Custom RR_graph generator\n"); + build_rr_graph_fpga_interchange(graph_type, + grid, + segment_inf, + router_opts.base_cost_type, + &det_routing_arch->wire_to_rr_ipin_switch, + det_routing_arch->read_rr_graph_filename, + router_opts.do_check_rr_graph); + if (router_opts.reorder_rr_graph_nodes_algorithm != DONT_REORDER) { + mutable_device_ctx.rr_graph_builder.reorder_nodes(router_opts.reorder_rr_graph_nodes_algorithm, + router_opts.reorder_rr_graph_nodes_threshold, + router_opts.reorder_rr_graph_nodes_seed); + } } } else { if (channel_widths_unchanged(device_ctx.chan_width, nodes_per_chan) && !device_ctx.rr_graph.empty()) { diff --git a/vpr/src/route/rr_graph_fpga_interchange.cpp b/vpr/src/route/rr_graph_fpga_interchange.cpp new file mode 100644 index 00000000000..348e8fb25f0 --- /dev/null +++ b/vpr/src/route/rr_graph_fpga_interchange.cpp @@ -0,0 +1,1877 @@ +#include +#include +#include +#include +#include + +#include "rr_graph_fpga_interchange.h" + +#include "arch_util.h" +#include "vtr_time.h" +#include "vtr_error.h" +#include "globals.h" +#include "check_rr_graph.h" +#include "rr_metadata.h" +#include "rr_graph_indexed_data.h" +#include "fpga_interchange_arch_utils.h" + +//#define DEBUG + +using namespace DeviceResources; +using namespace LogicalNetlist; +using namespace capnp; + +// Helper types definitions + +// t_location in the grid: +typedef std::pair t_location; + +// edge between two wires: > +typedef std::tuple> t_pip; + +// Helper enumerations + +enum constant { + GND = 0, + VCC, +}; + +enum node_sides { + LEFT_EDGE = 0, + TOP_EDGE, + RIGHT_EDGE, + BOTTOM_EDGE, +}; + +const std::vector NODE_SIDES = {LEFT_EDGE, TOP_EDGE, RIGHT_EDGE, BOTTOM_EDGE}; +const std::vector OPPOSITE_NODE_SIDES = {RIGHT_EDGE, BOTTOM_EDGE, LEFT_EDGE, TOP_EDGE}; + +/** @brief Intermediate data type used to build and explore FPGA Interchange + * nodes graph and later convert them to VTR RR Nodes. + */ +struct intermediate_node { + public: + t_location loc; + int segment_id = -1; + std::vector links{false, false, false, false}; // left, up, right, down + intermediate_node* visited = nullptr; + bool back_bone = false; // Indicates whether the node is in the connecting line between two ends + bool on_route = false; + bool has_pins = false; + + intermediate_node() = delete; + + intermediate_node(int x, int y) + : loc{x, y} {} + + intermediate_node(t_location loc_) + : loc(loc_) {} +}; + +/** @brief Interchange RR Graph builder class + * + * This class contains methods and attributes to translate from the Interchange + * RR graph collection of nodes and wires to the VTR RR Graph representation. + * + * First it builds an intermediate representation, which is necessary to store all the + * information in a convenient way to use the VTR RR Graph builder API. + */ +struct InterchangeRRGraphBuilder { + public: + InterchangeRRGraphBuilder(Device::Reader& arch_reader, + const DeviceGrid& grid, + DeviceContext& device_ctx, + t_rr_graph_storage& rr_nodes, + const vtr::vector& segment_inf, + const std::vector& physical_tile_types, + const enum e_base_cost_type base_cost_type) + : ar_(arch_reader) + , device_ctx_(device_ctx) + , rr_nodes_(rr_nodes) + , segment_inf_(segment_inf) + , grid_(grid) + , base_cost_type_(base_cost_type) { + for (auto& type : physical_tile_types) { + tile_type_to_pb_type_[std::string(type.name)] = &type; + } + + auto wire_types = ar_.getWireTypes(); + for (auto wire : ar_.getWires()) + if (wire_types[wire.getType()].getCategory() != Device::WireCategory::GENERAL) + wire_name_to_seg_idx_[str(wire.getWire())] = 0; + + // segment at index 0 is the generic one + for (size_t i = 1; i < segment_inf.size(); ++i) + wire_name_to_seg_idx_[segment_inf_[RRSegmentId(i)].name] = i; + + std::unordered_map> temp_; + auto func = std::bind(&InterchangeRRGraphBuilder::str, this, std::placeholders::_1); + process_cell_bel_mappings(ar_, temp_, func); + + for (auto i : temp_) + bel_cell_mappings_.insert(i.first); + } + + void build_rr_graph() { + create_switch_inf(); + create_tile_id_to_loc(); + create_uniq_node_ids(); + create_sink_source_nodes(); + create_pips(); + process_nodes(); +#ifdef DEBUG + print_virtual_rr_graph(); +#endif + virtual_to_real(); + pack_to_rr_graph(); +#ifdef DEBUG + print_real_rr_graph(); +#endif + finish_rr_generation(); + } + + private: + Device::Reader& ar_; + DeviceContext& device_ctx_; + t_rr_graph_storage& rr_nodes_; + const vtr::vector& segment_inf_; + const DeviceGrid& grid_; + const enum e_base_cost_type base_cost_type_; + + std::unordered_map tile_type_to_pb_type_; + + std::unordered_map wire_name_to_seg_idx_; + + std::set bel_cell_mappings_; + + // Tile location maps + vtr::bimap tile_loc_bimap_; + + // PIP maps + std::unordered_map, int /*idx*/, hash_tuple::hash>> pips_models_; + std::unordered_map, t_pip, hash_tuple::hash>> pips_; + + // Node maps + std::unordered_map, int, hash_tuple::hash>> wire_to_node_; + std::unordered_map> node_to_locs_; + std::unordered_map /**/, int /*segment type*/, hash_tuple::hash>> node_tile_to_segment_; + std::unordered_map> node_to_RC_; + int total_node_count_; + + /* + * Offsets for FPGA Interchange node processing + */ + t_location offsets[4] = {t_location(-1, 0), t_location(0, 1), t_location(1, 0), t_location(0, -1)}; + + /* + * Intermediate data storage. + * - virtual_shorts_ represents connections between rr_nodes in a single FPGA Interchange node. + * - virtual_redirect_ is map from node_id at t_location to t_location channel and index of virtual_track in that channel. + * It's useful for ends of the nodes (FPGA Interchange) that don't have representation in CHANS. + * - virtual_chan_loc_map maps from t_location and CHAN to vector containing virtual_tracks description. + * - virtual_beg_to_real_ maps from virtual track to physical one + * - chan_loc_map_ maps from t_location and CHAN to vector containing tracks description. + * - node_id_count_ maps from node_id to its tile count + */ + std::vector> virtual_shorts_; + std::unordered_map, + std::tuple, + hash_tuple::hash>> + virtual_redirect_; + + std::unordered_map, + int, + hash_tuple::hash>> + virtual_beg_to_real_; + + std::unordered_map, + std::vector>, + hash_tuple::hash>> + virtual_chan_loc_map_; + + std::unordered_map, + std::unordered_map>, + hash_tuple::hash>> + chan_loc_map_; + + std::unordered_map node_id_count_; + + /* + * Sets contain tuples of node ids and t_location. + * Each value correspondence to node id n being used by either pip or pin at t_location l. + */ + std::unordered_set, + hash_tuple::hash>> + used_by_pip_; + + std::unordered_set, + hash_tuple::hash>> + used_by_pin_; + + std::unordered_set useful_node_; + + /* Sink_source_loc_map is used to create ink/source and ipin/opin rr_nodes, + * rr_edges from sink/source to ipin/opin and from ipin/opin to their coresponding segments + * uniq_id is used to create edge from segment to ipin/opin, edges from ipin/opin to sink/source are created simply as one to one + */ + std::unordered_map> /*idx = ptc,*/> sink_source_loc_map_; + + /* + * RR generation stuff + */ + vtr::vector seg_index_; + std::unordered_map, + int, + hash_tuple::hash>> + loc_type_idx_to_rr_idx_; + int rr_idx = 0; // Do not decrement! + + /// @brief Returns the sum of a location and a vector, resulting in a new location + t_location add_vec(t_location x, t_location dx) { + return t_location(x.first + dx.first, x.second + dx.second); + } + + /// @brief Returns the multiplication of a location and a scalar + t_location mul_vec_scal(t_location x, int s) { + return t_location(x.first * s, x.second * s); + } + + /// @brief Returns the string corresponding to an entry in the string list of the interchange database + std::string str(int idx) { + if (idx == -1) + return std::string("constant_block"); + return std::string(ar_.getStrList()[idx].cStr()); + } + + /* + * The following code iterates over each node independently when transforming + * an FPGA Interchange device resource into a VTR RR Graph. + * + * During the transformation, segments are created, which require + * starting and ending coordinates, as well as their track location. + * At any given time only one segment can occupy a single physical track. + * Solving the track assignment with an online algorithm would greatly impact + * on runtime, achieving a suboptimal maximal number of tracks in a single channel. + * + * This is solved by only storing a starting and ending location and + * the track number in the starting position, thus it is named "virtual". + * Then, when all FPGA Interchange nodes are taken care of, + * the physical track placement is computed. + * + * Much of the code runs on virtual assignment, as it's fast to create, + * use and is only translated when creating the final RR Graph. + */ + + /** @brief Debug print function to output the virtual RR graph information + */ + void print_virtual_rr_graph() { + VTR_LOG("Virtual\n"); + for (const auto& entry : sink_source_loc_map_) { + const auto& key = entry.first; + const auto& value = entry.second; + int x, y; + std::tie(x, y) = tile_loc_bimap_[key]; + VTR_LOG("t_location x:%d y:%d Name:%s\n", x, y, str(key).c_str()); + int it = 0; + for (const auto& pin : value) { + bool dir; + float RC; + int node_id; + std::tie(dir, RC, node_id) = pin; + VTR_LOG("\tpin:%d input:%d R/C:%e\n", it, dir, RC); + VTR_LOG("\ttile_name:%s node_id:%d\n", str(key).c_str(), node_id); + it++; + } + } + + int it = 0; + + for (auto const& switch_id : device_ctx_.rr_graph.rr_switch()) { + VTR_LOG("Switch: %d Name:%s\n", it++, switch_id.name); + } + + for (auto& entry : virtual_chan_loc_map_) { + t_location loc, loc2; + e_rr_type type; + std::tie(loc, type) = entry.first; + VTR_LOG("CHAN%c X:%d Y:%d\n", type == e_rr_type::CHANX ? 'X' : 'Y', loc.first, loc.second); + for (auto& seg : entry.second) { + loc2 = std::get<3>(seg); + VTR_LOG("\tSegment id:%d name:%s -> X:%d Y:%d\n", std::get<0>(seg), segment_inf_[RRSegmentId(std::get<0>(seg))].name.c_str(), loc2.first, loc2.second); + } + } + + VTR_LOG("Redirects:\n"); + for (auto& entry : virtual_redirect_) { + int node; + t_location loc; + std::tie(node, loc) = entry.first; + + t_location new_loc; + e_rr_type new_type; + int new_idx; + std::tie(new_loc, new_type, new_idx) = entry.second; + VTR_LOG("\t Node:%d X:%d Y:%d -> X:%d Y:%d CHAN%c idx:%d\n", node, loc.first, loc.second, new_loc.first, + new_loc.second, new_type == e_rr_type::CHANX ? 'X' : 'Y', new_idx); + } + + VTR_LOG("Shorts:\n"); + for (auto& entry : virtual_shorts_) { + t_location loc1, loc2; + e_rr_type type1, type2; + int id1, id2; + std::tie(loc1, type1, id1, loc2, type2, id2) = entry; + VTR_LOG("\tCHAN%c X:%d Y%d", type1 == e_rr_type::CHANX ? 'X' : 'Y', loc1.first, loc1.second); + VTR_LOG(" Segment id:%d name:%s ->", id1, + segment_inf_[RRSegmentId(std::get<0>(virtual_chan_loc_map_[std::make_tuple(loc1, type1)][id1]))].name.c_str()); + VTR_LOG(" CHAN%c X:%d Y:%d", type2 == e_rr_type::CHANX ? 'X' : 'Y', loc2.first, loc2.second); + VTR_LOG(" Segment id:%d name:%s\n", id2, + segment_inf_[RRSegmentId(std::get<0>(virtual_chan_loc_map_[std::make_tuple(loc2, type2)][id2]))].name.c_str()); + } + } + + void print_real_rr_graph() { + VTR_LOG("Printing real RR graph\n"); + VTR_LOG("Virtual to real mapping:\n"); + for (auto i : virtual_beg_to_real_) { + e_rr_type type; + t_location loc; + int virt_idx; + std::tie(loc, type, virt_idx) = i.first; + VTR_LOG("CHAN%c X:%d Y:%d virt_idx:%d -> idx:%d\n", type == CHANX ? 'X' : 'Y', loc.first, loc.second, virt_idx, i.second); + } + + VTR_LOG("RR Nodes\n"); + for (auto i : loc_type_idx_to_rr_idx_) { + t_location loc; + e_rr_type type; + int idx; + std::tie(loc, type, idx) = i.first; + VTR_LOG("\t X:%d Y:%d type:%d idx:%d -> rr_node:%d\n", loc.first, loc.second, type, idx, i.second); + } + } + + /** @brief Fill device_ctx rr_switch_inf structure and store id of each PIP type for future use + */ + void create_switch_inf() { + std::set> seen; + pip_types(seen, ar_); + + std::vector switches; + switches.resize(seen.size() + 2); + + std::vector, int>> temp_; + + /* + * This nuction is template function defined in + * libs/libarchfpga/src/fpga_interchange_arch_utils.impl.h + */ + process_switches_array&, int>(ar_, seen, switches, temp_); + + auto& rr_switches = device_ctx_.rr_graph_builder.rr_switch(); + rr_switches.resize(seen.size() + 2); + + for (int i = 0; i < (int)switches.size(); i++) + rr_switches[RRSwitchId(i)] = switches[i]; + + for (auto i : temp_) + pips_models_[std::get<0>(i)] = std::get<1>(i); + } + + /** @brief Creates mapping from tile_id to its location + */ + void create_tile_id_to_loc() { + int max_height = 0; + for (const auto& tile : ar_.getTileList()) { + tile_loc_bimap_.insert(tile.getName(), t_location(tile.getCol() + 1, tile.getRow() + 1)); + max_height = std::max(max_height, (int)(tile.getRow() + 1)); + } + /* tile with id name -1 is assosiated with constant source tile */ + tile_loc_bimap_.insert(-1, t_location(1, max_height + 1)); + } + + void fill_node_mappings(int tile_id, int wire_id, int node_id) { + wire_to_node_[std::make_tuple(tile_id, wire_id)] = node_id; + node_to_locs_[node_id].insert(tile_loc_bimap_[tile_id]); + + if (wire_name_to_seg_idx_.find(str(wire_id)) == wire_name_to_seg_idx_.end()) + wire_name_to_seg_idx_[str(wire_id)] = -1; + + node_tile_to_segment_[std::make_tuple(node_id, tile_id)] = wire_name_to_seg_idx_[str(wire_id)]; + } + + /** @brief Helper function for create_uniq_node_ids. It process constant sources into single node. + */ + void process_const_nodes() { + for (const auto& tile : ar_.getTileList()) { + int tile_id = tile.getName(); + auto tile_type = ar_.getTileTypeList()[tile.getType()]; + auto wires = tile_type.getWires(); + for (const auto& constant : tile_type.getConstants()) { + int const_id = constant.getConstant() == Device::ConstantType::VCC ? VCC : GND; + + for (const auto wire_id : constant.getWires()) + fill_node_mappings(tile_id, wires[wire_id], const_id); + } + } + + for (const auto& node_source : ar_.getConstants().getNodeSources()) { + int tile_id = node_source.getTile(); + int wire_id = node_source.getWire(); + int const_id = node_source.getConstant() == Device::ConstantType::VCC ? VCC : GND; + fill_node_mappings(tile_id, wire_id, const_id); + } + + for (auto const const_node : {GND, VCC}) { + wire_to_node_[std::make_tuple(-1, const_node)] = const_node; + node_to_locs_[const_node].insert(tile_loc_bimap_[-1]); + node_tile_to_segment_[std::make_tuple(const_node, -1)] = -1; + } + } + + /** @brief Create unique IDs for each FPGA Interchange node. + * + * In addition it creates mappings from wire to node ID and from node ID to its locations. + * These ids are used later in the RR graph generation for site pins and pip conections (rr_edges) + */ + void create_uniq_node_ids() { + process_const_nodes(); + + // Process nodes + int id = 2; // ids 0 and 1 are reserved respectively for GND and VCC constants + for (const auto& node : ar_.getNodes()) { + bool is_constant_node = false; + int node_id = OPEN; + + // Nodes connected to constant sources should be combined into single constant node + for (const auto& wire_id_ : node.getWires()) { + const auto& wire = ar_.getWires()[wire_id_]; + int tile_id = wire.getTile(); + int wire_id = wire.getWire(); + if (wire_to_node_.find(std::make_tuple(tile_id, wire_id)) != wire_to_node_.end() && wire_to_node_[std::make_tuple(tile_id, wire_id)] < 2) { + // At least one of node wires is marked as constant source, that make whole node constant source. + // Give this node id equal to const source type. + is_constant_node = true; + node_id = wire_to_node_[std::make_tuple(tile_id, wire_id)]; + } + } + + // If the node is not part of constant network give it unique ID + if (!is_constant_node) + node_id = id++; + + VTR_ASSERT(node_id != OPEN); + + float capacitance = 0.0, resistance = 0.0; + if (node_to_RC_.find(node_id) == node_to_RC_.end()) { + node_to_RC_[node_id] = std::pair(0, 0); + // TODO: these values are only temporarily hard-coded in case there is no timing information. + // Need to handle this better. + capacitance = 0.000000001; + resistance = 5.7; + } + + // Use timing model if present + if (ar_.hasNodeTimings()) { + auto model = ar_.getNodeTimings()[node.getNodeTiming()]; + capacitance = get_corner_value(model.getCapacitance(), "slow", "typ"); + resistance = get_corner_value(model.getResistance(), "slow", "typ"); + } + + node_to_RC_[node_id].first += resistance; + node_to_RC_[node_id].second += capacitance; + + for (const auto& wire_id_ : node.getWires()) { + const auto& wire = ar_.getWires()[wire_id_]; + int tile_id = wire.getTile(); + int wire_id = wire.getWire(); + + fill_node_mappings(tile_id, wire_id, node_id); + } + } + total_node_count_ = id; + } + + /** @brief Creates an entry for each pip in the FPGA device architecture, + * + * Additionally, it stores the following metadata for later physical netlist writing: + * - tile name + * - wire names + * - direction + */ + void create_pips() { + int pip_count = 0; + + for (const auto& tile : ar_.getTileList()) { + const auto& tile_type = ar_.getTileTypeList()[tile.getType()]; + pip_count += tile_type.getPips().size(); + } + + pips_.reserve((int)(pip_count * 1.05)); + + for (const auto& tile : ar_.getTileList()) { + int tile_id = tile.getName(); + t_location loc = tile_loc_bimap_[tile_id]; + const auto& tile_type = ar_.getTileTypeList()[tile.getType()]; + + for (const auto& pip : tile_type.getPips()) { + int wire0_name, wire1_name; + int node0, node1; + int switch_id; + + wire0_name = tile_type.getWires()[pip.getWire0()]; + wire1_name = tile_type.getWires()[pip.getWire1()]; + + if (wire_to_node_.find(std::make_tuple(tile_id, wire0_name)) == wire_to_node_.end()) + continue; + if (wire_to_node_.find(std::make_tuple(tile_id, wire1_name)) == wire_to_node_.end()) + continue; + + node0 = wire_to_node_[std::make_tuple(tile_id, wire0_name)]; + node1 = wire_to_node_[std::make_tuple(tile_id, wire1_name)]; + + // FIXME: Right now we allow for pseudo pips even tho we don't check if they are avaiable + //if (pip.isPseudoCells() && (node0 > 1 && node1 > 1)) + // continue; + + used_by_pip_.emplace(node0, loc); + used_by_pip_.emplace(node1, loc); + + useful_node_.insert(node0); + useful_node_.insert(node1); + + int name = tile_id; + if (tile.hasSubTilesPrefices()) + name = tile.getSubTilesPrefices()[pip.getSubTile()]; + + VTR_ASSERT(pips_models_.find(std::make_tuple(pip.getTiming(), pip.getBuffered21())) != pips_models_.end()); + + switch_id = pips_models_[std::make_tuple(pip.getTiming(), pip.getBuffered21())]; + + auto source_sink = std::make_tuple(node0, node1); + auto edge = t_pip(switch_id, tile_id, std::make_tuple(name, wire0_name, wire1_name, true)); + pips_.emplace(source_sink, edge); + + if (!pip.getBuffered21() && (pip.getDirectional() || pip.getBuffered20())) { + source_sink = std::make_tuple(node1, node0); + pips_.emplace(source_sink, t_pip(switch_id, tile_id, std::make_tuple(name, wire0_name, wire1_name, true))); + } + + if (!pip.getDirectional()) { + switch_id = pips_models_[std::make_tuple(pip.getTiming(), pip.getBuffered20())]; + source_sink = std::make_tuple(node1, node0); + pips_.emplace(source_sink, t_pip(switch_id, tile_id, std::make_tuple(name, wire0_name, wire1_name, true))); + if (!pip.getBuffered20() && pip.getBuffered21()) { + source_sink = std::make_tuple(node0, node1); + pips_.emplace(source_sink, t_pip(switch_id, tile_id, std::make_tuple(name, wire0_name, wire1_name, true))); + } + } + } + } + } + + bool pip_uses_node_loc(int node_id, t_location loc) { + return used_by_pip_.find(std::make_tuple(node_id, loc)) != used_by_pip_.end(); + } + + bool pin_uses_node_loc(int node_id, t_location loc) { + return used_by_pin_.find(std::make_tuple(node_id, loc)) != used_by_pin_.end(); + } + + /** @brief Builds graph of FPGA Interchange node for further computations + * + * An FPGA Interchange node graph can be interpreted as a set of wires + * that are electrically connected by non-configurable shorts. + * + * A direct translation of nodes from the Interchange Format to the VTR RR graph + * format is not possible, and some extra computation is required to build a node's + * representation suitable fro the RR graph. + * + * The node graph may be split in multiple unconnected sub-graphs, each with its own root nodes, + * and each sub-graph will need to be connected to each other, forming a single final node graph. + * + * The reason why a node graph is a forest, rather than a single tree (e.g. has multiple roots hence + * multiple unconnected sub-graphs), is that with absent information on the location of the various wires + * in the interchange format, the exact structure of the graph is not available and needs to be + * reconstructed to suite the VTR RR graph representation. + * + * All this procedure is to allow having nodes that represent complex-shaped wires, namely L-wires or U-wires. + */ + std::set build_node_graph(int node_id, + std::set nodes, + std::map& existing_nodes, + int& seg_id) { + std::set roots; + do { + intermediate_node* root_node = nullptr; + t_location key; + for (const auto& it : nodes) { + if (pip_uses_node_loc(node_id, it) || pin_uses_node_loc(node_id, it)) { + root_node = new intermediate_node(it); + key = it; + break; + } + } + + if (root_node == nullptr) { + nodes.clear(); + break; + } + + if (node_tile_to_segment_[std::make_tuple(node_id, tile_loc_bimap_[key])] != -1) + seg_id = node_tile_to_segment_[std::make_tuple(node_id, tile_loc_bimap_[key])]; + + nodes.erase(key); + + existing_nodes.emplace(key, root_node); + + std::queue builder; + builder.push(root_node); + + while (!builder.empty()) { + intermediate_node* vertex; + vertex = builder.front(); + builder.pop(); + t_location loc = vertex->loc; + for (auto i : NODE_SIDES) { + t_location other_loc = add_vec(loc, offsets[i]); + bool temp = false; + if (existing_nodes.find(other_loc) != existing_nodes.end()) { + temp = true; + } else if (nodes.find(other_loc) != nodes.end()) { + intermediate_node* new_node = new intermediate_node(other_loc); + temp = true; + + if (node_tile_to_segment_[std::make_tuple(node_id, tile_loc_bimap_[other_loc])] != -1) + seg_id = node_tile_to_segment_[std::make_tuple(node_id, tile_loc_bimap_[other_loc])]; + + nodes.erase(other_loc); + existing_nodes.emplace(other_loc, new_node); + builder.push(new_node); + } + vertex->links[i] = temp; + } + } + + roots.insert(root_node); + } while (!nodes.empty()); + + for (auto& node : existing_nodes) { + if (pip_uses_node_loc(node_id, node.second->loc)) + node.second->on_route = true; + + if (pin_uses_node_loc(node_id, node.second->loc)) { + node.second->has_pins = true; + node.second->on_route = true; + } + } + + return roots; + } + + /** @brief This function is used to update the bounding box of a node graph. + * + * parameters: + * - end: node at a side's frontier + * - node: node that might replace the end node + * - side: side where to check the bounding box expansion + */ + intermediate_node* update_end(intermediate_node* end, intermediate_node* node, enum node_sides side) { + intermediate_node* res = end; + int x, y; + x = end->loc.first > node->loc.first ? -1 : end->loc.first < node->loc.first ? 1 : 0; + y = end->loc.second > node->loc.second ? -1 : end->loc.second < node->loc.second ? 1 : 0; + switch (side) { + case LEFT_EDGE: + if (x < 0 || (x == 0 && y < 0)) + res = node; + break; + case TOP_EDGE: + if (y > 0 || (y == 0 && x > 0)) + res = node; + break; + case RIGHT_EDGE: + if (x > 0 || (x == 0 && y > 0)) + res = node; + break; + case BOTTOM_EDGE: + if (y < 0 || (y == 0 && x < 0)) + res = node; + break; + default: + VTR_ASSERT(false); + break; + } + + return res; + } + + /** @brief Creates a line of nodes connecting two points. + * + * parameters: + * - existing_nodes: mapping of nodes at locations + * - start: starting location + * - end: ending location + * - connections: sides that are used for this line + * + * If intermediate nodes are missing, they will be created. + * + * TODO: consider using segments of length > 1 and create one node only + * instead of multiple nodes on a straight line. + */ + void add_line(std::map& existing_nodes, + t_location start, + t_location end, + std::initializer_list connections) { + bool horizontal = start.second == end.second; + VTR_ASSERT(horizontal || start.first == end.first); + + int range_start = horizontal ? start.first + 1 : start.second + 1; + int range_end = horizontal ? end.first : end.second; + + for (int i = range_start; i < range_end; i++) { + t_location loc = horizontal ? t_location(i, end.second) : t_location(end.first, i); + + if (existing_nodes.find(loc) == existing_nodes.end()) + existing_nodes[loc] = new intermediate_node(loc); + + intermediate_node* loc_ = existing_nodes[loc]; + loc_->back_bone = true; + + for (auto j : connections) + loc_->links[j] = true; + } + } + + /** @brief Adds a middle node to allow connecting end nodes that would form a non-straight + * line of nodes. + * + * +----------+ + * | | +--------+ + * | End Node +----------+ Middle | + * | | | Node | + * +----------+ +---+----+ + * | + * | + * | + * +----+-----+ + * | | + * | End Node | + * | | + * +----------+ + */ + void add_middle_node(std::map& existing_nodes, + intermediate_node* ends[], + t_location node_loc, + bool is_up, + enum node_sides node_side, + enum node_sides end_side) { + if (existing_nodes.find(node_loc) == existing_nodes.end()) + existing_nodes[node_loc] = new intermediate_node(node_loc); + + intermediate_node* node = existing_nodes[node_loc]; + node->back_bone = true; + + if (node != ends[node_side]) { + node->links[node_side] = true; + ends[node_side]->links[end_side] = true; + + if (node->loc.first < ends[node_side]->loc.first) + add_line(existing_nodes, node->loc, ends[node_side]->loc, {RIGHT_EDGE, LEFT_EDGE}); + else + add_line(existing_nodes, ends[node_side]->loc, node->loc, {RIGHT_EDGE, LEFT_EDGE}); + } + + if (is_up && node->loc.second != ends[TOP_EDGE]->loc.second) { + node->links[TOP_EDGE] = true; + ends[TOP_EDGE]->links[BOTTOM_EDGE] = true; + add_line(existing_nodes, node->loc, ends[TOP_EDGE]->loc, {TOP_EDGE, BOTTOM_EDGE}); + } else if (!is_up && node->loc.second != ends[BOTTOM_EDGE]->loc.second) { + node->links[BOTTOM_EDGE] = true; + ends[BOTTOM_EDGE]->links[TOP_EDGE] = true; + add_line(existing_nodes, ends[BOTTOM_EDGE]->loc, node->loc, {TOP_EDGE, BOTTOM_EDGE}); + } + } + + /** @brief Creates the final root node that will be used to represent an Interchange node graph + * + * Given that there may be different root nodes for the same node graph, we need to elect one to + * be the new final root, so that we can than build the RR graph connections between all the + * generated nodes reliably. + */ + intermediate_node* create_final_root_node(std::map& existing_nodes, + bool left_node, + t_location root_node, + t_location up_node, + t_location middle_node, + enum node_sides side) { + if (existing_nodes.find(root_node) == existing_nodes.end()) + existing_nodes[root_node] = new intermediate_node(root_node); + + intermediate_node* temp = existing_nodes[root_node]; + + temp->back_bone = true; + temp->links[side] = true; + temp->links[BOTTOM_EDGE] = true; + + add_line(existing_nodes, up_node, root_node, {TOP_EDGE, BOTTOM_EDGE}); + + existing_nodes[up_node]->links[TOP_EDGE] = true; + existing_nodes[middle_node]->links[OPPOSITE_NODE_SIDES[side]] = true; + + if (left_node) + add_line(existing_nodes, root_node, middle_node, {RIGHT_EDGE, LEFT_EDGE}); + else + add_line(existing_nodes, middle_node, root_node, {RIGHT_EDGE, LEFT_EDGE}); + + return temp; + } + + /** @brief Connect all the remaining root nodes that are not yet connected to the corresponding + * node graph. + * + * This is to connect all the nodes that may be in the current situation: + * + * +---------+ + * | | + * | Node | + * | | + * +----+----+ + * | + * | +-------------+ + * | | Dangling | + * +----------+ | + * | | Root Node | + * | +-------------+ + * | + * +---------+ +----+----+ + * | | | Final | + * | Node +------------+ | + * | | | Root | + * +---------+ +----+----+ + * | + * | + * | + * | + * | + * +----+----+ + * | | + * | Node | + * | | + * +---------+ + */ + void connect_dangling_roots(std::set& roots, + std::map& existing_nodes, + intermediate_node* ends[]) { + /* connect not yet connected roots */ + std::vector del_list; + + int x_, y_; + x_ = ends[RIGHT_EDGE]->loc.first - ends[LEFT_EDGE]->loc.first; + y_ = ends[TOP_EDGE]->loc.second - ends[BOTTOM_EDGE]->loc.second; + + int max_length = std::max(x_, y_); + + for (const auto& root : roots) { + bool done = root->back_bone; + for (int j = 1; j < max_length; j++) { + if (done) { + del_list.push_back(root); + break; + } + + for (const auto& k : NODE_SIDES) { + t_location offset = mul_vec_scal(offsets[k], j); + t_location other_node_loc = add_vec(root->loc, offset); + if (existing_nodes.find(other_node_loc) == existing_nodes.end()) + continue; + intermediate_node* other_node; + other_node = existing_nodes[other_node_loc]; + if (!other_node->back_bone) + continue; + root->links[k] = true; + root->back_bone = true; + other_node->links[OPPOSITE_NODE_SIDES[k]] = true; + + if (0 < k && k < 3) + add_line(existing_nodes, root->loc, other_node_loc, {k, OPPOSITE_NODE_SIDES[k]}); + else + add_line(existing_nodes, other_node_loc, root->loc, {k, OPPOSITE_NODE_SIDES[k]}); + + done = true; + break; + } + } + } + + // Remove roots that have already been processed + for (const auto& i : del_list) { + if (roots.find(i) != roots.end()) + roots.erase(i); + } + + del_list.clear(); + } + + /** @brief Connects to unconnected graphs into a single node graph. + * + * If the node graph is composed of multiple unconnected trees, this procedure will connect all + * of the various roots of the sub-trees together, so that a single node graph is present. + */ + intermediate_node* connect_roots(std::set& roots, + std::map& existing_nodes) { + // The node graph is fully connected, therefore no extra computation is required + if (roots.size() == 1) { + intermediate_node* root = *roots.begin(); + root->back_bone = true; + roots.clear(); + return root; + } + + intermediate_node* ends[4]; + ends[0] = ends[1] = ends[2] = ends[3] = *roots.begin(); + + for (auto root : roots) + for (auto const& side : NODE_SIDES) + ends[side] = update_end(ends[side], root, side); + + for (auto const& side : NODE_SIDES) { + ends[side]->back_bone = true; + if (roots.find(ends[side]) != roots.end()) + roots.erase(ends[side]); + } + + bool right_to_top = ends[TOP_EDGE]->loc.first >= ends[BOTTOM_EDGE]->loc.first; + bool left_to_top = ends[TOP_EDGE]->loc.first < ends[BOTTOM_EDGE]->loc.first; + + t_location right_middle_node(right_to_top ? ends[TOP_EDGE]->loc.first : ends[BOTTOM_EDGE]->loc.first, ends[RIGHT_EDGE]->loc.second); + t_location left_middle_node(left_to_top ? ends[TOP_EDGE]->loc.first : ends[BOTTOM_EDGE]->loc.first, ends[LEFT_EDGE]->loc.second); + + add_middle_node(existing_nodes, ends, right_middle_node, right_to_top, RIGHT_EDGE, LEFT_EDGE); + add_middle_node(existing_nodes, ends, left_middle_node, left_to_top, LEFT_EDGE, RIGHT_EDGE); + + int y = std::max(right_middle_node.second, left_middle_node.second); + t_location loc1(left_middle_node.first, y); + t_location loc2(right_middle_node.first, y); + + intermediate_node* true_root = nullptr; + + // The final root node is chosen among the possible different ones, depending on the various intermediate nodes + // locations. + if (loc1 != left_middle_node && loc1 != loc2) { + true_root = create_final_root_node(existing_nodes, true, loc1, left_middle_node, right_middle_node, RIGHT_EDGE); + } else if (loc2 != right_middle_node && loc1 != loc2) { + true_root = create_final_root_node(existing_nodes, false, loc2, right_middle_node, left_middle_node, LEFT_EDGE); + } else if (loc1 != loc2) { + add_line(existing_nodes, loc1, loc2, {RIGHT_EDGE, LEFT_EDGE}); + existing_nodes[loc1]->links[RIGHT_EDGE] = true; + existing_nodes[loc2]->links[LEFT_EDGE] = true; + true_root = existing_nodes[loc1]; + } else { + loc1 = right_middle_node; + loc2 = left_middle_node; + + if (right_middle_node.second > left_middle_node.second) + std::swap(loc1, loc2); + + add_line(existing_nodes, loc1, loc2, {TOP_EDGE, BOTTOM_EDGE}); + existing_nodes[loc1]->links[TOP_EDGE] = true; + existing_nodes[loc2]->links[BOTTOM_EDGE] = true; + true_root = existing_nodes[loc1]; + } + + connect_dangling_roots(roots, existing_nodes, ends); + + VTR_ASSERT(true_root != nullptr); + return true_root; + } + + /** @brief Removes dangling nodes from a graph represented by the root node. + * + * Dangling nodes are nodes that do not connect to a pin, a pip or other non-dangling node. + */ + int reduce_graph_and_count_nodes_left(intermediate_node* root, + std::map& existing_nodes, + int seg_id) { + int cnt = 0; + std::queue walker; + std::stack back_walker; + + walker.push(root); + root->visited = root; + + while (!walker.empty()) { + intermediate_node* vertex = walker.front(); + walker.pop(); + back_walker.push(vertex); + vertex->segment_id = seg_id; + + bool single_node = true; + bool has_chanx = false; + + for (auto i : NODE_SIDES) { + if (vertex->links[i]) { + single_node = false; + intermediate_node* other_node = existing_nodes[add_vec(vertex->loc, offsets[i])]; + if (other_node->visited == nullptr || vertex->visited == other_node) { + if (i == node_sides::LEFT_EDGE) { + has_chanx = true; + cnt++; + } else if (i == node_sides::TOP_EDGE) { + cnt++; + } + + if (other_node->visited == nullptr) { + other_node->visited = vertex; + walker.push(other_node); + } + } else { + vertex->links[i] = false; + } + } + } + + if ((vertex->has_pins && !has_chanx) || single_node) + cnt++; + } + + while (!back_walker.empty()) { + intermediate_node* vertex = back_walker.top(); + back_walker.pop(); + + bool single_node = true; + + if (!vertex->has_pins && !vertex->on_route) { + for (auto i : NODE_SIDES) + if (vertex->links[i]) + if (i == node_sides::LEFT_EDGE || i == node_sides::TOP_EDGE) + cnt--; + + existing_nodes.erase(vertex->loc); + delete vertex; + } else { + vertex->visited->on_route = true; + for (auto i : NODE_SIDES) { + if (vertex->links[i]) { + if (existing_nodes.find(add_vec(vertex->loc, offsets[i])) == existing_nodes.end()) { + vertex->links[i] = false; + if ((i == node_sides::LEFT_EDGE && !vertex->has_pins) || i == node_sides::TOP_EDGE) + cnt--; + } + } + } + + for (auto i : NODE_SIDES) + single_node &= !vertex->links[i]; + + if (!vertex->has_pins && single_node) + cnt++; + } + } + + return cnt; + } + + /** @brief Processes the set of nodes to populate the virtual CHAN loation maps + */ + void process_set(std::unordered_set& set, + std::unordered_set>& nodes_used, + std::map& existing_nodes, + std::map>& local_redirect, + float R, + float C, + t_location offset, + node_sides side, + e_rr_type chan_type) { + for (auto const inode : set) { + int len = 0; + intermediate_node* start = inode; + intermediate_node* end = inode; + auto key = std::make_tuple(add_vec(start->loc, offset), chan_type); + int idx = virtual_chan_loc_map_[key].size(); + do { + nodes_used.insert(end->loc); + len++; + local_redirect.emplace(end->loc, std::make_tuple(add_vec(start->loc, offset), chan_type, idx)); + + if (!end->links[side]) + // Reached the end of this set of nodes exiting the loop + break; + + intermediate_node* neighbour = existing_nodes[add_vec(end->loc, offsets[side])]; + + if (set.find(neighbour) != set.end()) + break; + + end = neighbour; + } while (true); + + virtual_chan_loc_map_[key].emplace_back(start->segment_id, R * len, C * len, add_vec(end->loc, offset)); + } + } + + /** @brief Adds a short between two node locations between two nodes existing in the + * same Interchange node graph. This populates the virtual shorts map. + */ + void add_short(t_location n1, + t_location n2, + std::map>& r1, + std::map>& r2) { + auto red1 = r1[n1]; + auto red2 = r2[n2]; + virtual_shorts_.emplace_back(std::get<0>(red1), std::get<1>(red1), std::get<2>(red1), + std::get<0>(red2), std::get<1>(red2), std::get<2>(red2)); + } + + /** @brief Adds connections between nodes belonging to the same Interchange node graph + */ + void connect_base_on_redirects(std::unordered_set& set, + node_sides side, + std::map>& src, + std::map>& dist) { + for (auto const node : set) { + if (!node->links[side]) + continue; + + t_location other_node = add_vec(node->loc, offsets[side]); + + if (dist.find(other_node) == dist.end()) + continue; + + add_short(node->loc, other_node, src, dist); + } + } + + /** @brief Translates the Interchange node graph representation in a format suitable + * to be translated into the VTR RR graph representation. + * + * In practice this means populating the mappings to better access data in the + * real RR graph building stage. + */ + void graph_reduction_stage2(int node_id, + std::map& existing_nodes, + float R, + float C) { + std::unordered_set chanxs, chanys; + std::unordered_set> nodes_in_chanxs, nodes_in_chanys; + + // Populate sets containing starting point where horizontal (CHANX) or vertical (CHANY) lines of + // nodes are linked together. + // + // In the following there will be four sets of nodes: + // - three sets for CHANY to build the three veritcal sets of nodes + // - one set for CHANX to connect together all the bottom nodes + // + // +---+ +---+ +---+ + // | | | | | | + // +-+-+ +-+-+ +-+-+ + // | | | + // | | | + // | | | + // | | | + // +-+-+ +-+-+ +-+-+ + // | +--------------------+ +--------------------+ | + // +---+ +---+ +---+ + // + for (auto const& node : existing_nodes) { + bool single_node = true; + for (auto side : NODE_SIDES) + single_node &= !node.second->links[side]; + + bool chanx_start = false; + bool chany_start = false; + + if (node.second->links[LEFT_EDGE]) { + intermediate_node* left_node = existing_nodes[add_vec(node.second->loc, offsets[LEFT_EDGE])]; + chanx_start = pip_uses_node_loc(node_id, left_node->loc) || left_node->links[TOP_EDGE] || left_node->links[BOTTOM_EDGE]; + } else { + chanx_start = node.second->has_pins || single_node; + if (node.second->links[RIGHT_EDGE]) + nodes_in_chanxs.insert(node.first); + } + if (node.second->links[TOP_EDGE]) { + intermediate_node* top_node = existing_nodes[add_vec(node.second->loc, offsets[TOP_EDGE])]; + chany_start = pip_uses_node_loc(node_id, top_node->loc) || pin_uses_node_loc(node_id, top_node->loc); + chany_start |= top_node->links[LEFT_EDGE] || top_node->links[RIGHT_EDGE]; + } else if (node.second->links[BOTTOM_EDGE]) { + nodes_in_chanys.insert(node.first); + } + + if (chanx_start) + chanxs.insert(node.second); + if (chany_start) + chanys.insert(node.second); + } + + std::map> local_redirect_x, local_redirect_y; + + process_set(chanys, nodes_in_chanys, existing_nodes, local_redirect_y, R, C, t_location(0, 0), BOTTOM_EDGE, CHANY); + process_set(chanxs, nodes_in_chanxs, existing_nodes, local_redirect_x, R, C, offsets[BOTTOM_EDGE], RIGHT_EDGE, CHANX); + + connect_base_on_redirects(chanys, TOP_EDGE, local_redirect_y, local_redirect_y); + connect_base_on_redirects(chanxs, LEFT_EDGE, local_redirect_x, local_redirect_x); + + // Connect CHANY with CHANX nodes + for (auto i : nodes_in_chanys) { + t_location node = i; + if (nodes_in_chanxs.find(i) != nodes_in_chanxs.end()) { + bool ry = local_redirect_y.find(node) != local_redirect_y.end(); + bool rx = local_redirect_x.find(node) != local_redirect_x.end(); + t_location x, y; + if (rx) + x = node; + else + x = add_vec(node, offsets[RIGHT_EDGE]); + + if (ry) + y = node; + else + y = add_vec(node, offsets[BOTTOM_EDGE]); + + add_short(y, x, local_redirect_y, local_redirect_x); + } + } + + for (auto const& node : existing_nodes) { + bool ry = local_redirect_y.find(node.first) != local_redirect_y.end(); + bool rx = local_redirect_x.find(node.first) != local_redirect_x.end(); + + if (rx) { + virtual_redirect_.emplace(std::make_tuple(node_id, node.first), local_redirect_x[node.first]); + } else if (ry) { + virtual_redirect_.emplace(std::make_tuple(node_id, node.first), local_redirect_y[node.first]); + } else if (node.second->links[RIGHT_EDGE]) { + t_location right_node = add_vec(node.first, offsets[RIGHT_EDGE]); + virtual_redirect_.emplace(std::make_tuple(node_id, node.first), local_redirect_x[right_node]); + } else if (node.second->links[BOTTOM_EDGE]) { + t_location bottom_node = add_vec(node.first, offsets[BOTTOM_EDGE]); + virtual_redirect_.emplace(std::make_tuple(node_id, node.first), local_redirect_y[bottom_node]); + } else { + VTR_ASSERT(false); + } + } + } + + /** @brief Removes a node graph represented by a root node. + * + * This step is used to clean up of earlier intermediate stages. + */ + void delete_nodes(std::map& existing_nodes) { + for (auto i : existing_nodes) { + delete i.second; + } + existing_nodes.clear(); + } + + /** @brief Process FPGA Interchange nodes. + * + * This function processes all the wires that are connected together and are part of the same node graph. + */ + void process_nodes() { + for (int node_id = 0; node_id < total_node_count_; ++node_id) { + int seg_id; + if (useful_node_.find(node_id) == useful_node_.end()) { + continue; + } + std::set all_possible_tiles = node_to_locs_[node_id]; + + std::map existing_nodes; + + std::set roots = build_node_graph(node_id, all_possible_tiles, existing_nodes, seg_id); + intermediate_node* root = connect_roots(roots, existing_nodes); + + VTR_ASSERT(roots.empty()); + + int div = reduce_graph_and_count_nodes_left(root, existing_nodes, seg_id); + + if (existing_nodes.empty()) + continue; + + node_id_count_[node_id] = div; + VTR_ASSERT(div > 0); + + // Given that an Interchange graph node is composed of many electrically connected wires + // where each one is a separate VTR RR graph node, the resistance and capacitance values + // need to be split among all of the different nodes, as in the end, they are all part of + // the same Interchange "node". + float resistance = node_to_RC_[node_id].first / div; + float capacitance = node_to_RC_[node_id].second / div; + graph_reduction_stage2(node_id, existing_nodes, resistance, capacitance); + delete_nodes(existing_nodes); + } + } + + /** @brief Finds the next suitable site index to create SINK and SOURCE nodes + */ + int next_good_site(int first_idx, const Device::Tile::Reader tile) { + auto tile_type = ar_.getTileTypeList()[tile.getType()]; + auto site_types = ar_.getSiteTypeList(); + size_t ans = first_idx; + for (; ans < tile.getSites().size(); ans++) { + auto site = tile.getSites()[ans]; + auto site_type = site_types[tile_type.getSiteTypes()[site.getType()].getPrimaryType()]; + bool found = false; + for (auto bel : site_type.getBels()) + found |= bel_cell_mappings_.find(bel.getName()) != bel_cell_mappings_.end(); + + if (found) + break; + for (auto alt_site_idx : site_type.getAltSiteTypes()) { + auto alt_site = site_types[alt_site_idx]; + for (auto bel : alt_site.getBels()) { + auto bel_name = bel.getName(); + bool res = bel_cell_mappings_.find(bel_name) != bel_cell_mappings_.end(); + found |= res; + } + if (found) + break; + } + if (found) + break; + } + + return ans; + } + + /** @brief Creates site SINK/SOURCE IPIN/OPIN mappings. + */ + void create_sink_source_nodes() { + const auto& tile_types = ar_.getTileTypeList(); + std::map, std::tuple> tile_type_site_num_pin_name_model; + + // Fill tile pin to wire map + for (const auto& tileType : tile_types) { + int it = 0; + + if (!tile_type_to_pb_type_.count(str(tileType.getName()))) + continue; + + for (const auto& siteType : tileType.getSiteTypes()) { + int pin_count = 0; + for (const auto& pin_ : ar_.getSiteTypeList()[siteType.getPrimaryType()].getPins()) { + std::tuple pin(tileType.getName(), it, str(pin_.getName())); + const auto& model = pin_.getModel(); + float value = 0.0; + + if (pin_.getDir() == LogicalNetlist::Netlist::Direction::OUTPUT && model.hasResistance()) + value = get_corner_value(model.getResistance(), "slow", "max"); + else if (pin_.getDir() == LogicalNetlist::Netlist::Direction::INPUT && model.hasCapacitance()) + value = get_corner_value(model.getCapacitance(), "slow", "max"); + + int wire_id = siteType.getPrimaryPinsToTileWires()[pin_count]; + tile_type_site_num_pin_name_model[pin] = std::tuple(value, wire_id); + pin_count++; + } + it++; + } + } + + for (const auto& tile : ar_.getTileList()) { + const auto& tile_type_id = tile_types[tile.getType()].getName(); + const auto& tile_type_name = str(tile_type_id); + const auto& tile_id = tile.getName(); + + if (!tile_type_to_pb_type_.count(tile_type_name)) + continue; + + int it = next_good_site(0, tile); + + sink_source_loc_map_[tile_id].resize(tile_type_to_pb_type_[tile_type_name]->num_pins); + for (const auto& sub_tile : tile_type_to_pb_type_[tile_type_name]->sub_tiles) { + for (const auto& port : sub_tile.ports) { + int pin_id = sub_tile.sub_tile_to_tile_pin_indices[port.index]; + + std::tuple key{tile_type_id, it, std::string(port.name)}; + + float value; + int wire_id; + std::tie(value, wire_id) = tile_type_site_num_pin_name_model[key]; + + int node_id; + if (!wire_to_node_.count(std::make_tuple(tile_id, wire_id))) + node_id = -1; + else + node_id = wire_to_node_[std::make_tuple(tile_id, wire_id)]; + + t_location loc = tile_loc_bimap_[tile_id]; + + used_by_pin_.insert(std::make_tuple(node_id, loc)); + useful_node_.insert(node_id); + sink_source_loc_map_[tile_id][pin_id] = std::make_tuple(port.type == IN_PORT, value, node_id); + } + it = next_good_site(it + 1, tile); + } + } + + // Add constant sources. Tile ID "-1" corresponds to the constant synthetic tile + sink_source_loc_map_[-1].resize(2); + for (auto pin_id : {0, 1}) { + t_location loc = tile_loc_bimap_[-1]; + used_by_pin_.insert(std::make_tuple(pin_id, loc)); + useful_node_.insert(pin_id); + sink_source_loc_map_[-1][pin_id] = std::make_tuple(false, 0, pin_id); + } + } + + void sweep(t_location loc, + e_rr_type type, + int& used_tracks, + std::unordered_map>& sweeper, + std::list& avaiable_tracks) { + auto loc_type_tuple = std::make_tuple(loc, type); + for (size_t k = 0; k < virtual_chan_loc_map_[loc_type_tuple].size(); ++k) { + auto track = virtual_chan_loc_map_[loc_type_tuple][k]; + int new_id; + + if (!avaiable_tracks.empty()) { + new_id = avaiable_tracks.front(); + avaiable_tracks.pop_front(); + } else { + new_id = used_tracks++; + } + + virtual_beg_to_real_[std::make_tuple(loc, type, k)] = new_id; + chan_loc_map_[loc_type_tuple][new_id] = track; + int stop = type == CHANX ? std::get<3>(track).first : std::get<3>(track).second; + sweeper[stop].push_back(new_id); + } + + int pos = type == CHANX ? loc.first : loc.second; + + while (!sweeper[pos].empty()) { + avaiable_tracks.push_back(sweeper[pos].front()); + sweeper[pos].pop_front(); + } + + virtual_chan_loc_map_[loc_type_tuple].clear(); + } + + /** @brief Creates the mapping from the virtual to physical tracks. + * + * It works in O(N*M + L), where N and M are the device dimensions + * and L is the number of used segments in the various CHANs + */ + void virtual_to_real() { + device_ctx_.chan_width.x_list.resize(grid_.height(), 0); + device_ctx_.chan_width.y_list.resize(grid_.width(), 0); + + int min_x, min_y, max_x, max_y; + min_x = min_y = std::numeric_limits::max(); + max_x = max_y = 0; + + std::unordered_map> sweeper; + std::list avaiable_tracks; + + int used_tracks; + for (size_t i = 0; i < grid_.height(); ++i) { + used_tracks = 0; + avaiable_tracks.clear(); + + for (size_t j = 0; j < grid_.width(); ++j) + sweep(t_location(j, i), CHANX, used_tracks, sweeper, avaiable_tracks); + + min_x = std::min(min_x, used_tracks); + max_x = std::max(max_x, used_tracks); + device_ctx_.chan_width.x_list[i] = used_tracks; + } + + for (size_t i = 0; i < grid_.width(); ++i) { + used_tracks = 0; + avaiable_tracks.clear(); + + for (size_t j = grid_.height() - 1; j < (size_t)-1; --j) + sweep(t_location(i, j), CHANY, used_tracks, sweeper, avaiable_tracks); + + min_y = std::min(min_y, used_tracks); + max_y = std::max(max_y, used_tracks); + device_ctx_.chan_width.y_list[i] = used_tracks; + } + + device_ctx_.chan_width.max = std::max(max_y, max_x); + device_ctx_.chan_width.x_min = min_x; + device_ctx_.chan_width.x_max = max_x; + device_ctx_.chan_width.y_min = min_y; + device_ctx_.chan_width.y_max = max_y; + } + + void pack_tiles() { + for (const auto& node_loc : sink_source_loc_map_) { + int tile_id = node_loc.first; + int x, y; + t_location loc = tile_loc_bimap_[tile_id]; + std::tie(x, y) = loc; + + auto pin_vec = node_loc.second; + for (size_t pin_id = 0; pin_id < pin_vec.size(); ++pin_id) { + bool input; + float RC; + int interchange_node_id; + std::tie(input, RC, interchange_node_id) = pin_vec[pin_id]; + + e_rr_type pin = input ? e_rr_type::SINK : e_rr_type::SOURCE; + + float R, C; + std::tie(R, C) = input ? std::tuple(0, RC) : std::tuple(RC, 0); + + if (interchange_node_id != -1) { + t_location track_loc; + e_rr_type track_type; + int track_idx; + if (virtual_redirect_.find(std::make_tuple(interchange_node_id, loc)) == virtual_redirect_.end()) { + VTR_LOG("node_id:%d, loc->X:%d Y:%d\n", interchange_node_id, loc.first, loc.second); + VTR_ASSERT(false); + } + auto virtual_key = virtual_redirect_[std::make_tuple(interchange_node_id, loc)]; + track_idx = virtual_beg_to_real_[virtual_key]; + std::tie(track_loc, track_type, std::ignore) = virtual_key; + + auto val = chan_loc_map_[std::make_tuple(track_loc, track_type)][track_idx]; + int track_seg; + float track_R, track_C; + t_location end; + std::tie(track_seg, track_R, track_C, end) = val; + track_R += R; + track_C += C; + + if (node_id_count_[interchange_node_id] == 1) + track_seg = 0; //set __generic__ segment type for wires going to/from site and that have pips in tile + + chan_loc_map_[std::make_tuple(track_loc, track_type)][track_idx] = std::make_tuple(track_seg, track_R, track_C, end); + } + + rr_nodes_.make_room_for_node(RRNodeId(rr_idx)); + loc_type_idx_to_rr_idx_[std::make_tuple(loc, pin, pin_id)] = rr_idx; + auto node = rr_nodes_[rr_idx]; + RRNodeId node_id = node.id(); + + device_ctx_.rr_graph_builder.set_node_type(node_id, pin); + device_ctx_.rr_graph_builder.set_node_capacity(node_id, 1); + device_ctx_.rr_graph_builder.set_node_rc_index(node_id, NodeRCIndex(find_create_rr_rc_data(0, 0))); + device_ctx_.rr_graph_builder.set_node_coordinates(node_id, x, y, x, y); + device_ctx_.rr_graph_builder.set_node_ptc_num(node_id, pin_id); + + if (pin == e_rr_type::SOURCE) + device_ctx_.rr_graph_builder.set_node_cost_index(node_id, RRIndexedDataId(SOURCE_COST_INDEX)); + else + device_ctx_.rr_graph_builder.set_node_cost_index(node_id, RRIndexedDataId(SINK_COST_INDEX)); + + rr_idx++; + } + + for (size_t pin_id = 0; pin_id < pin_vec.size(); ++pin_id) { + bool input; + int interchange_node_id; + std::tie(input, std::ignore, interchange_node_id) = pin_vec[pin_id]; + + if (interchange_node_id == -1) + continue; + + e_rr_type pin = input ? e_rr_type::IPIN : e_rr_type::OPIN; + + rr_nodes_.make_room_for_node(RRNodeId(rr_idx)); + loc_type_idx_to_rr_idx_[std::make_tuple(loc, pin, pin_id)] = rr_idx; + auto node = rr_nodes_[rr_idx]; + RRNodeId node_id = node.id(); + + device_ctx_.rr_graph_builder.set_node_type(node_id, pin); + device_ctx_.rr_graph_builder.set_node_capacity(node_id, 1); + device_ctx_.rr_graph_builder.set_node_rc_index(node_id, NodeRCIndex(find_create_rr_rc_data(0, 0))); + device_ctx_.rr_graph_builder.set_node_coordinates(node_id, x, y, x, y); + device_ctx_.rr_graph_builder.set_node_ptc_num(node_id, pin_id); + device_ctx_.rr_graph_builder.add_node_side(node_id, e_side::BOTTOM); + + if (pin == e_rr_type::IPIN) + device_ctx_.rr_graph_builder.set_node_cost_index(node_id, RRIndexedDataId(IPIN_COST_INDEX)); + else + device_ctx_.rr_graph_builder.set_node_cost_index(node_id, RRIndexedDataId(OPIN_COST_INDEX)); + + rr_idx++; + } + } + } + + void pack_chans() { + for (auto i : chan_loc_map_) { + t_location loc; + e_rr_type type; + std::tie(loc, type) = i.first; + int x, y; + std::tie(x, y) = loc; + + auto tracks = i.second; + for (auto track : tracks) { + int seg; + float R, C; + t_location end; + int x1, y1; + std::tie(seg, R, C, end) = track.second; + std::tie(x1, y1) = end; + + rr_nodes_.make_room_for_node(RRNodeId(rr_idx)); + loc_type_idx_to_rr_idx_[std::make_tuple(loc, type, track.first)] = rr_idx; + auto node = rr_nodes_[rr_idx]; + RRNodeId node_id = node.id(); + + device_ctx_.rr_graph_builder.set_node_type(node_id, type); + device_ctx_.rr_graph_builder.set_node_capacity(node_id, 1); + device_ctx_.rr_graph_builder.set_node_rc_index(node_id, NodeRCIndex(find_create_rr_rc_data(R, C))); + device_ctx_.rr_graph_builder.set_node_direction(node_id, Direction::BIDIR); + + device_ctx_.rr_graph_builder.set_node_coordinates(node_id, x, y, x1, y1); + device_ctx_.rr_graph_builder.set_node_ptc_num(node_id, track.first); + + if (type == e_rr_type::CHANX) + device_ctx_.rr_graph_builder.set_node_cost_index(node_id, RRIndexedDataId(CHANX_COST_INDEX_START + seg)); + else + device_ctx_.rr_graph_builder.set_node_cost_index(node_id, RRIndexedDataId(CHANX_COST_INDEX_START + segment_inf_.size() + seg)); + + seg_index_[device_ctx_.rr_graph.node_cost_index(node_id)] = seg; + + rr_idx++; + } + } + } + + void pack_rr_nodes() { + rr_nodes_.clear(); + seg_index_.resize(CHANX_COST_INDEX_START + 2 * segment_inf_.size(), -1); + pack_tiles(); + pack_chans(); + rr_nodes_.shrink_to_fit(); + } + + void pack_tiles_edges() { + for (const auto& i : sink_source_loc_map_) { + int tile_id = i.first; + int x, y; + t_location loc = tile_loc_bimap_[tile_id]; + std::tie(x, y) = loc; + auto pin_vec = i.second; + for (size_t ipin = 0; ipin < pin_vec.size(); ++ipin) { + bool input; + int node_id; + std::tie(input, std::ignore, node_id) = pin_vec[ipin]; + + if (node_id == -1) + continue; + + e_rr_type pin = input ? e_rr_type::SINK : e_rr_type::SOURCE; + e_rr_type mux = input ? e_rr_type::IPIN : e_rr_type::OPIN; + + auto virtual_chan_key = virtual_redirect_[std::make_tuple(node_id, loc)]; + auto chan_key = std::make_tuple(std::get<0>(virtual_chan_key), + std::get<1>(virtual_chan_key), + virtual_beg_to_real_[virtual_chan_key]); + + int pin_id = loc_type_idx_to_rr_idx_[std::make_tuple(loc, pin, ipin)]; + int mux_id = loc_type_idx_to_rr_idx_[std::make_tuple(loc, mux, ipin)]; + + int track_id = loc_type_idx_to_rr_idx_[chan_key]; + + int sink = input ? pin_id : track_id; + int sink_src = mux_id; + int src = input ? track_id : pin_id; + + device_ctx_.rr_graph_builder.emplace_back_edge(RRNodeId(src), + RRNodeId(sink_src), + src == track_id ? 1 : 0); + device_ctx_.rr_graph_builder.emplace_back_edge(RRNodeId(sink_src), + RRNodeId(sink), + sink == track_id ? 1 : 0); + } + } + } + + void pack_chans_edges() { + for (auto& i : virtual_shorts_) { + t_location l1, l2; + e_rr_type t1, t2; + int virtual_idx1, idx1, virtual_idx2, idx2; + std::tie(l1, t1, virtual_idx1, l2, t2, virtual_idx2) = i; + idx1 = virtual_beg_to_real_[std::make_tuple(l1, t1, virtual_idx1)]; + idx2 = virtual_beg_to_real_[std::make_tuple(l2, t2, virtual_idx2)]; + + std::tuple key1(l1, t1, idx1), key2(l2, t2, idx2); + + int src, sink; + src = loc_type_idx_to_rr_idx_[key1]; + sink = loc_type_idx_to_rr_idx_[key2]; + + device_ctx_.rr_graph_builder.emplace_back_edge(RRNodeId(src), RRNodeId(sink), 0); + device_ctx_.rr_graph_builder.emplace_back_edge(RRNodeId(sink), RRNodeId(src), 0); + } + } + + void pack_pips() { + for (auto& i : pips_) { + int node1, node2; + std::tie(node1, node2) = i.first; + int sw_id, tile_id; + std::tuple metadata; + std::tie(sw_id, tile_id, metadata) = i.second; + VTR_ASSERT(sw_id > 1); + int name, wire0, wire1; + bool forward; + std::tie(name, wire0, wire1, forward) = metadata; + t_location loc = tile_loc_bimap_[tile_id]; + auto virtual_key1 = virtual_redirect_[std::make_tuple(node1, loc)]; + auto virtual_key2 = virtual_redirect_[std::make_tuple(node2, loc)]; + + auto key1 = std::make_tuple(std::get<0>(virtual_key1), + std::get<1>(virtual_key1), + virtual_beg_to_real_[virtual_key1]); + auto key2 = std::make_tuple(std::get<0>(virtual_key2), + std::get<1>(virtual_key2), + virtual_beg_to_real_[virtual_key2]); + + int src, sink; + src = loc_type_idx_to_rr_idx_[key1]; + sink = loc_type_idx_to_rr_idx_[key2]; + device_ctx_.rr_graph_builder.emplace_back_edge(RRNodeId(src), RRNodeId(sink), sw_id); + + // TODO: find a more efficient way to pack PIP information to build the physical netlist out + // of a routed design + char metadata_[100]; + char* temp = int_to_string(metadata_, name); + *temp++ = '.'; + temp = int_to_string(temp, wire0); + *temp++ = ','; + temp = int_to_string(temp, wire1); + *temp++ = '.'; + temp = int_to_string(temp, forward ? 1 : 0); + *temp++ = 0; + + vpr::add_rr_edge_metadata(src, sink, sw_id, vtr::string_view("FPGAInterchange"), vtr::string_view(metadata_)); + } + } + + void pack_rr_edges() { + pack_tiles_edges(); + pack_chans_edges(); + pack_pips(); + device_ctx_.rr_graph_builder.mark_edges_as_rr_switch_ids(); + device_ctx_.rr_graph_builder.partition_edges(); + } + + /** @brief Fills the device RR graph. + */ + void pack_to_rr_graph() { + pack_rr_nodes(); + pack_rr_edges(); + } + + /** @brief Finilizes the RR graph generation. + * + * - Builds the node lookup + * - Inits fan_ins + * - Loads indexed data + */ + void finish_rr_generation() { + for (t_rr_type rr_type : RR_TYPES) + if (rr_type == CHANX) + device_ctx_.rr_graph_builder.node_lookup().resize_nodes(grid_.height(), grid_.width(), rr_type, NUM_SIDES); + else + device_ctx_.rr_graph_builder.node_lookup().resize_nodes(grid_.width(), grid_.height(), rr_type, NUM_SIDES); + + for (size_t node = 0; node < rr_nodes_.size(); node++) { + auto rr_node = rr_nodes_[node]; + device_ctx_.rr_graph_builder.add_node_to_all_locs(rr_node.id()); + } + + device_ctx_.rr_graph_builder.init_fan_in(); + + // Creates a temporary copy to convert from vtr::vector to sitd::vector. + // + // This is required because the ``alloc_and_load_rr_indexed_data()`` function supports only std::vector data + // type for ``rr_segments`` + // Note that this is a dirty fix (to avoid massive code changes) + // TODO: The ``alloc_and_load_rr_indexed_data()`` function should embrace ``vtr::vector`` for ``rr_segments`` + std::vector temp_rr_segs; + temp_rr_segs.reserve(segment_inf_.size()); + for (auto& rr_seg : segment_inf_) { + temp_rr_segs.push_back(rr_seg); + } + + alloc_and_load_rr_indexed_data( + temp_rr_segs, + 0, //we connect ipins to tracks with shorts + base_cost_type_); + + VTR_ASSERT(device_ctx_.rr_indexed_data.size() == seg_index_.size()); + + for (size_t i = 0; i < seg_index_.size(); ++i) + device_ctx_.rr_indexed_data[RRIndexedDataId(i)].seg_index = seg_index_[RRIndexedDataId(i)]; + } +}; + +void build_rr_graph_fpga_interchange(const t_graph_type graph_type, + const DeviceGrid& grid, + const std::vector& segment_inf, + const enum e_base_cost_type base_cost_type, + int* wire_to_rr_ipin_switch, + std::string& read_rr_graph_filename, + bool do_check_rr_graph) { + vtr::ScopedStartFinishTimer timer("Building RR Graph from device database"); + + auto& device_ctx = g_vpr_ctx.mutable_device(); + size_t num_segments = segment_inf.size(); + + auto& rr_segments = device_ctx.rr_graph_builder.rr_segments(); + rr_segments.reserve(num_segments); + for (long unsigned int iseg = 0; iseg < num_segments; ++iseg) { + rr_segments.push_back(segment_inf[(iseg)]); + } + + // Decompress GZipped capnproto device file + gzFile file = gzopen(get_arch_file_name(), "r"); + VTR_ASSERT(file != Z_NULL); + + std::vector output_data; + output_data.resize(4096); + std::stringstream sstream(std::ios_base::in | std::ios_base::out | std::ios_base::binary); + while (true) { + int ret = gzread(file, output_data.data(), output_data.size()); + VTR_ASSERT(ret >= 0); + if (ret > 0) { + sstream.write((const char*)output_data.data(), ret); + VTR_ASSERT(sstream); + } else { + VTR_ASSERT(ret == 0); + int error; + gzerror(file, &error); + VTR_ASSERT(error == Z_OK); + break; + } + } + + VTR_ASSERT(gzclose(file) == Z_OK); + + sstream.seekg(0); + kj::std::StdInputStream istream(sstream); + + // Reader options + capnp::ReaderOptions reader_options; + reader_options.nestingLimit = std::numeric_limits::max(); + reader_options.traversalLimitInWords = std::numeric_limits::max(); + + capnp::InputStreamMessageReader message_reader(istream, reader_options); + + auto device_reader = message_reader.getRoot(); + + InterchangeRRGraphBuilder builder( + device_reader, + grid, + device_ctx, + device_ctx.rr_graph_builder.rr_nodes(), + device_ctx.rr_graph.rr_segments(), + device_ctx.physical_tile_types, + base_cost_type); + builder.build_rr_graph(); + *wire_to_rr_ipin_switch = 0; + device_ctx.read_rr_graph_filename.assign(get_arch_file_name()); + read_rr_graph_filename.assign(get_arch_file_name()); + + if (do_check_rr_graph) { + check_rr_graph(graph_type, grid, device_ctx.physical_tile_types); + } +} diff --git a/vpr/src/route/rr_graph_fpga_interchange.h b/vpr/src/route/rr_graph_fpga_interchange.h new file mode 100644 index 00000000000..e24127bc300 --- /dev/null +++ b/vpr/src/route/rr_graph_fpga_interchange.h @@ -0,0 +1,82 @@ +/* Defines the function used to load an rr graph written in xml format into vpr*/ + +#ifndef RR_GRAPH_FPGA_INTERCHANGE_H +#define RR_GRAPH_FPGA_INTERCHANGE_H + +#include "rr_graph.h" +#include "device_grid.h" +#include "DeviceResources.capnp.h" +#include "LogicalNetlist.capnp.h" +#include "capnp/serialize.h" +#include "capnp/serialize-packed.h" +#include + +void build_rr_graph_fpga_interchange(const t_graph_type graph_type, + const DeviceGrid& grid, + const std::vector& segment_inf, + const enum e_base_cost_type base_cost_type, + int* wire_to_rr_ipin_switch, + std::string& read_rr_graph_filename, + bool do_check_rr_graph); + +namespace hash_tuple { +namespace { + +// https://nullprogram.com/blog/2018/07/31/ +inline size_t splittable64(size_t x) { + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9U; + x ^= x >> 27; + x *= 0x94d049bb133111ebU; + x ^= x >> 31; + return x; +} + +template +struct hash { + size_t operator()(TT const& tt) const { + return splittable64(std::hash()(tt)); + } +}; + +template +struct hash> { + size_t operator()(const std::pair& p) const noexcept(true) { + size_t lhs, rhs; + lhs = splittable64(std::hash()(p.first)); + rhs = splittable64(std::hash()(p.second)); + return (lhs * lhs + 3UL * lhs + 2UL * lhs * rhs + rhs + rhs * rhs) / 2UL; + } +}; +template +inline void hash_combine(std::size_t& seed, T const& v) { + seed ^= hash_tuple::hash()(v) + 0x9e3779b97f4a7c15UL + (seed << 12) + (seed >> 4); +} +// Recursive template code derived from Matthieu M. +template::value - 1> +struct HashValueImpl { + static void apply(size_t& seed, Tuple const& tuple) { + HashValueImpl::apply(seed, tuple); + hash_combine(seed, std::get(tuple)); + } +}; + +template +struct HashValueImpl { + static void apply(size_t& seed, Tuple const& tuple) { + hash_combine(seed, std::get<0>(tuple)); + } +}; +} // namespace + +template +struct hash> { + size_t operator()(std::tuple const& tt) const { + size_t seed = 0; + HashValueImpl>::apply(seed, tt); + return seed; + } +}; +} // namespace hash_tuple + +#endif /* RR_GRAPH_FPGA_INTERCHANGE_H */