From 38800e181741cb64ef916e269d8e476c3a4095a0 Mon Sep 17 00:00:00 2001 From: Richard Ren Date: Tue, 7 May 2019 15:58:13 -0400 Subject: [PATCH 01/15] Added Cinternal instances The following are all the sqashed commits messages: added instances of Cinternal to mux, tri, buffer added the Cinternal to struct definitions in physical_types.h Previous change with CINTERNAL_REQD was lost, reuploaded removed Cinternal element from buffer readjusted comment in physical_types.h Propogates c_internal from arch to rr_graph; reads c_internal; writes c_internal to output Signed-off-by: Alessandro Comodi --- libs/libarchfpga/src/physical_types.h | 4 ++++ libs/libarchfpga/src/read_xml_arch_file.cpp | 13 ++++++++----- vpr/src/route/rr_graph.cpp | 1 + vpr/src/route/rr_graph_reader.cpp | 2 ++ vpr/src/route/rr_graph_writer.cpp | 3 ++- 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index 36c1150cc2a..b48d6aaa1b1 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -1225,6 +1225,7 @@ enum class BufferSize { * R: Equivalent resistance of the buffer/switch. * * Cin: Input capacitance. * * Cout: Output capacitance. * + * Cinternal: Internal capacitance in a buffer with fanout. * * Tdel_map: A map where the key is the number of inputs and the entry * * is the corresponding delay. If there is only one entry at key * * UNDEFINED, then delay is a constant (doesn't vary with fan-in). * @@ -1242,6 +1243,7 @@ struct t_arch_switch_inf { float R = 0.; float Cin = 0.; float Cout = 0.; + float Cinternal = 0.; // defined the property Cinternal float mux_trans_size = 1.; BufferSize buf_size_type = BufferSize::AUTO; float buf_size = 0.; @@ -1293,6 +1295,7 @@ struct t_arch_switch_inf { * R: Equivalent resistance of the buffer/switch. * * Cin: Input capacitance. * * Cout: Output capacitance. * + * Cinternal: Internal capacitance in a buffer. * * Tdel: Intrinsic delay. The delay through an unloaded switch is * * Tdel + R * Cout. * * mux_trans_size: The area of each transistor in the segment's driving mux * @@ -1303,6 +1306,7 @@ struct t_rr_switch_inf { float R = 0.; float Cin = 0.; float Cout = 0.; + float Cinternal = 0.; //defined the property Cinternal float Tdel = 0.; float mux_trans_size = 0.; float buf_size = 0.; diff --git a/libs/libarchfpga/src/read_xml_arch_file.cpp b/libs/libarchfpga/src/read_xml_arch_file.cpp index 519b0287b1d..e2c374261c9 100644 --- a/libs/libarchfpga/src/read_xml_arch_file.cpp +++ b/libs/libarchfpga/src/read_xml_arch_file.cpp @@ -3074,23 +3074,23 @@ static void ProcessSwitches(pugi::xml_node Parent, SwitchType type = SwitchType::MUX; if (0 == strcmp(type_name, "mux")) { type = SwitchType::MUX; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size", "mux_trans_size"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size", "mux_trans_size"}, " with type '"s + type_name + "'"s, loc_data); // buffered switch should have a Cinternal element } else if (0 == strcmp(type_name, "tristate")) { type = SwitchType::TRISTATE; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); // buffered switch should have a Cinternal element } else if (0 == strcmp(type_name, "buffer")) { type = SwitchType::BUFFER; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); // buffer should not have a Cinternal element } else if (0 == strcmp(type_name, "pass_gate")) { type = SwitchType::PASS_GATE; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type '"s + type_name + "'"s, loc_data); // unbuffered switch does not have Cinternal element } else if (0 == strcmp(type_name, "short")) { type = SwitchType::SHORT; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type "s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type "s + type_name + "'"s, loc_data); // unbuffered switch does not have Cinternal element } else { archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), @@ -3102,6 +3102,8 @@ static void ProcessSwitches(pugi::xml_node Parent, ReqOpt COUT_REQD = TIMING_ENABLE_REQD; ReqOpt CIN_REQD = TIMING_ENABLE_REQD; + ReqOpt CINTERNAL_REQD = OPTIONAL; //defined the parameter + if (arch_switch.type() == SwitchType::SHORT) { //Cin/Cout are optional on shorts, since they really only have one capacitance CIN_REQD = OPTIONAL; @@ -3109,6 +3111,7 @@ static void ProcessSwitches(pugi::xml_node Parent, } arch_switch.Cin = get_attribute(Node, "Cin", loc_data, CIN_REQD).as_float(0); arch_switch.Cout = get_attribute(Node, "Cout", loc_data, COUT_REQD).as_float(0); + arch_switch.Cinternal = get_attribute(Node, "Cinternal", loc_data, CINTERNAL_REQD).as_float(0); // retrieve the optional parameter if (arch_switch.type() == SwitchType::MUX) { //Only muxes have mux transistors diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index d00df14ef95..2d644064ef7 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -893,6 +893,7 @@ void load_rr_switch_from_arch_switch(int arch_switch_idx, device_ctx.rr_switch_inf[rr_switch_idx].set_type(device_ctx.arch_switch_inf[arch_switch_idx].type()); device_ctx.rr_switch_inf[rr_switch_idx].R = device_ctx.arch_switch_inf[arch_switch_idx].R; device_ctx.rr_switch_inf[rr_switch_idx].Cin = device_ctx.arch_switch_inf[arch_switch_idx].Cin; + device_ctx.rr_switch_inf[rr_switch_idx].Cinternal = device_ctx.arch_switch_inf[arch_switch_idx].Cinternal; //now we can retrieve Cinternal from the arch and implement into the rr calculations. device_ctx.rr_switch_inf[rr_switch_idx].Cout = device_ctx.arch_switch_inf[arch_switch_idx].Cout; device_ctx.rr_switch_inf[rr_switch_idx].Tdel = rr_switch_Tdel; device_ctx.rr_switch_inf[rr_switch_idx].mux_trans_size = device_ctx.arch_switch_inf[arch_switch_idx].mux_trans_size; diff --git a/vpr/src/route/rr_graph_reader.cpp b/vpr/src/route/rr_graph_reader.cpp index 465ea6d92f9..6e8e481434b 100644 --- a/vpr/src/route/rr_graph_reader.cpp +++ b/vpr/src/route/rr_graph_reader.cpp @@ -234,11 +234,13 @@ void process_switches(pugi::xml_node parent, const pugiutil::loc_data& loc_data) rr_switch.R = get_attribute(SwitchSubnode, "R", loc_data).as_float(); rr_switch.Cin = get_attribute(SwitchSubnode, "Cin", loc_data).as_float(); rr_switch.Cout = get_attribute(SwitchSubnode, "Cout", loc_data).as_float(); + rr_switch.Cinternal = get_attribute(SwitchSubnode, "Cinternal", loc_data).as_float(); rr_switch.Tdel = get_attribute(SwitchSubnode, "Tdel", loc_data).as_float(); } else { rr_switch.R = 0; rr_switch.Cin = 0; rr_switch.Cout = 0; + rr_switch.Cinternal = 0; rr_switch.Tdel = 0; } SwitchSubnode = get_single_child(Switch, "sizing", loc_data); diff --git a/vpr/src/route/rr_graph_writer.cpp b/vpr/src/route/rr_graph_writer.cpp index fccc0341136..48012725a65 100644 --- a/vpr/src/route/rr_graph_writer.cpp +++ b/vpr/src/route/rr_graph_writer.cpp @@ -189,7 +189,8 @@ void write_rr_switches(fstream& fp) { } fp << ">" << endl; - fp << "\t\t\t" << endl; + fp << "\t\t\t" << endl; fp << "\t\t\t" << endl; fp << "\t\t" << endl; } From d61276d460ba298f5e7d7995d26e18372f332d48 Mon Sep 17 00:00:00 2001 From: Michael Gielda Date: Sun, 16 Jun 2019 15:33:38 +0200 Subject: [PATCH 02/15] Add VTR change issue template Signed-off-by: Alessandro Comodi --- .github/ISSUE_TEMPLATE/bug_report.md | 3 +++ .github/ISSUE_TEMPLATE/feature_request.md | 3 +++ .github/ISSUE_TEMPLATE/vtr-change.md | 25 +++++++++++++++++++++++ 3 files changed, 31 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/vtr-change.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index dc1b48f28cc..1e5584ab1b6 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,6 +1,9 @@ --- name: Bug report about: Create a report to help us improve +title: '' +labels: '' +assignees: '' --- diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 96a3a6f322e..7523b9efbf8 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,6 +1,9 @@ --- name: Feature request about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' --- diff --git a/.github/ISSUE_TEMPLATE/vtr-change.md b/.github/ISSUE_TEMPLATE/vtr-change.md new file mode 100644 index 00000000000..74d5ec9e8f6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/vtr-change.md @@ -0,0 +1,25 @@ +--- +name: VTR change +about: Describe purpose and lifecycle of a local change we made to VTR +title: '' +labels: '' +assignees: '' + +--- + +### Why did we need this? (what does this change enable us to do) + + +### What did it change? + + +### Should it be merged upstream - if not, when can we delete it? + +### What is needed to get this merged / deleted? + +* [ ] is the implementation work to make suitable for merging / deletion completed? +* [ ] Is there an associated test? +* [ ] is this currently part of the Conda package? +* [ ] is this properly cleaned up in our local repositories? + +### Tracker / branch / PR & other useful links From 6f3067882584ffc2ae6fc4929eefb6a6bd983544 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Mon, 8 Oct 2018 19:35:29 -0700 Subject: [PATCH 03/15] Avoid criticality issue. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> Signed-off-by: Alessandro Comodi --- vpr/src/timing/timing_util.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index e260b8f5cfa..8bc919dac3f 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -571,6 +571,10 @@ float calc_relaxed_criticality(const std::map& domains_max_re max_req += shift; } + if (!std::isfinite(slack)) { + continue; + } + float crit = std::numeric_limits::quiet_NaN(); if (max_req > 0.) { //Standard case From e684b0ee93299961cf3aec4c81a36cc9fd12aff0 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Mon, 10 Jun 2019 10:51:45 -0700 Subject: [PATCH 04/15] vpr: allow connection box map based algorithm during routing Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> --- vpr/src/base/echo_files.cpp | 2 + vpr/src/base/echo_files.h | 1 + vpr/src/base/read_options.cpp | 15 +- vpr/src/base/vpr_context.h | 3 + vpr/src/base/vpr_types.h | 5 +- vpr/src/place/timing_place_lookup.cpp | 11 +- vpr/src/route/connection_box.cpp | 127 +++++ vpr/src/route/connection_box.h | 76 +++ .../route/connection_box_lookahead_map.cpp | 460 ++++++++++++++++++ vpr/src/route/connection_box_lookahead_map.h | 14 + vpr/src/route/router_lookahead.cpp | 22 + vpr/src/route/router_lookahead.h | 5 + vpr/src/route/router_lookahead_map_utils.cpp | 192 ++++++++ vpr/src/route/router_lookahead_map_utils.h | 142 ++++++ vpr/src/route/rr_graph.cpp | 5 + vpr/src/route/rr_graph_reader.cpp | 54 ++ vpr/src/route/rr_node.h | 2 +- 17 files changed, 1127 insertions(+), 9 deletions(-) create mode 100644 vpr/src/route/connection_box.cpp create mode 100644 vpr/src/route/connection_box.h create mode 100644 vpr/src/route/connection_box_lookahead_map.cpp create mode 100644 vpr/src/route/connection_box_lookahead_map.h create mode 100644 vpr/src/route/router_lookahead_map_utils.cpp create mode 100644 vpr/src/route/router_lookahead_map_utils.h diff --git a/vpr/src/base/echo_files.cpp b/vpr/src/base/echo_files.cpp index d195c7d3871..e35b04c6da0 100644 --- a/vpr/src/base/echo_files.cpp +++ b/vpr/src/base/echo_files.cpp @@ -112,6 +112,8 @@ void alloc_and_load_echo_file_info() { setEchoFileName(E_ECHO_CHAN_DETAILS, "chan_details.txt"); setEchoFileName(E_ECHO_SBLOCK_PATTERN, "sblock_pattern.txt"); setEchoFileName(E_ECHO_ENDPOINT_TIMING, "endpoint_timing.echo.json"); + + setEchoFileName(E_ECHO_LOOKAHEAD_MAP, "lookahead_map.echo"); } void free_echo_file_info() { diff --git a/vpr/src/base/echo_files.h b/vpr/src/base/echo_files.h index 2aa9253617b..3a3507f60ca 100644 --- a/vpr/src/base/echo_files.h +++ b/vpr/src/base/echo_files.h @@ -43,6 +43,7 @@ enum e_echo_files { E_ECHO_CHAN_DETAILS, E_ECHO_SBLOCK_PATTERN, E_ECHO_ENDPOINT_TIMING, + E_ECHO_LOOKAHEAD_MAP, //Timing Graphs E_ECHO_PRE_PACKING_TIMING_GRAPH, diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 28470df9142..e58f220eab6 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -648,6 +648,8 @@ struct ParseRouterLookahead { conv_value.set_value(e_router_lookahead::CLASSIC); else if (str == "map") conv_value.set_value(e_router_lookahead::MAP); + else if (str == "connection_box_map") + conv_value.set_value(e_router_lookahead::CONNECTION_BOX_MAP); else { std::stringstream msg; msg << "Invalid conversion from '" @@ -661,17 +663,22 @@ struct ParseRouterLookahead { ConvertedValue to_str(e_router_lookahead val) { ConvertedValue conv_value; - if (val == e_router_lookahead::CLASSIC) + if (val == e_router_lookahead::CLASSIC) { conv_value.set_value("classic"); - else { - VTR_ASSERT(val == e_router_lookahead::MAP); + } else if (val == e_router_lookahead::MAP) { conv_value.set_value("map"); + } else if (val == e_router_lookahead::CONNECTION_BOX_MAP) { + conv_value.set_value("connection_box_map"); + } else { + std::stringstream msg; + msg << "Unrecognized e_router_lookahead"; + conv_value.set_error(msg.str()); } return conv_value; } std::vector default_choices() { - return {"classic", "map"}; + return {"classic", "map", "connection_box_map"}; } }; diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 7ddc42ff3be..0ef875206f7 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -20,6 +20,7 @@ #include "clock_connection_builders.h" #include "route_traceback.h" #include "place_macro.h" +#include "connection_box.h" //A Context is collection of state relating to a particular part of VPR // @@ -194,6 +195,8 @@ struct DeviceContext : public Context { * Clock Network ********************************************************************/ t_clock_arch* clock_arch; + + ConnectionBoxes connection_boxes; }; //State relating to power analysis diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index a90f3f9f3fd..b46f726c532 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -103,7 +103,10 @@ constexpr const char* EMPTY_BLOCK_NAME = "EMPTY"; enum class e_router_lookahead { CLASSIC, //VPR's classic lookahead (assumes uniform wire types) MAP, //Lookahead considering different wire types (see Oleg Petelin's MASc Thesis) - NO_OP //A no-operation lookahead which always returns zero + NO_OP, //A no-operation lookahead which always returns zero + CONNECTION_BOX_MAP, + // Lookahead considering different wire types and IPIN + // connection box. }; enum class e_route_bb_update { diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index e6e0e1dccda..60c13a079e1 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -264,9 +264,14 @@ static float route_connection_delay(int source_x, int source_y, int sink_x, int VTR_ASSERT(sink_rr_node != OPEN); - successfully_routed = calculate_delay(source_rr_node, sink_rr_node, - router_opts, - &net_delay_value); + { + vtr::ScopedStartFinishTimer timer(vtr::string_fmt( + "Routing Src: %d Sink: %d", source_rr_node, + sink_rr_node)); + successfully_routed = calculate_delay(source_rr_node, sink_rr_node, + router_opts, + &net_delay_value); + } if (successfully_routed) break; } diff --git a/vpr/src/route/connection_box.cpp b/vpr/src/route/connection_box.cpp new file mode 100644 index 00000000000..85d554b4307 --- /dev/null +++ b/vpr/src/route/connection_box.cpp @@ -0,0 +1,127 @@ +#include "connection_box.h" +#include "vtr_assert.h" +#include "globals.h" + +ConnectionBoxes::ConnectionBoxes() + : size_(std::make_pair(0, 0)) { +} + +size_t ConnectionBoxes::num_connection_box_types() const { + return boxes_.size(); +} + +std::pair ConnectionBoxes::connection_box_grid_size() const { + return size_; +} + +const ConnectionBox* ConnectionBoxes::get_connection_box(ConnectionBoxId box) const { + if (bool(box)) { + return nullptr; + } + + size_t index = size_t(box); + if (index >= boxes_.size()) { + return nullptr; + } + + return &boxes_.at(index); +} + +bool ConnectionBoxes::find_connection_box(int inode, + ConnectionBoxId* box_id, + std::pair* box_location) const { + VTR_ASSERT(box_id != nullptr); + VTR_ASSERT(box_location != nullptr); + + const auto& conn_box_loc = ipin_map_[inode]; + if (conn_box_loc.box_id == ConnectionBoxId::INVALID()) { + return false; + } + + *box_id = conn_box_loc.box_id; + *box_location = conn_box_loc.box_location; + return true; +} + +// Clear IPIN map and set connection box grid size and box ids. +void ConnectionBoxes::reset_boxes(std::pair size, + const std::vector boxes) { + clear(); + + size_ = size; + boxes_ = boxes; +} + +void ConnectionBoxes::resize_nodes(size_t rr_node_size) { + ipin_map_.resize(rr_node_size); + canonical_loc_map_.resize(rr_node_size, + std::make_pair(-1, -1)); +} + +void ConnectionBoxes::clear() { + ipin_map_.clear(); + size_ = std::make_pair(0, 0); + boxes_.clear(); + canonical_loc_map_.clear(); + sink_to_ipin_.clear(); +} + +void ConnectionBoxes::add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location) { + // Ensure that box location is in bounds + VTR_ASSERT(box_location.first < size_.first); + VTR_ASSERT(box_location.second < size_.second); + + // Bounds check box_id + VTR_ASSERT(bool(box_id)); + VTR_ASSERT(size_t(box_id) < boxes_.size()); + + // Make sure sink map will not be invalidated upon insertion. + VTR_ASSERT(sink_to_ipin_.size() == 0); + + ipin_map_[inode] = ConnBoxLoc(box_location, box_id); +} + +void ConnectionBoxes::add_canonical_loc(int inode, std::pair loc) { + VTR_ASSERT(loc.first < size_.first); + VTR_ASSERT(loc.second < size_.second); + canonical_loc_map_[inode] = loc; +} + +const std::pair* ConnectionBoxes::find_canonical_loc(int inode) const { + const auto& canon_loc = canonical_loc_map_[inode]; + if (canon_loc.first == size_t(-1)) { + return nullptr; + } + + return &canon_loc; +} + +void ConnectionBoxes::create_sink_back_ref() { + const auto& device_ctx = g_vpr_ctx.device(); + + sink_to_ipin_.resize(device_ctx.rr_nodes.size(), {{0, 0, 0, 0}, 0}); + + for (size_t i = 0; i < device_ctx.rr_nodes.size(); ++i) { + const auto& ipin_node = device_ctx.rr_nodes[i]; + if (ipin_node.type() != IPIN) { + continue; + } + + if (ipin_map_[i].box_id == ConnectionBoxId::INVALID()) { + continue; + } + + for (auto edge : ipin_node.edges()) { + int sink_inode = ipin_node.edge_sink_node(edge); + VTR_ASSERT(device_ctx.rr_nodes[sink_inode].type() == SINK); + VTR_ASSERT(sink_to_ipin_[sink_inode].ipin_count < 4); + auto& sink_to_ipin = sink_to_ipin_[sink_inode]; + sink_to_ipin.ipin_nodes[sink_to_ipin.ipin_count++] = i; + } + } +} + +const SinkToIpin& ConnectionBoxes::find_sink_connection_boxes( + int inode) const { + return sink_to_ipin_[inode]; +} diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h new file mode 100644 index 00000000000..06217ac2a41 --- /dev/null +++ b/vpr/src/route/connection_box.h @@ -0,0 +1,76 @@ +#ifndef CONNECTION_BOX_H +#define CONNECTION_BOX_H +// Some routing graphs have connectivity driven by types of connection boxes. +// This class relates IPIN rr nodes with connection box type and locations, used +// for connection box driven map lookahead. + +#include +#include "vtr_strong_id.h" +#include "vtr_flat_map.h" +#include "vtr_range.h" +#include + +struct connection_box_tag {}; +typedef vtr::StrongId ConnectionBoxId; + +struct ConnectionBox { + std::string name; +}; + +struct ConnBoxLoc { + ConnBoxLoc() + : box_location(std::make_pair(-1, -1)) {} + ConnBoxLoc( + const std::pair& a_box_location, + ConnectionBoxId a_box_id) + : box_location(a_box_location) + , box_id(a_box_id) {} + + std::pair box_location; + ConnectionBoxId box_id; +}; + +struct SinkToIpin { + int ipin_nodes[4]; + int ipin_count; +}; + +class ConnectionBoxes { + public: + ConnectionBoxes(); + + size_t num_connection_box_types() const; + std::pair connection_box_grid_size() const; + const ConnectionBox* get_connection_box(ConnectionBoxId box) const; + + bool find_connection_box(int inode, + ConnectionBoxId* box_id, + std::pair* box_location) const; + const std::pair* find_canonical_loc(int inode) const; + + // Clear IPIN map and set connection box grid size and box ids. + void clear(); + void reset_boxes(std::pair size, + const std::vector boxes); + void resize_nodes(size_t rr_node_size); + + void add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location); + void add_canonical_loc(int inode, std::pair loc); + + // Create map from SINK's back to IPIN's + // + // This must be called after all connection boxes have been added. + void create_sink_back_ref(); + const SinkToIpin& find_sink_connection_boxes( + int inode) const; + + private: + std::pair size_; + std::vector boxes_; + std::vector ipin_map_; + std::vector sink_to_ipin_; + std::vector> + canonical_loc_map_; +}; + +#endif diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp new file mode 100644 index 00000000000..fc806f67687 --- /dev/null +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -0,0 +1,460 @@ +#include "connection_box_lookahead_map.h" + +#include +#include + +#include "connection_box.h" +#include "rr_node.h" +#include "router_lookahead_map_utils.h" +#include "globals.h" +#include "vtr_math.h" +#include "vtr_time.h" +#include "echo_files.h" + +/* we're profiling routing cost over many tracks for each wire type, so we'll + * have many cost entries at each |dx|,|dy| offset. There are many ways to + * "boil down" the many costs at each offset to a single entry for a given + * (wire type, chan_type) combination we can take the smallest cost, the + * average, median, etc. This define selects the method we use. + * + * See e_representative_entry_method */ +#define REPRESENTATIVE_ENTRY_METHOD SMALLEST + +#define REF_X 25 +#define REF_Y 23 + +static int signum(int x) { + if (x > 0) return 1; + if (x < 0) + return -1; + else + return 0; +} + +typedef std::vector, Cost_Entry>> t_routing_cost_map; +static void run_dijkstra(int start_node_ind, + t_routing_cost_map* cost_map); + +class CostMap { + public: + void set_segment_count(size_t seg_count) { + cost_map_.clear(); + offset_.clear(); + cost_map_.resize(seg_count); + offset_.resize(seg_count); + + const auto& device_ctx = g_vpr_ctx.device(); + segment_map_.resize(device_ctx.rr_nodes.size()); + for (size_t i = 0; i < segment_map_.size(); ++i) { + auto& from_node = device_ctx.rr_nodes[i]; + + int from_cost_index = from_node.cost_index(); + int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; + + segment_map_[i] = from_seg_index; + } + } + + int node_to_segment(int from_node_ind) { + return segment_map_[from_node_ind]; + } + + Cost_Entry find_cost(int from_seg_index, int delta_x, int delta_y) const { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + int dx = delta_x - offset_[from_seg_index].first; + int dy = delta_y - offset_[from_seg_index].second; + const auto& cost_map = cost_map_[from_seg_index]; + + if (dx < 0) { + dx = 0; + } + if (dy < 0) { + dy = 0; + } + + if (dx >= (ssize_t)cost_map.dim_size(0)) { + dx = cost_map.dim_size(0) - 1; + } + if (dy >= (ssize_t)cost_map.dim_size(1)) { + dy = cost_map.dim_size(1) - 1; + } + + return cost_map_[from_seg_index][dx][dy]; + } + + void set_cost_map(int from_seg_index, + const t_routing_cost_map& cost_map, + e_representative_entry_method method) { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + + // Find coordinate offset for this segment. + int min_dx = 0; + int min_dy = 0; + int max_dx = 0; + int max_dy = 0; + for (const auto& entry : cost_map) { + min_dx = std::min(entry.first.first, min_dx); + min_dy = std::min(entry.first.second, min_dy); + + max_dx = std::max(entry.first.first, max_dx); + max_dy = std::max(entry.first.second, max_dy); + } + + offset_[from_seg_index].first = min_dx; + offset_[from_seg_index].second = min_dy; + size_t dim_x = max_dx - min_dx + 1; + size_t dim_y = max_dy - min_dy + 1; + + vtr::NdMatrix expansion_cost_map( + {dim_x, dim_y}); + + for (const auto& entry : cost_map) { + int x = entry.first.first - min_dx; + int y = entry.first.second - min_dy; + expansion_cost_map[x][y].add_cost_entry( + method, entry.second.delay, + entry.second.congestion); + } + + cost_map_[from_seg_index] = vtr::NdMatrix( + {dim_x, dim_y}); + + /* set the lookahead cost map entries with a representative cost + * entry from routing_cost_map */ + for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { + for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { + cost_map_[from_seg_index][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method); + } + } + + /* find missing cost entries and fill them in by copying a nearby cost entry */ + for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { + for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { + Cost_Entry cost_entry = cost_map_[from_seg_index][ix][iy]; + + if (!cost_entry.valid()) { + Cost_Entry copied_entry = get_nearby_cost_entry( + from_seg_index, + offset_[from_seg_index].first + ix, + offset_[from_seg_index].second + iy); + cost_map_[from_seg_index][ix][iy] = copied_entry; + } + } + } + } + + Cost_Entry get_nearby_cost_entry(int segment_index, int x, int y) { + /* compute the slope from x,y to 0,0 and then move towards 0,0 by one + * unit to get the coordinates of the cost entry to be copied */ + + float slope; + int copy_x, copy_y; + if (x == 0 || y == 0) { + slope = std::numeric_limits::infinity(); + copy_x = x - signum(x); + copy_y = y - signum(y); + } else { + slope = (float)y / (float)x; + if (slope >= 1.0) { + copy_y = y - signum(y); + copy_x = vtr::nint((float)y / slope); + } else { + copy_x = x - signum(x); + copy_y = vtr::nint((float)x * slope); + } + } + + Cost_Entry copy_entry = find_cost(segment_index, copy_x, copy_y); + + /* if the entry to be copied is also empty, recurse */ + if (copy_entry.valid()) { + return copy_entry; + } else if (copy_x == 0 && copy_y == 0) { + return Cost_Entry(); + } + + return get_nearby_cost_entry(segment_index, copy_x, copy_y); + } + + void print_cost_map(const std::vector& segment_inf, + const char* fname) { + FILE* fp = vtr::fopen(fname, "w"); + for (size_t iseg = 0; iseg < cost_map_.size(); iseg++) { + fprintf(fp, "Seg %s(%zu) (%d, %d)\n", segment_inf.at(iseg).name.c_str(), + iseg, + offset_[iseg].first, + offset_[iseg].second); + for (size_t iy = 0; iy < cost_map_[iseg].dim_size(1); iy++) { + for (size_t ix = 0; ix < cost_map_[iseg].dim_size(0); ix++) { + fprintf(fp, "%.4g,\t", + cost_map_[iseg][ix][iy].delay); + } + fprintf(fp, "\n"); + } + fprintf(fp, "\n\n"); + } + + fclose(fp); + } + + private: + std::vector> cost_map_; + std::vector> offset_; + std::vector segment_map_; +}; + +static CostMap g_cost_map; + +class StartNode { + public: + StartNode(int start_x, int start_y, t_rr_type rr_type, int seg_index) + : start_x_(start_x) + , start_y_(start_y) + , rr_type_(rr_type) + , seg_index_(seg_index) + , index_(0) {} + int get_next_node() { + const auto& device_ctx = g_vpr_ctx.device(); + const std::vector& channel_node_list = device_ctx.rr_node_indices[rr_type_][start_x_][start_y_][0]; + + for (; index_ < channel_node_list.size(); index_++) { + int node_ind = channel_node_list[index_]; + + if (node_ind == OPEN || device_ctx.rr_nodes[node_ind].capacity() == 0) { + continue; + } + + const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(node_ind); + if (loc == nullptr) { + continue; + } + + int node_cost_ind = device_ctx.rr_nodes[node_ind].cost_index(); + int node_seg_ind = device_ctx.rr_indexed_data[node_cost_ind].seg_index; + if (node_seg_ind == seg_index_) { + index_ += 1; + return node_ind; + } + } + + return UNDEFINED; + } + + private: + int start_x_; + int start_y_; + t_rr_type rr_type_; + int seg_index_; + size_t index_; +}; + +// Minimum size of search for channels to profile. kMinProfile results +// in searching x = [0, kMinProfile], and y = [0, kMinProfile[. +// +// Making this value larger will increase the sample size, but also the runtime +// to produce the lookahead. +static constexpr int kMinProfile = 1; + +// Maximum size of search for channels to profile. Once search is outside of +// kMinProfile distance, lookahead will stop searching once: +// - At least one channel has been profiled +// - kMaxProfile is exceeded. +static constexpr int kMaxProfile = 7; + +void compute_connection_box_lookahead( + const std::vector& segment_inf) { + size_t num_segments = segment_inf.size(); + vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); + + /* free previous delay map and allocate new one */ + g_cost_map.set_segment_count(segment_inf.size()); + + /* run Dijkstra's algorithm for each segment type & channel type combination */ + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + VTR_LOG("Creating cost map for %s(%d)\n", + segment_inf[iseg].name.c_str(), iseg); + /* allocate the cost map for this iseg/chan_type */ + t_routing_cost_map cost_map; + + int count = 0; + + int dx = 0; + int dy = 0; + //int start_x = vtr::nint(device_ctx.grid.width()/2); + //int start_y = vtr::nint(device_ctx.grid.height()/2); + int start_x = REF_X; + int start_y = REF_Y; + while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { + for (e_rr_type chan_type : {CHANX, CHANY}) { + StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); + + for (int start_node_ind = start_node.get_next_node(); + start_node_ind != UNDEFINED; + start_node_ind = start_node.get_next_node()) { + count += 1; + + /* run Dijkstra's algorithm */ + run_dijkstra(start_node_ind, &cost_map); + } + } + + if (dy < dx) { + dy += 1; + } else { + dx += 1; + } + } + + if (count == 0) { + VTR_LOG_WARN("Segment %s(%d) found no start_node_ind\n", + segment_inf[iseg].name.c_str(), iseg); + } + + /* boil down the cost list in routing_cost_map at each coordinate to a + * representative cost entry and store it in the lookahead cost map */ + g_cost_map.set_cost_map(iseg, cost_map, + REPRESENTATIVE_ENTRY_METHOD); + } + + if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_LOOKAHEAD_MAP)) { + g_cost_map.print_cost_map(segment_inf, getEchoFileName(E_ECHO_LOOKAHEAD_MAP)); + } +} + +float get_connection_box_lookahead_map_cost(int from_node_ind, + int to_node_ind, + float criticality_fac) { + if (from_node_ind == to_node_ind) { + return 0.f; + } + + auto& device_ctx = g_vpr_ctx.device(); + + std::pair from_location; + std::pair to_location; + auto to_node_type = device_ctx.rr_nodes[to_node_ind].type(); + + if (to_node_type == SINK) { + const auto& sink_to_ipin = device_ctx.connection_boxes.find_sink_connection_boxes(to_node_ind); + if (sink_to_ipin.ipin_count > 1) { + float cost = std::numeric_limits::infinity(); + // Find cheapest cost from from_node_ind to IPINs for this SINK. + for (int i = 0; i < sink_to_ipin.ipin_count; ++i) { + cost = std::min(cost, + get_connection_box_lookahead_map_cost( + from_node_ind, + sink_to_ipin.ipin_nodes[i], criticality_fac)); + } + + return cost; + } else if (sink_to_ipin.ipin_count == 1) { + to_node_ind = sink_to_ipin.ipin_nodes[0]; + if (from_node_ind == to_node_ind) { + return 0.f; + } + } else { + return std::numeric_limits::infinity(); + } + } + + if (device_ctx.rr_nodes[to_node_ind].type() == IPIN) { + ConnectionBoxId box_id; + std::pair box_location; + bool found = device_ctx.connection_boxes.find_connection_box( + to_node_ind, &box_id, &box_location); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind); + } + + to_location = box_location; + } else { + const std::pair* to_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(to_node_ind); + if (!to_canonical_loc) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d", to_node_ind); + } + + to_location = *to_canonical_loc; + } + + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(from_node_ind); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d (to %d)", + from_node_ind, to_node_ind); + } + + ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(to_location.first); + ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(to_location.second); + + int from_seg_index = g_cost_map.node_to_segment(from_node_ind); + Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, dx, dy); + float expected_delay = cost_entry.delay; + float expected_congestion = cost_entry.congestion; + + float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + return expected_cost; +} + +/* runs Dijkstra's algorithm from specified node until all nodes have been + * visited. Each time a pin is visited, the delay/congestion information + * to that pin is stored to an entry in the routing_cost_map */ +static void run_dijkstra(int start_node_ind, + t_routing_cost_map* routing_cost_map) { + auto& device_ctx = g_vpr_ctx.device(); + + /* a list of boolean flags (one for each rr node) to figure out if a + * certain node has already been expanded */ + std::vector node_expanded(device_ctx.rr_nodes.size(), false); + /* for each node keep a list of the cost with which that node has been + * visited (used to determine whether to push a candidate node onto the + * expansion queue */ + std::vector node_visited_costs(device_ctx.rr_nodes.size(), -1.0); + /* a priority queue for expansion */ + std::priority_queue pq; + + /* first entry has no upstream delay or congestion */ + PQ_Entry first_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + + pq.push(first_entry); + + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", + start_node_ind); + } + + /* now do routing */ + while (!pq.empty()) { + PQ_Entry current = pq.top(); + pq.pop(); + + int node_ind = current.rr_node_ind; + + /* check that we haven't already expanded from this node */ + if (node_expanded[node_ind]) { + continue; + } + + /* if this node is an ipin record its congestion/delay in the routing_cost_map */ + if (device_ctx.rr_nodes[node_ind].type() == IPIN) { + ConnectionBoxId box_id; + std::pair box_location; + bool found = device_ctx.connection_boxes.find_connection_box( + node_ind, &box_id, &box_location); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); + } + + int delta_x = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); + int delta_y = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); + + routing_cost_map->push_back(std::make_pair( + std::make_pair(delta_x, delta_y), + Cost_Entry( + current.delay, + current.congestion_upstream))); + } + + expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq); + node_expanded[node_ind] = true; + } +} diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h new file mode 100644 index 00000000000..75771a1fd6b --- /dev/null +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -0,0 +1,14 @@ +#ifndef CONNECTION_BOX_LOOKAHEAD_H_ +#define CONNECTION_BOX_LOOKAHEAD_H_ + +#include +#include "physical_types.h" + +void compute_connection_box_lookahead( + const std::vector& segment_inf); + +float get_connection_box_lookahead_map_cost(int from_node_ind, + int to_node_ind, + float criticality_fac); + +#endif diff --git a/vpr/src/route/router_lookahead.cpp b/vpr/src/route/router_lookahead.cpp index 645aca4a7ee..96a824a0de2 100644 --- a/vpr/src/route/router_lookahead.cpp +++ b/vpr/src/route/router_lookahead.cpp @@ -1,6 +1,7 @@ #include "router_lookahead.h" #include "router_lookahead_map.h" +#include "connection_box_lookahead_map.h" #include "vpr_error.h" #include "globals.h" #include "route_timing.h" @@ -13,6 +14,8 @@ std::unique_ptr make_router_lookahead(e_router_lookahead router return std::make_unique(); } else if (router_lookahead_type == e_router_lookahead::MAP) { return std::make_unique(); + } else if (router_lookahead_type == e_router_lookahead::CONNECTION_BOX_MAP) { + return std::make_unique(); } else if (router_lookahead_type == e_router_lookahead::NO_OP) { return std::make_unique(); } @@ -81,6 +84,25 @@ float MapLookahead::get_expected_cost(int current_node, int target_node, const t } } +float ConnectionBoxMapLookahead::get_expected_cost( + int current_node, + int target_node, + const t_conn_cost_params& params, + float /*R_upstream*/) const { + auto& device_ctx = g_vpr_ctx.device(); + + t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); + + if (rr_type == CHANX || rr_type == CHANY) { + return get_connection_box_lookahead_map_cost( + current_node, target_node, params.criticality); + } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ + return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); + } else { /* Change this if you want to investigate route-throughs */ + return (0.); + } +} + float NoOpLookahead::get_expected_cost(int /*current_node*/, int /*target_node*/, const t_conn_cost_params& /*params*/, float /*R_upstream*/) const { return 0.; } diff --git a/vpr/src/route/router_lookahead.h b/vpr/src/route/router_lookahead.h index 6880651e887..07138ddb4d5 100644 --- a/vpr/src/route/router_lookahead.h +++ b/vpr/src/route/router_lookahead.h @@ -27,6 +27,11 @@ class MapLookahead : public RouterLookahead { float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; }; +class ConnectionBoxMapLookahead : public RouterLookahead { + protected: + float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; +}; + class NoOpLookahead : public RouterLookahead { protected: float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp new file mode 100644 index 00000000000..b76edcc7e70 --- /dev/null +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -0,0 +1,192 @@ +#include "router_lookahead_map_utils.h" + +#include "globals.h" +#include "vpr_context.h" +#include "vtr_math.h" + +/* Number of CLBs I think the average conn. goes. */ +static const int CLB_DIST = 3; + +PQ_Entry::PQ_Entry( + int set_rr_node_ind, + int switch_ind, + float parent_delay, + float parent_R_upstream, + float parent_congestion_upstream, + bool starting_node) { + this->rr_node_ind = set_rr_node_ind; + + auto& device_ctx = g_vpr_ctx.device(); + this->delay = parent_delay; + this->congestion_upstream = parent_congestion_upstream; + this->R_upstream = parent_R_upstream; + if (!starting_node) { + int cost_index = device_ctx.rr_nodes[set_rr_node_ind].cost_index(); + + float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel; + float Rsw = device_ctx.rr_switch_inf[switch_ind].R; + float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C(); + float Rnode = device_ctx.rr_nodes[set_rr_node_ind].R(); + + float T_linear = 0.f; + float T_quadratic = 0.f; + if (device_ctx.rr_switch_inf[switch_ind].buffered()) { + T_linear = Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode; + T_quadratic = 0.; + } else { /* Pass transistor */ + T_linear = Tsw + 0.5 * Rsw * Cnode; + T_quadratic = (Rsw + Rnode) * 0.5 * Cnode; + } + + float base_cost; + if (device_ctx.rr_indexed_data[cost_index].inv_length < 0) { + base_cost = device_ctx.rr_indexed_data[cost_index].base_cost; + } else { + float frac_num_seg = CLB_DIST * device_ctx.rr_indexed_data[cost_index].inv_length; + + base_cost = frac_num_seg * T_linear + + frac_num_seg * frac_num_seg * T_quadratic; + } + + VTR_ASSERT(T_linear >= 0.); + VTR_ASSERT(base_cost >= 0.); + this->delay += T_linear; + + this->congestion_upstream += base_cost; + } + + /* set the cost of this node */ + this->cost = this->delay; +} + +/* returns cost entry with the smallest delay */ +Cost_Entry Expansion_Cost_Entry::get_smallest_entry() const { + Cost_Entry smallest_entry; + + for (auto entry : this->cost_vector) { + if (!smallest_entry.valid() || entry.delay < smallest_entry.delay) { + smallest_entry = entry; + } + } + + return smallest_entry; +} + +/* returns a cost entry that represents the average of all the recorded entries */ +Cost_Entry Expansion_Cost_Entry::get_average_entry() const { + float avg_delay = 0; + float avg_congestion = 0; + + for (auto cost_entry : this->cost_vector) { + avg_delay += cost_entry.delay; + avg_congestion += cost_entry.congestion; + } + + avg_delay /= (float)this->cost_vector.size(); + avg_congestion /= (float)this->cost_vector.size(); + + return Cost_Entry(avg_delay, avg_congestion); +} + +/* returns a cost entry that represents the geomean of all the recorded entries */ +Cost_Entry Expansion_Cost_Entry::get_geomean_entry() const { + float geomean_delay = 0; + float geomean_cong = 0; + for (auto cost_entry : this->cost_vector) { + geomean_delay += log(cost_entry.delay); + geomean_cong += log(cost_entry.congestion); + } + + geomean_delay = exp(geomean_delay / (float)this->cost_vector.size()); + geomean_cong = exp(geomean_cong / (float)this->cost_vector.size()); + + return Cost_Entry(geomean_delay, geomean_cong); +} + +/* returns a cost entry that represents the medial of all recorded entries */ +Cost_Entry Expansion_Cost_Entry::get_median_entry() const { + /* find median by binning the delays of all entries and then chosing the bin + * with the largest number of entries */ + + int num_bins = 10; + + /* find entries with smallest and largest delays */ + Cost_Entry min_del_entry; + Cost_Entry max_del_entry; + for (auto entry : this->cost_vector) { + if (!min_del_entry.valid() || entry.delay < min_del_entry.delay) { + min_del_entry = entry; + } + if (!max_del_entry.valid() || entry.delay > max_del_entry.delay) { + max_del_entry = entry; + } + } + + /* get the bin size */ + float delay_diff = max_del_entry.delay - min_del_entry.delay; + float bin_size = delay_diff / (float)num_bins; + + /* sort the cost entries into bins */ + std::vector > entry_bins(num_bins, std::vector()); + for (auto entry : this->cost_vector) { + float bin_num = floor((entry.delay - min_del_entry.delay) / bin_size); + + VTR_ASSERT(vtr::nint(bin_num) >= 0 && vtr::nint(bin_num) <= num_bins); + if (vtr::nint(bin_num) == num_bins) { + /* largest entry will otherwise have an out-of-bounds bin number */ + bin_num -= 1; + } + entry_bins[vtr::nint(bin_num)].push_back(entry); + } + + /* find the bin with the largest number of elements */ + int largest_bin = 0; + int largest_size = 0; + for (int ibin = 0; ibin < num_bins; ibin++) { + if (entry_bins[ibin].size() > (unsigned)largest_size) { + largest_bin = ibin; + largest_size = (unsigned)entry_bins[ibin].size(); + } + } + + /* get the representative delay of the largest bin */ + Cost_Entry representative_entry = entry_bins[largest_bin][0]; + + return representative_entry; +} + +/* iterates over the children of the specified node and selectively pushes them onto the priority queue */ +void expand_dijkstra_neighbours(PQ_Entry parent_entry, + std::vector& node_visited_costs, + std::vector& node_expanded, + std::priority_queue& pq) { + auto& device_ctx = g_vpr_ctx.device(); + + int parent_ind = parent_entry.rr_node_ind; + + auto& parent_node = device_ctx.rr_nodes[parent_ind]; + + for (int iedge = 0; iedge < parent_node.num_edges(); iedge++) { + int child_node_ind = parent_node.edge_sink_node(iedge); + int switch_ind = parent_node.edge_switch(iedge); + + /* skip this child if it has already been expanded from */ + if (node_expanded[child_node_ind]) { + continue; + } + + PQ_Entry child_entry(child_node_ind, switch_ind, parent_entry.delay, + parent_entry.R_upstream, parent_entry.congestion_upstream, false); + + VTR_ASSERT(child_entry.cost >= 0); + + /* skip this child if it has been visited with smaller cost */ + if (node_visited_costs[child_node_ind] >= 0 && node_visited_costs[child_node_ind] < child_entry.cost) { + continue; + } + + /* finally, record the cost with which the child was visited and put the child entry on the queue */ + node_visited_costs[child_node_ind] = child_entry.cost; + pq.push(child_entry); + } +} diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h new file mode 100644 index 00000000000..d0077ccb9bc --- /dev/null +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -0,0 +1,142 @@ +#ifndef ROUTER_LOOKAHEAD_MAP_UTILS_H_ +#define ROUTER_LOOKAHEAD_MAP_UTILS_H_ +/* + * The router lookahead provides an estimate of the cost from an intermediate node to the target node + * during directed (A*-like) routing. + * + * The VPR 7.0 lookahead (route/route_timing.c ==> get_timing_driven_expected_cost) lower-bounds the remaining delay and + * congestion by assuming that a minimum number of wires, of the same type as the current node being expanded, can be used + * to complete the route. While this method is efficient, it can run into trouble with architectures that use + * multiple interconnected wire types. + * + * The lookahead in this file pre-computes delay/congestion costs up and to the right of a starting tile. This generates + * delay/congestion tables for {CHANX, CHANY} channel types, over all wire types defined in the architecture file. + * See Section 3.2.4 in Oleg Petelin's MASc thesis (2016) for more discussion. + * + */ + +#include +#include +#include +#include +#include "vpr_types.h" + +/* when a list of delay/congestion entries at a coordinate in Cost_Entry is boiled down to a single + * representative entry, this enum is passed-in to specify how that representative entry should be + * calculated */ +enum e_representative_entry_method { + FIRST = 0, //the first cost that was recorded + SMALLEST, //the smallest-delay cost recorded + AVERAGE, + GEOMEAN, + MEDIAN +}; + +/* f_cost_map is an array of these cost entries that specifies delay/congestion estimates + * to travel relative x/y distances */ +class Cost_Entry { + public: + float delay; + float congestion; + + Cost_Entry() { + delay = std::numeric_limits::infinity(); + congestion = std::numeric_limits::infinity(); + } + Cost_Entry(float set_delay, float set_congestion) { + delay = set_delay; + congestion = set_congestion; + } + + bool valid() const { + return std::isfinite(delay) && std::isfinite(congestion); + } +}; + +/* a class that stores delay/congestion information for a given relative coordinate during the Dijkstra expansion. + * since it stores multiple cost entries, it is later boiled down to a single representative cost entry to be stored + * in the final lookahead cost map */ +class Expansion_Cost_Entry { + private: + std::vector cost_vector; + + Cost_Entry get_smallest_entry() const; + Cost_Entry get_average_entry() const; + Cost_Entry get_geomean_entry() const; + Cost_Entry get_median_entry() const; + + public: + void add_cost_entry(e_representative_entry_method method, + float add_delay, + float add_congestion) { + Cost_Entry cost_entry(add_delay, add_congestion); + if (method == SMALLEST) { + /* taking the smallest-delay entry anyway, so no need to push back multple entries */ + if (this->cost_vector.empty()) { + this->cost_vector.push_back(cost_entry); + } else { + if (add_delay < this->cost_vector[0].delay) { + this->cost_vector[0] = cost_entry; + } + } + } else { + this->cost_vector.push_back(cost_entry); + } + } + void clear_cost_entries() { + this->cost_vector.clear(); + } + + Cost_Entry get_representative_cost_entry(e_representative_entry_method method) const { + Cost_Entry entry; + + if (!cost_vector.empty()) { + switch (method) { + case FIRST: + entry = cost_vector[0]; + break; + case SMALLEST: + entry = this->get_smallest_entry(); + break; + case AVERAGE: + entry = this->get_average_entry(); + break; + case GEOMEAN: + entry = this->get_geomean_entry(); + break; + case MEDIAN: + entry = this->get_median_entry(); + break; + default: + break; + } + } + return entry; + } +}; + +/* a class that represents an entry in the Dijkstra expansion priority queue */ +class PQ_Entry { + public: + int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents + float cost; //the cost of the path to get to this node + + /* store backward delay, R and congestion info */ + float delay; + float R_upstream; + float congestion_upstream; + + PQ_Entry(int set_rr_node_ind, int /*switch_ind*/, float parent_delay, float parent_R_upstream, float parent_congestion_upstream, bool starting_node); + + bool operator<(const PQ_Entry& obj) const { + /* inserted into max priority queue so want queue entries with a lower cost to be greater */ + return (this->cost > obj.cost); + } +}; + +void expand_dijkstra_neighbours(PQ_Entry parent_entry, + std::vector& node_visited_costs, + std::vector& node_expanded, + std::priority_queue& pq); + +#endif diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index d00df14ef95..c264e73e0db 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -34,6 +34,7 @@ using namespace std; #include "rr_graph_writer.h" #include "rr_graph_reader.h" #include "router_lookahead_map.h" +#include "connection_box_lookahead_map.h" #include "rr_graph_clock.h" #include "rr_types.h" @@ -384,6 +385,10 @@ void create_rr_graph(const t_graph_type graph_type, compute_router_lookahead(segment_inf.size()); } + if (router_lookahead_type == e_router_lookahead::CONNECTION_BOX_MAP) { + compute_connection_box_lookahead(segment_inf); + } + //Write out rr graph file if needed if (!det_routing_arch->write_rr_graph_filename.empty()) { write_rr_graph(det_routing_arch->write_rr_graph_filename.c_str(), segment_inf); diff --git a/vpr/src/route/rr_graph_reader.cpp b/vpr/src/route/rr_graph_reader.cpp index 465ea6d92f9..7924b176b70 100644 --- a/vpr/src/route/rr_graph_reader.cpp +++ b/vpr/src/route/rr_graph_reader.cpp @@ -56,6 +56,7 @@ void verify_blocks(pugi::xml_node parent, const pugiutil::loc_data& loc_data); void process_blocks(pugi::xml_node parent, const pugiutil::loc_data& loc_data); void verify_grid(pugi::xml_node parent, const pugiutil::loc_data& loc_data, const DeviceGrid& grid); void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data); +void process_connection_boxes(pugi::xml_node parent, const pugiutil::loc_data& loc_data); void process_edges(pugi::xml_node parent, const pugiutil::loc_data& loc_data, int* wire_to_rr_ipin_switch, const int num_rr_switches); void process_channels(t_chan_width& chan_width, pugi::xml_node parent, const pugiutil::loc_data& loc_data); void process_rr_node_indices(const DeviceGrid& grid); @@ -133,6 +134,13 @@ void load_rr_file(const t_graph_type graph_type, next_component = get_first_child(rr_graph, "channels", loc_data); process_channels(nodes_per_chan, next_component, loc_data); + next_component = get_first_child(rr_graph, "connection_boxes", loc_data, OPTIONAL); + if (next_component != nullptr) { + process_connection_boxes(next_component, loc_data); + } else { + device_ctx.connection_boxes.clear(); + } + /* Decode the graph_type */ bool is_global_graph = (GRAPH_GLOBAL == graph_type ? true : false); @@ -146,6 +154,7 @@ void load_rr_file(const t_graph_type graph_type, int num_rr_nodes = count_children(next_component, "node", loc_data); device_ctx.rr_nodes.resize(num_rr_nodes); + device_ctx.connection_boxes.resize_nodes(num_rr_nodes); process_nodes(next_component, loc_data); /* Loads edges, switches, and node look up tables*/ @@ -179,6 +188,7 @@ void load_rr_file(const t_graph_type graph_type, device_ctx.chan_width = nodes_per_chan; check_rr_graph(graph_type, grid, device_ctx.block_types); + device_ctx.connection_boxes.create_sink_back_ref(); } catch (XmlError& e) { vpr_throw(VPR_ERROR_ROUTE, read_rr_graph_name, e.line(), "%s", e.what()); @@ -304,6 +314,18 @@ void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) { node.set_type(OPIN); } else if (strcmp(node_type, "IPIN") == 0) { node.set_type(IPIN); + + pugi::xml_node connection_boxSubnode = get_single_child(rr_node, "connection_box", loc_data, OPTIONAL); + if (connection_boxSubnode) { + int x = get_attribute(connection_boxSubnode, "x", loc_data).as_int(); + int y = get_attribute(connection_boxSubnode, "y", loc_data).as_int(); + int id = get_attribute(connection_boxSubnode, "id", loc_data).as_int(); + + device_ctx.connection_boxes.add_connection_box(inode, + ConnectionBoxId(id), + std::make_pair(x, y)); + } + } else { vpr_throw(VPR_ERROR_OTHER, __FILE__, __LINE__, "Valid inputs for class types are \"CHANX\", \"CHANY\",\"SOURCE\", \"SINK\",\"OPIN\", and \"IPIN\"."); @@ -323,6 +345,15 @@ void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) { } } + pugi::xml_node connection_boxSubnode = get_single_child(rr_node, "canonical_loc", loc_data, OPTIONAL); + if (connection_boxSubnode) { + int x = get_attribute(connection_boxSubnode, "x", loc_data).as_int(); + int y = get_attribute(connection_boxSubnode, "y", loc_data).as_int(); + + device_ctx.connection_boxes.add_canonical_loc(inode, + std::make_pair(x, y)); + } + node.set_capacity(get_attribute(rr_node, "capacity", loc_data).as_float()); //-------------- @@ -876,3 +907,26 @@ void set_cost_indices(pugi::xml_node parent, const pugiutil::loc_data& loc_data, rr_node = rr_node.next_sibling(rr_node.name()); } } + +void process_connection_boxes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) { + auto& device_ctx = g_vpr_ctx.mutable_device(); + + int x_dim = get_attribute(parent, "x_dim", loc_data).as_int(0); + int y_dim = get_attribute(parent, "y_dim", loc_data).as_int(0); + int num_boxes = get_attribute(parent, "num_boxes", loc_data).as_int(0); + VTR_ASSERT(num_boxes >= 0); + + pugi::xml_node connection_box = get_first_child(parent, "connection_box", loc_data); + std::vector boxes(num_boxes); + while (connection_box) { + int id = get_attribute(connection_box, "id", loc_data).as_int(-1); + const char* name = get_attribute(connection_box, "name", loc_data).as_string(nullptr); + VTR_ASSERT(id >= 0 && id < num_boxes); + VTR_ASSERT(boxes.at(id).name == ""); + boxes.at(id).name = std::string(name); + + connection_box = connection_box.next_sibling(connection_box.name()); + } + + device_ctx.connection_boxes.reset_boxes(std::make_pair(x_dim, y_dim), boxes); +} diff --git a/vpr/src/route/rr_node.h b/vpr/src/route/rr_node.h index 16075b8b176..5d610824684 100644 --- a/vpr/src/route/rr_node.h +++ b/vpr/src/route/rr_node.h @@ -173,7 +173,7 @@ class t_rr_node { uint16_t edges_capacity_ = 0; uint8_t num_non_configurable_edges_ = 0; - int8_t cost_index_ = -1; + uint16_t cost_index_ = -1; int16_t rc_index_ = -1; int16_t xlow_ = -1; From 6f18b6d0dd52ac450fc627afe2486ebab2587cc7 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Tue, 7 May 2019 17:29:52 +0200 Subject: [PATCH 05/15] vpr options: added option to disable check_route Signed-off-by: Alessandro Comodi --- vpr/src/base/SetupVPR.cpp | 2 +- vpr/src/base/read_options.cpp | 5 +++++ vpr/src/base/read_options.h | 1 + vpr/src/base/vpr_api.cpp | 4 +++- vpr/src/base/vpr_types.h | 1 + 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 06ac0c2c237..879561b4af6 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -347,8 +347,8 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts) RouterOpts->max_convergence_count = Options.router_max_convergence_count; RouterOpts->reconvergence_cpd_threshold = Options.router_reconvergence_cpd_threshold; RouterOpts->first_iteration_timing_report_file = Options.router_first_iteration_timing_report_file; - RouterOpts->strict_checks = Options.strict_checks; + RouterOpts->disable_check_route = Options.disable_check_route; } static void SetupAnnealSched(const t_options& Options, diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 28470df9142..28768919808 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -1533,6 +1533,11 @@ static argparse::ArgumentParser create_arg_parser(std::string prog_name, t_optio .default_value("") .show_in(argparse::ShowIn::HELP_ONLY); + route_timing_grp.add_argument(args.disable_check_route, "--disable_check_route") + .help("Disables check_route once routing step has finished or when routing file is loaded") + .default_value("off") + .show_in(argparse::ShowIn::HELP_ONLY); + route_timing_grp.add_argument(args.router_debug_net, "--router_debug_net") .help( "Controls when router debugging is enabled.\n" diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 2227656c1af..886262ffd60 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -118,6 +118,7 @@ struct t_options { argparse::ArgValue verify_binary_search; argparse::ArgValue RouterAlgorithm; argparse::ArgValue min_incremental_reroute_fanout; + argparse::ArgValue disable_check_route; /* Timing-driven router options only */ argparse::ArgValue astar_fac; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index d08a5764405..4167c804637 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -637,7 +637,9 @@ RouteStatus vpr_route_flow(t_vpr_setup& vpr_setup, const t_arch& arch) { std::string graphics_msg; if (route_status.success()) { //Sanity check the routing - check_route(router_opts.route_type); + if (!router_opts.disable_check_route) { + check_route(router_opts.route_type); + } get_serial_num(); //Update status diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index a90f3f9f3fd..ffff9aed047 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -947,6 +947,7 @@ struct t_router_opts { float reconvergence_cpd_threshold; std::string first_iteration_timing_report_file; bool strict_checks; + bool disable_check_route; }; struct t_analysis_opts { From 990d96a6b6757dc907ee4e7210307fe0eb4a417e Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Fri, 31 May 2019 17:17:01 +0200 Subject: [PATCH 06/15] vpr: added optional disable errors and suppress warnings The errors can be disabled for entire functions. From the command line option, the developers can select which functions should not treat errors as warnings. The noisy warnings can be suppressed and redirected to a custom file. This can help to have a clearer output. Signed-off-by: Alessandro Comodi --- libs/libvtrutil/src/vtr_log.cpp | 36 +++++++++++++++++++++++++++- libs/libvtrutil/src/vtr_log.h | 27 ++++++++++++++++++--- vpr/src/base/read_options.cpp | 16 +++++++++++++ vpr/src/base/read_options.h | 2 ++ vpr/src/base/vpr_api.cpp | 23 ++++++++++++++++++ vpr/src/place/place.cpp | 12 ++-------- vpr/src/route/check_route.cpp | 2 +- vpr/src/route/check_rr_graph.cpp | 2 +- vpr/src/util/vpr_error.cpp | 41 ++++++++++++++++++++++++++++++++ vpr/src/util/vpr_error.h | 23 ++++++++++++++---- vpr/src/util/vpr_utils.cpp | 3 ++- vpr/src/util/vpr_utils.h | 2 ++ 12 files changed, 168 insertions(+), 21 deletions(-) diff --git a/libs/libvtrutil/src/vtr_log.cpp b/libs/libvtrutil/src/vtr_log.cpp index c0ae90759de..55d850c514a 100644 --- a/libs/libvtrutil/src/vtr_log.cpp +++ b/libs/libvtrutil/src/vtr_log.cpp @@ -1,5 +1,9 @@ -#include "vtr_log.h" +#include +#include +#include +#include "vtr_util.h" +#include "vtr_log.h" #include "log.h" namespace vtr { @@ -14,3 +18,33 @@ void set_log_file(const char* filename) { } } // namespace vtr + +void add_warnings_to_suppress(std::string function_name) { + warnings_to_suppress.insert(function_name); +} + +void set_noisy_warn_log_file(const char* log_file_name) { + std::ofstream log; + log.open(log_file_name, std::ifstream::out | std::ifstream::trunc); + log.close(); + noisy_warn_log_file = std::string(log_file_name); +} + +void suppress_warning(const char* pszFileName, unsigned int lineNum, const char* pszFuncName, const char* pszMessage, ...) { + std::string function_name(pszFuncName); + + va_list va_args; + va_start(va_args, pszMessage); + std::string msg = vtr::vstring_fmt(pszMessage, va_args); + va_end(va_args); + + auto result = warnings_to_suppress.find(function_name); + if (result == warnings_to_suppress.end()) { + vtr::printf_warning(pszFileName, lineNum, msg.data()); + } else { + std::ofstream log; + log.open(noisy_warn_log_file.data(), std::ios_base::app); + log << "Warning:\n\tfile: " << pszFileName << "\n\tline: " << lineNum << "\n\tmessage: " << msg << std::endl; + log.close(); + } +} diff --git a/libs/libvtrutil/src/vtr_log.h b/libs/libvtrutil/src/vtr_log.h index 878653ba84d..b7bc2dceadc 100644 --- a/libs/libvtrutil/src/vtr_log.h +++ b/libs/libvtrutil/src/vtr_log.h @@ -1,6 +1,8 @@ #ifndef VTR_LOG_H #define VTR_LOG_H #include +#include +#include /* * This header defines useful logging macros for VTR projects. @@ -71,15 +73,18 @@ #define VTR_LOGF_ERROR(file, line, ...) VTR_LOGVF_ERROR(true, file, line, __VA_ARGS__) #define VTR_LOGF_NOP(file, line, ...) VTR_LOGVF_NOP(true, file, line, __VA_ARGS__) +//Custom file-line-func location logging macros +#define VTR_LOGFF_WARN(file, line, func, ...) VTR_LOGVFF_WARN(true, file, line, func, __VA_ARGS__) + //Conditional logging and custom file-line location macros #define VTR_LOGVF(expr, file, line, ...) \ do { \ if (expr) vtr::printf(__VA_ARGS__); \ } while (false) -#define VTR_LOGVF_WARN(expr, file, line, ...) \ - do { \ - if (expr) vtr::printf_warning(file, line, __VA_ARGS__); \ +#define VTR_LOGVF_WARN(expr, file, line, ...) \ + do { \ + if (expr) suppress_warning(file, line, __func__, __VA_ARGS__); \ } while (false) #define VTR_LOGVF_ERROR(expr, file, line, ...) \ @@ -87,6 +92,12 @@ if (expr) vtr::printf_error(file, line, __VA_ARGS__); \ } while (false) +// Conditional logging and custom file-line-func location macros +#define VTR_LOGVFF_WARN(expr, file, line, func, ...) \ + do { \ + if (expr) suppress_warning(file, line, func, __VA_ARGS__); \ + } while (false) + //No-op version of logging macro which avoids unused parameter warnings. // //Note that to avoid unused parameter warnings we call sizeof() and cast @@ -129,4 +140,14 @@ void set_log_file(const char* filename); } // namespace vtr +// The following data structure and functions allow to suppress noisy warnings +// and direct them into an external file. +static std::unordered_set warnings_to_suppress; +static std::string noisy_warn_log_file; + +void add_warnings_to_suppress(std::string function_name); +void set_noisy_warn_log_file(const char* log_file_name); + +void suppress_warning(const char* pszFileName, unsigned int lineNum, const char* pszFuncName, const char* pszMessage, ...); + #endif diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 28470df9142..4f5123c208e 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -931,6 +931,22 @@ static argparse::ArgumentParser create_arg_parser(std::string prog_name, t_optio .default_value("on") .show_in(argparse::ShowIn::HELP_ONLY); + gen_grp.add_argument(args.disable_errors, "--disable_errors") + .help( + "Parses a list of functions for which the errors are going to be treated as warnings.\n" + "Each function in the list is delimited by `:`\n" + "This option should be only used for development purposes.") + .default_value(""); + + gen_grp.add_argument(args.suppress_warnings, "--suppress_warnings") + .help( + "Parses a list of functions for which the warnings will be suppressed on stdout.\n" + "The first element of the list is the name of the output log file with the suppressed warnings.\n" + "The file name and the list of functions is separated by `,`\n" + "Each function in the list is delimited by `:`\n" + "This option should be only used for development purposes.") + .default_value(""); + auto& file_grp = parser.add_argument_group("file options"); file_grp.add_argument(args.BlifFile, "--circuit_file") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 2227656c1af..b586733b48b 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -50,6 +50,8 @@ struct t_options { argparse::ArgValue clock_modeling; argparse::ArgValue exit_before_pack; argparse::ArgValue strict_checks; + argparse::ArgValue disable_errors; + argparse::ArgValue suppress_warnings; /* Atom netlist options */ argparse::ArgValue absorb_buffer_luts; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index d08a5764405..e2b7143f2d1 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -214,6 +214,29 @@ void vpr_init(const int argc, const char** argv, t_options* options, t_vpr_setup /* Determine whether echo is on or off */ setEchoEnabled(options->CreateEchoFile); + /* + * Initialize the functions names for which VPR_THROWs + * are demoted to VTR_LOG_WARNs + */ + for (std::string func_name : vtr::split(options->disable_errors, std::string(":"))) { + map_error_activation_status(func_name); + } + + /* + * Initialize the functions names for which + * warnings are being suppressed + */ + std::vector split_warning_option = vtr::split(options->suppress_warnings, std::string(",")); + + // If the file or the list of functions is not provided + // no warning is suppressed + if (split_warning_option.size() == 2) { + set_noisy_warn_log_file(split_warning_option[0].data()); + for (std::string func_name : vtr::split(split_warning_option[1], std::string(":"))) { + add_warnings_to_suppress(func_name); + } + } + /* Read in arch and circuit */ SetupVPR(options, vpr_setup->TimingEnabled, diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 94ccc1a4788..d769a295809 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -963,11 +963,7 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, const if (fabs(new_bb_cost - costs->bb_cost) > costs->bb_cost * ERROR_TOL) { std::string msg = vtr::string_fmt("in recompute_costs_from_scratch: new_bb_cost = %g, old bb_cost = %g\n", new_bb_cost, costs->bb_cost); - if (placer_opts.strict_checks) { - vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, msg.c_str()); - } else { - VTR_LOG_WARN(msg.c_str()); - } + VPR_THROW(VPR_ERROR_PLACE, msg.c_str()); } costs->bb_cost = new_bb_cost; @@ -977,11 +973,7 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, const if (fabs(new_timing_cost - costs->timing_cost) > costs->timing_cost * ERROR_TOL) { std::string msg = vtr::string_fmt("in recompute_costs_from_scratch: new_timing_cost = %g, old timing_cost = %g, ERROR_TOL = %g\n", new_timing_cost, costs->timing_cost, ERROR_TOL); - if (placer_opts.strict_checks) { - vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, msg.c_str()); - } else { - VTR_LOG_WARN(msg.c_str()); - } + VPR_THROW(VPR_ERROR_PLACE, msg.c_str()); } costs->timing_cost = new_timing_cost; } else { diff --git a/vpr/src/route/check_route.cpp b/vpr/src/route/check_route.cpp index 84b6290a144..dbef691c861 100644 --- a/vpr/src/route/check_route.cpp +++ b/vpr/src/route/check_route.cpp @@ -118,7 +118,7 @@ void check_route(enum e_route_type route_type) { } else { //Continuing along existing branch connects = check_adjacent(prev_node, inode); if (!connects) { - vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, + VPR_THROW(VPR_ERROR_ROUTE, "in check_route: found non-adjacent segments in traceback while checking net %d:\n" " %s\n" " %s\n", diff --git a/vpr/src/route/check_rr_graph.cpp b/vpr/src/route/check_rr_graph.cpp index 3699746fda5..846680ab69e 100644 --- a/vpr/src/route/check_rr_graph.cpp +++ b/vpr/src/route/check_rr_graph.cpp @@ -502,7 +502,7 @@ static void check_unbuffered_edges(int from_node) { } if (trans_matched == false) { - vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, + VPR_THROW(VPR_ERROR_ROUTE, "in check_unbuffered_edges:\n" "connection from node %d to node %d uses an unbuffered switch (switch type %d '%s')\n" "but there is no corresponding unbuffered switch edge in the other direction.\n", diff --git a/vpr/src/util/vpr_error.cpp b/vpr/src/util/vpr_error.cpp index 96e0f4bba3f..2bad3b6f919 100644 --- a/vpr/src/util/vpr_error.cpp +++ b/vpr/src/util/vpr_error.cpp @@ -1,6 +1,8 @@ #include +#include #include "vtr_util.h" +#include "vtr_log.h" #include "vpr_error.h" /* Date:June 15th, 2013 @@ -11,6 +13,10 @@ * anything but throw an exception which will be caught * main.c. */ +void map_error_activation_status(std::string function_name) { + functions_to_demote.insert(function_name); +} + void vpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, @@ -41,3 +47,38 @@ void vvpr_throw(enum e_vpr_error type, throw VprError(type, msg, psz_file_name, line_num); } + +void vpr_throw_msg(enum e_vpr_error type, + const char* psz_file_name, + unsigned int line_num, + std::string msg) { + throw VprError(type, msg, psz_file_name, line_num); +} + +void vpr_throw_opt(enum e_vpr_error type, + const char* psz_func_name, + const char* psz_file_name, + unsigned int line_num, + const char* psz_message, + ...) { + std::string func_name(psz_func_name); + + // Make a variable argument list + va_list va_args; + + // Initialize variable argument list + va_start(va_args, psz_message); + + //Format the message + std::string msg = vtr::vstring_fmt(psz_message, va_args); + + auto result = functions_to_demote.find(func_name); + if (result != functions_to_demote.end()) { + VTR_LOGFF_WARN(psz_file_name, line_num, psz_func_name, msg.data()); + } else { + vpr_throw_msg(type, psz_file_name, line_num, msg); + } + + // Reset variable argument list + va_end(va_args); +} diff --git a/vpr/src/util/vpr_error.h b/vpr/src/util/vpr_error.h index f999889359b..32619251c49 100644 --- a/vpr/src/util/vpr_error.h +++ b/vpr/src/util/vpr_error.h @@ -1,8 +1,11 @@ #ifndef VPR_ERROR_H #define VPR_ERROR_H -#include "vtr_error.h" #include +#include +#include + +#include "vtr_error.h" enum e_vpr_error { VPR_ERROR_UNKNOWN = 0, @@ -45,6 +48,15 @@ class VprError : public vtr::VtrError { t_vpr_error_type type_; }; +// Set of function names for which the VPR_THROW errors are treated +// as VTR_LOG_WARN +static std::unordered_set functions_to_demote; + +// This function is used to save into the functions_to_demote set +// all the function names which contain VPR_THROW errors that are +// going to be demoted to be VTR_LOG_WARN +void map_error_activation_status(std::string function_name); + //VPR error reporting routines // //Note that we mark these functions with the C++11 attribute 'noreturn' @@ -52,14 +64,17 @@ class VprError : public vtr::VtrError { //reduce false-positive compiler warnings [[noreturn]] void vpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, const char* psz_message, ...); [[noreturn]] void vvpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, const char* psz_message, va_list args); +[[noreturn]] void vpr_throw_msg(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, std::string msg); + +void vpr_throw_opt(enum e_vpr_error type, const char* psz_func_name, const char* psz_file_name, unsigned int line_num, const char* psz_message, ...); /* * Macro wrapper around vpr_throw() which automatically * specifies file and line number of call site. */ -#define VPR_THROW(type, ...) \ - do { \ - vpr_throw(type, __FILE__, __LINE__, __VA_ARGS__); \ +#define VPR_THROW(type, ...) \ + do { \ + vpr_throw_opt(type, __func__, __FILE__, __LINE__, __VA_ARGS__); \ } while (false) #endif diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index 34b15f205b4..3df718b6099 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -1,6 +1,8 @@ #include #include #include +#include + using namespace std; #include "vtr_assert.h" @@ -18,7 +20,6 @@ using namespace std; #include "string.h" #include "pack_types.h" #include "device_grid.h" -#include /* This module contains subroutines that are used in several unrelated parts * * of VPR. They are VPR-specific utility routines. */ diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 4b6cd5ff09e..bc53e60950e 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -2,7 +2,9 @@ #define VPR_UTILS_H #include +#include #include + #include "vpr_types.h" #include "atom_netlist.h" #include "clustered_netlist.h" From caf3e416669c8273d0dcf737622da544e33e1a38 Mon Sep 17 00:00:00 2001 From: Keith Rothman <537074+litghost@users.noreply.github.com> Date: Tue, 5 Mar 2019 16:17:00 -0800 Subject: [PATCH 07/15] Revert badge to the SymbiFlow Travis-CI. Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com> Updated README.md Signed-off-by: Alessandro Comodi --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d57d1a35bc0..4a41e9f4868 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,14 @@ + +SymbiFlow WIP changes for Verilog to Routing (VTR) +================================================== + +This branch contains work in progress changes for using Verilog to Routing +(VTR) as part of SymbiFlow. + +--- + # Verilog to Routing (VTR) -[![Build Status](https://travis-ci.org/verilog-to-routing/vtr-verilog-to-routing.svg?branch=master)](https://travis-ci.org/verilog-to-routing/vtr-verilog-to-routing) [![Documentation Status](https://readthedocs.org/projects/vtr/badge/?version=latest)](http://docs.verilogtorouting.org/en/latest/?badge=latest) +[![Build Status](https://travis-ci.com/SymbiFlow/vtr-verilog-to-routing.svg?branch=master)](https://travis-ci.com/SymbiFlow/vtr-verilog-to-routing) [![Documentation Status](https://readthedocs.org/projects/vtr/badge/?version=latest)](http://docs.verilogtorouting.org/en/latest/?badge=latest) ## Introduction The Verilog-to-Routing (VTR) project is a world-wide collaborative effort to provide a open-source framework for conducting FPGA architecture and CAD research and development. From d48ea3004501fd03fb8574a422e20a10a01609ce Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Thu, 6 Jun 2019 22:58:22 +0200 Subject: [PATCH 08/15] vpr: added option for dangling comb nodes Signed-off-by: Alessandro Comodi --- vpr/src/base/read_options.cpp | 10 ++++++++++ vpr/src/base/read_options.h | 1 + vpr/src/base/vpr_api.cpp | 2 +- vpr/src/timing/timing_graph_builder.cpp | 10 +++++++--- vpr/src/timing/timing_graph_builder.h | 4 ++-- 5 files changed, 21 insertions(+), 6 deletions(-) diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 28470df9142..e28f6ed6473 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -931,6 +931,16 @@ static argparse::ArgumentParser create_arg_parser(std::string prog_name, t_optio .default_value("on") .show_in(argparse::ShowIn::HELP_ONLY); + gen_grp.add_argument(args.allow_dangling_combinational_nodes, "--allow_dangling_combinational_nodes") + .help( + "Option to allow dangling combinational nodes in the timing graph.\n" + "This option should normally be off, as dangling combinational nodes are unusual\n" + "in the timing graph and may indicate a problem in the circuit or architecture.\n" + "Unless you understand why your architecture/circuit can have valid dangling combinational nodes, this option should be off.\n" + "In general this is a dev-only option and should not be turned on by the end-user.") + .default_value("off") + .show_in(argparse::ShowIn::HELP_ONLY); + auto& file_grp = parser.add_argument_group("file options"); file_grp.add_argument(args.BlifFile, "--circuit_file") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 2227656c1af..fd3e97cf99e 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -50,6 +50,7 @@ struct t_options { argparse::ArgValue clock_modeling; argparse::ArgValue exit_before_pack; argparse::ArgValue strict_checks; + argparse::ArgValue allow_dangling_combinational_nodes; /* Atom netlist options */ argparse::ArgValue absorb_buffer_luts; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index d08a5764405..43374bde441 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -275,7 +275,7 @@ void vpr_init(const int argc, const char** argv, t_options* options, t_vpr_setup auto& timing_ctx = g_vpr_ctx.mutable_timing(); { vtr::ScopedStartFinishTimer t("Build Timing Graph"); - timing_ctx.graph = TimingGraphBuilder(atom_ctx.nlist, atom_ctx.lookup).timing_graph(); + timing_ctx.graph = TimingGraphBuilder(atom_ctx.nlist, atom_ctx.lookup).timing_graph(options->allow_dangling_combinational_nodes); VTR_LOG(" Timing Graph Nodes: %zu\n", timing_ctx.graph->nodes().size()); VTR_LOG(" Timing Graph Edges: %zu\n", timing_ctx.graph->edges().size()); VTR_LOG(" Timing Graph Levels: %zu\n", timing_ctx.graph->levels().size()); diff --git a/vpr/src/timing/timing_graph_builder.cpp b/vpr/src/timing/timing_graph_builder.cpp index 2ae02a020db..192b9ed8aaa 100644 --- a/vpr/src/timing/timing_graph_builder.cpp +++ b/vpr/src/timing/timing_graph_builder.cpp @@ -40,8 +40,8 @@ TimingGraphBuilder::TimingGraphBuilder(const AtomNetlist& netlist, //pass } -std::unique_ptr TimingGraphBuilder::timing_graph() { - build(); +std::unique_ptr TimingGraphBuilder::timing_graph(bool allow_dangling_combinational_nodes) { + build(allow_dangling_combinational_nodes); opt_memory_layout(); VTR_ASSERT(tg_); @@ -50,9 +50,13 @@ std::unique_ptr TimingGraphBuilder::timing_graph() { return std::move(tg_); } -void TimingGraphBuilder::build() { +void TimingGraphBuilder::build(bool allow_dangling_combinational_nodes) { tg_ = std::make_unique(); + // Optionally allow dangling combinational nodes. + // Set by `--allow_dangling_combinational_nodes on`. Default value is false + tg_->set_allow_dangling_combinational_nodes(allow_dangling_combinational_nodes); + for (AtomBlockId blk : netlist_.blocks()) { AtomBlockType blk_type = netlist_.block_type(blk); diff --git a/vpr/src/timing/timing_graph_builder.h b/vpr/src/timing/timing_graph_builder.h index 0ca93d1e19f..8e6745b7cb1 100644 --- a/vpr/src/timing/timing_graph_builder.h +++ b/vpr/src/timing/timing_graph_builder.h @@ -10,10 +10,10 @@ class TimingGraphBuilder { TimingGraphBuilder(const AtomNetlist& netlist, AtomLookup& netlist_lookup); - std::unique_ptr timing_graph(); + std::unique_ptr timing_graph(bool allow_dangling_combinational_nodes); private: - void build(); + void build(bool allow_dangling_combinational_nodes); void opt_memory_layout(); void add_io_to_timing_graph(const AtomBlockId blk); From 98142910f58eafc245351e8cdec271cb572a3db7 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Wed, 3 Apr 2019 16:47:15 +0200 Subject: [PATCH 09/15] equivalent tiles: added equivalent tile placement capability This commit introduces two major features: - introduce the tile concept in the architecture XML. Top level pb_type is discarded and all the top level pb_type information are moved into the tile tags. This will cause CI build to fail as all the architectures do not include the tiles tag right now. - introduce the possibility to place blocks in equivalent tiles (SLICEL blocks into SLICEM ones). According to the XML architecture description there could be tiles equivalent to others that can be used during the placement step (this can bring to better placement solutions) Signed-off-by: Alessandro Comodi --- libs/libarchfpga/src/physical_types.cpp | 9 + libs/libarchfpga/src/physical_types.h | 31 +- libs/libarchfpga/src/read_xml_arch_file.cpp | 382 +++- utils/fasm/src/fasm.cpp | 6 +- vpr/src/base/clustered_netlist.cpp | 49 +- vpr/src/base/clustered_netlist.h | 34 +- vpr/src/pack/pack.cpp | 8 +- vpr/src/place/place.cpp | 288 ++- vpr/src/timing/clb_delay_calc.inl | 5 +- vpr/src/util/vpr_utils.cpp | 110 +- vtr_flow/arch/equivalent_tiles/slice.xml | 1625 +++++++++++++++++ .../strong_equivalent_tiles/config/config.txt | 31 + 12 files changed, 2386 insertions(+), 192 deletions(-) create mode 100644 vtr_flow/arch/equivalent_tiles/slice.xml create mode 100644 vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp index 0e684e37b29..a345c9dd3ae 100644 --- a/libs/libarchfpga/src/physical_types.cpp +++ b/libs/libarchfpga/src/physical_types.cpp @@ -140,6 +140,15 @@ std::vector t_type_descriptor::get_clock_pins_indices() const { return indices; } +bool t_type_descriptor::is_available_tile_index(int index_to_check) const { + auto search = this->available_tiles_indices.find(index_to_check); + if (search != available_tiles_indices.end()) { + return true; + } + + return false; +} + /** * t_pb_graph_node */ diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index b48d6aaa1b1..0213261fecf 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -31,9 +31,9 @@ #include #include #include -#include #include #include +#include #include "vtr_ndmatrix.h" #include "vtr_hash.h" @@ -556,6 +556,20 @@ constexpr int DEFAULT_SWITCH = -2; * pb_type: Internal subblocks and routing information for this physical block * pb_graph_head: Head of DAG of pb_types_nodes and their edges * + * + * num_equivalent_tiles: Specifies the number of equivalent physical types that can be used during placement. + * If the value is `0` all the data structures relative to the equivalent tiles will be empty. + * equivalent_tiles: Array containing pointers to the equivalent tiles. The number of elements contained is specified + * by num_equivalent_tiles. + * equivalent_tile_pin_mapping: Multi-dimensional array that, for each different equivalent tile contains a mapping between + * the pins of the two tiles. + * Example: equivalent_tile_pin_mapping[eq_tile_index][pin_index] = equivalent_pin_index + * This is necessary to maintain consistency between two equivalent tiles that have the same pins + * defined with different indeces. + * equivalent_tile_inverse_pin_mapping: Multi-dimensional array that works as the previous one, but the mapping is inverse in this case. + * Example: equivalent_tile_pin_mapping[eq_tile_index][equivalent_pin_index] = pin_index + * available_tiles_indices: unordered map used to have a fast lookup on the available tiles. + * * area: Describes how much area this logic block takes, if undefined, use default * type_timing_inf: timing information unique to this type * num_drivers: Total number of output drivers supplied @@ -595,6 +609,13 @@ struct t_type_descriptor /* TODO rename this. maybe physical type descriptor or t_pb_type* pb_type = nullptr; t_pb_graph_node* pb_graph_head = nullptr; + /* Equivalent tiles information */ + int num_equivalent_tiles = 0; + std::unordered_map equivalent_tiles; /* [0..num_equivalent_tiles-1] */ + std::unordered_map> equivalent_tile_pin_mapping; /* [0..num_equivalent_tiles-1][0..num_pins-1] */ + std::unordered_map> equivalent_tile_inverse_pin_mapping; /* [0..num_equivalent_tiles-1][0..num_pins-1] */ + std::unordered_set available_tiles_indices; + float area = 0; /* This info can be determined from class_inf and pin_class but stored for faster access */ @@ -603,8 +624,15 @@ struct t_type_descriptor /* TODO rename this. maybe physical type descriptor or int index = -1; /* index of type descriptor in array (allows for index referencing) */ + /*********** + * Methods * + ***********/ + /* Returns the indices of pins that contain a clock for this physical logic block */ std::vector get_clock_pins_indices() const; + + /* Returns a boolean set to True if the input index belongs to an available tile, False otherwise */ + bool is_available_tile_index(int index_to_check) const; }; typedef const t_type_descriptor* t_type_ptr; @@ -1200,6 +1228,7 @@ struct t_segment_inf { std::vector cb; std::vector sb; //float Cmetal_per_m; /* Wire capacitance (per meter) */ + t_metadata_dict* meta = nullptr; }; enum class SwitchType { diff --git a/libs/libarchfpga/src/read_xml_arch_file.cpp b/libs/libarchfpga/src/read_xml_arch_file.cpp index e2c374261c9..1a62ad7dcdd 100644 --- a/libs/libarchfpga/src/read_xml_arch_file.cpp +++ b/libs/libarchfpga/src/read_xml_arch_file.cpp @@ -101,7 +101,7 @@ static void Process_Fc(pugi::xml_node Node, t_type_descriptor* Type, std::vector static t_fc_override Process_Fc_override(pugi::xml_node node, const pugiutil::loc_data& loc_data); static void ProcessSwitchblockLocations(pugi::xml_node swtichblock_locations, t_type_descriptor* type, const t_arch& arch, const pugiutil::loc_data& loc_data); static e_fc_value_type string_to_fc_value_type(const std::string& str, pugi::xml_node node, const pugiutil::loc_data& loc_data); -static void ProcessComplexBlockProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data); +static void ProcessTileProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data); static void ProcessChanWidthDistr(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data); @@ -111,12 +111,29 @@ static void ProcessModelPorts(pugi::xml_node port_group, t_model* model, std::se static void ProcessLayout(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data); static t_grid_def ProcessGridLayout(pugi::xml_node layout_type_tag, const pugiutil::loc_data& loc_data); static void ProcessDevice(pugi::xml_node Node, t_arch* arch, t_default_fc_spec& arch_def_fc, const pugiutil::loc_data& loc_data); +static void ProcessTiles(pugi::xml_node Node, + t_type_descriptor** Types, + int* NumTypes, + std::unordered_map* TypeMap, + const pugiutil::loc_data& loc_data); +static void ProcessTilesTags(pugi::xml_node Node, + std::unordered_map TypeMap, + t_arch& arch, + const t_default_fc_spec& arch_def_fc, + const pugiutil::loc_data& loc_data); +static void ProcessTileExtraModes(pugi::xml_node Node, + t_type_descriptor* Type, + std::unordered_map TypeMap, + const pugiutil::loc_data& loc_data); +static void ProcessTileExtraModePinMapping(pugi::xml_node Node, + t_type_descriptor* Type, + t_type_descriptor* EquivalentType, + int imode, + const pugiutil::loc_data& loc_data); static void ProcessComplexBlocks(pugi::xml_node Node, - t_type_descriptor** Types, - int* NumTypes, + std::unordered_map TypeMap, t_arch& arch, const bool timing_enabled, - const t_default_fc_spec& arch_def_fc, const pugiutil::loc_data& loc_data); static void ProcessSwitches(pugi::xml_node Node, t_arch_switch_inf** Switches, @@ -176,6 +193,8 @@ int find_switch_by_name(const t_arch& arch, std::string switch_name); e_side string_to_side(std::string side_str); +static t_type_descriptor* get_corresponding_tile(std::unordered_map TypeMap, const char* type_name); +static int get_pin_index_by_name(t_type_descriptor* Type, const char* port_name, int offset); /* * * @@ -254,9 +273,18 @@ void XmlReadArch(const char* ArchFile, const bool timing_enabled, t_arch* arch, ProcessSwitchblocks(Next, arch, loc_data); } - /* Process types */ + /* Process tiles */ + std::unordered_map TypeMap; + Next = get_single_child(architecture, "tiles", loc_data); + ProcessTiles(Next, Types, NumTypes, &TypeMap, loc_data); + + /* Process pb_types */ Next = get_single_child(architecture, "complexblocklist", loc_data); - ProcessComplexBlocks(Next, Types, NumTypes, *arch, timing_enabled, arch_def_fc, loc_data); + ProcessComplexBlocks(Next, TypeMap, *arch, timing_enabled, loc_data); + + /* Process tile tags that after pb_type have been parsed */ + Next = get_single_child(architecture, "tiles", loc_data); + ProcessTilesTags(Next, TypeMap, *arch, arch_def_fc, loc_data); /* Process directs */ Next = get_single_child(architecture, "directlist", loc_data, OPTIONAL); @@ -969,14 +997,6 @@ static void ProcessPb_Type(pugi::xml_node Parent, t_pb_type* pb_type, t_mode* mo children_to_expect.push_back("model"); children_to_expect.push_back("pb_type"); children_to_expect.push_back("interconnect"); - - if (is_root_pb_type) { - VTR_ASSERT(!is_leaf_pb_type); - //Top level pb_type's may also have the following tag types - children_to_expect.push_back("fc"); - children_to_expect.push_back("pinlocations"); - children_to_expect.push_back("switchblock_locations"); - } } else { VTR_ASSERT(is_leaf_pb_type); VTR_ASSERT(!is_root_pb_type); @@ -1698,7 +1718,7 @@ static void Process_Fc(pugi::xml_node Node, t_type_descriptor* Type, std::vector /* Use the default value, if available */ if (!arch_def_fc.specified) { archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), - " is missing child , and no specified in architecture\n"); + " is missing child , and no specified in architecture\n"); } def_fc_spec = arch_def_fc; } @@ -2017,28 +2037,6 @@ static void ProcessSwitchblockLocations(pugi::xml_node switchblock_locations, t_ } } -/* Thie processes attributes of the 'type' tag */ -static void ProcessComplexBlockProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data) { - const char* Prop; - - expect_only_attributes(Node, {"name", "capacity", "width", "height", "area"}, loc_data); - - /* Load type name */ - Prop = get_attribute(Node, "name", loc_data).value(); - Type->name = vtr::strdup(Prop); - - /* Load properties */ - Type->capacity = get_attribute(Node, "capacity", loc_data, OPTIONAL).as_uint(1); /* TODO: Any block with capacity > 1 that is not I/O has not been tested, must test */ - Type->width = get_attribute(Node, "width", loc_data, OPTIONAL).as_uint(1); - Type->height = get_attribute(Node, "height", loc_data, OPTIONAL).as_uint(1); - Type->area = get_attribute(Node, "area", loc_data, OPTIONAL).as_float(UNDEFINED); - - if (atof(Prop) < 0) { - archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), - "Area for type %s must be non-negative\n", Type->name); - } -} - /* Takes in node pointing to and loads all the * child type objects. */ static void ProcessModels(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data) { @@ -2620,16 +2618,36 @@ static void ProcessChanWidthDistrDir(pugi::xml_node Node, t_chan* chan, const pu chan->dc = get_attribute(Node, "dc", loc_data, hasDc).as_float(0); } -/* Takes in node pointing to and loads all the - * child type objects. */ -static void ProcessComplexBlocks(pugi::xml_node Node, - t_type_descriptor** Types, - int* NumTypes, - t_arch& arch, - const bool timing_enabled, - const t_default_fc_spec& arch_def_fc, - const pugiutil::loc_data& loc_data) { - pugi::xml_node CurType, Prev; +/* Thie processes attributes of the 'type' tag */ +static void ProcessTileProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data) { + const char* Prop; + + expect_only_attributes(Node, {"name", "capacity", "width", "height", "area"}, loc_data); + + /* Load type name */ + Prop = get_attribute(Node, "name", loc_data).value(); + Type->name = vtr::strdup(Prop); + + /* Load properties */ + Type->capacity = get_attribute(Node, "capacity", loc_data, OPTIONAL).as_uint(1); /* TODO: Any block with capacity > 1 that is not I/O has not been tested, must test */ + Type->width = get_attribute(Node, "width", loc_data, OPTIONAL).as_uint(1); + Type->height = get_attribute(Node, "height", loc_data, OPTIONAL).as_uint(1); + Type->area = get_attribute(Node, "area", loc_data, OPTIONAL).as_float(UNDEFINED); + + if (atof(Prop) < 0) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), + "Area for type %s must be non-negative\n", Type->name); + } +} + +/* Takes in node pointing to and loads all the * + * child type objects. */ +static void ProcessTiles(pugi::xml_node Node, + t_type_descriptor** Types, + int* NumTypes, + std::unordered_map* TypeMap, + const pugiutil::loc_data& loc_data) { + pugi::xml_node CurType; pugi::xml_node Cur; t_type_descriptor* Type; int i; @@ -2638,7 +2656,7 @@ static void ProcessComplexBlocks(pugi::xml_node Node, /* Alloc the type list. Need one additional t_type_desctiptors: * 1: empty psuedo-type */ - *NumTypes = count_children(Node, "pb_type", loc_data) + 1; + *NumTypes = count_children(Node, "tile", loc_data) + 1; *Types = new t_type_descriptor[*NumTypes]; cb_type_descriptors = *Types; @@ -2654,30 +2672,63 @@ static void ProcessComplexBlocks(pugi::xml_node Node, CurType = Node.first_child(); while (CurType) { - check_node(CurType, "pb_type", loc_data); + check_node(CurType, "tile", loc_data); /* Alias to current type */ Type = &(*Types)[i]; /* Parses the properties fields of the type */ - ProcessComplexBlockProps(CurType, Type, loc_data); + ProcessTileProps(CurType, Type, loc_data); ret_pb_type_descriptors = pb_type_descriptors.insert(pair(Type->name, 0)); if (!ret_pb_type_descriptors.second) { archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), - "Duplicate pb_type descriptor name: '%s'.\n", Type->name); + "Duplicate tile descriptor name: '%s'.\n", Type->name); } - /* Load pb_type info */ - Type->pb_type = new t_pb_type; - Type->pb_type->name = vtr::strdup(Type->name); - ProcessPb_Type(CurType, Type->pb_type, nullptr, timing_enabled, arch, loc_data); - Type->num_pins = Type->capacity - * (Type->pb_type->num_input_pins - + Type->pb_type->num_output_pins - + Type->pb_type->num_clock_pins); - Type->num_receivers = Type->capacity * Type->pb_type->num_input_pins; - Type->num_drivers = Type->capacity * Type->pb_type->num_output_pins; + Type->index = i; + Type->available_tiles_indices.insert(i); + + auto result = TypeMap->insert(std::make_pair(Type->name, Type)); + if (!result.second) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "Duplicate tile found: '%s'.\n", Type->name); + } + + /* Type fully read */ + ++i; + + /* Free this node and get its next sibling node */ + CurType = CurType.next_sibling(CurType.name()); + } + pb_type_descriptors.clear(); +} + +// This step has to be performed after the root pb_type has been parsed +static void ProcessTilesTags(pugi::xml_node Node, + std::unordered_map TypeMap, + t_arch& arch, + const t_default_fc_spec& arch_def_fc, + const pugiutil::loc_data& loc_data) { + pugi::xml_node Cur, CurType; + t_type_descriptor* Type; + + /* Process the types */ + CurType = Node.first_child(); + while (CurType) { + check_node(CurType, "tile", loc_data); + + /* Load type name */ + const char* NameProp = get_attribute(CurType, "name", loc_data).value(); + + /* Alias to current type */ + Type = get_corresponding_tile(TypeMap, vtr::strdup(NameProp)); + if (Type == nullptr) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "No tiles found corresponding to current root level pb type: '%s'.\n", Type->pb_type->name); + } + + VTR_ASSERT(Type->pb_type != nullptr); /* Load pin names and classes and locations */ Cur = get_single_child(CurType, "pinlocations", loc_data, OPTIONAL); @@ -2698,19 +2749,178 @@ static void ProcessComplexBlocks(pugi::xml_node Node, Cur = get_single_child(CurType, "fc", loc_data, OPTIONAL); Process_Fc(Cur, Type, arch.Segments, arch_def_fc, loc_data); - //Load switchblock type and location overrides + /* Load switchblock type and location overrides */ Cur = get_single_child(CurType, "switchblock_locations", loc_data, OPTIONAL); ProcessSwitchblockLocations(Cur, Type, arch, loc_data); - Type->index = i; - - /* Type fully read */ - ++i; + /* Load possible modes (pb_types which are compatible with the current tile) */ + Cur = get_single_child(CurType, "equivalent_tiles", loc_data, OPTIONAL); + if (Cur) { + ProcessTileExtraModes(Cur, Type, TypeMap, loc_data); + } /* Free this node and get its next sibling node */ CurType = CurType.next_sibling(CurType.name()); } - pb_type_descriptors.clear(); +} + +/* Processes the equivalent tiles defined in the XML arch definition + * + * + * + * + * + * + * + * + * + * + * In particular this function parses the `modes` (if they exist) of each tile + * and adds the equivalent tile information to the t_type_descriptor relative to + * the current tile. + * It populates the following t_type_descriptor members: + * - num_equivalent_tiles; + * - equivalent_tiles. + */ +static void ProcessTileExtraModes(pugi::xml_node Node, + t_type_descriptor* Type, + std::unordered_map TypeMap, + const pugiutil::loc_data& loc_data) { + pugi::xml_node CurType; + + Type->num_equivalent_tiles = count_children(Node, "mode", loc_data); + int index = 0; + CurType = Node.first_child(); + while (CurType && index < Type->num_equivalent_tiles) { + const char* equivalent_tile_name = get_attribute(CurType, "name", loc_data).value(); + auto EquivalentTile = get_corresponding_tile(TypeMap, equivalent_tile_name); + + if (EquivalentTile == nullptr) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "No tiles found corresponding to equivalent tile name: '%s'.\n", Type->pb_type->name); + } + + // Inserts equivalent tile as last element so the index points to the correct equivalent tile. + auto result = Type->equivalent_tiles.insert(std::make_pair(index, EquivalentTile)); + if (!result.second) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "Duplicate equivalent tile found: '%s'.\n", EquivalentTile->name); + } + + Type->available_tiles_indices.insert(EquivalentTile->index); + + ProcessTileExtraModePinMapping(CurType, Type, Type->equivalent_tiles[index], index, loc_data); + + index++; + CurType = CurType.next_sibling(CurType.name()); + } +} + +/* Processes the pin_mapping of each equivalent tile. + * It goes through each mode and populates the following t_type_descriptor memebrs: + * - equivalent_tile_pin_mapping; + * - equivalent_tile_inverse_pin_mapping. + */ +static void ProcessTileExtraModePinMapping(pugi::xml_node Node, + t_type_descriptor* Type, + t_type_descriptor* EquivalentType, + int imode, + const pugiutil::loc_data& loc_data) { + pugi::xml_node CurType = Node.first_child(); + const char *from_port, *to_port; + int from_pin_index, to_pin_index; + int num_pins; + + std::unordered_map pin_mapping, inverse_pin_mapping; + + while (CurType) { + //Process each mode mapping + if (CurType.name() != std::string("map")) { + bad_tag(CurType, loc_data, Node, {"map"}); + } + + from_port = get_attribute(CurType, "from", loc_data).value(); + to_port = get_attribute(CurType, "to", loc_data).value(); + num_pins = get_attribute(CurType, "num_pins", loc_data, OPTIONAL).as_int(1); + + for (int offset = 0; offset < num_pins; offset++) { + from_pin_index = get_pin_index_by_name(Type, from_port, offset); + to_pin_index = get_pin_index_by_name(EquivalentType, to_port, offset); + + auto result = pin_mapping.insert(std::make_pair(from_pin_index, to_pin_index)); + if (!result.second) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "Duplicate equivalent tile 'from_pin': '%d' (in %s).\n", from_pin_index, Type->name); + } + + result = inverse_pin_mapping.insert(std::make_pair(to_pin_index, from_pin_index)); + if (!result.second) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "Duplicate equivalent tile 'to_pin': '%d' (in %s).\n", to_pin_index, Type->name); + } + } + + CurType = CurType.next_sibling(CurType.name()); + } + + Type->equivalent_tile_pin_mapping.insert(std::make_pair(imode, pin_mapping)); + Type->equivalent_tile_inverse_pin_mapping.insert(std::make_pair(imode, inverse_pin_mapping)); +} + +static void ProcessComplexBlocks(pugi::xml_node Node, + std::unordered_map TypeMap, + t_arch& arch, + const bool timing_enabled, + const pugiutil::loc_data& loc_data) { + pugi::xml_node CurPbType; + t_type_descriptor* Type; + + map pb_types; + pair::iterator, bool> ret_pb_types; + + CurPbType = Node.first_child(); + while (CurPbType) { + check_node(CurPbType, "pb_type", loc_data); + + char* type_name = nullptr; + + for (pugi::xml_attribute attr : CurPbType.attributes()) { + if (attr.name() != std::string("name")) { + bad_attribute(attr, CurPbType, loc_data); + } else { + type_name = vtr::strdup(attr.value()); + } + } + + Type = get_corresponding_tile(TypeMap, type_name); + if (Type == nullptr) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurPbType), + "No tiles found corresponding to current root level pb type: '%s'.\n", type_name); + } + + Type->pb_type = new t_pb_type; + Type->pb_type->name = vtr::strdup(type_name); + + ret_pb_types = pb_types.insert( + pair(Type->pb_type->name, 0)); + if (!ret_pb_types.second) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurPbType), + "Duplicate pb_type descriptor name: '%s'.\n", Type->pb_type->name); + } + + ProcessPb_Type(CurPbType, Type->pb_type, nullptr, timing_enabled, arch, loc_data); + Type->num_pins = Type->capacity + * (Type->pb_type->num_input_pins + + Type->pb_type->num_output_pins + + Type->pb_type->num_clock_pins); + Type->num_receivers = Type->capacity * Type->pb_type->num_input_pins; + Type->num_drivers = Type->capacity * Type->pb_type->num_output_pins; + + /* Load pin names and classes and locations */ + + CurPbType = CurPbType.next_sibling(CurPbType.name()); + } + pb_types.clear(); } static void ProcessSegments(pugi::xml_node Parent, @@ -4117,3 +4327,41 @@ e_side string_to_side(std::string side_str) { } return side; } + +static t_type_descriptor* get_corresponding_tile(std::unordered_map TypeMap, + const char* type_name) { + auto result = TypeMap.find(type_name); + + if (result == TypeMap.end()) { + return nullptr; + } + + return result->second; +} + +static int get_pin_index_by_name(t_type_descriptor* Type, const char* port_name, int pin_index_in_port) { + int ipin = OPEN; + + t_pb_type* pb_type = Type->pb_type; + t_port* matched_port = nullptr; + int port_base_ipin = 0; + + for (int iport = 0; iport < pb_type->num_ports; ++iport) { + t_port* port = &pb_type->ports[iport]; + + if (0 == strcmp(port->name, port_name)) { + matched_port = port; + break; + } + port_base_ipin += port->num_pins; + } + + if (matched_port) { + VTR_ASSERT(0 == strcmp(matched_port->name, port_name)); + VTR_ASSERT(pin_index_in_port < matched_port->num_pins); + + ipin = port_base_ipin + pin_index_in_port; + } + + return ipin; +} diff --git a/utils/fasm/src/fasm.cpp b/utils/fasm/src/fasm.cpp index ee7c7cd8fd1..9797f97e81c 100644 --- a/utils/fasm/src/fasm.cpp +++ b/utils/fasm/src/fasm.cpp @@ -93,7 +93,11 @@ void FasmWriterVisitor::check_interconnect(const t_pb_routes &pb_routes, int ino return; } - t_pb_graph_pin *prev_pin = pb_graph_pin_lookup_from_index_by_type_.at(blk_type_->index)[prev_node]; + auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + + t_type_ptr original_blk_type = clb_nlist.block_type(current_blk_id_, false); + + t_pb_graph_pin *prev_pin = pb_graph_pin_lookup_from_index_by_type_.at(original_blk_type->index)[prev_node]; int prev_edge; for(prev_edge = 0; prev_edge < prev_pin->num_output_edges; prev_edge++) { diff --git a/vpr/src/base/clustered_netlist.cpp b/vpr/src/base/clustered_netlist.cpp index eb69053c404..de5f5f097e6 100644 --- a/vpr/src/base/clustered_netlist.cpp +++ b/vpr/src/base/clustered_netlist.cpp @@ -25,11 +25,30 @@ t_pb* ClusteredNetlist::block_pb(const ClusterBlockId id) const { } t_type_ptr ClusteredNetlist::block_type(const ClusterBlockId id) const { + return block_type(id, true); +} + +t_type_ptr ClusteredNetlist::block_type(const ClusterBlockId id, bool get_equivalent_if_set) const { VTR_ASSERT_SAFE(valid_block_id(id)); + if (block_eq_type_index(id) != OPEN && get_equivalent_if_set) { + return block_eq_type_[id]; + } return block_types_[id]; } +int ClusteredNetlist::block_eq_type_index(const ClusterBlockId id) const { + VTR_ASSERT_SAFE(valid_block_id(id)); + + return block_eq_type_index_[id]; +} + +bool ClusteredNetlist::block_eq_type_effective(const ClusterBlockId id) const { + VTR_ASSERT_SAFE(valid_block_id(id)); + + return block_eq_type_effective_[id]; +} + ClusterNetId ClusteredNetlist::block_net(const ClusterBlockId blk_id, const int phys_pin_index) const { auto pin_id = block_pin(blk_id, phys_pin_index); @@ -120,9 +139,12 @@ ClusterBlockId ClusteredNetlist::create_block(const char* name, t_pb* pb, t_type block_pbs_.insert(blk_id, pb); block_types_.insert(blk_id, type); - + block_eq_type_.insert(blk_id, type); + block_eq_type_index_.insert(blk_id, OPEN); + block_eq_type_effective_.insert(blk_id, false); //Allocate and initialize every potential pin of the block - block_logical_pins_.insert(blk_id, std::vector(type->num_pins, ClusterPinId::INVALID())); + int num_pins = get_max_num_pins(type); + block_logical_pins_.insert(blk_id, std::vector(num_pins, ClusterPinId::INVALID())); } //Check post-conditions: size @@ -135,6 +157,12 @@ ClusterBlockId ClusteredNetlist::create_block(const char* name, t_pb* pb, t_type return blk_id; } +void ClusteredNetlist::set_equivalent_block_type(const ClusterBlockId blk_id, int i_eq_type, t_type_ptr eq_type) { + block_eq_type_index_[blk_id] = i_eq_type; + block_eq_type_effective_[blk_id] = true; + block_eq_type_[blk_id] = eq_type; +} + void ClusteredNetlist::set_pin_physical_index(const ClusterPinId pin, const int phys_pin_index) { VTR_ASSERT_SAFE(valid_pin_id(pin)); auto blk = pin_block(pin); @@ -322,3 +350,20 @@ bool ClusteredNetlist::validate_net_sizes_impl(size_t num_nets) const { } return true; } + +/* + * Utilities + */ +int ClusteredNetlist::get_max_num_pins(t_type_ptr type) { + int max_pins = type->num_pins; + + for (int itype = 0; itype < type->num_equivalent_tiles; itype++) { + auto result = type->equivalent_tiles.find(itype); + VTR_ASSERT(result != type->equivalent_tiles.end()); + + int num_pins = result->second->num_pins; + max_pins = std::max(num_pins, max_pins); + } + + return max_pins; +} diff --git a/vpr/src/base/clustered_netlist.h b/vpr/src/base/clustered_netlist.h index 343cffaa9b9..8b3f34fb1ff 100644 --- a/vpr/src/base/clustered_netlist.h +++ b/vpr/src/base/clustered_netlist.h @@ -125,8 +125,25 @@ class ClusteredNetlist : public Netlistequivalent_tiles[index] + int block_eq_type_index(const ClusterBlockId id) const; + + //Returns true if the block has been placed in an equivalent tile + bool block_eq_type_effective(const ClusterBlockId id) const; //Returns the net of the block attached to the specific pin index ClusterNetId block_net(const ClusterBlockId blk_id, const int pin_index) const; @@ -174,6 +191,13 @@ class ClusteredNetlist : public Netlist block_pbs_; //Physical block representing the clustering & internal hierarchy of each CLB vtr::vector_map block_types_; //The type of physical block this user circuit block is mapped to + vtr::vector_map block_eq_type_; //The equivalent type (if any) selected for a CLB + vtr::vector_map block_eq_type_index_; //Index relative to the equivalent tile chosen during placement + vtr::vector_map block_eq_type_effective_; //Boolean to state if equivalent tile is used vtr::vector_map> block_logical_pins_; //The logical pin associated with each physical block pin //Pins diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index c841f16e453..5627135974d 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -279,9 +279,15 @@ static bool try_size_device_grid(const t_arch& arch, const std::mapsecond; + + int num_available_instances = device_ctx.grid.num_instances(type); + for (int itype = 0; itype < type->num_equivalent_tiles; itype++) { + num_available_instances += device_ctx.grid.num_instances(type->equivalent_tiles[itype]); + } + float util = 0.; if (num_instances != 0) { - util = num_instances / device_ctx.grid.num_instances(type); + util = num_instances / num_available_instances; } type_util[type] = util; diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index d769a295809..f0b2e8e3e16 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -287,7 +287,7 @@ static int try_place_macro(int itype, int ipos, int imacro); static void initial_placement_pl_macros(int macros_max_num_tries, int* free_locations); static void initial_placement_blocks(int* free_locations, enum e_pad_loc_type pad_loc_type); -static void initial_placement_location(const int* free_locations, ClusterBlockId blk_id, int& pipos, t_pl_loc& to); +static void initial_placement_location(const int* free_locations, int itype, int& pipos, t_pl_loc& to); static void initial_placement(enum e_pad_loc_type pad_loc_type, const char* pad_loc_file); @@ -375,7 +375,7 @@ static void comp_td_costs(const PlaceDelayModel& delay_model, double* timing_cos static e_swap_result assess_swap(double delta_c, double t); -static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& to); +static bool find_to(t_type_ptr to_type, t_type_ptr from_type, float rlim, const t_pl_loc from, t_pl_loc& to); static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new); @@ -431,6 +431,7 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, static void log_move_abort(std::string reason); static void report_aborted_moves(); +std::vector get_available_tiles(t_type_ptr type); static int grid_to_compressed(const std::vector& coords, int point); static void print_place_status_header(); @@ -1657,18 +1658,44 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) { // * on chip, and // * match the correct block type // - //Note that we need to explicitly check that the types match, since the device floorplan is not + //Note that we need to explicitly check that the types match or are equivalent, since the device floorplan is not //(neccessarily) translationally invariant for an arbitrary macro auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_ctx = g_vpr_ctx.placement(); if (to.x < 0 || to.x >= int(device_ctx.grid.width()) || to.y < 0 || to.y >= int(device_ctx.grid.height()) - || to.z < 0 || to.z >= device_ctx.grid[to.x][to.y].type->capacity - || (device_ctx.grid[to.x][to.y].type != cluster_ctx.clb_nlist.block_type(blk))) { + || to.z < 0 || to.z >= device_ctx.grid[to.x][to.y].type->capacity) { + return false; + } + + // Check if types are allowed to be swapped + auto blk_type_from = cluster_ctx.clb_nlist.block_type(blk); + auto blk_type_to = device_ctx.grid[to.x][to.y].type; + + // First check is to see if `from` type can be placed in `to` type + if (!blk_type_from->is_available_tile_index(blk_type_to->index)) { return false; } + + t_pl_loc from = place_ctx.block_locs[blk].loc; + ClusterBlockId blk_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.z]; + + // In case `blk_to` is empty we can skip the second check + if (blk_to == EMPTY_BLOCK_ID) { + return true; + } + + blk_type_from = device_ctx.grid[from.x][from.y].type; + blk_type_to = cluster_ctx.clb_nlist.block_type(blk_to); + + // Second check is to see if `to` type can be placed in `from` type + if (!blk_type_to->is_available_tile_index(blk_type_from->index)) { + return false; + } + return true; } @@ -1728,7 +1755,25 @@ static e_swap_result try_swap(float t, t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = g_vpr_ctx.device().grid[from.x][from.y].type; - VTR_ASSERT(cluster_from_type == grid_from_type); + + VTR_ASSERT(cluster_from_type->is_available_tile_index(grid_from_type->index)); + + t_type_ptr to_block_type = cluster_ctx.clb_nlist.block_type(b_from); + + // Find random equivalent type (could be of the same type as the `from` one) + if (to_block_type->num_equivalent_tiles > 0) { + int irand_block_type = std::rand() % (to_block_type->num_equivalent_tiles + 1); + + // If random index is 0 do not use an equivalent tile. + if (irand_block_type > 0) { + auto result = to_block_type->equivalent_tiles.find(irand_block_type - 1); + VTR_ASSERT(result != to_block_type->equivalent_tiles.end()); + + to_block_type = result->second; + } + } + + VTR_ASSERT(cluster_from_type->is_available_tile_index(to_block_type->index)); //Allow some fraction of moves to not be restricted by rlim, //in the hopes of better escaping local minima @@ -1737,7 +1782,7 @@ static e_swap_result try_swap(float t, } t_pl_loc to; - if (!find_to(cluster_ctx.clb_nlist.block_type(b_from), rlim, from, to)) + if (!find_to(to_block_type, grid_from_type, rlim, from, to)) return REJECTED; #if 0 @@ -1821,8 +1866,8 @@ static e_swap_result try_swap(float t, //VTR_ASSERT(check_macro_placement_consistency() == 0); #if 0 - //Check that each accepted swap yields a valid placement - check_place(*costs, delay_model, place_algorithm); + //Check that each accepted swap yields a valid placement + check_place(costs, *place_delay_model, place_algorithm); #endif return (keep_switch); @@ -1995,7 +2040,7 @@ static void update_td_delta_costs(const PlaceDelayModel& delay_model, const Clus } } -static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& to) { +static bool find_to(t_type_ptr to_type, t_type_ptr from_type, float rlim, const t_pl_loc from, t_pl_loc& to) { //Finds a legal swap to location for the given type, starting from 'x_from' and 'y_from' // //Note that the range limit (rlim) is applied in a logical sense (i.e. 'compressed' grid space consisting @@ -2005,29 +2050,39 @@ static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& // //This ensures that such blocks don't get locked down too early during placement (as would be the //case with a physical distance rlim) - auto& grid = g_vpr_ctx.device().grid; - - auto grid_type = grid[from.x][from.y].type; - VTR_ASSERT(type == grid_type); //Retrieve the compressed block grid for this block type - const auto& compressed_block_grid = f_compressed_block_grids[type->index]; + const auto& to_compressed_block_grid = f_compressed_block_grids[to_type->index]; + const auto& from_compressed_block_grid = f_compressed_block_grids[from_type->index]; //Determine the rlim in each dimension - int rlim_x = min(compressed_block_grid.compressed_to_grid_x.size(), rlim); - int rlim_y = min(compressed_block_grid.compressed_to_grid_y.size(), rlim); /* for aspect_ratio != 1 case. */ + int rlim_x = min(to_compressed_block_grid.compressed_to_grid_x.size(), rlim); + int rlim_y = min(to_compressed_block_grid.compressed_to_grid_y.size(), rlim); /* for aspect_ratio != 1 case. */ //Determine the coordinates in the compressed grid space of the current block - int cx_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_x, from.x); - int cy_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_y, from.y); + int cx_from = grid_to_compressed(from_compressed_block_grid.compressed_to_grid_x, from.x); + int cy_from = grid_to_compressed(from_compressed_block_grid.compressed_to_grid_y, from.y); + + int min_cx, max_cx; + int min_cy, max_cy; + int delta_cx; - //Determin the valid compressed grid location ranges - int min_cx = std::max(0, cx_from - rlim_x); - int max_cx = std::min(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_from + rlim_x); - int delta_cx = max_cx - min_cx; + //Determine the valid compressed grid location ranges + if (to_type == from_type) { + min_cx = std::max(0, cx_from - rlim_x); + max_cx = std::min(to_compressed_block_grid.compressed_to_grid_x.size() - 1, cx_from + rlim_x); + delta_cx = max_cx - min_cx; - int min_cy = std::max(0, cy_from - rlim_y); - int max_cy = std::min(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_from + rlim_y); + min_cy = std::max(0, cy_from - rlim_y); + max_cy = std::min(to_compressed_block_grid.compressed_to_grid_y.size() - 1, cy_from + rlim_y); + } else { + min_cx = 0; + max_cx = to_compressed_block_grid.compressed_to_grid_x.size() - 1; + delta_cx = max_cx - min_cx; + + min_cy = 0; + max_cy = to_compressed_block_grid.compressed_to_grid_y.size() - 1; + } int cx_to = OPEN; int cy_to = OPEN; @@ -2054,19 +2109,19 @@ static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& // //The candidates are stored in a flat_map so we can efficiently find the set of valid //candidates with upper/lower bound. - auto y_lower_iter = compressed_block_grid.grid[cx_to].lower_bound(min_cy); - if (y_lower_iter == compressed_block_grid.grid[cx_to].end()) { + auto y_lower_iter = to_compressed_block_grid.grid[cx_to].lower_bound(min_cy); + if (y_lower_iter == to_compressed_block_grid.grid[cx_to].end()) { continue; } - auto y_upper_iter = compressed_block_grid.grid[cx_to].upper_bound(max_cy); + auto y_upper_iter = to_compressed_block_grid.grid[cx_to].upper_bound(max_cy); if (y_lower_iter->first > min_cy) { //No valid blocks at this x location which are within rlim_y // //Fall back to allow the whole y range - y_lower_iter = compressed_block_grid.grid[cx_to].begin(); - y_upper_iter = compressed_block_grid.grid[cx_to].end(); + y_lower_iter = to_compressed_block_grid.grid[cx_to].begin(); + y_upper_iter = to_compressed_block_grid.grid[cx_to].end(); min_cy = y_lower_iter->first; max_cy = (y_upper_iter - 1)->first; @@ -2112,15 +2167,15 @@ static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& VTR_ASSERT(cy_to != OPEN); //Convert to true (uncompressed) grid locations - to.x = compressed_block_grid.compressed_to_grid_x[cx_to]; - to.y = compressed_block_grid.compressed_to_grid_y[cy_to]; + to.x = to_compressed_block_grid.compressed_to_grid_x[cx_to]; + to.y = to_compressed_block_grid.compressed_to_grid_y[cy_to]; //Each x/y location contains only a single type, so we can pick a random //z (capcity) location - to.z = vtr::irand(type->capacity - 1); + to.z = vtr::irand(to_type->capacity - 1); auto& device_ctx = g_vpr_ctx.device(); - VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].type == type, "Type must match"); + VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].type == to_type, "Type must match"); VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].width_offset == 0, "Should be at block base location"); VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].height_offset == 0, "Should be at block base location"); @@ -3153,11 +3208,10 @@ static int try_place_macro(int itype, int ipos, int imacro) { static void initial_placement_pl_macros(int macros_max_num_tries, int* free_locations) { int macro_placed; - int itype, itry, ipos; + int itry, ipos; ClusterBlockId blk_id; auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& device_ctx = g_vpr_ctx.device(); auto& place_ctx = g_vpr_ctx.placement(); auto& pl_macros = place_ctx.pl_macros; @@ -3169,49 +3223,62 @@ static void initial_placement_pl_macros(int macros_max_num_tries, int* free_loca // Assume that all the blocks in the macro are of the same type blk_id = pl_macros[imacro].members[0].blk_index; - itype = cluster_ctx.clb_nlist.block_type(blk_id)->index; - if (free_locations[itype] < int(pl_macros[imacro].members.size())) { - vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, - "Initial placement failed.\n" - "Could not place macro length %zu with head block %s (#%zu); not enough free locations of type %s (#%d).\n" - "VPR cannot auto-size for your circuit, please resize the FPGA manually.\n", - pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id), device_ctx.block_types[itype].name, itype); - } - // Try to place the macro first, if can be placed - place them, otherwise try again - for (itry = 0; itry < macros_max_num_tries && macro_placed == false; itry++) { - // Choose a random position for the head - ipos = vtr::irand(free_locations[itype] - 1); - - // Try to place the macro - macro_placed = try_place_macro(itype, ipos, imacro); + bool no_free_locations = true; + // Loop over all the possible equivalent tiles + for (int itype : get_available_tiles(cluster_ctx.clb_nlist.block_type(blk_id))) { + if (free_locations[itype] >= int(pl_macros[imacro].members.size())) { + no_free_locations = false; + } else { + continue; + } - } // Finished all tries + // Try to place the macro first, if can be placed - place them, otherwise try again + for (itry = 0; itry < macros_max_num_tries && macro_placed == false; itry++) { + // Choose a random position for the head + ipos = vtr::irand(free_locations[itype] - 1); - if (macro_placed == false) { - // if a macro still could not be placed after macros_max_num_tries times, - // go through the chip exhaustively to find a legal placement for the macro - // place the macro on the first location that is legal - // then set macro_placed = true; - // if there are no legal positions, error out - - // Exhaustive placement of carry macros - for (ipos = 0; ipos < free_locations[itype] && macro_placed == false; ipos++) { // Try to place the macro macro_placed = try_place_macro(itype, ipos, imacro); - } // Exhausted all the legal placement position for this macro + } // Finished all tries - // If macro could not be placed after exhaustive placement, error out if (macro_placed == false) { - // Error out - vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, - "Initial placement failed.\n" - "Could not place macro length %zu with head block %s (#%zu); not enough free locations of type %s (#%d).\n" - "Please manually size the FPGA because VPR can't do this yet.\n", - pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id), device_ctx.block_types[itype].name, itype); + // if a macro still could not be placed after macros_max_num_tries times, + // go through the chip exhaustively to find a legal placement for the macro + // place the macro on the first location that is legal + // then set macro_placed = true; + // if there are no legal positions, error out + + // Exhaustive placement of carry macros + for (ipos = 0; ipos < free_locations[itype] && macro_placed == false; ipos++) { + // Try to place the macro + macro_placed = try_place_macro(itype, ipos, imacro); + + } // Exhausted all the legal placement position for this macro + } + + if (macro_placed == true) { + break; } + } + if (no_free_locations) { + vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, + "Initial placement failed.\n" + "Could not place macro length %d with head block %s (#%zu); not enough free locations.\n" + "VPR cannot auto-size for your circuit, please resize the FPGA manually.\n", + pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id)); + } + + // If macro could not be placed even after exhaustive placement, error out + if (macro_placed == false) { + // Error out + vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, + "Initial placement failed.\n" + "Could not place macro length %d with head block %s (#%zu); not enough free locations.\n" + "Please manually size the FPGA because VPR can't do this yet.\n", + pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id)); } else { // This macro has been placed successfully, proceed to place the next macro continue; @@ -3222,10 +3289,9 @@ static void initial_placement_pl_macros(int macros_max_num_tries, int* free_loca /* Place blocks that are NOT a part of any macro. * We'll randomly place each block in the clustered netlist, one by one. */ static void initial_placement_blocks(int* free_locations, enum e_pad_loc_type pad_loc_type) { - int itype, ipos; + int ipos; auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.mutable_placement(); - auto& device_ctx = g_vpr_ctx.device(); for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { if (place_ctx.block_locs[blk_id].loc.x != -1) { // -1 is a sentinel for an empty block @@ -3241,45 +3307,54 @@ static void initial_placement_blocks(int* free_locations, enum e_pad_loc_type pa * Choose one randomly and put blk_id there. Then we don't want to pick * that location again, so remove it from the free_locations array. */ - itype = cluster_ctx.clb_nlist.block_type(blk_id)->index; - if (free_locations[itype] <= 0) { - vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, - "Initial placement failed.\n" - "Could not place block %s (#%zu); no free locations of type %s (#%d).\n", - cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id), device_ctx.block_types[itype].name, itype); - } + bool no_free_locations = true; + // Loop over all the possible equivalent tiles + for (int itype : get_available_tiles(cluster_ctx.clb_nlist.block_type(blk_id))) { + if (free_locations[itype] > 0) { + no_free_locations = false; + } else { + continue; + } + + t_pl_loc to; + initial_placement_location(free_locations, itype, ipos, to); + + // Make sure that the position is EMPTY_BLOCK before placing the block down + VTR_ASSERT(place_ctx.grid_blocks[to.x][to.y].blocks[to.z] == EMPTY_BLOCK_ID); - t_pl_loc to; - initial_placement_location(free_locations, blk_id, ipos, to); + place_ctx.grid_blocks[to.x][to.y].blocks[to.z] = blk_id; + place_ctx.grid_blocks[to.x][to.y].usage++; - // Make sure that the position is EMPTY_BLOCK before placing the block down - VTR_ASSERT(place_ctx.grid_blocks[to.x][to.y].blocks[to.z] == EMPTY_BLOCK_ID); + place_ctx.block_locs[blk_id].loc = to; - place_ctx.grid_blocks[to.x][to.y].blocks[to.z] = blk_id; - place_ctx.grid_blocks[to.x][to.y].usage++; + //Mark IOs as fixed if specifying a (fixed) random placement + if (is_io_type(cluster_ctx.clb_nlist.block_type(blk_id)) && pad_loc_type == RANDOM) { + place_ctx.block_locs[blk_id].is_fixed = true; + } - place_ctx.block_locs[blk_id].loc = to; + /* Ensure randomizer doesn't pick this location again, since it's occupied. Could shift all the + * legal positions in legal_pos to remove the entry (choice) we just used, but faster to + * just move the last entry in legal_pos to the spot we just used and decrement the + * count of free_locations. */ + legal_pos[itype][ipos] = legal_pos[itype][free_locations[itype] - 1]; /* overwrite used block position */ + free_locations[itype]--; - //Mark IOs as fixed if specifying a (fixed) random placement - if (is_io_type(cluster_ctx.clb_nlist.block_type(blk_id)) && pad_loc_type == RANDOM) { - place_ctx.block_locs[blk_id].is_fixed = true; + //Do not check other type as the block has already been placed + break; } - /* Ensure randomizer doesn't pick this location again, since it's occupied. Could shift all the - * legal positions in legal_pos to remove the entry (choice) we just used, but faster to - * just move the last entry in legal_pos to the spot we just used and decrement the - * count of free_locations. */ - legal_pos[itype][ipos] = legal_pos[itype][free_locations[itype] - 1]; /* overwrite used block position */ - free_locations[itype]--; + // Check if there were no available locations + if (no_free_locations) { + vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, + "Initial placement failed.\n" + "Could not place block %s (#%zu); no free locations\n", + cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id)); + } } } } -static void initial_placement_location(const int* free_locations, ClusterBlockId blk_id, int& ipos, t_pl_loc& to) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - - int itype = cluster_ctx.clb_nlist.block_type(blk_id)->index; - +static void initial_placement_location(const int* free_locations, int itype, int& ipos, t_pl_loc& to) { ipos = vtr::irand(free_locations[itype] - 1); to = legal_pos[itype][ipos]; } @@ -3536,7 +3611,7 @@ static int check_block_placement_consistency() { if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum) continue; - if (cluster_ctx.clb_nlist.block_type(bnum) != device_ctx.grid[i][j].type) { + if (!cluster_ctx.clb_nlist.block_type(bnum)->is_available_tile_index(device_ctx.grid[i][j].type->index)) { VTR_LOG_ERROR("Block %zu type (%s) does not match grid location (%zu,%zu) type (%s).\n", size_t(bnum), cluster_ctx.clb_nlist.block_type(bnum)->name, i, j, device_ctx.grid[i][j].type->name); error++; @@ -3657,6 +3732,19 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, timing_reporter.report_timing_setup(placer_opts.post_place_timing_report_file, *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths); } +std::vector get_available_tiles(t_type_ptr type) { + std::vector types(1, type->index); + + for (int i = 0; i < type->num_equivalent_tiles; i++) { + auto result = type->equivalent_tiles.find(i); + VTR_ASSERT(result != type->equivalent_tiles.end()); + + types.push_back(result->second->index); + } + + return types; +} + #if 0 static void update_screen_debug(); diff --git a/vpr/src/timing/clb_delay_calc.inl b/vpr/src/timing/clb_delay_calc.inl index b19effad62b..47d76bf5d36 100644 --- a/vpr/src/timing/clb_delay_calc.inl +++ b/vpr/src/timing/clb_delay_calc.inl @@ -73,7 +73,8 @@ inline float ClbDelayCalc::pb_route_delay(ClusterBlockId clb, int pb_route_idx, inline const t_pb_graph_edge* ClbDelayCalc::find_pb_graph_edge(ClusterBlockId clb, int pb_route_idx) const { auto& cluster_ctx = g_vpr_ctx.clustering(); - int type_index = cluster_ctx.clb_nlist.block_type(clb)->index; + //Getting the original block type in case the CLB has been placed in an equivalent tile. + int type_index = cluster_ctx.clb_nlist.block_type(clb, false)->index; const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb); if (pb->pb_route.count(pb_route_idx)) { @@ -84,7 +85,7 @@ inline const t_pb_graph_edge* ClbDelayCalc::find_pb_graph_edge(ClusterBlockId cl const t_pb_graph_pin* pb_gpin = intra_lb_pb_pin_lookup_.pb_gpin(type_index, pb_route_idx); const t_pb_graph_pin* upstream_pb_gpin = intra_lb_pb_pin_lookup_.pb_gpin(type_index, upstream_pb_route_idx); - return find_pb_graph_edge(upstream_pb_gpin, pb_gpin); + return find_pb_graph_edge(upstream_pb_gpin, pb_gpin); } } diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index 3df718b6099..21ea0b3ca19 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -90,6 +90,10 @@ static AtomPinId find_atom_pin_for_pb_route_id(ClusterBlockId clb, int pb_route_ static bool block_type_contains_blif_model(t_type_ptr type, const std::regex& blif_model_regex); static bool pb_type_contains_blif_model(const t_pb_type* pb_type, const std::regex& blif_model_regex); +static t_type_ptr get_equivalent_tile(t_type_ptr type, int eq_itype); +static bool try_sync_equivalent_tiles(ClusterBlockId clb, t_type_ptr logic_type, t_type_ptr phyical_type); +static int get_type_pin(std::unordered_map> pin_mappings, int eq_type_index, int eq_pin); + /******************** Subroutine definitions *********************************/ const t_model* find_model(const t_model* models, const std::string& name, bool required) { @@ -134,6 +138,46 @@ void print_tabs(FILE* fpout, int num_tab) { } } +static t_type_ptr get_equivalent_tile(t_type_ptr type, int eq_itype) { + auto result = type->equivalent_tiles.find(eq_itype); + VTR_ASSERT(result != type->equivalent_tiles.end()); + + return result->second; +} + +static int get_type_pin(std::unordered_map> pin_mappings, int eq_type_index, int eq_pin) { + auto tile_result = pin_mappings.find(eq_type_index); + VTR_ASSERT(tile_result != pin_mappings.end()); + + auto pin_mapping = tile_result->second; + auto pin_result = pin_mapping.find(eq_pin); + VTR_ASSERT(pin_result != pin_mapping.end()); + + return pin_result->second; +} + +static bool try_sync_equivalent_tiles(ClusterBlockId clb, t_type_ptr logic_type, t_type_ptr physical_type) { + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + auto& clb_nlist = cluster_ctx.clb_nlist; + + //Searching for equivalent tiles in the logic_type + for (int itype = 0; itype < logic_type->num_equivalent_tiles; itype++) { + if (get_equivalent_tile(logic_type, itype)->index == physical_type->index) { + clb_nlist.set_equivalent_block_type(clb, itype, physical_type); + + //Setting new logical to physical pin mapping + for (auto pin : clb_nlist.block_pins(clb)) { + int original_ipin = clb_nlist.pin_physical_index(pin); + int new_ipin = get_type_pin(logic_type->equivalent_tile_pin_mapping, itype, original_ipin); + clb_nlist.set_pin_physical_index(pin, new_ipin); + } + return true; + } + } + + return false; +} + /* Points the place_ctx.grid_blocks structure back to the blocks list */ void sync_grid_to_blocks() { auto& place_ctx = g_vpr_ctx.mutable_placement(); @@ -171,11 +215,16 @@ void sync_grid_to_blocks() { } /* Check types match */ - if (cluster_ctx.clb_nlist.block_type(blk_id) != device_ctx.grid[blk_x][blk_y].type) { - VPR_THROW(VPR_ERROR_PLACE, "A block is in a grid location (%d x %d) with a conflicting types '%s' and '%s' .\n", - blk_x, blk_y, - cluster_ctx.clb_nlist.block_type(blk_id)->name, - device_ctx.grid[blk_x][blk_y].type->name); + auto logic_type = cluster_ctx.clb_nlist.block_type(blk_id); + auto physical_type = device_ctx.grid[blk_x][blk_y].type; + + if (logic_type != physical_type) { + if (!try_sync_equivalent_tiles(blk_id, logic_type, physical_type)) { + VPR_THROW(VPR_ERROR_PLACE, "A block is in a grid location (%d x %d) with a conflicting types '%s' and '%s' .\n", + blk_x, blk_y, + cluster_ctx.clb_nlist.block_type(blk_id)->name, + device_ctx.grid[blk_x][blk_y].type->name); + } } /* Check already in use */ @@ -444,7 +493,9 @@ static AtomPinId find_atom_pin_for_pb_route_id(ClusterBlockId clb, int pb_route_ VTR_ASSERT_MSG(cluster_ctx.clb_nlist.block_pb(clb)->pb_route[pb_route_id].atom_net_id, "PB route should correspond to a valid atom net"); //Find the graph pin associated with this pb_route - const t_pb_graph_pin* gpin = pb_gpin_lookup.pb_gpin(cluster_ctx.clb_nlist.block_type(clb)->index, pb_route_id); + int index = cluster_ctx.clb_nlist.block_type(clb, false)->index; + + const t_pb_graph_pin* gpin = pb_gpin_lookup.pb_gpin(index, pb_route_id); VTR_ASSERT(gpin); //Get the PB associated with this block @@ -542,25 +593,36 @@ int find_clb_pb_pin(ClusterBlockId clb, int clb_pin) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.placement(); - VTR_ASSERT_MSG(clb_pin < cluster_ctx.clb_nlist.block_type(clb)->num_pins, "Must be a valid top-level pin"); + auto& clb_nlist = cluster_ctx.clb_nlist; + + auto type = clb_nlist.block_type(clb); - int pb_pin = -1; + int pin = clb_pin; + + // In case an equivalent tile is selected, the CLB block type will be different (e.g. CLB logic type is LAB, CLB physical type is MLAB). + // Therefore, I need to retrieve the pin mapping from the LAB type by setting the `false` flag when calling block_type. + if (clb_nlist.block_eq_type_effective(clb)) { + int eq_type_index = clb_nlist.block_eq_type_index(clb); + auto block_type = clb_nlist.block_type(clb, false); + + pin = get_type_pin(block_type->equivalent_tile_inverse_pin_mapping, eq_type_index, clb_pin); + } + + int pb_pin = OPEN; if (place_ctx.block_locs[clb].nets_and_pins_synced_to_z_coordinate) { //Pins have been offset by z-coordinate, need to remove offset - t_type_ptr type = cluster_ctx.clb_nlist.block_type(clb); VTR_ASSERT(type->num_pins % type->capacity == 0); int num_basic_block_pins = type->num_pins / type->capacity; /* Logical location and physical location is offset by z * max_num_block_pins */ - pb_pin = clb_pin - place_ctx.block_locs[clb].loc.z * num_basic_block_pins; + pb_pin = pin - place_ctx.block_locs[clb].loc.z * num_basic_block_pins; } else { //No offset - pb_pin = clb_pin; + pb_pin = pin; } VTR_ASSERT(pb_pin >= 0); - return pb_pin; } @@ -569,21 +631,35 @@ int find_pb_pin_clb_pin(ClusterBlockId clb, int pb_pin) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.placement(); - int clb_pin = -1; + auto& clb_nlist = cluster_ctx.clb_nlist; + + auto type = clb_nlist.block_type(clb); + + int pin = pb_pin; + + // In case an equivalent tile is selected, the CLB block type will be different (e.g. CLB logic type is LAB, CLB physical type is MLAB). + // Therefore, I need to retrieve the pin mapping from the LAB type by setting the `false` flag when calling block_type. + if (clb_nlist.block_eq_type_effective(clb)) { + int eq_type_index = clb_nlist.block_eq_type_index(clb); + auto block_type = clb_nlist.block_type(clb, false); + + pin = get_type_pin(block_type->equivalent_tile_pin_mapping, eq_type_index, pb_pin); + } + + int clb_pin = OPEN; if (place_ctx.block_locs[clb].nets_and_pins_synced_to_z_coordinate) { //Pins have been offset by z-coordinate, need to remove offset - t_type_ptr type = cluster_ctx.clb_nlist.block_type(clb); VTR_ASSERT(type->num_pins % type->capacity == 0); int num_basic_block_pins = type->num_pins / type->capacity; /* Logical location and physical location is offset by z * max_num_block_pins */ - clb_pin = pb_pin + place_ctx.block_locs[clb].loc.z * num_basic_block_pins; + clb_pin = pin + place_ctx.block_locs[clb].loc.z * num_basic_block_pins; } else { //No offset - clb_pin = pb_pin; + clb_pin = pin; } - VTR_ASSERT(clb_pin >= 0); + VTR_ASSERT(clb_pin >= 0); return clb_pin; } diff --git a/vtr_flow/arch/equivalent_tiles/slice.xml b/vtr_flow/arch/equivalent_tiles/slice.xml new file mode 100644 index 00000000000..b8a16a781eb --- /dev/null +++ b/vtr_flow/arch/equivalent_tiles/slice.xml @@ -0,0 +1,1625 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io_tile.in io_tile.out + io_tile.in io_tile.out + io_tile.in io_tile.out + io_tile.in io_tile.out + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 1 1 1 1 1 1 1 1 1 1 1 + 1 1 1 1 1 1 1 1 1 1 1 1 + + + diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt new file mode 100644 index 00000000000..7ec5b84e895 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt @@ -0,0 +1,31 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/microbenchmarks + +# Path to directory of architectures to use +archs_dir=arch/equivalent_tiles + +# Path to directory of SDC files to use +sdc_dir = sdc + +# Add circuits to list to sweep +circuit_list_add=carry_chain.blif + +# Add architectures to list to sweep +arch_list_add=slice.xml + +# Parse info and how to parse +parse_file=vpr_standard.txt + +# How to parse QoR info +qor_parse_file=qor_standard.txt + +# Pass requirements +pass_requirements_file=pass_requirements.txt + +# Script parameters +#script_params="" +script_params = -track_memory_usage -lut_size 1 -starting_stage vpr From 980dc679343bda94e3ef25b22b50b5c4cb7db47e Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Wed, 15 May 2019 18:04:01 +0200 Subject: [PATCH 10/15] vtr_flow: added script to add tiles to architecture xml I have also changes travis.yml to install the lxml python package needed by the script Signed-off-by: Alessandro Comodi --- .travis.yml | 1 + vtr_flow/scripts/add_tiles.py | 142 +++++++++++++++++++++++++++++++ vtr_flow/scripts/run_vtr_flow.pl | 5 +- 3 files changed, 146 insertions(+), 2 deletions(-) create mode 100755 vtr_flow/scripts/add_tiles.py diff --git a/.travis.yml b/.travis.yml index 2fe883ecece..186a0779a85 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,6 +36,7 @@ addons: - libxml++2.6-dev - perl - python + - python-lxml - texinfo - time - valgrind diff --git a/vtr_flow/scripts/add_tiles.py b/vtr_flow/scripts/add_tiles.py new file mode 100755 index 00000000000..efcf092a17a --- /dev/null +++ b/vtr_flow/scripts/add_tiles.py @@ -0,0 +1,142 @@ +#!/usr/bin/env python + +""" +This script is intended to modify the architecture description file to be compliant with +the new format. + +It moves the top level pb_types attributes and tags to the tiles high-level tag. + +BEFORE: + + + + + + + + + + + + + + + +AFTER: + + + + + + + + + + + + + + + + + +""" + +""" +This script is intended to modify the architecture description file to be compliant with +the new format. + +It moves the top level pb_types attributes and tags to the tiles high-level tag. + +BEFORE: + + + + + + + + + + + + + + + +AFTER: + + + + + + + + + + + + + + + + + +""" + +from lxml import etree as ET +import argparse + +TAGS_TO_SWAP = ['fc', 'pinlocations', 'switchblock_locations'] +ATTR_TO_REMOVE = ['area', 'height', 'width', 'capacity'] + +def swap_tags(tile, pb_type): + # Moving tags from top level pb_type to tile + for child in pb_type: + if child.tag in TAGS_TO_SWAP: + pb_type.remove(child) + tile.append(child) + + +def main(): + parser = argparse.ArgumentParser( + description="Moves top level pb_types to tiles tag." + ) + parser.add_argument( + '--arch_xml', + required=True, + help="Input arch.xml that needs to be modified to move the top level pb_types to the `tiles` tag." + ) + + args = parser.parse_args() + + arch_xml = ET.ElementTree() + root_element = arch_xml.parse(args.arch_xml) + + tiles = ET.SubElement(root_element, 'tiles') + + top_pb_types = [] + for pb_type in root_element.iter('pb_type'): + if pb_type.getparent().tag == 'complexblocklist': + top_pb_types.append(pb_type) + + for pb_type in top_pb_types: + tile = ET.SubElement(tiles, 'tile') + attrs = pb_type.attrib + + for attr in attrs: + tile.set(attr, pb_type.get(attr)) + + # Remove attributes of top level pb_types only + for attr in ATTR_TO_REMOVE: + pb_type.attrib.pop(attr, None) + + swap_tags(tile, pb_type) + + print(ET.tostring(arch_xml, pretty_print=True).decode('utf-8')) + + +if __name__ == '__main__': + main() diff --git a/vtr_flow/scripts/run_vtr_flow.pl b/vtr_flow/scripts/run_vtr_flow.pl index 1d59a1e9bac..2db3b4d50a7 100755 --- a/vtr_flow/scripts/run_vtr_flow.pl +++ b/vtr_flow/scripts/run_vtr_flow.pl @@ -363,7 +363,7 @@ # Read arch XML my $tpp = XML::TreePP->new(); -my $xml_tree = $tpp->parsefile($architecture_file_path); +my $xml_tree = $tpp->parsefile("$architecture_file_path"); # Get lut size if undefined if (!defined $lut_size) { @@ -416,7 +416,8 @@ #system "cp $odin2_base_config" my $architecture_file_path_new = "$temp_dir$architecture_file_name"; -copy( $architecture_file_path, $architecture_file_path_new ); +my $ret = `$vtr_flow_path/scripts/add_tiles.py --arch_xml $architecture_file_path > $architecture_file_path_new`; +#copy( "$architecture_file_path", $architecture_file_path_new ); $architecture_file_path = $architecture_file_path_new; my $circuit_file_path_new = "$temp_dir$benchmark_name" . file_ext_for_stage($starting_stage - 1, $circuit_suffix); From bc50eee4605ee779d2a3f7e1a482e05d6ee5b5fe Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Thu, 16 May 2019 18:28:10 +0200 Subject: [PATCH 11/15] vtr_flow: added comment Signed-off-by: Alessandro Comodi --- vtr_flow/scripts/run_vtr_flow.pl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vtr_flow/scripts/run_vtr_flow.pl b/vtr_flow/scripts/run_vtr_flow.pl index 2db3b4d50a7..c851dc4886a 100755 --- a/vtr_flow/scripts/run_vtr_flow.pl +++ b/vtr_flow/scripts/run_vtr_flow.pl @@ -363,7 +363,7 @@ # Read arch XML my $tpp = XML::TreePP->new(); -my $xml_tree = $tpp->parsefile("$architecture_file_path"); +my $xml_tree = $tpp->parsefile($architecture_file_path); # Get lut size if undefined if (!defined $lut_size) { @@ -417,6 +417,8 @@ my $architecture_file_path_new = "$temp_dir$architecture_file_name"; my $ret = `$vtr_flow_path/scripts/add_tiles.py --arch_xml $architecture_file_path > $architecture_file_path_new`; + +# There is no need to copy the arch decription file as it is produced by the add_tiles.py script #copy( "$architecture_file_path", $architecture_file_path_new ); $architecture_file_path = $architecture_file_path_new; From 76376574394b344cea33162764cdc926f50be0f3 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Thu, 16 May 2019 20:30:00 +0200 Subject: [PATCH 12/15] ODIN II: regression script modifies arch.xml with tiles Signed-off-by: Alessandro Comodi --- ODIN_II/verify_odin.sh | 98 +++++++++++++++++++++--------------------- 1 file changed, 50 insertions(+), 48 deletions(-) diff --git a/ODIN_II/verify_odin.sh b/ODIN_II/verify_odin.sh index 0416d9b78f8..fc2b09bb418 100755 --- a/ODIN_II/verify_odin.sh +++ b/ODIN_II/verify_odin.sh @@ -64,9 +64,9 @@ function exit_program() { if [ -f ${NEW_RUN_DIR}/test_failures.log ]; then FAIL_COUNT=$(wc -l ${NEW_RUN_DIR}/test_failures.log | cut -d ' ' -f 1) fi - + FAILURE=$(( ${FAIL_COUNT} )) - + if [ "_${FAILURE}" != "_0" ] then echo "Failed ${FAILURE} benchmarks" @@ -107,7 +107,7 @@ _prt_cur_arg() { function help() { printf "Called program with $INPUT - Usage: + Usage: ${THIS_SCRIPT_EXEC} [ OPTIONS / FLAGS ] @@ -209,7 +209,7 @@ function cleanup_temp() { fi for runs in ${OUTPUT_DIRECTORY}/run* - do + do rm -Rf ${runs} done @@ -267,14 +267,14 @@ function mv_failed() { # Helper Functions function flag_is_number() { case "_$2" in - _) + _) echo "Passed an empty value for $1" help exit 120 ;; *) case $2 in - ''|*[!0-9]*) + ''|*[!0-9]*) echo "Passed a non number value [$2] for $1" help exit 120 @@ -312,7 +312,7 @@ function _set_flag() { _batch_sim_flag=$(_set_if ${_BATCH_SIM} "--batch") _use_best_coverage_flag=$(_set_if ${_BEST_COVERAGE_OFF} "--best_coverage") _perf_flag=$(_set_if ${_USE_PERF} "--tool perf") - + _vector_flag="-g ${_VECTORS}" _timeout_flag="--time_limit ${_TIMEOUT}s" _simulation_threads_flag=$([ "${_SIM_THREADS}" != "1" ] && echo "-j ${_SIM_THREADS}") @@ -323,20 +323,20 @@ function _set_flag() { function parse_args() { while [[ "$#" > 0 ]] - do - case $1 in + do + case $1 in # Help Desk -h|--help) echo "Printing Help information" help exit_program - + ## directory in benchmark ;;-t|--test) # this is handled down stream if [ "_$2" == "_" ] - then + then echo "empty argument for $1" exit 120 fi @@ -349,11 +349,11 @@ function parse_args() { ;;-a|--adder_def) if [ "_$2" == "_" ] - then + then echo "empty argument for $1" exit 120 fi - + _ADDER_DEF=$2 if [ "${_ADDER_DEF}" != "default" ] && [ "${_ADDER_DEF}" != "optimized" ] && [ ! -f "$(readlink -f ${_ADDER_DEF})" ] @@ -367,11 +367,11 @@ function parse_args() { ;;-d|--output_dir) if [ "_$2" == "_" ] - then + then echo "empty argument for $1" exit 120 fi - + _RUN_DIR_OVERRIDE=$2 if [ ! -d "${_RUN_DIR_OVERRIDE}" ] @@ -409,45 +409,45 @@ function parse_args() { shift # Boolean flags - ;;-g|--generate_bench) + ;;-g|--generate_bench) _GENERATE_BENCH="on" echo "generating output vector for test given predefined input" - ;;-o|--generate_output) + ;;-o|--generate_output) _GENERATE_OUTPUT="on" echo "generating input and output vector for test" - ;;-c|--clean) + ;;-c|--clean) echo "Cleaning temporary run in directory" cleanup_temp - ;;-l|--limit_ressource) + ;;-l|--limit_ressource) _LIMIT_RESSOURCE="on" echo "limiting ressources for benchmark, this can help with small hardware" - ;;-v|--valgrind) + ;;-v|--valgrind) _VALGRIND="on" echo "Using Valgrind for benchmarks" - ;;-B|--best_coverage_off) + ;;-B|--best_coverage_off) _BEST_COVERAGE_OFF="off" echo "turning off using best coverage for benchmark vector generation" - ;;-b|--batch_sim) + ;;-b|--batch_sim) _BATCH_SIM="on" echo "Using Batch multithreaded simulation with -j threads" ;;-p|--perf) _USE_PERF="on" echo "Using perf for synthesis and simulation" - - ;;-f|--force_simulate) + + ;;-f|--force_simulate) _FORCE_SIM="on" - echo "Forcing Simulation" + echo "Forcing Simulation" - ;;*) + ;;*) echo "Unknown parameter passed: $1" - help + help ctrl_c esac shift @@ -477,9 +477,9 @@ function sim() { shift while [[ "$#" > 0 ]] - do + do case $1 in - --custom_args_file) + --custom_args_file) with_custom_args=1 ;; @@ -517,7 +517,7 @@ function sim() { *) echo "Unknown internal parameter passed: $1" - config_help + config_help ctrl_c ;; esac @@ -553,15 +553,15 @@ function sim() { ${_timeout_flag} ${_low_ressource_flag} ${_valgrind_flag}" - + if [ "${_USE_PERF}" == "on" ] then wrapper_odin_command="${wrapper_odin_command} ${_perf_flag} ${DIR}/perf.data" fi odin_command="${DEFAULT_CMD_PARAM} - $(cat ${dir}/odin.args | tr '\n' ' ') - -o ${blif_file} + $(cat ${dir}/odin.args | tr '\n' ' ') + -o ${blif_file} -sim_dir ${DIR}" echo $(echo "${wrapper_odin_command} ${odin_command}" | tr '\n' ' ' | tr -s ' ' ) > ${DIR}/odin_param @@ -607,13 +607,6 @@ function sim() { for arches in ${arch_list} do - - arch_cmd="" - if [ -e ${arches} ] - then - arch_cmd="-a ${arches}" - fi - arch_basename=${arches%.xml} arch_name=${arch_basename##*/} @@ -622,6 +615,14 @@ function sim() { DIR="${NEW_RUN_DIR}/${TEST_FULL_REF}" blif_file="${DIR}/odin.blif" + arch_cmd="" + if [ -e ${arches} ] + then + tiles_cmd="../vtr_flow/scripts/add_tiles.py" + arch_file="${arch_name}.xml" + ${tiles_cmd} --arch_xml ${arches} > ${DIR}/${arch_name}.xml + arch_cmd="-a ${DIR}/${arch_name}.xml" + fi #build commands mkdir -p $DIR @@ -643,6 +644,7 @@ function sim() { wrapper_synthesis_command="${wrapper_synthesis_command} ${_perf_flag} ${DIR}/perf.data" fi + synthesis_command="${DEFAULT_CMD_PARAM} ${arch_cmd} -V ${benchmark} @@ -712,7 +714,7 @@ function sim() { #run the simulation find ${NEW_RUN_DIR}/${bench_type}/ -name sim_param | xargs -n1 -P$threads -I sim_cmd ${SHELL} -c '$(cat sim_cmd)' - + # move the log for sim_log in $(find ${NEW_RUN_DIR}/${bench_type}/ -name "simulation.log") do @@ -722,7 +724,7 @@ function sim() { disable_failed ${global_simulation_failure} done - + mkdir -p ${NEW_RUN_DIR}/${bench_type}/vectors # move the vectors @@ -733,7 +735,7 @@ function sim() { cp ${sim_input_vectors} ${NEW_RUN_DIR}/${bench_type}/vectors/${BM_NAME} mv ${sim_input_vectors} ${BM_DIR}/${BM_NAME} - + done @@ -803,7 +805,7 @@ function debug_failures() { echo "Which benchmark would you like to debug (type 'quit' or 'q' to exit)?" echo "============" - echo "${FAILURES_LIST}" + echo "${FAILURES_LIST}" echo "============" printf "enter a substring: " @@ -813,7 +815,7 @@ function debug_failures() { echo "exiting" break ;; - *) + *) BM="${FAILED_RUN_DIR}/$(echo "${FAILURES_LIST}" | grep ${INPUT_BM} | tail -n 1)" if [ "_${BM}" != "_" ] && [ -f "${BM}/${CMD_FILE_NAME}" ] @@ -854,7 +856,7 @@ LIGHT_LIST=( "operators" "arch" "other" - "micro" + "micro" "syntax" "FIR" ) @@ -934,8 +936,8 @@ case ${_TEST} in full_suite) run_all - ;; - + ;; + heavy_suite) run_heavy_suite ;; From 15ebc1324521a1d04d4f19826fe2287e6c060af9 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Thu, 16 May 2019 23:03:31 +0200 Subject: [PATCH 13/15] vtr_flow: added tiles step to upgrade arch Signed-off-by: Alessandro Comodi --- vtr_flow/scripts/add_tiles.py | 43 ------------ vtr_flow/scripts/upgrade_arch.py | 115 +++++++++++++++++++++++++++---- 2 files changed, 102 insertions(+), 56 deletions(-) diff --git a/vtr_flow/scripts/add_tiles.py b/vtr_flow/scripts/add_tiles.py index efcf092a17a..14794bae284 100755 --- a/vtr_flow/scripts/add_tiles.py +++ b/vtr_flow/scripts/add_tiles.py @@ -43,49 +43,6 @@ """ -""" -This script is intended to modify the architecture description file to be compliant with -the new format. - -It moves the top level pb_types attributes and tags to the tiles high-level tag. - -BEFORE: - - - - - - - - - - - - - - - -AFTER: - - - - - - - - - - - - - - - - - -""" - from lxml import etree as ET import argparse diff --git a/vtr_flow/scripts/upgrade_arch.py b/vtr_flow/scripts/upgrade_arch.py index ef6dd8f7310..64cba982360 100755 --- a/vtr_flow/scripts/upgrade_arch.py +++ b/vtr_flow/scripts/upgrade_arch.py @@ -39,6 +39,7 @@ def __init__(self): "upgrade_port_equivalence", "upgrade_complex_sb_num_conns", "add_missing_comb_model_internal_timing_edges", + "move_top_level_pb_type_to_tiles", ] def parse_args(): @@ -137,6 +138,11 @@ def main(): if result: modified = True + if "move_top_level_pb_type_to_tiles" in args.features: + result = move_top_level_pb_type_to_tiles(arch) + if result: + modified = True + if modified: if args.debug: root.write(sys.stdout, pretty_print=args.pretty) @@ -155,7 +161,7 @@ def add_model_timing(arch): #Find all primitive pb types prim_pbs = arch.findall(".//pb_type[@blif_model]") - #Build up the timing specifications from + #Build up the timing specifications from default_models = frozenset([".input", ".output", ".latch", ".names"]) primitive_timing_specs = {} for prim_pb in prim_pbs: @@ -237,7 +243,7 @@ def upgrade_fc_overrides(arch): port = old_pin_override.attrib['name'] fc_type = old_pin_override.attrib['fc_type'] fc_val = old_pin_override.attrib['fc_val'] - + fc_tag.remove(old_pin_override) new_attrib = OrderedDict() @@ -285,7 +291,7 @@ def upgrade_fc_overrides(arch): new_attrib["fc_val"] = out_val fc_override = ET.SubElement(fc_tag, "fc_override", attrib=new_attrib) - + changed = True return changed @@ -350,7 +356,7 @@ def upgrade_device_layout(arch): device_auto.attrib['height'] = height else: assert False, "Unrecognized specification" - + if 0: for type, locs in type_to_grid_specs.iteritems(): print "Type:", type @@ -370,7 +376,7 @@ def upgrade_device_layout(arch): device_auto.text = "\n" + 2*INDENT device_auto.tail = "\n" - + for type_name, locs in type_to_grid_specs.iteritems(): for loc in locs: assert loc.tag == "loc" @@ -408,8 +414,8 @@ def upgrade_device_layout(arch): col_spec.attrib['priority'] = str(priority) col_spec.tail = "\n" + 2*INDENT - #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY" - #instead of with the underlying type. To replicate that we create a col spec with the same + #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY" + #instead of with the underlying type. To replicate that we create a col spec with the same #location information, but of type 'EMPTY' and with slightly lower priority than the real type. col_empty_spec = ET.SubElement(device_auto, 'col') @@ -451,8 +457,8 @@ def upgrade_device_layout(arch): col_spec.attrib['priority'] = str(priority) col_spec.tail = "\n" + 2*INDENT - #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY" - #instead of with the underlying type. To replicate that we create a col spec with the same + #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY" + #instead of with the underlying type. To replicate that we create a col spec with the same #location information, but of type 'EMPTY' and with slightly lower priority than the real type. col_empty_spec = ET.SubElement(device_auto, 'col') col_empty_spec.attrib['type'] = "EMPTY" @@ -496,7 +502,7 @@ def upgrade_device_layout(arch): assert False, "Unrecognzied type tag {}".format(loc_type) return changed - + def remove_io_chan_distr(arch): """ Removes the legacy '' channel width distribution tags @@ -631,7 +637,7 @@ def upgrade_connection_block_input_switch(arch): # #Create the switch # - + switch_name = "ipin_cblock" #Make sure the switch name doesn't already exist @@ -673,7 +679,7 @@ def upgrade_switch_types(arch): assert switchlist_tag is not None for switch_tag in switchlist_tag.findall("./switch"): - + switch_type = switch_tag.attrib['type'] if switch_type in ['buffered', 'pass_trans']: @@ -710,7 +716,7 @@ def rename_fc_attributes(arch): def remove_longline_sb_cb(arch): """ Drops and of any types with length="longline", - since we now assume longlines have full switch block/connection block + since we now assume longlines have full switch block/connection block populations """ @@ -867,5 +873,88 @@ def add_missing_comb_model_internal_timing_edges(arch): return changed +def move_top_level_pb_type_to_tiles(arch): + """ + This script is intended to modify the architecture description file to be compliant with + the new format. + + It moves the top level pb_types attributes and tags to the tiles high-level tag. + + BEFORE: + + + + + + + + + + + + + + + + AFTER: + + + + + + + + + + + + + + + + + + """ + + changed = False + + TAGS_TO_SWAP = ['fc', 'pinlocations', 'switchblock_locations'] + ATTR_TO_REMOVE = ['area', 'height', 'width', 'capacity'] + + def swap_tags(tile, pb_type): + # Moving tags from top level pb_type to tile + for child in pb_type: + if child.tag in TAGS_TO_SWAP: + pb_type.remove(child) + tile.append(child) + + tiles = arch.find('tiles') + + if tiles is None: + tiles = ET.SubElement(arch, 'tiles') + + top_pb_types = [] + for pb_type in arch.iter('pb_type'): + if pb_type.getparent().tag == 'complexblocklist': + top_pb_types.append(pb_type) + + for pb_type in top_pb_types: + tile = ET.SubElement(tiles, 'tile') + attrs = pb_type.attrib + + for attr in attrs: + tile.set(attr, pb_type.get(attr)) + + # Remove attributes of top level pb_types only + for attr in ATTR_TO_REMOVE: + pb_type.attrib.pop(attr, None) + + swap_tags(tile, pb_type) + + changed = True + + return changed + if __name__ == "__main__": main() From e1200cd8afa52629364686db0658522caeae8d0f Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Fri, 31 May 2019 18:29:04 +0200 Subject: [PATCH 14/15] ODIN_II: corrected arch.xml file generation Signed-off-by: Alessandro Comodi --- ODIN_II/verify_odin.sh | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/ODIN_II/verify_odin.sh b/ODIN_II/verify_odin.sh index fc2b09bb418..089bed1dd82 100755 --- a/ODIN_II/verify_odin.sh +++ b/ODIN_II/verify_odin.sh @@ -109,8 +109,6 @@ function help() { printf "Called program with $INPUT Usage: ${THIS_SCRIPT_EXEC} [ OPTIONS / FLAGS ] - - OPTIONS: -h|--help $(_prt_cur_arg off) print this -t|--test < test name > $(_prt_cur_arg ${_TEST}) Test name is one of ( ${TEST_DIR_LIST} heavy_suite light_suite full_suite vtr_basic vtr_strong pre_commit failures debug_sim debug_synth) @@ -121,7 +119,6 @@ printf "Called program with $INPUT -a|--adder_def < /abs/path > $(_prt_cur_arg ${_ADDER_DEF}) Use template to build adders -n|--simulation_count < N > $(_prt_cur_arg ${_SIM_COUNT}) Allow to run the simulation N times to benchmark the simulator -d|--output_dir < /abs/path > $(_prt_cur_arg ${_RUN_DIR_OVERRIDE}) Change the run directory output - FLAGS: -g|--generate_bench $(_prt_cur_arg ${_GENERATE_BENCH}) Generate input and output vector for test -o|--generate_output $(_prt_cur_arg ${_GENERATE_OUTPUT}) Generate output vector for test given its input vector @@ -132,7 +129,6 @@ printf "Called program with $INPUT -b|--batch_sim $(_prt_cur_arg ${_BATCH_SIM}) Use Batch mode multithreaded simulation -p|--perf $(_prt_cur_arg ${_USE_PERF}) Use Perf for monitoring execution -f|--force_simulate $(_prt_cur_arg ${_FORCE_SIM}) Force the simulation to be executed regardless of the config - " } @@ -607,6 +603,7 @@ function sim() { for arches in ${arch_list} do + arch_basename=${arches%.xml} arch_name=${arch_basename##*/} @@ -615,18 +612,18 @@ function sim() { DIR="${NEW_RUN_DIR}/${TEST_FULL_REF}" blif_file="${DIR}/odin.blif" + #build commands + mkdir -p $DIR + arch_cmd="" if [ -e ${arches} ] then tiles_cmd="../vtr_flow/scripts/add_tiles.py" arch_file="${arch_name}.xml" - ${tiles_cmd} --arch_xml ${arches} > ${DIR}/${arch_name}.xml - arch_cmd="-a ${DIR}/${arch_name}.xml" + ${tiles_cmd} --arch_xml ${arches} > $DIR/${arch_name}.xml + arch_cmd="-a $DIR/${arch_name}.xml" fi - #build commands - mkdir -p $DIR - ############################### # Synthesis if [ "${_SYNTHESIS}" == "on" ] @@ -644,7 +641,6 @@ function sim() { wrapper_synthesis_command="${wrapper_synthesis_command} ${_perf_flag} ${DIR}/perf.data" fi - synthesis_command="${DEFAULT_CMD_PARAM} ${arch_cmd} -V ${benchmark} @@ -973,3 +969,16 @@ print_time_since $START exit_program ### end here +© 2019 GitHub, Inc. +Terms +Privacy +Security +Status +Help +Contact GitHub +Pricing +API +Training +Blog +About + From a59408a779911b6e1fae0c19d5b283f3af434ba1 Mon Sep 17 00:00:00 2001 From: Alessandro Comodi Date: Thu, 27 Jun 2019 14:42:03 +0200 Subject: [PATCH 15/15] vpr: corrected bug in equivalent placement Signed-off-by: Alessandro Comodi --- vpr/src/place/place.cpp | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index f0b2e8e3e16..96de3266233 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -318,6 +318,7 @@ static e_find_affected_blocks_result identify_macro_self_swap_affected_macros(st static e_find_affected_blocks_result record_macro_self_swaps(const int imacro, t_pl_offset swap_offset); bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to); +bool is_legal_blk_swap(t_pl_loc from, t_pl_loc to); std::set determine_locations_emptied_by_move(); @@ -1675,12 +1676,27 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) { auto blk_type_from = cluster_ctx.clb_nlist.block_type(blk); auto blk_type_to = device_ctx.grid[to.x][to.y].type; - // First check is to see if `from` type can be placed in `to` type + // Check is to see if `from` type can be placed in `to` type if (!blk_type_from->is_available_tile_index(blk_type_to->index)) { return false; } t_pl_loc from = place_ctx.block_locs[blk].loc; + if (!is_legal_blk_swap(from, to)) { + return false; + } + + return true; +} + +bool is_legal_blk_swap(t_pl_loc from, t_pl_loc to) { + // Make sure that when swapping, the block in the `to` location + // can be moved in the `from` location + + auto& device_ctx = g_vpr_ctx.device(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_ctx = g_vpr_ctx.placement(); + ClusterBlockId blk_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.z]; // In case `blk_to` is empty we can skip the second check @@ -1688,10 +1704,10 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) { return true; } - blk_type_from = device_ctx.grid[from.x][from.y].type; - blk_type_to = cluster_ctx.clb_nlist.block_type(blk_to); + auto blk_type_from = device_ctx.grid[from.x][from.y].type; + auto blk_type_to = cluster_ctx.clb_nlist.block_type(blk_to); - // Second check is to see if `to` type can be placed in `from` type + // Check is to see if `to` type can be placed in `from` type if (!blk_type_to->is_available_tile_index(blk_type_from->index)) { return false; } @@ -2179,7 +2195,7 @@ static bool find_to(t_type_ptr to_type, t_type_ptr from_type, float rlim, const VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].width_offset == 0, "Should be at block base location"); VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].height_offset == 0, "Should be at block base location"); - return true; + return is_legal_blk_swap(from, to); } static e_swap_result assess_swap(double delta_c, double t) {