From 38800e181741cb64ef916e269d8e476c3a4095a0 Mon Sep 17 00:00:00 2001
From: Richard Ren <rich.ren@mail.utoronto.ca>
Date: Tue, 7 May 2019 15:58:13 -0400
Subject: [PATCH 01/15] Added Cinternal instances

The following are all the sqashed commits messages:

added instances of Cinternal to mux, tri, buffer

added the Cinternal to struct definitions in physical_types.h

Previous change with CINTERNAL_REQD was lost, reuploaded

removed Cinternal element from buffer

readjusted comment in physical_types.h

Propogates c_internal from arch to rr_graph; reads c_internal; writes c_internal to output

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 libs/libarchfpga/src/physical_types.h       |  4 ++++
 libs/libarchfpga/src/read_xml_arch_file.cpp | 13 ++++++++-----
 vpr/src/route/rr_graph.cpp                  |  1 +
 vpr/src/route/rr_graph_reader.cpp           |  2 ++
 vpr/src/route/rr_graph_writer.cpp           |  3 ++-
 5 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index 36c1150cc2a..b48d6aaa1b1 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -1225,6 +1225,7 @@ enum class BufferSize {
  * R:  Equivalent resistance of the buffer/switch.                           *
  * Cin:  Input capacitance.                                                  *
  * Cout:  Output capacitance.                                                *
+ * Cinternal: Internal capacitance in a buffer with fanout.                  *
  * Tdel_map: A map where the key is the number of inputs and the entry       *
  *           is the corresponding delay. If there is only one entry at key   *
  *           UNDEFINED, then delay is a constant (doesn't vary with fan-in). *
@@ -1242,6 +1243,7 @@ struct t_arch_switch_inf {
     float R = 0.;
     float Cin = 0.;
     float Cout = 0.;
+    float Cinternal = 0.; // defined the property Cinternal
     float mux_trans_size = 1.;
     BufferSize buf_size_type = BufferSize::AUTO;
     float buf_size = 0.;
@@ -1293,6 +1295,7 @@ struct t_arch_switch_inf {
  * R:  Equivalent resistance of the buffer/switch.                           *
  * Cin:  Input capacitance.                                                  *
  * Cout:  Output capacitance.                                                *
+ * Cinternal: Internal capacitance in a buffer.                              *
  * Tdel:  Intrinsic delay.  The delay through an unloaded switch is          *
  *        Tdel + R * Cout.                                                   *
  * mux_trans_size:  The area of each transistor in the segment's driving mux *
@@ -1303,6 +1306,7 @@ struct t_rr_switch_inf {
     float R = 0.;
     float Cin = 0.;
     float Cout = 0.;
+    float Cinternal = 0.; //defined the property Cinternal
     float Tdel = 0.;
     float mux_trans_size = 0.;
     float buf_size = 0.;
diff --git a/libs/libarchfpga/src/read_xml_arch_file.cpp b/libs/libarchfpga/src/read_xml_arch_file.cpp
index 519b0287b1d..e2c374261c9 100644
--- a/libs/libarchfpga/src/read_xml_arch_file.cpp
+++ b/libs/libarchfpga/src/read_xml_arch_file.cpp
@@ -3074,23 +3074,23 @@ static void ProcessSwitches(pugi::xml_node Parent,
         SwitchType type = SwitchType::MUX;
         if (0 == strcmp(type_name, "mux")) {
             type = SwitchType::MUX;
-            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size", "mux_trans_size"}, " with type '"s + type_name + "'"s, loc_data);
+            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size", "mux_trans_size"}, " with type '"s + type_name + "'"s, loc_data); // buffered switch should have a Cinternal element
 
         } else if (0 == strcmp(type_name, "tristate")) {
             type = SwitchType::TRISTATE;
-            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data);
+            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); // buffered switch should have a Cinternal element
 
         } else if (0 == strcmp(type_name, "buffer")) {
             type = SwitchType::BUFFER;
-            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data);
+            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); // buffer should not have a Cinternal element
 
         } else if (0 == strcmp(type_name, "pass_gate")) {
             type = SwitchType::PASS_GATE;
-            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type '"s + type_name + "'"s, loc_data);
+            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type '"s + type_name + "'"s, loc_data); // unbuffered switch does not have Cinternal element
 
         } else if (0 == strcmp(type_name, "short")) {
             type = SwitchType::SHORT;
-            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type "s + type_name + "'"s, loc_data);
+            expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type "s + type_name + "'"s, loc_data); // unbuffered switch does not have Cinternal element
 
         } else {
             archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
@@ -3102,6 +3102,8 @@ static void ProcessSwitches(pugi::xml_node Parent,
 
         ReqOpt COUT_REQD = TIMING_ENABLE_REQD;
         ReqOpt CIN_REQD = TIMING_ENABLE_REQD;
+        ReqOpt CINTERNAL_REQD = OPTIONAL; //defined the parameter
+
         if (arch_switch.type() == SwitchType::SHORT) {
             //Cin/Cout are optional on shorts, since they really only have one capacitance
             CIN_REQD = OPTIONAL;
@@ -3109,6 +3111,7 @@ static void ProcessSwitches(pugi::xml_node Parent,
         }
         arch_switch.Cin = get_attribute(Node, "Cin", loc_data, CIN_REQD).as_float(0);
         arch_switch.Cout = get_attribute(Node, "Cout", loc_data, COUT_REQD).as_float(0);
+        arch_switch.Cinternal = get_attribute(Node, "Cinternal", loc_data, CINTERNAL_REQD).as_float(0); // retrieve the optional parameter
 
         if (arch_switch.type() == SwitchType::MUX) {
             //Only muxes have mux transistors
diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp
index d00df14ef95..2d644064ef7 100644
--- a/vpr/src/route/rr_graph.cpp
+++ b/vpr/src/route/rr_graph.cpp
@@ -893,6 +893,7 @@ void load_rr_switch_from_arch_switch(int arch_switch_idx,
     device_ctx.rr_switch_inf[rr_switch_idx].set_type(device_ctx.arch_switch_inf[arch_switch_idx].type());
     device_ctx.rr_switch_inf[rr_switch_idx].R = device_ctx.arch_switch_inf[arch_switch_idx].R;
     device_ctx.rr_switch_inf[rr_switch_idx].Cin = device_ctx.arch_switch_inf[arch_switch_idx].Cin;
+    device_ctx.rr_switch_inf[rr_switch_idx].Cinternal = device_ctx.arch_switch_inf[arch_switch_idx].Cinternal; //now we can retrieve Cinternal from the arch and implement into the rr calculations.
     device_ctx.rr_switch_inf[rr_switch_idx].Cout = device_ctx.arch_switch_inf[arch_switch_idx].Cout;
     device_ctx.rr_switch_inf[rr_switch_idx].Tdel = rr_switch_Tdel;
     device_ctx.rr_switch_inf[rr_switch_idx].mux_trans_size = device_ctx.arch_switch_inf[arch_switch_idx].mux_trans_size;
diff --git a/vpr/src/route/rr_graph_reader.cpp b/vpr/src/route/rr_graph_reader.cpp
index 465ea6d92f9..6e8e481434b 100644
--- a/vpr/src/route/rr_graph_reader.cpp
+++ b/vpr/src/route/rr_graph_reader.cpp
@@ -234,11 +234,13 @@ void process_switches(pugi::xml_node parent, const pugiutil::loc_data& loc_data)
             rr_switch.R = get_attribute(SwitchSubnode, "R", loc_data).as_float();
             rr_switch.Cin = get_attribute(SwitchSubnode, "Cin", loc_data).as_float();
             rr_switch.Cout = get_attribute(SwitchSubnode, "Cout", loc_data).as_float();
+            rr_switch.Cinternal = get_attribute(SwitchSubnode, "Cinternal", loc_data).as_float();
             rr_switch.Tdel = get_attribute(SwitchSubnode, "Tdel", loc_data).as_float();
         } else {
             rr_switch.R = 0;
             rr_switch.Cin = 0;
             rr_switch.Cout = 0;
+            rr_switch.Cinternal = 0;
             rr_switch.Tdel = 0;
         }
         SwitchSubnode = get_single_child(Switch, "sizing", loc_data);
diff --git a/vpr/src/route/rr_graph_writer.cpp b/vpr/src/route/rr_graph_writer.cpp
index fccc0341136..48012725a65 100644
--- a/vpr/src/route/rr_graph_writer.cpp
+++ b/vpr/src/route/rr_graph_writer.cpp
@@ -189,7 +189,8 @@ void write_rr_switches(fstream& fp) {
         }
         fp << ">" << endl;
 
-        fp << "\t\t\t<timing R=\"" << setprecision(FLOAT_PRECISION) << rr_switch.R << "\" Cin=\"" << setprecision(FLOAT_PRECISION) << rr_switch.Cin << "\" Cout=\"" << setprecision(FLOAT_PRECISION) << rr_switch.Cout << "\" Tdel=\"" << setprecision(FLOAT_PRECISION) << rr_switch.Tdel << "\"/>" << endl;
+        fp << "\t\t\t<timing R=\"" << setprecision(FLOAT_PRECISION) << rr_switch.R << "\" Cin=\"" << setprecision(FLOAT_PRECISION) << rr_switch.Cin << "\" Cout=\"" << setprecision(FLOAT_PRECISION) << rr_switch.Cout << "\" Cinternal=\"" << setprecision(FLOAT_PRECISION) << rr_switch.Cinternal << //will print display the value of Cinternal
+            "\" Tdel=\"" << setprecision(FLOAT_PRECISION) << rr_switch.Tdel << "\"/>" << endl;
         fp << "\t\t\t<sizing mux_trans_size=\"" << setprecision(FLOAT_PRECISION) << rr_switch.mux_trans_size << "\" buf_size=\"" << setprecision(FLOAT_PRECISION) << rr_switch.buf_size << "\"/>" << endl;
         fp << "\t\t</switch>" << endl;
     }

From d61276d460ba298f5e7d7995d26e18372f332d48 Mon Sep 17 00:00:00 2001
From: Michael Gielda <mgielda@antmicro.com>
Date: Sun, 16 Jun 2019 15:33:38 +0200
Subject: [PATCH 02/15] Add VTR change issue template

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 .github/ISSUE_TEMPLATE/bug_report.md      |  3 +++
 .github/ISSUE_TEMPLATE/feature_request.md |  3 +++
 .github/ISSUE_TEMPLATE/vtr-change.md      | 25 +++++++++++++++++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 .github/ISSUE_TEMPLATE/vtr-change.md

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index dc1b48f28cc..1e5584ab1b6 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -1,6 +1,9 @@
 ---
 name: Bug report
 about: Create a report to help us improve
+title: ''
+labels: ''
+assignees: ''
 
 ---
 
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 96a3a6f322e..7523b9efbf8 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -1,6 +1,9 @@
 ---
 name: Feature request
 about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
 
 ---
 
diff --git a/.github/ISSUE_TEMPLATE/vtr-change.md b/.github/ISSUE_TEMPLATE/vtr-change.md
new file mode 100644
index 00000000000..74d5ec9e8f6
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/vtr-change.md
@@ -0,0 +1,25 @@
+---
+name: VTR change
+about: Describe purpose and lifecycle of a local change we made to VTR
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+### Why did we need this? (what does this change enable us to do)
+<!--- i.e. what does this change enable us to do? -->
+
+### What did it change?
+<!--- i.e. technical description what the change does -->
+
+### Should it be merged upstream - if not, when can we delete it?
+
+### What is needed to get this merged / deleted? 
+
+* [ ] is the implementation work to make suitable for merging / deletion completed?
+* [ ] Is there an associated test? <!--- i.e. how will we prevent it from regressing? -->
+* [ ] is this currently part of the Conda package?
+* [ ] is this properly cleaned up in our local repositories? <!--- add subtasks here if needed) -->
+
+### Tracker / branch / PR & other useful links

From 6f3067882584ffc2ae6fc4929eefb6a6bd983544 Mon Sep 17 00:00:00 2001
From: Keith Rothman <537074+litghost@users.noreply.github.com>
Date: Mon, 8 Oct 2018 19:35:29 -0700
Subject: [PATCH 03/15] Avoid criticality issue.

Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com>
Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 vpr/src/timing/timing_util.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp
index e260b8f5cfa..8bc919dac3f 100644
--- a/vpr/src/timing/timing_util.cpp
+++ b/vpr/src/timing/timing_util.cpp
@@ -571,6 +571,10 @@ float calc_relaxed_criticality(const std::map<DomainPair, float>& domains_max_re
             max_req += shift;
         }
 
+        if (!std::isfinite(slack)) {
+            continue;
+        }
+
         float crit = std::numeric_limits<float>::quiet_NaN();
         if (max_req > 0.) {
             //Standard case

From e684b0ee93299961cf3aec4c81a36cc9fd12aff0 Mon Sep 17 00:00:00 2001
From: Keith Rothman <537074+litghost@users.noreply.github.com>
Date: Mon, 10 Jun 2019 10:51:45 -0700
Subject: [PATCH 04/15] vpr: allow connection box map based algorithm during
 routing

Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com>
---
 vpr/src/base/echo_files.cpp                   |   2 +
 vpr/src/base/echo_files.h                     |   1 +
 vpr/src/base/read_options.cpp                 |  15 +-
 vpr/src/base/vpr_context.h                    |   3 +
 vpr/src/base/vpr_types.h                      |   5 +-
 vpr/src/place/timing_place_lookup.cpp         |  11 +-
 vpr/src/route/connection_box.cpp              | 127 +++++
 vpr/src/route/connection_box.h                |  76 +++
 .../route/connection_box_lookahead_map.cpp    | 460 ++++++++++++++++++
 vpr/src/route/connection_box_lookahead_map.h  |  14 +
 vpr/src/route/router_lookahead.cpp            |  22 +
 vpr/src/route/router_lookahead.h              |   5 +
 vpr/src/route/router_lookahead_map_utils.cpp  | 192 ++++++++
 vpr/src/route/router_lookahead_map_utils.h    | 142 ++++++
 vpr/src/route/rr_graph.cpp                    |   5 +
 vpr/src/route/rr_graph_reader.cpp             |  54 ++
 vpr/src/route/rr_node.h                       |   2 +-
 17 files changed, 1127 insertions(+), 9 deletions(-)
 create mode 100644 vpr/src/route/connection_box.cpp
 create mode 100644 vpr/src/route/connection_box.h
 create mode 100644 vpr/src/route/connection_box_lookahead_map.cpp
 create mode 100644 vpr/src/route/connection_box_lookahead_map.h
 create mode 100644 vpr/src/route/router_lookahead_map_utils.cpp
 create mode 100644 vpr/src/route/router_lookahead_map_utils.h

diff --git a/vpr/src/base/echo_files.cpp b/vpr/src/base/echo_files.cpp
index d195c7d3871..e35b04c6da0 100644
--- a/vpr/src/base/echo_files.cpp
+++ b/vpr/src/base/echo_files.cpp
@@ -112,6 +112,8 @@ void alloc_and_load_echo_file_info() {
     setEchoFileName(E_ECHO_CHAN_DETAILS, "chan_details.txt");
     setEchoFileName(E_ECHO_SBLOCK_PATTERN, "sblock_pattern.txt");
     setEchoFileName(E_ECHO_ENDPOINT_TIMING, "endpoint_timing.echo.json");
+
+    setEchoFileName(E_ECHO_LOOKAHEAD_MAP, "lookahead_map.echo");
 }
 
 void free_echo_file_info() {
diff --git a/vpr/src/base/echo_files.h b/vpr/src/base/echo_files.h
index 2aa9253617b..3a3507f60ca 100644
--- a/vpr/src/base/echo_files.h
+++ b/vpr/src/base/echo_files.h
@@ -43,6 +43,7 @@ enum e_echo_files {
     E_ECHO_CHAN_DETAILS,
     E_ECHO_SBLOCK_PATTERN,
     E_ECHO_ENDPOINT_TIMING,
+    E_ECHO_LOOKAHEAD_MAP,
 
     //Timing Graphs
     E_ECHO_PRE_PACKING_TIMING_GRAPH,
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 28470df9142..e58f220eab6 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -648,6 +648,8 @@ struct ParseRouterLookahead {
             conv_value.set_value(e_router_lookahead::CLASSIC);
         else if (str == "map")
             conv_value.set_value(e_router_lookahead::MAP);
+        else if (str == "connection_box_map")
+            conv_value.set_value(e_router_lookahead::CONNECTION_BOX_MAP);
         else {
             std::stringstream msg;
             msg << "Invalid conversion from '"
@@ -661,17 +663,22 @@ struct ParseRouterLookahead {
 
     ConvertedValue<std::string> to_str(e_router_lookahead val) {
         ConvertedValue<std::string> conv_value;
-        if (val == e_router_lookahead::CLASSIC)
+        if (val == e_router_lookahead::CLASSIC) {
             conv_value.set_value("classic");
-        else {
-            VTR_ASSERT(val == e_router_lookahead::MAP);
+        } else if (val == e_router_lookahead::MAP) {
             conv_value.set_value("map");
+        } else if (val == e_router_lookahead::CONNECTION_BOX_MAP) {
+            conv_value.set_value("connection_box_map");
+        } else {
+            std::stringstream msg;
+            msg << "Unrecognized e_router_lookahead";
+            conv_value.set_error(msg.str());
         }
         return conv_value;
     }
 
     std::vector<std::string> default_choices() {
-        return {"classic", "map"};
+        return {"classic", "map", "connection_box_map"};
     }
 };
 
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 7ddc42ff3be..0ef875206f7 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -20,6 +20,7 @@
 #include "clock_connection_builders.h"
 #include "route_traceback.h"
 #include "place_macro.h"
+#include "connection_box.h"
 
 //A Context is collection of state relating to a particular part of VPR
 //
@@ -194,6 +195,8 @@ struct DeviceContext : public Context {
      * Clock Network
      ********************************************************************/
     t_clock_arch* clock_arch;
+
+    ConnectionBoxes connection_boxes;
 };
 
 //State relating to power analysis
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index a90f3f9f3fd..b46f726c532 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -103,7 +103,10 @@ constexpr const char* EMPTY_BLOCK_NAME = "EMPTY";
 enum class e_router_lookahead {
     CLASSIC, //VPR's classic lookahead (assumes uniform wire types)
     MAP,     //Lookahead considering different wire types (see Oleg Petelin's MASc Thesis)
-    NO_OP    //A no-operation lookahead which always returns zero
+    NO_OP,   //A no-operation lookahead which always returns zero
+    CONNECTION_BOX_MAP,
+    // Lookahead considering different wire types and IPIN
+    // connection box.
 };
 
 enum class e_route_bb_update {
diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp
index e6e0e1dccda..60c13a079e1 100644
--- a/vpr/src/place/timing_place_lookup.cpp
+++ b/vpr/src/place/timing_place_lookup.cpp
@@ -264,9 +264,14 @@ static float route_connection_delay(int source_x, int source_y, int sink_x, int
 
             VTR_ASSERT(sink_rr_node != OPEN);
 
-            successfully_routed = calculate_delay(source_rr_node, sink_rr_node,
-                                                  router_opts,
-                                                  &net_delay_value);
+            {
+                vtr::ScopedStartFinishTimer timer(vtr::string_fmt(
+                    "Routing Src: %d Sink: %d", source_rr_node,
+                    sink_rr_node));
+                successfully_routed = calculate_delay(source_rr_node, sink_rr_node,
+                                                      router_opts,
+                                                      &net_delay_value);
+            }
 
             if (successfully_routed) break;
         }
diff --git a/vpr/src/route/connection_box.cpp b/vpr/src/route/connection_box.cpp
new file mode 100644
index 00000000000..85d554b4307
--- /dev/null
+++ b/vpr/src/route/connection_box.cpp
@@ -0,0 +1,127 @@
+#include "connection_box.h"
+#include "vtr_assert.h"
+#include "globals.h"
+
+ConnectionBoxes::ConnectionBoxes()
+    : size_(std::make_pair(0, 0)) {
+}
+
+size_t ConnectionBoxes::num_connection_box_types() const {
+    return boxes_.size();
+}
+
+std::pair<size_t, size_t> ConnectionBoxes::connection_box_grid_size() const {
+    return size_;
+}
+
+const ConnectionBox* ConnectionBoxes::get_connection_box(ConnectionBoxId box) const {
+    if (bool(box)) {
+        return nullptr;
+    }
+
+    size_t index = size_t(box);
+    if (index >= boxes_.size()) {
+        return nullptr;
+    }
+
+    return &boxes_.at(index);
+}
+
+bool ConnectionBoxes::find_connection_box(int inode,
+                                          ConnectionBoxId* box_id,
+                                          std::pair<size_t, size_t>* box_location) const {
+    VTR_ASSERT(box_id != nullptr);
+    VTR_ASSERT(box_location != nullptr);
+
+    const auto& conn_box_loc = ipin_map_[inode];
+    if (conn_box_loc.box_id == ConnectionBoxId::INVALID()) {
+        return false;
+    }
+
+    *box_id = conn_box_loc.box_id;
+    *box_location = conn_box_loc.box_location;
+    return true;
+}
+
+// Clear IPIN map and set connection box grid size and box ids.
+void ConnectionBoxes::reset_boxes(std::pair<size_t, size_t> size,
+                                  const std::vector<ConnectionBox> boxes) {
+    clear();
+
+    size_ = size;
+    boxes_ = boxes;
+}
+
+void ConnectionBoxes::resize_nodes(size_t rr_node_size) {
+    ipin_map_.resize(rr_node_size);
+    canonical_loc_map_.resize(rr_node_size,
+                              std::make_pair(-1, -1));
+}
+
+void ConnectionBoxes::clear() {
+    ipin_map_.clear();
+    size_ = std::make_pair(0, 0);
+    boxes_.clear();
+    canonical_loc_map_.clear();
+    sink_to_ipin_.clear();
+}
+
+void ConnectionBoxes::add_connection_box(int inode, ConnectionBoxId box_id, std::pair<size_t, size_t> box_location) {
+    // Ensure that box location is in bounds
+    VTR_ASSERT(box_location.first < size_.first);
+    VTR_ASSERT(box_location.second < size_.second);
+
+    // Bounds check box_id
+    VTR_ASSERT(bool(box_id));
+    VTR_ASSERT(size_t(box_id) < boxes_.size());
+
+    // Make sure sink map will not be invalidated upon insertion.
+    VTR_ASSERT(sink_to_ipin_.size() == 0);
+
+    ipin_map_[inode] = ConnBoxLoc(box_location, box_id);
+}
+
+void ConnectionBoxes::add_canonical_loc(int inode, std::pair<size_t, size_t> loc) {
+    VTR_ASSERT(loc.first < size_.first);
+    VTR_ASSERT(loc.second < size_.second);
+    canonical_loc_map_[inode] = loc;
+}
+
+const std::pair<size_t, size_t>* ConnectionBoxes::find_canonical_loc(int inode) const {
+    const auto& canon_loc = canonical_loc_map_[inode];
+    if (canon_loc.first == size_t(-1)) {
+        return nullptr;
+    }
+
+    return &canon_loc;
+}
+
+void ConnectionBoxes::create_sink_back_ref() {
+    const auto& device_ctx = g_vpr_ctx.device();
+
+    sink_to_ipin_.resize(device_ctx.rr_nodes.size(), {{0, 0, 0, 0}, 0});
+
+    for (size_t i = 0; i < device_ctx.rr_nodes.size(); ++i) {
+        const auto& ipin_node = device_ctx.rr_nodes[i];
+        if (ipin_node.type() != IPIN) {
+            continue;
+        }
+
+        if (ipin_map_[i].box_id == ConnectionBoxId::INVALID()) {
+            continue;
+        }
+
+        for (auto edge : ipin_node.edges()) {
+            int sink_inode = ipin_node.edge_sink_node(edge);
+            VTR_ASSERT(device_ctx.rr_nodes[sink_inode].type() == SINK);
+            VTR_ASSERT(sink_to_ipin_[sink_inode].ipin_count < 4);
+            auto& sink_to_ipin = sink_to_ipin_[sink_inode];
+            sink_to_ipin.ipin_nodes[sink_to_ipin.ipin_count++] = i;
+        }
+    }
+}
+
+const SinkToIpin& ConnectionBoxes::find_sink_connection_boxes(
+    int inode) const {
+    return sink_to_ipin_[inode];
+}
diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h
new file mode 100644
index 00000000000..06217ac2a41
--- /dev/null
+++ b/vpr/src/route/connection_box.h
@@ -0,0 +1,76 @@
+#ifndef CONNECTION_BOX_H
+#define CONNECTION_BOX_H
+// Some routing graphs have connectivity driven by types of connection boxes.
+// This class relates IPIN rr nodes with connection box type and locations, used
+// for connection box driven map lookahead.
+
+#include <tuple>
+#include "vtr_strong_id.h"
+#include "vtr_flat_map.h"
+#include "vtr_range.h"
+#include <map>
+
+struct connection_box_tag {};
+typedef vtr::StrongId<connection_box_tag> ConnectionBoxId;
+
+struct ConnectionBox {
+    std::string name;
+};
+
+struct ConnBoxLoc {
+    ConnBoxLoc()
+        : box_location(std::make_pair(-1, -1)) {}
+    ConnBoxLoc(
+        const std::pair<size_t, size_t>& a_box_location,
+        ConnectionBoxId a_box_id)
+        : box_location(a_box_location)
+        , box_id(a_box_id) {}
+
+    std::pair<size_t, size_t> box_location;
+    ConnectionBoxId box_id;
+};
+
+struct SinkToIpin {
+    int ipin_nodes[4];
+    int ipin_count;
+};
+
+class ConnectionBoxes {
+  public:
+    ConnectionBoxes();
+
+    size_t num_connection_box_types() const;
+    std::pair<size_t, size_t> connection_box_grid_size() const;
+    const ConnectionBox* get_connection_box(ConnectionBoxId box) const;
+
+    bool find_connection_box(int inode,
+                             ConnectionBoxId* box_id,
+                             std::pair<size_t, size_t>* box_location) const;
+    const std::pair<size_t, size_t>* find_canonical_loc(int inode) const;
+
+    // Clear IPIN map and set connection box grid size and box ids.
+    void clear();
+    void reset_boxes(std::pair<size_t, size_t> size,
+                     const std::vector<ConnectionBox> boxes);
+    void resize_nodes(size_t rr_node_size);
+
+    void add_connection_box(int inode, ConnectionBoxId box_id, std::pair<size_t, size_t> box_location);
+    void add_canonical_loc(int inode, std::pair<size_t, size_t> loc);
+
+    // Create map from SINK's back to IPIN's
+    //
+    // This must be called after all connection boxes have been added.
+    void create_sink_back_ref();
+    const SinkToIpin& find_sink_connection_boxes(
+        int inode) const;
+
+  private:
+    std::pair<size_t, size_t> size_;
+    std::vector<ConnectionBox> boxes_;
+    std::vector<ConnBoxLoc> ipin_map_;
+    std::vector<SinkToIpin> sink_to_ipin_;
+    std::vector<std::pair<size_t, size_t>>
+        canonical_loc_map_;
+};
+
+#endif
diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp
new file mode 100644
index 00000000000..fc806f67687
--- /dev/null
+++ b/vpr/src/route/connection_box_lookahead_map.cpp
@@ -0,0 +1,460 @@
+#include "connection_box_lookahead_map.h"
+
+#include <vector>
+#include <queue>
+
+#include "connection_box.h"
+#include "rr_node.h"
+#include "router_lookahead_map_utils.h"
+#include "globals.h"
+#include "vtr_math.h"
+#include "vtr_time.h"
+#include "echo_files.h"
+
+/* we're profiling routing cost over many tracks for each wire type, so we'll
+ * have many cost entries at each |dx|,|dy| offset. There are many ways to
+ * "boil down" the many costs at each offset to a single entry for a given
+ * (wire type, chan_type) combination we can take the smallest cost, the
+ * average, median, etc. This define selects the method we use.
+ *
+ * See e_representative_entry_method */
+#define REPRESENTATIVE_ENTRY_METHOD SMALLEST
+
+#define REF_X 25
+#define REF_Y 23
+
+static int signum(int x) {
+    if (x > 0) return 1;
+    if (x < 0)
+        return -1;
+    else
+        return 0;
+}
+
+typedef std::vector<std::pair<std::pair<int, int>, Cost_Entry>> t_routing_cost_map;
+static void run_dijkstra(int start_node_ind,
+                         t_routing_cost_map* cost_map);
+
+class CostMap {
+  public:
+    void set_segment_count(size_t seg_count) {
+        cost_map_.clear();
+        offset_.clear();
+        cost_map_.resize(seg_count);
+        offset_.resize(seg_count);
+
+        const auto& device_ctx = g_vpr_ctx.device();
+        segment_map_.resize(device_ctx.rr_nodes.size());
+        for (size_t i = 0; i < segment_map_.size(); ++i) {
+            auto& from_node = device_ctx.rr_nodes[i];
+
+            int from_cost_index = from_node.cost_index();
+            int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index;
+
+            segment_map_[i] = from_seg_index;
+        }
+    }
+
+    int node_to_segment(int from_node_ind) {
+        return segment_map_[from_node_ind];
+    }
+
+    Cost_Entry find_cost(int from_seg_index, int delta_x, int delta_y) const {
+        VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size());
+        int dx = delta_x - offset_[from_seg_index].first;
+        int dy = delta_y - offset_[from_seg_index].second;
+        const auto& cost_map = cost_map_[from_seg_index];
+
+        if (dx < 0) {
+            dx = 0;
+        }
+        if (dy < 0) {
+            dy = 0;
+        }
+
+        if (dx >= (ssize_t)cost_map.dim_size(0)) {
+            dx = cost_map.dim_size(0) - 1;
+        }
+        if (dy >= (ssize_t)cost_map.dim_size(1)) {
+            dy = cost_map.dim_size(1) - 1;
+        }
+
+        return cost_map_[from_seg_index][dx][dy];
+    }
+
+    void set_cost_map(int from_seg_index,
+                      const t_routing_cost_map& cost_map,
+                      e_representative_entry_method method) {
+        VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size());
+
+        // Find coordinate offset for this segment.
+        int min_dx = 0;
+        int min_dy = 0;
+        int max_dx = 0;
+        int max_dy = 0;
+        for (const auto& entry : cost_map) {
+            min_dx = std::min(entry.first.first, min_dx);
+            min_dy = std::min(entry.first.second, min_dy);
+
+            max_dx = std::max(entry.first.first, max_dx);
+            max_dy = std::max(entry.first.second, max_dy);
+        }
+
+        offset_[from_seg_index].first = min_dx;
+        offset_[from_seg_index].second = min_dy;
+        size_t dim_x = max_dx - min_dx + 1;
+        size_t dim_y = max_dy - min_dy + 1;
+
+        vtr::NdMatrix<Expansion_Cost_Entry, 2> expansion_cost_map(
+            {dim_x, dim_y});
+
+        for (const auto& entry : cost_map) {
+            int x = entry.first.first - min_dx;
+            int y = entry.first.second - min_dy;
+            expansion_cost_map[x][y].add_cost_entry(
+                method, entry.second.delay,
+                entry.second.congestion);
+        }
+
+        cost_map_[from_seg_index] = vtr::NdMatrix<Cost_Entry, 2>(
+            {dim_x, dim_y});
+
+        /* set the lookahead cost map entries with a representative cost
+         * entry from routing_cost_map */
+        for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) {
+            for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) {
+                cost_map_[from_seg_index][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method);
+            }
+        }
+
+        /* find missing cost entries and fill them in by copying a nearby cost entry */
+        for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) {
+            for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) {
+                Cost_Entry cost_entry = cost_map_[from_seg_index][ix][iy];
+
+                if (!cost_entry.valid()) {
+                    Cost_Entry copied_entry = get_nearby_cost_entry(
+                        from_seg_index,
+                        offset_[from_seg_index].first + ix,
+                        offset_[from_seg_index].second + iy);
+                    cost_map_[from_seg_index][ix][iy] = copied_entry;
+                }
+            }
+        }
+    }
+
+    Cost_Entry get_nearby_cost_entry(int segment_index, int x, int y) {
+        /* compute the slope from x,y to 0,0 and then move towards 0,0 by one
+         * unit to get the coordinates of the cost entry to be copied */
+
+        float slope;
+        int copy_x, copy_y;
+        if (x == 0 || y == 0) {
+            slope = std::numeric_limits<float>::infinity();
+            copy_x = x - signum(x);
+            copy_y = y - signum(y);
+        } else {
+            slope = (float)y / (float)x;
+            if (slope >= 1.0) {
+                copy_y = y - signum(y);
+                copy_x = vtr::nint((float)y / slope);
+            } else {
+                copy_x = x - signum(x);
+                copy_y = vtr::nint((float)x * slope);
+            }
+        }
+
+        Cost_Entry copy_entry = find_cost(segment_index, copy_x, copy_y);
+
+        /* if the entry to be copied is also empty, recurse */
+        if (copy_entry.valid()) {
+            return copy_entry;
+        } else if (copy_x == 0 && copy_y == 0) {
+            return Cost_Entry();
+        }
+
+        return get_nearby_cost_entry(segment_index, copy_x, copy_y);
+    }
+
+    void print_cost_map(const std::vector<t_segment_inf>& segment_inf,
+                        const char* fname) {
+        FILE* fp = vtr::fopen(fname, "w");
+        for (size_t iseg = 0; iseg < cost_map_.size(); iseg++) {
+            fprintf(fp, "Seg %s(%zu) (%d, %d)\n", segment_inf.at(iseg).name.c_str(),
+                    iseg,
+                    offset_[iseg].first,
+                    offset_[iseg].second);
+            for (size_t iy = 0; iy < cost_map_[iseg].dim_size(1); iy++) {
+                for (size_t ix = 0; ix < cost_map_[iseg].dim_size(0); ix++) {
+                    fprintf(fp, "%.4g,\t",
+                            cost_map_[iseg][ix][iy].delay);
+                }
+                fprintf(fp, "\n");
+            }
+            fprintf(fp, "\n\n");
+        }
+
+        fclose(fp);
+    }
+
+  private:
+    std::vector<vtr::NdMatrix<Cost_Entry, 2>> cost_map_;
+    std::vector<std::pair<int, int>> offset_;
+    std::vector<int> segment_map_;
+};
+
+static CostMap g_cost_map;
+
+class StartNode {
+  public:
+    StartNode(int start_x, int start_y, t_rr_type rr_type, int seg_index)
+        : start_x_(start_x)
+        , start_y_(start_y)
+        , rr_type_(rr_type)
+        , seg_index_(seg_index)
+        , index_(0) {}
+    int get_next_node() {
+        const auto& device_ctx = g_vpr_ctx.device();
+        const std::vector<int>& channel_node_list = device_ctx.rr_node_indices[rr_type_][start_x_][start_y_][0];
+
+        for (; index_ < channel_node_list.size(); index_++) {
+            int node_ind = channel_node_list[index_];
+
+            if (node_ind == OPEN || device_ctx.rr_nodes[node_ind].capacity() == 0) {
+                continue;
+            }
+
+            const std::pair<size_t, size_t>* loc = device_ctx.connection_boxes.find_canonical_loc(node_ind);
+            if (loc == nullptr) {
+                continue;
+            }
+
+            int node_cost_ind = device_ctx.rr_nodes[node_ind].cost_index();
+            int node_seg_ind = device_ctx.rr_indexed_data[node_cost_ind].seg_index;
+            if (node_seg_ind == seg_index_) {
+                index_ += 1;
+                return node_ind;
+            }
+        }
+
+        return UNDEFINED;
+    }
+
+  private:
+    int start_x_;
+    int start_y_;
+    t_rr_type rr_type_;
+    int seg_index_;
+    size_t index_;
+};
+
+// Minimum size of search for channels to profile.  kMinProfile results
+// in searching x = [0, kMinProfile], and y = [0, kMinProfile[.
+//
+// Making this value larger will increase the sample size, but also the runtime
+// to produce the lookahead.
+static constexpr int kMinProfile = 1;
+
+// Maximum size of search for channels to profile.  Once search is outside of
+// kMinProfile distance, lookahead will stop searching once:
+//  - At least one channel has been profiled
+//  - kMaxProfile is exceeded.
+static constexpr int kMaxProfile = 7;
+
+void compute_connection_box_lookahead(
+    const std::vector<t_segment_inf>& segment_inf) {
+    size_t num_segments = segment_inf.size();
+    vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map");
+
+    /* free previous delay map and allocate new one */
+    g_cost_map.set_segment_count(segment_inf.size());
+
+    /* run Dijkstra's algorithm for each segment type & channel type combination */
+    for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) {
+        VTR_LOG("Creating cost map for %s(%d)\n",
+                segment_inf[iseg].name.c_str(), iseg);
+        /* allocate the cost map for this iseg/chan_type */
+        t_routing_cost_map cost_map;
+
+        int count = 0;
+
+        int dx = 0;
+        int dy = 0;
+        //int start_x = vtr::nint(device_ctx.grid.width()/2);
+        //int start_y = vtr::nint(device_ctx.grid.height()/2);
+        int start_x = REF_X;
+        int start_y = REF_Y;
+        while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) {
+            for (e_rr_type chan_type : {CHANX, CHANY}) {
+                StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg);
+
+                for (int start_node_ind = start_node.get_next_node();
+                     start_node_ind != UNDEFINED;
+                     start_node_ind = start_node.get_next_node()) {
+                    count += 1;
+
+                    /* run Dijkstra's algorithm */
+                    run_dijkstra(start_node_ind, &cost_map);
+                }
+            }
+
+            if (dy < dx) {
+                dy += 1;
+            } else {
+                dx += 1;
+            }
+        }
+
+        if (count == 0) {
+            VTR_LOG_WARN("Segment %s(%d) found no start_node_ind\n",
+                         segment_inf[iseg].name.c_str(), iseg);
+        }
+
+        /* boil down the cost list in routing_cost_map at each coordinate to a
+         * representative cost entry and store it in the lookahead cost map */
+        g_cost_map.set_cost_map(iseg, cost_map,
+                                REPRESENTATIVE_ENTRY_METHOD);
+    }
+
+    if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_LOOKAHEAD_MAP)) {
+        g_cost_map.print_cost_map(segment_inf, getEchoFileName(E_ECHO_LOOKAHEAD_MAP));
+    }
+}
+
+float get_connection_box_lookahead_map_cost(int from_node_ind,
+                                            int to_node_ind,
+                                            float criticality_fac) {
+    if (from_node_ind == to_node_ind) {
+        return 0.f;
+    }
+
+    auto& device_ctx = g_vpr_ctx.device();
+
+    std::pair<size_t, size_t> from_location;
+    std::pair<size_t, size_t> to_location;
+    auto to_node_type = device_ctx.rr_nodes[to_node_ind].type();
+
+    if (to_node_type == SINK) {
+        const auto& sink_to_ipin = device_ctx.connection_boxes.find_sink_connection_boxes(to_node_ind);
+        if (sink_to_ipin.ipin_count > 1) {
+            float cost = std::numeric_limits<float>::infinity();
+            // Find cheapest cost from from_node_ind to IPINs for this SINK.
+            for (int i = 0; i < sink_to_ipin.ipin_count; ++i) {
+                cost = std::min(cost,
+                                get_connection_box_lookahead_map_cost(
+                                    from_node_ind,
+                                    sink_to_ipin.ipin_nodes[i], criticality_fac));
+            }
+
+            return cost;
+        } else if (sink_to_ipin.ipin_count == 1) {
+            to_node_ind = sink_to_ipin.ipin_nodes[0];
+            if (from_node_ind == to_node_ind) {
+                return 0.f;
+            }
+        } else {
+            return std::numeric_limits<float>::infinity();
+        }
+    }
+
+    if (device_ctx.rr_nodes[to_node_ind].type() == IPIN) {
+        ConnectionBoxId box_id;
+        std::pair<size_t, size_t> box_location;
+        bool found = device_ctx.connection_boxes.find_connection_box(
+            to_node_ind, &box_id, &box_location);
+        if (!found) {
+            VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind);
+        }
+
+        to_location = box_location;
+    } else {
+        const std::pair<size_t, size_t>* to_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(to_node_ind);
+        if (!to_canonical_loc) {
+            VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d", to_node_ind);
+        }
+
+        to_location = *to_canonical_loc;
+    }
+
+    const std::pair<size_t, size_t>* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(from_node_ind);
+    if (from_canonical_loc == nullptr) {
+        VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d (to %d)",
+                  from_node_ind, to_node_ind);
+    }
+
+    ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(to_location.first);
+    ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(to_location.second);
+
+    int from_seg_index = g_cost_map.node_to_segment(from_node_ind);
+    Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, dx, dy);
+    float expected_delay = cost_entry.delay;
+    float expected_congestion = cost_entry.congestion;
+
+    float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion;
+    return expected_cost;
+}
+
+/* runs Dijkstra's algorithm from specified node until all nodes have been
+ * visited. Each time a pin is visited, the delay/congestion information
+ * to that pin is stored to an entry in the routing_cost_map */
+static void run_dijkstra(int start_node_ind,
+                         t_routing_cost_map* routing_cost_map) {
+    auto& device_ctx = g_vpr_ctx.device();
+
+    /* a list of boolean flags (one for each rr node) to figure out if a
+     * certain node has already been expanded */
+    std::vector<bool> node_expanded(device_ctx.rr_nodes.size(), false);
+    /* for each node keep a list of the cost with which that node has been
+     * visited (used to determine whether to push a candidate node onto the
+     * expansion queue */
+    std::vector<float> node_visited_costs(device_ctx.rr_nodes.size(), -1.0);
+    /* a priority queue for expansion */
+    std::priority_queue<PQ_Entry> pq;
+
+    /* first entry has no upstream delay or congestion */
+    PQ_Entry first_entry(start_node_ind, UNDEFINED, 0, 0, 0, true);
+
+    pq.push(first_entry);
+
+    const std::pair<size_t, size_t>* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind);
+    if (from_canonical_loc == nullptr) {
+        VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d",
+                  start_node_ind);
+    }
+
+    /* now do routing */
+    while (!pq.empty()) {
+        PQ_Entry current = pq.top();
+        pq.pop();
+
+        int node_ind = current.rr_node_ind;
+
+        /* check that we haven't already expanded from this node */
+        if (node_expanded[node_ind]) {
+            continue;
+        }
+
+        /* if this node is an ipin record its congestion/delay in the routing_cost_map */
+        if (device_ctx.rr_nodes[node_ind].type() == IPIN) {
+            ConnectionBoxId box_id;
+            std::pair<size_t, size_t> box_location;
+            bool found = device_ctx.connection_boxes.find_connection_box(
+                node_ind, &box_id, &box_location);
+            if (!found) {
+                VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind);
+            }
+
+            int delta_x = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first);
+            int delta_y = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second);
+
+            routing_cost_map->push_back(std::make_pair(
+                std::make_pair(delta_x, delta_y),
+                Cost_Entry(
+                    current.delay,
+                    current.congestion_upstream)));
+        }
+
+        expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq);
+        node_expanded[node_ind] = true;
+    }
+}
diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h
new file mode 100644
index 00000000000..75771a1fd6b
--- /dev/null
+++ b/vpr/src/route/connection_box_lookahead_map.h
@@ -0,0 +1,14 @@
+#ifndef CONNECTION_BOX_LOOKAHEAD_H_
+#define CONNECTION_BOX_LOOKAHEAD_H_
+
+#include <vector>
+#include "physical_types.h"
+
+void compute_connection_box_lookahead(
+    const std::vector<t_segment_inf>& segment_inf);
+
+float get_connection_box_lookahead_map_cost(int from_node_ind,
+                                            int to_node_ind,
+                                            float criticality_fac);
+
+#endif
diff --git a/vpr/src/route/router_lookahead.cpp b/vpr/src/route/router_lookahead.cpp
index 645aca4a7ee..96a824a0de2 100644
--- a/vpr/src/route/router_lookahead.cpp
+++ b/vpr/src/route/router_lookahead.cpp
@@ -1,6 +1,7 @@
 #include "router_lookahead.h"
 
 #include "router_lookahead_map.h"
+#include "connection_box_lookahead_map.h"
 #include "vpr_error.h"
 #include "globals.h"
 #include "route_timing.h"
@@ -13,6 +14,8 @@ std::unique_ptr<RouterLookahead> make_router_lookahead(e_router_lookahead router
         return std::make_unique<ClassicLookahead>();
     } else if (router_lookahead_type == e_router_lookahead::MAP) {
         return std::make_unique<MapLookahead>();
+    } else if (router_lookahead_type == e_router_lookahead::CONNECTION_BOX_MAP) {
+        return std::make_unique<ConnectionBoxMapLookahead>();
     } else if (router_lookahead_type == e_router_lookahead::NO_OP) {
         return std::make_unique<NoOpLookahead>();
     }
@@ -81,6 +84,25 @@ float MapLookahead::get_expected_cost(int current_node, int target_node, const t
     }
 }
 
+float ConnectionBoxMapLookahead::get_expected_cost(
+    int current_node,
+    int target_node,
+    const t_conn_cost_params& params,
+    float /*R_upstream*/) const {
+    auto& device_ctx = g_vpr_ctx.device();
+
+    t_rr_type rr_type = device_ctx.rr_nodes[current_node].type();
+
+    if (rr_type == CHANX || rr_type == CHANY) {
+        return get_connection_box_lookahead_map_cost(
+            current_node, target_node, params.criticality);
+    } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */
+        return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost);
+    } else { /* Change this if you want to investigate route-throughs */
+        return (0.);
+    }
+}
+
 float NoOpLookahead::get_expected_cost(int /*current_node*/, int /*target_node*/, const t_conn_cost_params& /*params*/, float /*R_upstream*/) const {
     return 0.;
 }
diff --git a/vpr/src/route/router_lookahead.h b/vpr/src/route/router_lookahead.h
index 6880651e887..07138ddb4d5 100644
--- a/vpr/src/route/router_lookahead.h
+++ b/vpr/src/route/router_lookahead.h
@@ -27,6 +27,11 @@ class MapLookahead : public RouterLookahead {
     float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override;
 };
 
+class ConnectionBoxMapLookahead : public RouterLookahead {
+  protected:
+    float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override;
+};
+
 class NoOpLookahead : public RouterLookahead {
   protected:
     float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override;
diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp
new file mode 100644
index 00000000000..b76edcc7e70
--- /dev/null
+++ b/vpr/src/route/router_lookahead_map_utils.cpp
@@ -0,0 +1,192 @@
+#include "router_lookahead_map_utils.h"
+
+#include "globals.h"
+#include "vpr_context.h"
+#include "vtr_math.h"
+
+/* Number of CLBs I think the average conn. goes. */
+static const int CLB_DIST = 3;
+
+PQ_Entry::PQ_Entry(
+    int set_rr_node_ind,
+    int switch_ind,
+    float parent_delay,
+    float parent_R_upstream,
+    float parent_congestion_upstream,
+    bool starting_node) {
+    this->rr_node_ind = set_rr_node_ind;
+
+    auto& device_ctx = g_vpr_ctx.device();
+    this->delay = parent_delay;
+    this->congestion_upstream = parent_congestion_upstream;
+    this->R_upstream = parent_R_upstream;
+    if (!starting_node) {
+        int cost_index = device_ctx.rr_nodes[set_rr_node_ind].cost_index();
+
+        float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel;
+        float Rsw = device_ctx.rr_switch_inf[switch_ind].R;
+        float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C();
+        float Rnode = device_ctx.rr_nodes[set_rr_node_ind].R();
+
+        float T_linear = 0.f;
+        float T_quadratic = 0.f;
+        if (device_ctx.rr_switch_inf[switch_ind].buffered()) {
+            T_linear = Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode;
+            T_quadratic = 0.;
+        } else { /* Pass transistor */
+            T_linear = Tsw + 0.5 * Rsw * Cnode;
+            T_quadratic = (Rsw + Rnode) * 0.5 * Cnode;
+        }
+
+        float base_cost;
+        if (device_ctx.rr_indexed_data[cost_index].inv_length < 0) {
+            base_cost = device_ctx.rr_indexed_data[cost_index].base_cost;
+        } else {
+            float frac_num_seg = CLB_DIST * device_ctx.rr_indexed_data[cost_index].inv_length;
+
+            base_cost = frac_num_seg * T_linear
+                        + frac_num_seg * frac_num_seg * T_quadratic;
+        }
+
+        VTR_ASSERT(T_linear >= 0.);
+        VTR_ASSERT(base_cost >= 0.);
+        this->delay += T_linear;
+
+        this->congestion_upstream += base_cost;
+    }
+
+    /* set the cost of this node */
+    this->cost = this->delay;
+}
+
+/* returns cost entry with the smallest delay */
+Cost_Entry Expansion_Cost_Entry::get_smallest_entry() const {
+    Cost_Entry smallest_entry;
+
+    for (auto entry : this->cost_vector) {
+        if (!smallest_entry.valid() || entry.delay < smallest_entry.delay) {
+            smallest_entry = entry;
+        }
+    }
+
+    return smallest_entry;
+}
+
+/* returns a cost entry that represents the average of all the recorded entries */
+Cost_Entry Expansion_Cost_Entry::get_average_entry() const {
+    float avg_delay = 0;
+    float avg_congestion = 0;
+
+    for (auto cost_entry : this->cost_vector) {
+        avg_delay += cost_entry.delay;
+        avg_congestion += cost_entry.congestion;
+    }
+
+    avg_delay /= (float)this->cost_vector.size();
+    avg_congestion /= (float)this->cost_vector.size();
+
+    return Cost_Entry(avg_delay, avg_congestion);
+}
+
+/* returns a cost entry that represents the geomean of all the recorded entries */
+Cost_Entry Expansion_Cost_Entry::get_geomean_entry() const {
+    float geomean_delay = 0;
+    float geomean_cong = 0;
+    for (auto cost_entry : this->cost_vector) {
+        geomean_delay += log(cost_entry.delay);
+        geomean_cong += log(cost_entry.congestion);
+    }
+
+    geomean_delay = exp(geomean_delay / (float)this->cost_vector.size());
+    geomean_cong = exp(geomean_cong / (float)this->cost_vector.size());
+
+    return Cost_Entry(geomean_delay, geomean_cong);
+}
+
+/* returns a cost entry that represents the medial of all recorded entries */
+Cost_Entry Expansion_Cost_Entry::get_median_entry() const {
+    /* find median by binning the delays of all entries and then chosing the bin
+     * with the largest number of entries */
+
+    int num_bins = 10;
+
+    /* find entries with smallest and largest delays */
+    Cost_Entry min_del_entry;
+    Cost_Entry max_del_entry;
+    for (auto entry : this->cost_vector) {
+        if (!min_del_entry.valid() || entry.delay < min_del_entry.delay) {
+            min_del_entry = entry;
+        }
+        if (!max_del_entry.valid() || entry.delay > max_del_entry.delay) {
+            max_del_entry = entry;
+        }
+    }
+
+    /* get the bin size */
+    float delay_diff = max_del_entry.delay - min_del_entry.delay;
+    float bin_size = delay_diff / (float)num_bins;
+
+    /* sort the cost entries into bins */
+    std::vector<std::vector<Cost_Entry> > entry_bins(num_bins, std::vector<Cost_Entry>());
+    for (auto entry : this->cost_vector) {
+        float bin_num = floor((entry.delay - min_del_entry.delay) / bin_size);
+
+        VTR_ASSERT(vtr::nint(bin_num) >= 0 && vtr::nint(bin_num) <= num_bins);
+        if (vtr::nint(bin_num) == num_bins) {
+            /* largest entry will otherwise have an out-of-bounds bin number */
+            bin_num -= 1;
+        }
+        entry_bins[vtr::nint(bin_num)].push_back(entry);
+    }
+
+    /* find the bin with the largest number of elements */
+    int largest_bin = 0;
+    int largest_size = 0;
+    for (int ibin = 0; ibin < num_bins; ibin++) {
+        if (entry_bins[ibin].size() > (unsigned)largest_size) {
+            largest_bin = ibin;
+            largest_size = (unsigned)entry_bins[ibin].size();
+        }
+    }
+
+    /* get the representative delay of the largest bin */
+    Cost_Entry representative_entry = entry_bins[largest_bin][0];
+
+    return representative_entry;
+}
+
+/* iterates over the children of the specified node and selectively pushes them onto the priority queue */
+void expand_dijkstra_neighbours(PQ_Entry parent_entry,
+                                std::vector<float>& node_visited_costs,
+                                std::vector<bool>& node_expanded,
+                                std::priority_queue<PQ_Entry>& pq) {
+    auto& device_ctx = g_vpr_ctx.device();
+
+    int parent_ind = parent_entry.rr_node_ind;
+
+    auto& parent_node = device_ctx.rr_nodes[parent_ind];
+
+    for (int iedge = 0; iedge < parent_node.num_edges(); iedge++) {
+        int child_node_ind = parent_node.edge_sink_node(iedge);
+        int switch_ind = parent_node.edge_switch(iedge);
+
+        /* skip this child if it has already been expanded from */
+        if (node_expanded[child_node_ind]) {
+            continue;
+        }
+
+        PQ_Entry child_entry(child_node_ind, switch_ind, parent_entry.delay,
+                             parent_entry.R_upstream, parent_entry.congestion_upstream, false);
+
+        VTR_ASSERT(child_entry.cost >= 0);
+
+        /* skip this child if it has been visited with smaller cost */
+        if (node_visited_costs[child_node_ind] >= 0 && node_visited_costs[child_node_ind] < child_entry.cost) {
+            continue;
+        }
+
+        /* finally, record the cost with which the child was visited and put the child entry on the queue */
+        node_visited_costs[child_node_ind] = child_entry.cost;
+        pq.push(child_entry);
+    }
+}
diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h
new file mode 100644
index 00000000000..d0077ccb9bc
--- /dev/null
+++ b/vpr/src/route/router_lookahead_map_utils.h
@@ -0,0 +1,142 @@
+#ifndef ROUTER_LOOKAHEAD_MAP_UTILS_H_
+#define ROUTER_LOOKAHEAD_MAP_UTILS_H_
+/*
+ * The router lookahead provides an estimate of the cost from an intermediate node to the target node
+ * during directed (A*-like) routing.
+ *
+ * The VPR 7.0 lookahead (route/route_timing.c ==> get_timing_driven_expected_cost) lower-bounds the remaining delay and
+ * congestion by assuming that a minimum number of wires, of the same type as the current node being expanded, can be used
+ * to complete the route. While this method is efficient, it can run into trouble with architectures that use
+ * multiple interconnected wire types.
+ *
+ * The lookahead in this file pre-computes delay/congestion costs up and to the right of a starting tile. This generates
+ * delay/congestion tables for {CHANX, CHANY} channel types, over all wire types defined in the architecture file.
+ * See Section 3.2.4 in Oleg Petelin's MASc thesis (2016) for more discussion.
+ *
+ */
+
+#include <cmath>
+#include <limits>
+#include <vector>
+#include <queue>
+#include "vpr_types.h"
+
+/* when a list of delay/congestion entries at a coordinate in Cost_Entry is boiled down to a single
+ * representative entry, this enum is passed-in to specify how that representative entry should be
+ * calculated */
+enum e_representative_entry_method {
+    FIRST = 0, //the first cost that was recorded
+    SMALLEST,  //the smallest-delay cost recorded
+    AVERAGE,
+    GEOMEAN,
+    MEDIAN
+};
+
+/* f_cost_map is an array of these cost entries that specifies delay/congestion estimates
+ * to travel relative x/y distances */
+class Cost_Entry {
+  public:
+    float delay;
+    float congestion;
+
+    Cost_Entry() {
+        delay = std::numeric_limits<float>::infinity();
+        congestion = std::numeric_limits<float>::infinity();
+    }
+    Cost_Entry(float set_delay, float set_congestion) {
+        delay = set_delay;
+        congestion = set_congestion;
+    }
+
+    bool valid() const {
+        return std::isfinite(delay) && std::isfinite(congestion);
+    }
+};
+
+/* a class that stores delay/congestion information for a given relative coordinate during the Dijkstra expansion.
+ * since it stores multiple cost entries, it is later boiled down to a single representative cost entry to be stored
+ * in the final lookahead cost map */
+class Expansion_Cost_Entry {
+  private:
+    std::vector<Cost_Entry> cost_vector;
+
+    Cost_Entry get_smallest_entry() const;
+    Cost_Entry get_average_entry() const;
+    Cost_Entry get_geomean_entry() const;
+    Cost_Entry get_median_entry() const;
+
+  public:
+    void add_cost_entry(e_representative_entry_method method,
+                        float add_delay,
+                        float add_congestion) {
+        Cost_Entry cost_entry(add_delay, add_congestion);
+        if (method == SMALLEST) {
+            /* taking the smallest-delay entry anyway, so no need to push back multple entries */
+            if (this->cost_vector.empty()) {
+                this->cost_vector.push_back(cost_entry);
+            } else {
+                if (add_delay < this->cost_vector[0].delay) {
+                    this->cost_vector[0] = cost_entry;
+                }
+            }
+        } else {
+            this->cost_vector.push_back(cost_entry);
+        }
+    }
+    void clear_cost_entries() {
+        this->cost_vector.clear();
+    }
+
+    Cost_Entry get_representative_cost_entry(e_representative_entry_method method) const {
+        Cost_Entry entry;
+
+        if (!cost_vector.empty()) {
+            switch (method) {
+                case FIRST:
+                    entry = cost_vector[0];
+                    break;
+                case SMALLEST:
+                    entry = this->get_smallest_entry();
+                    break;
+                case AVERAGE:
+                    entry = this->get_average_entry();
+                    break;
+                case GEOMEAN:
+                    entry = this->get_geomean_entry();
+                    break;
+                case MEDIAN:
+                    entry = this->get_median_entry();
+                    break;
+                default:
+                    break;
+            }
+        }
+        return entry;
+    }
+};
+
+/* a class that represents an entry in the Dijkstra expansion priority queue */
+class PQ_Entry {
+  public:
+    int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents
+    float cost;      //the cost of the path to get to this node
+
+    /* store backward delay, R and congestion info */
+    float delay;
+    float R_upstream;
+    float congestion_upstream;
+
+    PQ_Entry(int set_rr_node_ind, int /*switch_ind*/, float parent_delay, float parent_R_upstream, float parent_congestion_upstream, bool starting_node);
+
+    bool operator<(const PQ_Entry& obj) const {
+        /* inserted into max priority queue so want queue entries with a lower cost to be greater */
+        return (this->cost > obj.cost);
+    }
+};
+
+void expand_dijkstra_neighbours(PQ_Entry parent_entry,
+                                std::vector<float>& node_visited_costs,
+                                std::vector<bool>& node_expanded,
+                                std::priority_queue<PQ_Entry>& pq);
+
+#endif
diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp
index d00df14ef95..c264e73e0db 100644
--- a/vpr/src/route/rr_graph.cpp
+++ b/vpr/src/route/rr_graph.cpp
@@ -34,6 +34,7 @@ using namespace std;
 #include "rr_graph_writer.h"
 #include "rr_graph_reader.h"
 #include "router_lookahead_map.h"
+#include "connection_box_lookahead_map.h"
 #include "rr_graph_clock.h"
 
 #include "rr_types.h"
@@ -384,6 +385,10 @@ void create_rr_graph(const t_graph_type graph_type,
         compute_router_lookahead(segment_inf.size());
     }
 
+    if (router_lookahead_type == e_router_lookahead::CONNECTION_BOX_MAP) {
+        compute_connection_box_lookahead(segment_inf);
+    }
+
     //Write out rr graph file if needed
     if (!det_routing_arch->write_rr_graph_filename.empty()) {
         write_rr_graph(det_routing_arch->write_rr_graph_filename.c_str(), segment_inf);
diff --git a/vpr/src/route/rr_graph_reader.cpp b/vpr/src/route/rr_graph_reader.cpp
index 465ea6d92f9..7924b176b70 100644
--- a/vpr/src/route/rr_graph_reader.cpp
+++ b/vpr/src/route/rr_graph_reader.cpp
@@ -56,6 +56,7 @@ void verify_blocks(pugi::xml_node parent, const pugiutil::loc_data& loc_data);
 void process_blocks(pugi::xml_node parent, const pugiutil::loc_data& loc_data);
 void verify_grid(pugi::xml_node parent, const pugiutil::loc_data& loc_data, const DeviceGrid& grid);
 void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data);
+void process_connection_boxes(pugi::xml_node parent, const pugiutil::loc_data& loc_data);
 void process_edges(pugi::xml_node parent, const pugiutil::loc_data& loc_data, int* wire_to_rr_ipin_switch, const int num_rr_switches);
 void process_channels(t_chan_width& chan_width, pugi::xml_node parent, const pugiutil::loc_data& loc_data);
 void process_rr_node_indices(const DeviceGrid& grid);
@@ -133,6 +134,13 @@ void load_rr_file(const t_graph_type graph_type,
         next_component = get_first_child(rr_graph, "channels", loc_data);
         process_channels(nodes_per_chan, next_component, loc_data);
 
+        next_component = get_first_child(rr_graph, "connection_boxes", loc_data, OPTIONAL);
+        if (next_component != nullptr) {
+            process_connection_boxes(next_component, loc_data);
+        } else {
+            device_ctx.connection_boxes.clear();
+        }
+
         /* Decode the graph_type */
         bool is_global_graph = (GRAPH_GLOBAL == graph_type ? true : false);
 
@@ -146,6 +154,7 @@ void load_rr_file(const t_graph_type graph_type,
         int num_rr_nodes = count_children(next_component, "node", loc_data);
 
         device_ctx.rr_nodes.resize(num_rr_nodes);
+        device_ctx.connection_boxes.resize_nodes(num_rr_nodes);
         process_nodes(next_component, loc_data);
 
         /* Loads edges, switches, and node look up tables*/
@@ -179,6 +188,7 @@ void load_rr_file(const t_graph_type graph_type,
         device_ctx.chan_width = nodes_per_chan;
 
         check_rr_graph(graph_type, grid, device_ctx.block_types);
+        device_ctx.connection_boxes.create_sink_back_ref();
 
     } catch (XmlError& e) {
         vpr_throw(VPR_ERROR_ROUTE, read_rr_graph_name, e.line(), "%s", e.what());
@@ -304,6 +314,18 @@ void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) {
             node.set_type(OPIN);
         } else if (strcmp(node_type, "IPIN") == 0) {
             node.set_type(IPIN);
+
+            pugi::xml_node connection_boxSubnode = get_single_child(rr_node, "connection_box", loc_data, OPTIONAL);
+            if (connection_boxSubnode) {
+                int x = get_attribute(connection_boxSubnode, "x", loc_data).as_int();
+                int y = get_attribute(connection_boxSubnode, "y", loc_data).as_int();
+                int id = get_attribute(connection_boxSubnode, "id", loc_data).as_int();
+
+                device_ctx.connection_boxes.add_connection_box(inode,
+                                                               ConnectionBoxId(id),
+                                                               std::make_pair(x, y));
+            }
+
         } else {
             vpr_throw(VPR_ERROR_OTHER, __FILE__, __LINE__,
                       "Valid inputs for class types are \"CHANX\", \"CHANY\",\"SOURCE\", \"SINK\",\"OPIN\", and \"IPIN\".");
@@ -323,6 +345,15 @@ void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) {
             }
         }
 
+        pugi::xml_node connection_boxSubnode = get_single_child(rr_node, "canonical_loc", loc_data, OPTIONAL);
+        if (connection_boxSubnode) {
+            int x = get_attribute(connection_boxSubnode, "x", loc_data).as_int();
+            int y = get_attribute(connection_boxSubnode, "y", loc_data).as_int();
+
+            device_ctx.connection_boxes.add_canonical_loc(inode,
+                                                          std::make_pair(x, y));
+        }
+
         node.set_capacity(get_attribute(rr_node, "capacity", loc_data).as_float());
 
         //--------------
@@ -876,3 +907,26 @@ void set_cost_indices(pugi::xml_node parent, const pugiutil::loc_data& loc_data,
         rr_node = rr_node.next_sibling(rr_node.name());
     }
 }
+
+void process_connection_boxes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) {
+    auto& device_ctx = g_vpr_ctx.mutable_device();
+
+    int x_dim = get_attribute(parent, "x_dim", loc_data).as_int(0);
+    int y_dim = get_attribute(parent, "y_dim", loc_data).as_int(0);
+    int num_boxes = get_attribute(parent, "num_boxes", loc_data).as_int(0);
+    VTR_ASSERT(num_boxes >= 0);
+
+    pugi::xml_node connection_box = get_first_child(parent, "connection_box", loc_data);
+    std::vector<ConnectionBox> boxes(num_boxes);
+    while (connection_box) {
+        int id = get_attribute(connection_box, "id", loc_data).as_int(-1);
+        const char* name = get_attribute(connection_box, "name", loc_data).as_string(nullptr);
+        VTR_ASSERT(id >= 0 && id < num_boxes);
+        VTR_ASSERT(boxes.at(id).name == "");
+        boxes.at(id).name = std::string(name);
+
+        connection_box = connection_box.next_sibling(connection_box.name());
+    }
+
+    device_ctx.connection_boxes.reset_boxes(std::make_pair(x_dim, y_dim), boxes);
+}
diff --git a/vpr/src/route/rr_node.h b/vpr/src/route/rr_node.h
index 16075b8b176..5d610824684 100644
--- a/vpr/src/route/rr_node.h
+++ b/vpr/src/route/rr_node.h
@@ -173,7 +173,7 @@ class t_rr_node {
     uint16_t edges_capacity_ = 0;
     uint8_t num_non_configurable_edges_ = 0;
 
-    int8_t cost_index_ = -1;
+    uint16_t cost_index_ = -1;
     int16_t rc_index_ = -1;
 
     int16_t xlow_ = -1;

From 6f18b6d0dd52ac450fc627afe2486ebab2587cc7 Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Tue, 7 May 2019 17:29:52 +0200
Subject: [PATCH 05/15] vpr options: added option to disable check_route

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 vpr/src/base/SetupVPR.cpp     | 2 +-
 vpr/src/base/read_options.cpp | 5 +++++
 vpr/src/base/read_options.h   | 1 +
 vpr/src/base/vpr_api.cpp      | 4 +++-
 vpr/src/base/vpr_types.h      | 1 +
 5 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 06ac0c2c237..879561b4af6 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -347,8 +347,8 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts)
     RouterOpts->max_convergence_count = Options.router_max_convergence_count;
     RouterOpts->reconvergence_cpd_threshold = Options.router_reconvergence_cpd_threshold;
     RouterOpts->first_iteration_timing_report_file = Options.router_first_iteration_timing_report_file;
-
     RouterOpts->strict_checks = Options.strict_checks;
+    RouterOpts->disable_check_route = Options.disable_check_route;
 }
 
 static void SetupAnnealSched(const t_options& Options,
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 28470df9142..28768919808 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -1533,6 +1533,11 @@ static argparse::ArgumentParser create_arg_parser(std::string prog_name, t_optio
         .default_value("")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    route_timing_grp.add_argument<bool, ParseOnOff>(args.disable_check_route, "--disable_check_route")
+        .help("Disables check_route once routing step has finished or when routing file is loaded")
+        .default_value("off")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     route_timing_grp.add_argument(args.router_debug_net, "--router_debug_net")
         .help(
             "Controls when router debugging is enabled.\n"
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 2227656c1af..886262ffd60 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -118,6 +118,7 @@ struct t_options {
     argparse::ArgValue<bool> verify_binary_search;
     argparse::ArgValue<e_router_algorithm> RouterAlgorithm;
     argparse::ArgValue<int> min_incremental_reroute_fanout;
+    argparse::ArgValue<bool> disable_check_route;
 
     /* Timing-driven router options only */
     argparse::ArgValue<float> astar_fac;
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index d08a5764405..4167c804637 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -637,7 +637,9 @@ RouteStatus vpr_route_flow(t_vpr_setup& vpr_setup, const t_arch& arch) {
         std::string graphics_msg;
         if (route_status.success()) {
             //Sanity check the routing
-            check_route(router_opts.route_type);
+            if (!router_opts.disable_check_route) {
+                check_route(router_opts.route_type);
+            }
             get_serial_num();
 
             //Update status
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index a90f3f9f3fd..ffff9aed047 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -947,6 +947,7 @@ struct t_router_opts {
     float reconvergence_cpd_threshold;
     std::string first_iteration_timing_report_file;
     bool strict_checks;
+    bool disable_check_route;
 };
 
 struct t_analysis_opts {

From 990d96a6b6757dc907ee4e7210307fe0eb4a417e Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Fri, 31 May 2019 17:17:01 +0200
Subject: [PATCH 06/15] vpr: added optional disable errors and suppress
 warnings

The errors can be disabled for entire functions.
From the command line option, the developers can select which
functions should not treat errors as warnings.

The noisy warnings can be suppressed and redirected to a custom file.
This can help to have a clearer output.

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 libs/libvtrutil/src/vtr_log.cpp  | 36 +++++++++++++++++++++++++++-
 libs/libvtrutil/src/vtr_log.h    | 27 ++++++++++++++++++---
 vpr/src/base/read_options.cpp    | 16 +++++++++++++
 vpr/src/base/read_options.h      |  2 ++
 vpr/src/base/vpr_api.cpp         | 23 ++++++++++++++++++
 vpr/src/place/place.cpp          | 12 ++--------
 vpr/src/route/check_route.cpp    |  2 +-
 vpr/src/route/check_rr_graph.cpp |  2 +-
 vpr/src/util/vpr_error.cpp       | 41 ++++++++++++++++++++++++++++++++
 vpr/src/util/vpr_error.h         | 23 ++++++++++++++----
 vpr/src/util/vpr_utils.cpp       |  3 ++-
 vpr/src/util/vpr_utils.h         |  2 ++
 12 files changed, 168 insertions(+), 21 deletions(-)

diff --git a/libs/libvtrutil/src/vtr_log.cpp b/libs/libvtrutil/src/vtr_log.cpp
index c0ae90759de..55d850c514a 100644
--- a/libs/libvtrutil/src/vtr_log.cpp
+++ b/libs/libvtrutil/src/vtr_log.cpp
@@ -1,5 +1,9 @@
-#include "vtr_log.h"
+#include <string>
+#include <fstream>
+#include <cstdarg>
 
+#include "vtr_util.h"
+#include "vtr_log.h"
 #include "log.h"
 
 namespace vtr {
@@ -14,3 +18,33 @@ void set_log_file(const char* filename) {
 }
 
 } // namespace vtr
+
+void add_warnings_to_suppress(std::string function_name) {
+    warnings_to_suppress.insert(function_name);
+}
+
+void set_noisy_warn_log_file(const char* log_file_name) {
+    std::ofstream log;
+    log.open(log_file_name, std::ifstream::out | std::ifstream::trunc);
+    log.close();
+    noisy_warn_log_file = std::string(log_file_name);
+}
+
+void suppress_warning(const char* pszFileName, unsigned int lineNum, const char* pszFuncName, const char* pszMessage, ...) {
+    std::string function_name(pszFuncName);
+
+    va_list va_args;
+    va_start(va_args, pszMessage);
+    std::string msg = vtr::vstring_fmt(pszMessage, va_args);
+    va_end(va_args);
+
+    auto result = warnings_to_suppress.find(function_name);
+    if (result == warnings_to_suppress.end()) {
+        vtr::printf_warning(pszFileName, lineNum, msg.data());
+    } else {
+        std::ofstream log;
+        log.open(noisy_warn_log_file.data(), std::ios_base::app);
+        log << "Warning:\n\tfile: " << pszFileName << "\n\tline: " << lineNum << "\n\tmessage: " << msg << std::endl;
+        log.close();
+    }
+}
diff --git a/libs/libvtrutil/src/vtr_log.h b/libs/libvtrutil/src/vtr_log.h
index 878653ba84d..b7bc2dceadc 100644
--- a/libs/libvtrutil/src/vtr_log.h
+++ b/libs/libvtrutil/src/vtr_log.h
@@ -1,6 +1,8 @@
 #ifndef VTR_LOG_H
 #define VTR_LOG_H
 #include <tuple>
+#include <unordered_set>
+#include <string>
 
 /*
  * This header defines useful logging macros for VTR projects.
@@ -71,15 +73,18 @@
 #define VTR_LOGF_ERROR(file, line, ...) VTR_LOGVF_ERROR(true, file, line, __VA_ARGS__)
 #define VTR_LOGF_NOP(file, line, ...) VTR_LOGVF_NOP(true, file, line, __VA_ARGS__)
 
+//Custom file-line-func location logging macros
+#define VTR_LOGFF_WARN(file, line, func, ...) VTR_LOGVFF_WARN(true, file, line, func, __VA_ARGS__)
+
 //Conditional logging and custom file-line location macros
 #define VTR_LOGVF(expr, file, line, ...)    \
     do {                                    \
         if (expr) vtr::printf(__VA_ARGS__); \
     } while (false)
 
-#define VTR_LOGVF_WARN(expr, file, line, ...)                   \
-    do {                                                        \
-        if (expr) vtr::printf_warning(file, line, __VA_ARGS__); \
+#define VTR_LOGVF_WARN(expr, file, line, ...)                          \
+    do {                                                               \
+        if (expr) suppress_warning(file, line, __func__, __VA_ARGS__); \
     } while (false)
 
 #define VTR_LOGVF_ERROR(expr, file, line, ...)                \
@@ -87,6 +92,12 @@
         if (expr) vtr::printf_error(file, line, __VA_ARGS__); \
     } while (false)
 
+// Conditional logging and custom file-line-func location macros
+#define VTR_LOGVFF_WARN(expr, file, line, func, ...)               \
+    do {                                                           \
+        if (expr) suppress_warning(file, line, func, __VA_ARGS__); \
+    } while (false)
+
 //No-op version of logging macro which avoids unused parameter warnings.
 //
 //Note that to avoid unused parameter warnings we call sizeof() and cast
@@ -129,4 +140,14 @@ void set_log_file(const char* filename);
 
 } // namespace vtr
 
+// The following data structure and functions allow to suppress noisy warnings
+// and direct them into an external file.
+static std::unordered_set<std::string> warnings_to_suppress;
+static std::string noisy_warn_log_file;
+
+void add_warnings_to_suppress(std::string function_name);
+void set_noisy_warn_log_file(const char* log_file_name);
+
+void suppress_warning(const char* pszFileName, unsigned int lineNum, const char* pszFuncName, const char* pszMessage, ...);
+
 #endif
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 28470df9142..4f5123c208e 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -931,6 +931,22 @@ static argparse::ArgumentParser create_arg_parser(std::string prog_name, t_optio
         .default_value("on")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    gen_grp.add_argument<std::string>(args.disable_errors, "--disable_errors")
+        .help(
+            "Parses a list of functions for which the errors are going to be treated as warnings.\n"
+            "Each function in the list is delimited by `:`\n"
+            "This option should be only used for development purposes.")
+        .default_value("");
+
+    gen_grp.add_argument<std::string>(args.suppress_warnings, "--suppress_warnings")
+        .help(
+            "Parses a list of functions for which the warnings will be suppressed on stdout.\n"
+            "The first element of the list is the name of the output log file with the suppressed warnings.\n"
+            "The file name and the list of functions is separated by `,`\n"
+            "Each function in the list is delimited by `:`\n"
+            "This option should be only used for development purposes.")
+        .default_value("");
+
     auto& file_grp = parser.add_argument_group("file options");
 
     file_grp.add_argument(args.BlifFile, "--circuit_file")
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 2227656c1af..b586733b48b 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -50,6 +50,8 @@ struct t_options {
     argparse::ArgValue<e_clock_modeling> clock_modeling;
     argparse::ArgValue<bool> exit_before_pack;
     argparse::ArgValue<bool> strict_checks;
+    argparse::ArgValue<std::string> disable_errors;
+    argparse::ArgValue<std::string> suppress_warnings;
 
     /* Atom netlist options */
     argparse::ArgValue<bool> absorb_buffer_luts;
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index d08a5764405..e2b7143f2d1 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -214,6 +214,29 @@ void vpr_init(const int argc, const char** argv, t_options* options, t_vpr_setup
     /* Determine whether echo is on or off */
     setEchoEnabled(options->CreateEchoFile);
 
+    /*
+     * Initialize the functions names for which VPR_THROWs
+     * are demoted to VTR_LOG_WARNs
+     */
+    for (std::string func_name : vtr::split(options->disable_errors, std::string(":"))) {
+        map_error_activation_status(func_name);
+    }
+
+    /*
+     * Initialize the functions names for which
+     * warnings are being suppressed
+     */
+    std::vector<std::string> split_warning_option = vtr::split(options->suppress_warnings, std::string(","));
+
+    // If the file or the list of functions is not provided
+    // no warning is suppressed
+    if (split_warning_option.size() == 2) {
+        set_noisy_warn_log_file(split_warning_option[0].data());
+        for (std::string func_name : vtr::split(split_warning_option[1], std::string(":"))) {
+            add_warnings_to_suppress(func_name);
+        }
+    }
+
     /* Read in arch and circuit */
     SetupVPR(options,
              vpr_setup->TimingEnabled,
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 94ccc1a4788..d769a295809 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -963,11 +963,7 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, const
     if (fabs(new_bb_cost - costs->bb_cost) > costs->bb_cost * ERROR_TOL) {
         std::string msg = vtr::string_fmt("in recompute_costs_from_scratch: new_bb_cost = %g, old bb_cost = %g\n",
                                           new_bb_cost, costs->bb_cost);
-        if (placer_opts.strict_checks) {
-            vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, msg.c_str());
-        } else {
-            VTR_LOG_WARN(msg.c_str());
-        }
+        VPR_THROW(VPR_ERROR_PLACE, msg.c_str());
     }
     costs->bb_cost = new_bb_cost;
 
@@ -977,11 +973,7 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, const
         if (fabs(new_timing_cost - costs->timing_cost) > costs->timing_cost * ERROR_TOL) {
             std::string msg = vtr::string_fmt("in recompute_costs_from_scratch: new_timing_cost = %g, old timing_cost = %g, ERROR_TOL = %g\n",
                                               new_timing_cost, costs->timing_cost, ERROR_TOL);
-            if (placer_opts.strict_checks) {
-                vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, msg.c_str());
-            } else {
-                VTR_LOG_WARN(msg.c_str());
-            }
+            VPR_THROW(VPR_ERROR_PLACE, msg.c_str());
         }
         costs->timing_cost = new_timing_cost;
     } else {
diff --git a/vpr/src/route/check_route.cpp b/vpr/src/route/check_route.cpp
index 84b6290a144..dbef691c861 100644
--- a/vpr/src/route/check_route.cpp
+++ b/vpr/src/route/check_route.cpp
@@ -118,7 +118,7 @@ void check_route(enum e_route_type route_type) {
             } else { //Continuing along existing branch
                 connects = check_adjacent(prev_node, inode);
                 if (!connects) {
-                    vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
+                    VPR_THROW(VPR_ERROR_ROUTE,
                               "in check_route: found non-adjacent segments in traceback while checking net %d:\n"
                               "  %s\n"
                               "  %s\n",
diff --git a/vpr/src/route/check_rr_graph.cpp b/vpr/src/route/check_rr_graph.cpp
index 3699746fda5..846680ab69e 100644
--- a/vpr/src/route/check_rr_graph.cpp
+++ b/vpr/src/route/check_rr_graph.cpp
@@ -502,7 +502,7 @@ static void check_unbuffered_edges(int from_node) {
         }
 
         if (trans_matched == false) {
-            vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__,
+            VPR_THROW(VPR_ERROR_ROUTE,
                       "in check_unbuffered_edges:\n"
                       "connection from node %d to node %d uses an unbuffered switch (switch type %d '%s')\n"
                       "but there is no corresponding unbuffered switch edge in the other direction.\n",
diff --git a/vpr/src/util/vpr_error.cpp b/vpr/src/util/vpr_error.cpp
index 96e0f4bba3f..2bad3b6f919 100644
--- a/vpr/src/util/vpr_error.cpp
+++ b/vpr/src/util/vpr_error.cpp
@@ -1,6 +1,8 @@
 #include <cstdarg>
+#include <string>
 
 #include "vtr_util.h"
+#include "vtr_log.h"
 #include "vpr_error.h"
 
 /* Date:June 15th, 2013
@@ -11,6 +13,10 @@
  *			anything but throw an exception which will be caught
  *			main.c.
  */
+void map_error_activation_status(std::string function_name) {
+    functions_to_demote.insert(function_name);
+}
+
 void vpr_throw(enum e_vpr_error type,
                const char* psz_file_name,
                unsigned int line_num,
@@ -41,3 +47,38 @@ void vvpr_throw(enum e_vpr_error type,
 
     throw VprError(type, msg, psz_file_name, line_num);
 }
+
+void vpr_throw_msg(enum e_vpr_error type,
+                   const char* psz_file_name,
+                   unsigned int line_num,
+                   std::string msg) {
+    throw VprError(type, msg, psz_file_name, line_num);
+}
+
+void vpr_throw_opt(enum e_vpr_error type,
+                   const char* psz_func_name,
+                   const char* psz_file_name,
+                   unsigned int line_num,
+                   const char* psz_message,
+                   ...) {
+    std::string func_name(psz_func_name);
+
+    // Make a variable argument list
+    va_list va_args;
+
+    // Initialize variable argument list
+    va_start(va_args, psz_message);
+
+    //Format the message
+    std::string msg = vtr::vstring_fmt(psz_message, va_args);
+
+    auto result = functions_to_demote.find(func_name);
+    if (result != functions_to_demote.end()) {
+        VTR_LOGFF_WARN(psz_file_name, line_num, psz_func_name, msg.data());
+    } else {
+        vpr_throw_msg(type, psz_file_name, line_num, msg);
+    }
+
+    // Reset variable argument list
+    va_end(va_args);
+}
diff --git a/vpr/src/util/vpr_error.h b/vpr/src/util/vpr_error.h
index f999889359b..32619251c49 100644
--- a/vpr/src/util/vpr_error.h
+++ b/vpr/src/util/vpr_error.h
@@ -1,8 +1,11 @@
 #ifndef VPR_ERROR_H
 #define VPR_ERROR_H
 
-#include "vtr_error.h"
 #include <cstdarg>
+#include <string>
+#include <unordered_set>
+
+#include "vtr_error.h"
 
 enum e_vpr_error {
     VPR_ERROR_UNKNOWN = 0,
@@ -45,6 +48,15 @@ class VprError : public vtr::VtrError {
     t_vpr_error_type type_;
 };
 
+// Set of function names for which the VPR_THROW errors are treated
+// as VTR_LOG_WARN
+static std::unordered_set<std::string> functions_to_demote;
+
+// This function is used to save into the functions_to_demote set
+// all the function names which contain VPR_THROW errors that are
+// going to be demoted to be VTR_LOG_WARN
+void map_error_activation_status(std::string function_name);
+
 //VPR error reporting routines
 //
 //Note that we mark these functions with the C++11 attribute 'noreturn'
@@ -52,14 +64,17 @@ class VprError : public vtr::VtrError {
 //reduce false-positive compiler warnings
 [[noreturn]] void vpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, const char* psz_message, ...);
 [[noreturn]] void vvpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, const char* psz_message, va_list args);
+[[noreturn]] void vpr_throw_msg(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, std::string msg);
+
+void vpr_throw_opt(enum e_vpr_error type, const char* psz_func_name, const char* psz_file_name, unsigned int line_num, const char* psz_message, ...);
 
 /*
  * Macro wrapper around vpr_throw() which automatically
  * specifies file and line number of call site.
  */
-#define VPR_THROW(type, ...)                              \
-    do {                                                  \
-        vpr_throw(type, __FILE__, __LINE__, __VA_ARGS__); \
+#define VPR_THROW(type, ...)                                            \
+    do {                                                                \
+        vpr_throw_opt(type, __func__, __FILE__, __LINE__, __VA_ARGS__); \
     } while (false)
 
 #endif
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index 34b15f205b4..3df718b6099 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -1,6 +1,8 @@
 #include <cstring>
 #include <unordered_set>
 #include <regex>
+#include <algorithm>
+
 using namespace std;
 
 #include "vtr_assert.h"
@@ -18,7 +20,6 @@ using namespace std;
 #include "string.h"
 #include "pack_types.h"
 #include "device_grid.h"
-#include <algorithm>
 
 /* This module contains subroutines that are used in several unrelated parts *
  * of VPR.  They are VPR-specific utility routines.                          */
diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index 4b6cd5ff09e..bc53e60950e 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -2,7 +2,9 @@
 #define VPR_UTILS_H
 
 #include <vector>
+#include <string>
 #include <regex>
+
 #include "vpr_types.h"
 #include "atom_netlist.h"
 #include "clustered_netlist.h"

From caf3e416669c8273d0dcf737622da544e33e1a38 Mon Sep 17 00:00:00 2001
From: Keith Rothman <537074+litghost@users.noreply.github.com>
Date: Tue, 5 Mar 2019 16:17:00 -0800
Subject: [PATCH 07/15] Revert badge to the SymbiFlow Travis-CI.

Signed-off-by: Keith Rothman <537074+litghost@users.noreply.github.com>

Updated README.md

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 README.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index d57d1a35bc0..4a41e9f4868 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,14 @@
+
+SymbiFlow WIP changes for Verilog to Routing (VTR)
+==================================================
+
+This branch contains work in progress changes for using Verilog to Routing
+(VTR) as part of SymbiFlow.
+
+---
+
 # Verilog to Routing (VTR)
-[![Build Status](https://travis-ci.org/verilog-to-routing/vtr-verilog-to-routing.svg?branch=master)](https://travis-ci.org/verilog-to-routing/vtr-verilog-to-routing) [![Documentation Status](https://readthedocs.org/projects/vtr/badge/?version=latest)](http://docs.verilogtorouting.org/en/latest/?badge=latest)
+[![Build Status](https://travis-ci.com/SymbiFlow/vtr-verilog-to-routing.svg?branch=master)](https://travis-ci.com/SymbiFlow/vtr-verilog-to-routing) [![Documentation Status](https://readthedocs.org/projects/vtr/badge/?version=latest)](http://docs.verilogtorouting.org/en/latest/?badge=latest)
 
 ## Introduction
 The Verilog-to-Routing (VTR) project is a world-wide collaborative effort to provide a open-source framework for conducting FPGA architecture and CAD research and development.

From d48ea3004501fd03fb8574a422e20a10a01609ce Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Thu, 6 Jun 2019 22:58:22 +0200
Subject: [PATCH 08/15] vpr: added option for dangling comb nodes

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 vpr/src/base/read_options.cpp           | 10 ++++++++++
 vpr/src/base/read_options.h             |  1 +
 vpr/src/base/vpr_api.cpp                |  2 +-
 vpr/src/timing/timing_graph_builder.cpp | 10 +++++++---
 vpr/src/timing/timing_graph_builder.h   |  4 ++--
 5 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 28470df9142..e28f6ed6473 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -931,6 +931,16 @@ static argparse::ArgumentParser create_arg_parser(std::string prog_name, t_optio
         .default_value("on")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    gen_grp.add_argument<bool, ParseOnOff>(args.allow_dangling_combinational_nodes, "--allow_dangling_combinational_nodes")
+        .help(
+            "Option to allow dangling combinational nodes in the timing graph.\n"
+            "This option should normally be off, as dangling combinational nodes are unusual\n"
+            "in the timing graph and may indicate a problem in the circuit or architecture.\n"
+            "Unless you understand why your architecture/circuit can have valid dangling combinational nodes, this option should be off.\n"
+            "In general this is a dev-only option and should not be turned on by the end-user.")
+        .default_value("off")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     auto& file_grp = parser.add_argument_group("file options");
 
     file_grp.add_argument(args.BlifFile, "--circuit_file")
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 2227656c1af..fd3e97cf99e 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -50,6 +50,7 @@ struct t_options {
     argparse::ArgValue<e_clock_modeling> clock_modeling;
     argparse::ArgValue<bool> exit_before_pack;
     argparse::ArgValue<bool> strict_checks;
+    argparse::ArgValue<bool> allow_dangling_combinational_nodes;
 
     /* Atom netlist options */
     argparse::ArgValue<bool> absorb_buffer_luts;
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index d08a5764405..43374bde441 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -275,7 +275,7 @@ void vpr_init(const int argc, const char** argv, t_options* options, t_vpr_setup
         auto& timing_ctx = g_vpr_ctx.mutable_timing();
         {
             vtr::ScopedStartFinishTimer t("Build Timing Graph");
-            timing_ctx.graph = TimingGraphBuilder(atom_ctx.nlist, atom_ctx.lookup).timing_graph();
+            timing_ctx.graph = TimingGraphBuilder(atom_ctx.nlist, atom_ctx.lookup).timing_graph(options->allow_dangling_combinational_nodes);
             VTR_LOG("  Timing Graph Nodes: %zu\n", timing_ctx.graph->nodes().size());
             VTR_LOG("  Timing Graph Edges: %zu\n", timing_ctx.graph->edges().size());
             VTR_LOG("  Timing Graph Levels: %zu\n", timing_ctx.graph->levels().size());
diff --git a/vpr/src/timing/timing_graph_builder.cpp b/vpr/src/timing/timing_graph_builder.cpp
index 2ae02a020db..192b9ed8aaa 100644
--- a/vpr/src/timing/timing_graph_builder.cpp
+++ b/vpr/src/timing/timing_graph_builder.cpp
@@ -40,8 +40,8 @@ TimingGraphBuilder::TimingGraphBuilder(const AtomNetlist& netlist,
     //pass
 }
 
-std::unique_ptr<TimingGraph> TimingGraphBuilder::timing_graph() {
-    build();
+std::unique_ptr<TimingGraph> TimingGraphBuilder::timing_graph(bool allow_dangling_combinational_nodes) {
+    build(allow_dangling_combinational_nodes);
     opt_memory_layout();
 
     VTR_ASSERT(tg_);
@@ -50,9 +50,13 @@ std::unique_ptr<TimingGraph> TimingGraphBuilder::timing_graph() {
     return std::move(tg_);
 }
 
-void TimingGraphBuilder::build() {
+void TimingGraphBuilder::build(bool allow_dangling_combinational_nodes) {
     tg_ = std::make_unique<tatum::TimingGraph>();
 
+    // Optionally allow dangling combinational nodes.
+    // Set by `--allow_dangling_combinational_nodes on`. Default value is false
+    tg_->set_allow_dangling_combinational_nodes(allow_dangling_combinational_nodes);
+
     for (AtomBlockId blk : netlist_.blocks()) {
         AtomBlockType blk_type = netlist_.block_type(blk);
 
diff --git a/vpr/src/timing/timing_graph_builder.h b/vpr/src/timing/timing_graph_builder.h
index 0ca93d1e19f..8e6745b7cb1 100644
--- a/vpr/src/timing/timing_graph_builder.h
+++ b/vpr/src/timing/timing_graph_builder.h
@@ -10,10 +10,10 @@ class TimingGraphBuilder {
     TimingGraphBuilder(const AtomNetlist& netlist,
                        AtomLookup& netlist_lookup);
 
-    std::unique_ptr<tatum::TimingGraph> timing_graph();
+    std::unique_ptr<tatum::TimingGraph> timing_graph(bool allow_dangling_combinational_nodes);
 
   private:
-    void build();
+    void build(bool allow_dangling_combinational_nodes);
     void opt_memory_layout();
 
     void add_io_to_timing_graph(const AtomBlockId blk);

From 98142910f58eafc245351e8cdec271cb572a3db7 Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Wed, 3 Apr 2019 16:47:15 +0200
Subject: [PATCH 09/15] equivalent tiles: added equivalent tile placement
 capability

This commit introduces two major features:
- introduce the tile concept in the architecture XML. Top level pb_type
is discarded and all the top level pb_type information are moved into
the tile tags. This will cause CI build to fail as all the architectures
do not include the tiles tag right now.

- introduce the possibility to place blocks in equivalent tiles (SLICEL
blocks into SLICEM ones). According to the XML architecture description
there could be tiles equivalent to others that can be used during the
placement step (this can bring to better placement solutions)

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 libs/libarchfpga/src/physical_types.cpp       |    9 +
 libs/libarchfpga/src/physical_types.h         |   31 +-
 libs/libarchfpga/src/read_xml_arch_file.cpp   |  382 +++-
 utils/fasm/src/fasm.cpp                       |    6 +-
 vpr/src/base/clustered_netlist.cpp            |   49 +-
 vpr/src/base/clustered_netlist.h              |   34 +-
 vpr/src/pack/pack.cpp                         |    8 +-
 vpr/src/place/place.cpp                       |  288 ++-
 vpr/src/timing/clb_delay_calc.inl             |    5 +-
 vpr/src/util/vpr_utils.cpp                    |  110 +-
 vtr_flow/arch/equivalent_tiles/slice.xml      | 1625 +++++++++++++++++
 .../strong_equivalent_tiles/config/config.txt |   31 +
 12 files changed, 2386 insertions(+), 192 deletions(-)
 create mode 100644 vtr_flow/arch/equivalent_tiles/slice.xml
 create mode 100644 vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt

diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp
index 0e684e37b29..a345c9dd3ae 100644
--- a/libs/libarchfpga/src/physical_types.cpp
+++ b/libs/libarchfpga/src/physical_types.cpp
@@ -140,6 +140,15 @@ std::vector<int> t_type_descriptor::get_clock_pins_indices() const {
     return indices;
 }
 
+bool t_type_descriptor::is_available_tile_index(int index_to_check) const {
+    auto search = this->available_tiles_indices.find(index_to_check);
+    if (search != available_tiles_indices.end()) {
+        return true;
+    }
+
+    return false;
+}
+
 /**
  * t_pb_graph_node
  */
diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index b48d6aaa1b1..0213261fecf 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -31,9 +31,9 @@
 #include <unordered_map>
 #include <string>
 #include <map>
-#include <unordered_map>
 #include <limits>
 #include <numeric>
+#include <unordered_set>
 
 #include "vtr_ndmatrix.h"
 #include "vtr_hash.h"
@@ -556,6 +556,20 @@ constexpr int DEFAULT_SWITCH = -2;
  * pb_type: Internal subblocks and routing information for this physical block
  * pb_graph_head: Head of DAG of pb_types_nodes and their edges
  *
+ *
+ * num_equivalent_tiles: Specifies the number of equivalent physical types that can be used during placement.
+ *                       If the value is `0` all the data structures relative to the equivalent tiles will be empty.
+ * equivalent_tiles: Array containing pointers to the equivalent tiles. The number of elements contained is specified
+ *                   by num_equivalent_tiles.
+ * equivalent_tile_pin_mapping: Multi-dimensional array that, for each different equivalent tile contains a mapping between
+ *                              the pins of the two tiles.
+ *                              Example: equivalent_tile_pin_mapping[eq_tile_index][pin_index] = equivalent_pin_index
+ *                              This is necessary to maintain consistency between two equivalent tiles that have the same pins
+ *                              defined with different indeces.
+ * equivalent_tile_inverse_pin_mapping: Multi-dimensional array that works as the previous one, but the mapping is inverse in this case.
+ *                                      Example: equivalent_tile_pin_mapping[eq_tile_index][equivalent_pin_index] = pin_index
+ * available_tiles_indices: unordered map used to have a fast lookup on the available tiles.
+ *
  * area: Describes how much area this logic block takes, if undefined, use default
  * type_timing_inf: timing information unique to this type
  * num_drivers: Total number of output drivers supplied
@@ -595,6 +609,13 @@ struct t_type_descriptor /* TODO rename this.  maybe physical type descriptor or
     t_pb_type* pb_type = nullptr;
     t_pb_graph_node* pb_graph_head = nullptr;
 
+    /* Equivalent tiles information */
+    int num_equivalent_tiles = 0;
+    std::unordered_map<int, t_type_descriptor*> equivalent_tiles;                              /* [0..num_equivalent_tiles-1] */
+    std::unordered_map<int, std::unordered_map<int, int>> equivalent_tile_pin_mapping;         /* [0..num_equivalent_tiles-1][0..num_pins-1] */
+    std::unordered_map<int, std::unordered_map<int, int>> equivalent_tile_inverse_pin_mapping; /* [0..num_equivalent_tiles-1][0..num_pins-1] */
+    std::unordered_set<int> available_tiles_indices;
+
     float area = 0;
 
     /* This info can be determined from class_inf and pin_class but stored for faster access */
@@ -603,8 +624,15 @@ struct t_type_descriptor /* TODO rename this.  maybe physical type descriptor or
 
     int index = -1; /* index of type descriptor in array (allows for index referencing) */
 
+    /***********
+     * Methods *
+     ***********/
+
     /* Returns the indices of pins that contain a clock for this physical logic block */
     std::vector<int> get_clock_pins_indices() const;
+
+    /* Returns a boolean set to True if the input index belongs to an available tile, False otherwise */
+    bool is_available_tile_index(int index_to_check) const;
 };
 typedef const t_type_descriptor* t_type_ptr;
 
@@ -1200,6 +1228,7 @@ struct t_segment_inf {
     std::vector<bool> cb;
     std::vector<bool> sb;
     //float Cmetal_per_m; /* Wire capacitance (per meter) */
+    t_metadata_dict* meta = nullptr;
 };
 
 enum class SwitchType {
diff --git a/libs/libarchfpga/src/read_xml_arch_file.cpp b/libs/libarchfpga/src/read_xml_arch_file.cpp
index e2c374261c9..1a62ad7dcdd 100644
--- a/libs/libarchfpga/src/read_xml_arch_file.cpp
+++ b/libs/libarchfpga/src/read_xml_arch_file.cpp
@@ -101,7 +101,7 @@ static void Process_Fc(pugi::xml_node Node, t_type_descriptor* Type, std::vector
 static t_fc_override Process_Fc_override(pugi::xml_node node, const pugiutil::loc_data& loc_data);
 static void ProcessSwitchblockLocations(pugi::xml_node swtichblock_locations, t_type_descriptor* type, const t_arch& arch, const pugiutil::loc_data& loc_data);
 static e_fc_value_type string_to_fc_value_type(const std::string& str, pugi::xml_node node, const pugiutil::loc_data& loc_data);
-static void ProcessComplexBlockProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data);
+static void ProcessTileProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data);
 static void ProcessChanWidthDistr(pugi::xml_node Node,
                                   t_arch* arch,
                                   const pugiutil::loc_data& loc_data);
@@ -111,12 +111,29 @@ static void ProcessModelPorts(pugi::xml_node port_group, t_model* model, std::se
 static void ProcessLayout(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data);
 static t_grid_def ProcessGridLayout(pugi::xml_node layout_type_tag, const pugiutil::loc_data& loc_data);
 static void ProcessDevice(pugi::xml_node Node, t_arch* arch, t_default_fc_spec& arch_def_fc, const pugiutil::loc_data& loc_data);
+static void ProcessTiles(pugi::xml_node Node,
+                         t_type_descriptor** Types,
+                         int* NumTypes,
+                         std::unordered_map<std::string, t_type_descriptor*>* TypeMap,
+                         const pugiutil::loc_data& loc_data);
+static void ProcessTilesTags(pugi::xml_node Node,
+                             std::unordered_map<std::string, t_type_descriptor*> TypeMap,
+                             t_arch& arch,
+                             const t_default_fc_spec& arch_def_fc,
+                             const pugiutil::loc_data& loc_data);
+static void ProcessTileExtraModes(pugi::xml_node Node,
+                                  t_type_descriptor* Type,
+                                  std::unordered_map<std::string, t_type_descriptor*> TypeMap,
+                                  const pugiutil::loc_data& loc_data);
+static void ProcessTileExtraModePinMapping(pugi::xml_node Node,
+                                           t_type_descriptor* Type,
+                                           t_type_descriptor* EquivalentType,
+                                           int imode,
+                                           const pugiutil::loc_data& loc_data);
 static void ProcessComplexBlocks(pugi::xml_node Node,
-                                 t_type_descriptor** Types,
-                                 int* NumTypes,
+                                 std::unordered_map<std::string, t_type_descriptor*> TypeMap,
                                  t_arch& arch,
                                  const bool timing_enabled,
-                                 const t_default_fc_spec& arch_def_fc,
                                  const pugiutil::loc_data& loc_data);
 static void ProcessSwitches(pugi::xml_node Node,
                             t_arch_switch_inf** Switches,
@@ -176,6 +193,8 @@ int find_switch_by_name(const t_arch& arch, std::string switch_name);
 
 e_side string_to_side(std::string side_str);
 
+static t_type_descriptor* get_corresponding_tile(std::unordered_map<std::string, t_type_descriptor*> TypeMap, const char* type_name);
+static int get_pin_index_by_name(t_type_descriptor* Type, const char* port_name, int offset);
 /*
  *
  *
@@ -254,9 +273,18 @@ void XmlReadArch(const char* ArchFile, const bool timing_enabled, t_arch* arch,
             ProcessSwitchblocks(Next, arch, loc_data);
         }
 
-        /* Process types */
+        /* Process tiles */
+        std::unordered_map<std::string, t_type_descriptor*> TypeMap;
+        Next = get_single_child(architecture, "tiles", loc_data);
+        ProcessTiles(Next, Types, NumTypes, &TypeMap, loc_data);
+
+        /* Process pb_types */
         Next = get_single_child(architecture, "complexblocklist", loc_data);
-        ProcessComplexBlocks(Next, Types, NumTypes, *arch, timing_enabled, arch_def_fc, loc_data);
+        ProcessComplexBlocks(Next, TypeMap, *arch, timing_enabled, loc_data);
+
+        /* Process tile tags that after pb_type have been parsed */
+        Next = get_single_child(architecture, "tiles", loc_data);
+        ProcessTilesTags(Next, TypeMap, *arch, arch_def_fc, loc_data);
 
         /* Process directs */
         Next = get_single_child(architecture, "directlist", loc_data, OPTIONAL);
@@ -969,14 +997,6 @@ static void ProcessPb_Type(pugi::xml_node Parent, t_pb_type* pb_type, t_mode* mo
         children_to_expect.push_back("model");
         children_to_expect.push_back("pb_type");
         children_to_expect.push_back("interconnect");
-
-        if (is_root_pb_type) {
-            VTR_ASSERT(!is_leaf_pb_type);
-            //Top level pb_type's may also have the following tag types
-            children_to_expect.push_back("fc");
-            children_to_expect.push_back("pinlocations");
-            children_to_expect.push_back("switchblock_locations");
-        }
     } else {
         VTR_ASSERT(is_leaf_pb_type);
         VTR_ASSERT(!is_root_pb_type);
@@ -1698,7 +1718,7 @@ static void Process_Fc(pugi::xml_node Node, t_type_descriptor* Type, std::vector
         /* Use the default value, if available */
         if (!arch_def_fc.specified) {
             archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                           "<pb_type> is missing child <fc>, and no <default_fc> specified in architecture\n");
+                           "<tile> is missing child <fc>, and no <default_fc> specified in architecture\n");
         }
         def_fc_spec = arch_def_fc;
     }
@@ -2017,28 +2037,6 @@ static void ProcessSwitchblockLocations(pugi::xml_node switchblock_locations, t_
     }
 }
 
-/* Thie processes attributes of the 'type' tag */
-static void ProcessComplexBlockProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data) {
-    const char* Prop;
-
-    expect_only_attributes(Node, {"name", "capacity", "width", "height", "area"}, loc_data);
-
-    /* Load type name */
-    Prop = get_attribute(Node, "name", loc_data).value();
-    Type->name = vtr::strdup(Prop);
-
-    /* Load properties */
-    Type->capacity = get_attribute(Node, "capacity", loc_data, OPTIONAL).as_uint(1); /* TODO: Any block with capacity > 1 that is not I/O has not been tested, must test */
-    Type->width = get_attribute(Node, "width", loc_data, OPTIONAL).as_uint(1);
-    Type->height = get_attribute(Node, "height", loc_data, OPTIONAL).as_uint(1);
-    Type->area = get_attribute(Node, "area", loc_data, OPTIONAL).as_float(UNDEFINED);
-
-    if (atof(Prop) < 0) {
-        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
-                       "Area for type %s must be non-negative\n", Type->name);
-    }
-}
-
 /* Takes in node pointing to <models> and loads all the
  * child type objects.  */
 static void ProcessModels(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data) {
@@ -2620,16 +2618,36 @@ static void ProcessChanWidthDistrDir(pugi::xml_node Node, t_chan* chan, const pu
     chan->dc = get_attribute(Node, "dc", loc_data, hasDc).as_float(0);
 }
 
-/* Takes in node pointing to <typelist> and loads all the
- * child type objects. */
-static void ProcessComplexBlocks(pugi::xml_node Node,
-                                 t_type_descriptor** Types,
-                                 int* NumTypes,
-                                 t_arch& arch,
-                                 const bool timing_enabled,
-                                 const t_default_fc_spec& arch_def_fc,
-                                 const pugiutil::loc_data& loc_data) {
-    pugi::xml_node CurType, Prev;
+/* Thie processes attributes of the 'type' tag */
+static void ProcessTileProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data) {
+    const char* Prop;
+
+    expect_only_attributes(Node, {"name", "capacity", "width", "height", "area"}, loc_data);
+
+    /* Load type name */
+    Prop = get_attribute(Node, "name", loc_data).value();
+    Type->name = vtr::strdup(Prop);
+
+    /* Load properties */
+    Type->capacity = get_attribute(Node, "capacity", loc_data, OPTIONAL).as_uint(1); /* TODO: Any block with capacity > 1 that is not I/O has not been tested, must test */
+    Type->width = get_attribute(Node, "width", loc_data, OPTIONAL).as_uint(1);
+    Type->height = get_attribute(Node, "height", loc_data, OPTIONAL).as_uint(1);
+    Type->area = get_attribute(Node, "area", loc_data, OPTIONAL).as_float(UNDEFINED);
+
+    if (atof(Prop) < 0) {
+        archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node),
+                       "Area for type %s must be non-negative\n", Type->name);
+    }
+}
+
+/* Takes in node pointing to <tiles> and loads all the  *
+ * child type objects.                                  */
+static void ProcessTiles(pugi::xml_node Node,
+                         t_type_descriptor** Types,
+                         int* NumTypes,
+                         std::unordered_map<std::string, t_type_descriptor*>* TypeMap,
+                         const pugiutil::loc_data& loc_data) {
+    pugi::xml_node CurType;
     pugi::xml_node Cur;
     t_type_descriptor* Type;
     int i;
@@ -2638,7 +2656,7 @@ static void ProcessComplexBlocks(pugi::xml_node Node,
     /* Alloc the type list. Need one additional t_type_desctiptors:
      * 1: empty psuedo-type
      */
-    *NumTypes = count_children(Node, "pb_type", loc_data) + 1;
+    *NumTypes = count_children(Node, "tile", loc_data) + 1;
     *Types = new t_type_descriptor[*NumTypes];
 
     cb_type_descriptors = *Types;
@@ -2654,30 +2672,63 @@ static void ProcessComplexBlocks(pugi::xml_node Node,
 
     CurType = Node.first_child();
     while (CurType) {
-        check_node(CurType, "pb_type", loc_data);
+        check_node(CurType, "tile", loc_data);
 
         /* Alias to current type */
         Type = &(*Types)[i];
 
         /* Parses the properties fields of the type */
-        ProcessComplexBlockProps(CurType, Type, loc_data);
+        ProcessTileProps(CurType, Type, loc_data);
 
         ret_pb_type_descriptors = pb_type_descriptors.insert(pair<string, int>(Type->name, 0));
         if (!ret_pb_type_descriptors.second) {
             archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType),
-                           "Duplicate pb_type descriptor name: '%s'.\n", Type->name);
+                           "Duplicate tile descriptor name: '%s'.\n", Type->name);
         }
 
-        /* Load pb_type info */
-        Type->pb_type = new t_pb_type;
-        Type->pb_type->name = vtr::strdup(Type->name);
-        ProcessPb_Type(CurType, Type->pb_type, nullptr, timing_enabled, arch, loc_data);
-        Type->num_pins = Type->capacity
-                         * (Type->pb_type->num_input_pins
-                            + Type->pb_type->num_output_pins
-                            + Type->pb_type->num_clock_pins);
-        Type->num_receivers = Type->capacity * Type->pb_type->num_input_pins;
-        Type->num_drivers = Type->capacity * Type->pb_type->num_output_pins;
+        Type->index = i;
+        Type->available_tiles_indices.insert(i);
+
+        auto result = TypeMap->insert(std::make_pair(Type->name, Type));
+        if (!result.second) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType),
+                           "Duplicate tile found: '%s'.\n", Type->name);
+        }
+
+        /* Type fully read */
+        ++i;
+
+        /* Free this node and get its next sibling node */
+        CurType = CurType.next_sibling(CurType.name());
+    }
+    pb_type_descriptors.clear();
+}
+
+// This step has to be performed after the root pb_type has been parsed
+static void ProcessTilesTags(pugi::xml_node Node,
+                             std::unordered_map<std::string, t_type_descriptor*> TypeMap,
+                             t_arch& arch,
+                             const t_default_fc_spec& arch_def_fc,
+                             const pugiutil::loc_data& loc_data) {
+    pugi::xml_node Cur, CurType;
+    t_type_descriptor* Type;
+
+    /* Process the types */
+    CurType = Node.first_child();
+    while (CurType) {
+        check_node(CurType, "tile", loc_data);
+
+        /* Load type name */
+        const char* NameProp = get_attribute(CurType, "name", loc_data).value();
+
+        /* Alias to current type */
+        Type = get_corresponding_tile(TypeMap, vtr::strdup(NameProp));
+        if (Type == nullptr) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType),
+                           "No tiles found corresponding to current root level pb type: '%s'.\n", Type->pb_type->name);
+        }
+
+        VTR_ASSERT(Type->pb_type != nullptr);
 
         /* Load pin names and classes and locations */
         Cur = get_single_child(CurType, "pinlocations", loc_data, OPTIONAL);
@@ -2698,19 +2749,178 @@ static void ProcessComplexBlocks(pugi::xml_node Node,
         Cur = get_single_child(CurType, "fc", loc_data, OPTIONAL);
         Process_Fc(Cur, Type, arch.Segments, arch_def_fc, loc_data);
 
-        //Load switchblock type and location overrides
+        /* Load switchblock type and location overrides */
         Cur = get_single_child(CurType, "switchblock_locations", loc_data, OPTIONAL);
         ProcessSwitchblockLocations(Cur, Type, arch, loc_data);
 
-        Type->index = i;
-
-        /* Type fully read */
-        ++i;
+        /* Load possible modes (pb_types which are compatible with the current tile) */
+        Cur = get_single_child(CurType, "equivalent_tiles", loc_data, OPTIONAL);
+        if (Cur) {
+            ProcessTileExtraModes(Cur, Type, TypeMap, loc_data);
+        }
 
         /* Free this node and get its next sibling node */
         CurType = CurType.next_sibling(CurType.name());
     }
-    pb_type_descriptors.clear();
+}
+
+/* Processes the equivalent tiles defined in the XML arch definition
+ * <tiles>
+ *   <tile name="LAB">
+ *     <mode name="MLAB">
+ *       <map .../>
+ *       <map .../>
+ *       <map .../>
+ *     </mode>
+ *   </tile>
+ * </tiles>
+ *
+ * In particular this function parses the `modes` (if they exist) of each tile
+ * and adds the equivalent tile information to the t_type_descriptor relative to
+ * the current tile.
+ * It populates the following t_type_descriptor members:
+ *     - num_equivalent_tiles;
+ *     - equivalent_tiles.
+ */
+static void ProcessTileExtraModes(pugi::xml_node Node,
+                                  t_type_descriptor* Type,
+                                  std::unordered_map<std::string, t_type_descriptor*> TypeMap,
+                                  const pugiutil::loc_data& loc_data) {
+    pugi::xml_node CurType;
+
+    Type->num_equivalent_tiles = count_children(Node, "mode", loc_data);
+    int index = 0;
+    CurType = Node.first_child();
+    while (CurType && index < Type->num_equivalent_tiles) {
+        const char* equivalent_tile_name = get_attribute(CurType, "name", loc_data).value();
+        auto EquivalentTile = get_corresponding_tile(TypeMap, equivalent_tile_name);
+
+        if (EquivalentTile == nullptr) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType),
+                           "No tiles found corresponding to equivalent tile name: '%s'.\n", Type->pb_type->name);
+        }
+
+        // Inserts equivalent tile as last element so the index points to the correct equivalent tile.
+        auto result = Type->equivalent_tiles.insert(std::make_pair(index, EquivalentTile));
+        if (!result.second) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType),
+                           "Duplicate equivalent tile found: '%s'.\n", EquivalentTile->name);
+        }
+
+        Type->available_tiles_indices.insert(EquivalentTile->index);
+
+        ProcessTileExtraModePinMapping(CurType, Type, Type->equivalent_tiles[index], index, loc_data);
+
+        index++;
+        CurType = CurType.next_sibling(CurType.name());
+    }
+}
+
+/* Processes the pin_mapping of each equivalent tile.
+ * It goes through each mode and populates the following t_type_descriptor memebrs:
+ *     - equivalent_tile_pin_mapping;
+ *     - equivalent_tile_inverse_pin_mapping.
+ */
+static void ProcessTileExtraModePinMapping(pugi::xml_node Node,
+                                           t_type_descriptor* Type,
+                                           t_type_descriptor* EquivalentType,
+                                           int imode,
+                                           const pugiutil::loc_data& loc_data) {
+    pugi::xml_node CurType = Node.first_child();
+    const char *from_port, *to_port;
+    int from_pin_index, to_pin_index;
+    int num_pins;
+
+    std::unordered_map<int, int> pin_mapping, inverse_pin_mapping;
+
+    while (CurType) {
+        //Process each mode mapping
+        if (CurType.name() != std::string("map")) {
+            bad_tag(CurType, loc_data, Node, {"map"});
+        }
+
+        from_port = get_attribute(CurType, "from", loc_data).value();
+        to_port = get_attribute(CurType, "to", loc_data).value();
+        num_pins = get_attribute(CurType, "num_pins", loc_data, OPTIONAL).as_int(1);
+
+        for (int offset = 0; offset < num_pins; offset++) {
+            from_pin_index = get_pin_index_by_name(Type, from_port, offset);
+            to_pin_index = get_pin_index_by_name(EquivalentType, to_port, offset);
+
+            auto result = pin_mapping.insert(std::make_pair(from_pin_index, to_pin_index));
+            if (!result.second) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType),
+                               "Duplicate equivalent tile 'from_pin': '%d' (in %s).\n", from_pin_index, Type->name);
+            }
+
+            result = inverse_pin_mapping.insert(std::make_pair(to_pin_index, from_pin_index));
+            if (!result.second) {
+                archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType),
+                               "Duplicate equivalent tile 'to_pin': '%d' (in %s).\n", to_pin_index, Type->name);
+            }
+        }
+
+        CurType = CurType.next_sibling(CurType.name());
+    }
+
+    Type->equivalent_tile_pin_mapping.insert(std::make_pair(imode, pin_mapping));
+    Type->equivalent_tile_inverse_pin_mapping.insert(std::make_pair(imode, inverse_pin_mapping));
+}
+
+static void ProcessComplexBlocks(pugi::xml_node Node,
+                                 std::unordered_map<std::string, t_type_descriptor*> TypeMap,
+                                 t_arch& arch,
+                                 const bool timing_enabled,
+                                 const pugiutil::loc_data& loc_data) {
+    pugi::xml_node CurPbType;
+    t_type_descriptor* Type;
+
+    map<string, int> pb_types;
+    pair<map<string, int>::iterator, bool> ret_pb_types;
+
+    CurPbType = Node.first_child();
+    while (CurPbType) {
+        check_node(CurPbType, "pb_type", loc_data);
+
+        char* type_name = nullptr;
+
+        for (pugi::xml_attribute attr : CurPbType.attributes()) {
+            if (attr.name() != std::string("name")) {
+                bad_attribute(attr, CurPbType, loc_data);
+            } else {
+                type_name = vtr::strdup(attr.value());
+            }
+        }
+
+        Type = get_corresponding_tile(TypeMap, type_name);
+        if (Type == nullptr) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurPbType),
+                           "No tiles found corresponding to current root level pb type: '%s'.\n", type_name);
+        }
+
+        Type->pb_type = new t_pb_type;
+        Type->pb_type->name = vtr::strdup(type_name);
+
+        ret_pb_types = pb_types.insert(
+            pair<string, int>(Type->pb_type->name, 0));
+        if (!ret_pb_types.second) {
+            archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurPbType),
+                           "Duplicate pb_type descriptor name: '%s'.\n", Type->pb_type->name);
+        }
+
+        ProcessPb_Type(CurPbType, Type->pb_type, nullptr, timing_enabled, arch, loc_data);
+        Type->num_pins = Type->capacity
+                         * (Type->pb_type->num_input_pins
+                            + Type->pb_type->num_output_pins
+                            + Type->pb_type->num_clock_pins);
+        Type->num_receivers = Type->capacity * Type->pb_type->num_input_pins;
+        Type->num_drivers = Type->capacity * Type->pb_type->num_output_pins;
+
+        /* Load pin names and classes and locations */
+
+        CurPbType = CurPbType.next_sibling(CurPbType.name());
+    }
+    pb_types.clear();
 }
 
 static void ProcessSegments(pugi::xml_node Parent,
@@ -4117,3 +4327,41 @@ e_side string_to_side(std::string side_str) {
     }
     return side;
 }
+
+static t_type_descriptor* get_corresponding_tile(std::unordered_map<std::string, t_type_descriptor*> TypeMap,
+                                                 const char* type_name) {
+    auto result = TypeMap.find(type_name);
+
+    if (result == TypeMap.end()) {
+        return nullptr;
+    }
+
+    return result->second;
+}
+
+static int get_pin_index_by_name(t_type_descriptor* Type, const char* port_name, int pin_index_in_port) {
+    int ipin = OPEN;
+
+    t_pb_type* pb_type = Type->pb_type;
+    t_port* matched_port = nullptr;
+    int port_base_ipin = 0;
+
+    for (int iport = 0; iport < pb_type->num_ports; ++iport) {
+        t_port* port = &pb_type->ports[iport];
+
+        if (0 == strcmp(port->name, port_name)) {
+            matched_port = port;
+            break;
+        }
+        port_base_ipin += port->num_pins;
+    }
+
+    if (matched_port) {
+        VTR_ASSERT(0 == strcmp(matched_port->name, port_name));
+        VTR_ASSERT(pin_index_in_port < matched_port->num_pins);
+
+        ipin = port_base_ipin + pin_index_in_port;
+    }
+
+    return ipin;
+}
diff --git a/utils/fasm/src/fasm.cpp b/utils/fasm/src/fasm.cpp
index ee7c7cd8fd1..9797f97e81c 100644
--- a/utils/fasm/src/fasm.cpp
+++ b/utils/fasm/src/fasm.cpp
@@ -93,7 +93,11 @@ void FasmWriterVisitor::check_interconnect(const t_pb_routes &pb_routes, int ino
     return;
   }
 
-  t_pb_graph_pin *prev_pin = pb_graph_pin_lookup_from_index_by_type_.at(blk_type_->index)[prev_node];
+  auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
+
+  t_type_ptr original_blk_type = clb_nlist.block_type(current_blk_id_, false);
+
+  t_pb_graph_pin *prev_pin = pb_graph_pin_lookup_from_index_by_type_.at(original_blk_type->index)[prev_node];
 
   int prev_edge;
   for(prev_edge = 0; prev_edge < prev_pin->num_output_edges; prev_edge++) {
diff --git a/vpr/src/base/clustered_netlist.cpp b/vpr/src/base/clustered_netlist.cpp
index eb69053c404..de5f5f097e6 100644
--- a/vpr/src/base/clustered_netlist.cpp
+++ b/vpr/src/base/clustered_netlist.cpp
@@ -25,11 +25,30 @@ t_pb* ClusteredNetlist::block_pb(const ClusterBlockId id) const {
 }
 
 t_type_ptr ClusteredNetlist::block_type(const ClusterBlockId id) const {
+    return block_type(id, true);
+}
+
+t_type_ptr ClusteredNetlist::block_type(const ClusterBlockId id, bool get_equivalent_if_set) const {
     VTR_ASSERT_SAFE(valid_block_id(id));
+    if (block_eq_type_index(id) != OPEN && get_equivalent_if_set) {
+        return block_eq_type_[id];
+    }
 
     return block_types_[id];
 }
 
+int ClusteredNetlist::block_eq_type_index(const ClusterBlockId id) const {
+    VTR_ASSERT_SAFE(valid_block_id(id));
+
+    return block_eq_type_index_[id];
+}
+
+bool ClusteredNetlist::block_eq_type_effective(const ClusterBlockId id) const {
+    VTR_ASSERT_SAFE(valid_block_id(id));
+
+    return block_eq_type_effective_[id];
+}
+
 ClusterNetId ClusteredNetlist::block_net(const ClusterBlockId blk_id, const int phys_pin_index) const {
     auto pin_id = block_pin(blk_id, phys_pin_index);
 
@@ -120,9 +139,12 @@ ClusterBlockId ClusteredNetlist::create_block(const char* name, t_pb* pb, t_type
 
         block_pbs_.insert(blk_id, pb);
         block_types_.insert(blk_id, type);
-
+        block_eq_type_.insert(blk_id, type);
+        block_eq_type_index_.insert(blk_id, OPEN);
+        block_eq_type_effective_.insert(blk_id, false);
         //Allocate and initialize every potential pin of the block
-        block_logical_pins_.insert(blk_id, std::vector<ClusterPinId>(type->num_pins, ClusterPinId::INVALID()));
+        int num_pins = get_max_num_pins(type);
+        block_logical_pins_.insert(blk_id, std::vector<ClusterPinId>(num_pins, ClusterPinId::INVALID()));
     }
 
     //Check post-conditions: size
@@ -135,6 +157,12 @@ ClusterBlockId ClusteredNetlist::create_block(const char* name, t_pb* pb, t_type
     return blk_id;
 }
 
+void ClusteredNetlist::set_equivalent_block_type(const ClusterBlockId blk_id, int i_eq_type, t_type_ptr eq_type) {
+    block_eq_type_index_[blk_id] = i_eq_type;
+    block_eq_type_effective_[blk_id] = true;
+    block_eq_type_[blk_id] = eq_type;
+}
+
 void ClusteredNetlist::set_pin_physical_index(const ClusterPinId pin, const int phys_pin_index) {
     VTR_ASSERT_SAFE(valid_pin_id(pin));
     auto blk = pin_block(pin);
@@ -322,3 +350,20 @@ bool ClusteredNetlist::validate_net_sizes_impl(size_t num_nets) const {
     }
     return true;
 }
+
+/*
+ * Utilities
+ */
+int ClusteredNetlist::get_max_num_pins(t_type_ptr type) {
+    int max_pins = type->num_pins;
+
+    for (int itype = 0; itype < type->num_equivalent_tiles; itype++) {
+        auto result = type->equivalent_tiles.find(itype);
+        VTR_ASSERT(result != type->equivalent_tiles.end());
+
+        int num_pins = result->second->num_pins;
+        max_pins = std::max(num_pins, max_pins);
+    }
+
+    return max_pins;
+}
diff --git a/vpr/src/base/clustered_netlist.h b/vpr/src/base/clustered_netlist.h
index 343cffaa9b9..8b3f34fb1ff 100644
--- a/vpr/src/base/clustered_netlist.h
+++ b/vpr/src/base/clustered_netlist.h
@@ -125,8 +125,25 @@ class ClusteredNetlist : public Netlist<ClusterBlockId, ClusterPortId, ClusterPi
     //Returns the physical block
     t_pb* block_pb(const ClusterBlockId id) const;
 
-    //Returns the type of CLB (Logic block, RAM, DSP, etc.)
+    /*
+     * Returns the type of CLB (Logic block, RAM, DSP, etc.). There are two different overloaded block_type functions
+     * 1) The first one (without the boolean) is used to retrieve the actual physical type corresponding to the placed
+     *    block. This function will call the one with the boolean, setting `get_equivalent_if_set` to true.
+     * 2) The second one (with the boolean) is used to retrieve the physical or logical type relative to a block.
+     *    Depending on the value of `get_equivalent_if_set` we will get a different block_type:
+     *        - True: the function returns the physical placed tile (can be an equivalent tile) of the block;
+     *        - False: the function returns the logical tile of the block (even if it was placed in an equivalent tile).
+     */
     t_type_ptr block_type(const ClusterBlockId id) const;
+    t_type_ptr block_type(const ClusterBlockId id, bool get_equivalent_if_set) const;
+
+    //Returns the equivalent type index (if any) of a CLB
+    //The index is used to retrieve the equivalent type from the t_type_descriptor structure
+    //Example: type->equivalent_tiles[index]
+    int block_eq_type_index(const ClusterBlockId id) const;
+
+    //Returns true if the block has been placed in an equivalent tile
+    bool block_eq_type_effective(const ClusterBlockId id) const;
 
     //Returns the net of the block attached to the specific pin index
     ClusterNetId block_net(const ClusterBlockId blk_id, const int pin_index) const;
@@ -174,6 +191,13 @@ class ClusteredNetlist : public Netlist<ClusterBlockId, ClusterPortId, ClusterPi
     //  t_type_ptr  : The type of the CLB
     ClusterBlockId create_block(const char* name, t_pb* pb, t_type_ptr type);
 
+    //Add the equivalent block type for a CLB. This mutator adds both the equivalent type index and
+    //the equivalent type to the corresponding vector_maps
+    //  blk_id      : The block placed in an equivalent tile location
+    //  i_eq_type   : The index used to retrieve the equivalent tile from the t_type_ptr structure
+    //  eq_type     : The equivalent type associated with this block
+    void set_equivalent_block_type(const ClusterBlockId blk_id, int i_eq_type, t_type_ptr eq_type);
+
     //Create or return an existing port in the netlist
     //  blk_id      : The block the port is associated with
     //  name        : The name of the port (must match the name of a port in the block's model)
@@ -242,10 +266,18 @@ class ClusteredNetlist : public Netlist<ClusterBlockId, ClusterPortId, ClusterPi
     bool validate_pin_sizes_impl(size_t num_pins) const override;
     bool validate_net_sizes_impl(size_t num_nets) const override;
 
+    /*
+     * Utilities
+     */
+    int get_max_num_pins(t_type_ptr type);
+
   private: //Private Data
     //Blocks
     vtr::vector_map<ClusterBlockId, t_pb*> block_pbs_;                              //Physical block representing the clustering & internal hierarchy of each CLB
     vtr::vector_map<ClusterBlockId, t_type_ptr> block_types_;                       //The type of physical block this user circuit block is mapped to
+    vtr::vector_map<ClusterBlockId, t_type_ptr> block_eq_type_;                     //The equivalent type (if any) selected for a CLB
+    vtr::vector_map<ClusterBlockId, int> block_eq_type_index_;                      //Index relative to the equivalent tile chosen during placement
+    vtr::vector_map<ClusterBlockId, bool> block_eq_type_effective_;                 //Boolean to state if equivalent tile is used
     vtr::vector_map<ClusterBlockId, std::vector<ClusterPinId>> block_logical_pins_; //The logical pin associated with each physical block pin
 
     //Pins
diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp
index c841f16e453..5627135974d 100644
--- a/vpr/src/pack/pack.cpp
+++ b/vpr/src/pack/pack.cpp
@@ -279,9 +279,15 @@ static bool try_size_device_grid(const t_arch& arch, const std::map<t_type_ptr,
         if (itr == num_type_instances.end()) continue;
 
         float num_instances = itr->second;
+
+        int num_available_instances = device_ctx.grid.num_instances(type);
+        for (int itype = 0; itype < type->num_equivalent_tiles; itype++) {
+            num_available_instances += device_ctx.grid.num_instances(type->equivalent_tiles[itype]);
+        }
+
         float util = 0.;
         if (num_instances != 0) {
-            util = num_instances / device_ctx.grid.num_instances(type);
+            util = num_instances / num_available_instances;
         }
         type_util[type] = util;
 
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index d769a295809..f0b2e8e3e16 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -287,7 +287,7 @@ static int try_place_macro(int itype, int ipos, int imacro);
 static void initial_placement_pl_macros(int macros_max_num_tries, int* free_locations);
 
 static void initial_placement_blocks(int* free_locations, enum e_pad_loc_type pad_loc_type);
-static void initial_placement_location(const int* free_locations, ClusterBlockId blk_id, int& pipos, t_pl_loc& to);
+static void initial_placement_location(const int* free_locations, int itype, int& pipos, t_pl_loc& to);
 
 static void initial_placement(enum e_pad_loc_type pad_loc_type,
                               const char* pad_loc_file);
@@ -375,7 +375,7 @@ static void comp_td_costs(const PlaceDelayModel& delay_model, double* timing_cos
 
 static e_swap_result assess_swap(double delta_c, double t);
 
-static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& to);
+static bool find_to(t_type_ptr to_type, t_type_ptr from_type, float rlim, const t_pl_loc from, t_pl_loc& to);
 
 static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new);
 
@@ -431,6 +431,7 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
 
 static void log_move_abort(std::string reason);
 static void report_aborted_moves();
+std::vector<int> get_available_tiles(t_type_ptr type);
 static int grid_to_compressed(const std::vector<int>& coords, int point);
 
 static void print_place_status_header();
@@ -1657,18 +1658,44 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) {
     // * on chip, and
     // * match the correct block type
     //
-    //Note that we need to explicitly check that the types match, since the device floorplan is not
+    //Note that we need to explicitly check that the types match or are equivalent, since the device floorplan is not
     //(neccessarily) translationally invariant for an arbitrary macro
 
     auto& device_ctx = g_vpr_ctx.device();
     auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
 
     if (to.x < 0 || to.x >= int(device_ctx.grid.width())
         || to.y < 0 || to.y >= int(device_ctx.grid.height())
-        || to.z < 0 || to.z >= device_ctx.grid[to.x][to.y].type->capacity
-        || (device_ctx.grid[to.x][to.y].type != cluster_ctx.clb_nlist.block_type(blk))) {
+        || to.z < 0 || to.z >= device_ctx.grid[to.x][to.y].type->capacity) {
+        return false;
+    }
+
+    // Check if types are allowed to be swapped
+    auto blk_type_from = cluster_ctx.clb_nlist.block_type(blk);
+    auto blk_type_to = device_ctx.grid[to.x][to.y].type;
+
+    // First check is to see if `from` type can be placed in `to` type
+    if (!blk_type_from->is_available_tile_index(blk_type_to->index)) {
         return false;
     }
+
+    t_pl_loc from = place_ctx.block_locs[blk].loc;
+    ClusterBlockId blk_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.z];
+
+    // In case `blk_to` is empty we can skip the second check
+    if (blk_to == EMPTY_BLOCK_ID) {
+        return true;
+    }
+
+    blk_type_from = device_ctx.grid[from.x][from.y].type;
+    blk_type_to = cluster_ctx.clb_nlist.block_type(blk_to);
+
+    // Second check is to see if `to` type can be placed in `from` type
+    if (!blk_type_to->is_available_tile_index(blk_type_from->index)) {
+        return false;
+    }
+
     return true;
 }
 
@@ -1728,7 +1755,25 @@ static e_swap_result try_swap(float t,
     t_pl_loc from = place_ctx.block_locs[b_from].loc;
     auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from);
     auto grid_from_type = g_vpr_ctx.device().grid[from.x][from.y].type;
-    VTR_ASSERT(cluster_from_type == grid_from_type);
+
+    VTR_ASSERT(cluster_from_type->is_available_tile_index(grid_from_type->index));
+
+    t_type_ptr to_block_type = cluster_ctx.clb_nlist.block_type(b_from);
+
+    // Find random equivalent type (could be of the same type as the `from` one)
+    if (to_block_type->num_equivalent_tiles > 0) {
+        int irand_block_type = std::rand() % (to_block_type->num_equivalent_tiles + 1);
+
+        // If random index is 0 do not use an equivalent tile.
+        if (irand_block_type > 0) {
+            auto result = to_block_type->equivalent_tiles.find(irand_block_type - 1);
+            VTR_ASSERT(result != to_block_type->equivalent_tiles.end());
+
+            to_block_type = result->second;
+        }
+    }
+
+    VTR_ASSERT(cluster_from_type->is_available_tile_index(to_block_type->index));
 
     //Allow some fraction of moves to not be restricted by rlim,
     //in the hopes of better escaping local minima
@@ -1737,7 +1782,7 @@ static e_swap_result try_swap(float t,
     }
 
     t_pl_loc to;
-    if (!find_to(cluster_ctx.clb_nlist.block_type(b_from), rlim, from, to))
+    if (!find_to(to_block_type, grid_from_type, rlim, from, to))
         return REJECTED;
 
 #if 0
@@ -1821,8 +1866,8 @@ static e_swap_result try_swap(float t,
 
         //VTR_ASSERT(check_macro_placement_consistency() == 0);
 #if 0
-        //Check that each accepted swap yields a valid placement
-        check_place(*costs, delay_model, place_algorithm);
+		//Check that each accepted swap yields a valid placement
+		check_place(costs, *place_delay_model, place_algorithm);
 #endif
 
         return (keep_switch);
@@ -1995,7 +2040,7 @@ static void update_td_delta_costs(const PlaceDelayModel& delay_model, const Clus
     }
 }
 
-static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& to) {
+static bool find_to(t_type_ptr to_type, t_type_ptr from_type, float rlim, const t_pl_loc from, t_pl_loc& to) {
     //Finds a legal swap to location for the given type, starting from 'x_from' and 'y_from'
     //
     //Note that the range limit (rlim) is applied in a logical sense (i.e. 'compressed' grid space consisting
@@ -2005,29 +2050,39 @@ static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc&
     //
     //This ensures that such blocks don't get locked down too early during placement (as would be the
     //case with a physical distance rlim)
-    auto& grid = g_vpr_ctx.device().grid;
-
-    auto grid_type = grid[from.x][from.y].type;
-    VTR_ASSERT(type == grid_type);
 
     //Retrieve the compressed block grid for this block type
-    const auto& compressed_block_grid = f_compressed_block_grids[type->index];
+    const auto& to_compressed_block_grid = f_compressed_block_grids[to_type->index];
+    const auto& from_compressed_block_grid = f_compressed_block_grids[from_type->index];
 
     //Determine the rlim in each dimension
-    int rlim_x = min<int>(compressed_block_grid.compressed_to_grid_x.size(), rlim);
-    int rlim_y = min<int>(compressed_block_grid.compressed_to_grid_y.size(), rlim); /* for aspect_ratio != 1 case. */
+    int rlim_x = min<int>(to_compressed_block_grid.compressed_to_grid_x.size(), rlim);
+    int rlim_y = min<int>(to_compressed_block_grid.compressed_to_grid_y.size(), rlim); /* for aspect_ratio != 1 case. */
 
     //Determine the coordinates in the compressed grid space of the current block
-    int cx_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_x, from.x);
-    int cy_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_y, from.y);
+    int cx_from = grid_to_compressed(from_compressed_block_grid.compressed_to_grid_x, from.x);
+    int cy_from = grid_to_compressed(from_compressed_block_grid.compressed_to_grid_y, from.y);
+
+    int min_cx, max_cx;
+    int min_cy, max_cy;
+    int delta_cx;
 
-    //Determin the valid compressed grid location ranges
-    int min_cx = std::max(0, cx_from - rlim_x);
-    int max_cx = std::min<int>(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_from + rlim_x);
-    int delta_cx = max_cx - min_cx;
+    //Determine the valid compressed grid location ranges
+    if (to_type == from_type) {
+        min_cx = std::max(0, cx_from - rlim_x);
+        max_cx = std::min<int>(to_compressed_block_grid.compressed_to_grid_x.size() - 1, cx_from + rlim_x);
+        delta_cx = max_cx - min_cx;
 
-    int min_cy = std::max(0, cy_from - rlim_y);
-    int max_cy = std::min<int>(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_from + rlim_y);
+        min_cy = std::max(0, cy_from - rlim_y);
+        max_cy = std::min<int>(to_compressed_block_grid.compressed_to_grid_y.size() - 1, cy_from + rlim_y);
+    } else {
+        min_cx = 0;
+        max_cx = to_compressed_block_grid.compressed_to_grid_x.size() - 1;
+        delta_cx = max_cx - min_cx;
+
+        min_cy = 0;
+        max_cy = to_compressed_block_grid.compressed_to_grid_y.size() - 1;
+    }
 
     int cx_to = OPEN;
     int cy_to = OPEN;
@@ -2054,19 +2109,19 @@ static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc&
         //
         //The candidates are stored in a flat_map so we can efficiently find the set of valid
         //candidates with upper/lower bound.
-        auto y_lower_iter = compressed_block_grid.grid[cx_to].lower_bound(min_cy);
-        if (y_lower_iter == compressed_block_grid.grid[cx_to].end()) {
+        auto y_lower_iter = to_compressed_block_grid.grid[cx_to].lower_bound(min_cy);
+        if (y_lower_iter == to_compressed_block_grid.grid[cx_to].end()) {
             continue;
         }
 
-        auto y_upper_iter = compressed_block_grid.grid[cx_to].upper_bound(max_cy);
+        auto y_upper_iter = to_compressed_block_grid.grid[cx_to].upper_bound(max_cy);
 
         if (y_lower_iter->first > min_cy) {
             //No valid blocks at this x location which are within rlim_y
             //
             //Fall back to allow the whole y range
-            y_lower_iter = compressed_block_grid.grid[cx_to].begin();
-            y_upper_iter = compressed_block_grid.grid[cx_to].end();
+            y_lower_iter = to_compressed_block_grid.grid[cx_to].begin();
+            y_upper_iter = to_compressed_block_grid.grid[cx_to].end();
 
             min_cy = y_lower_iter->first;
             max_cy = (y_upper_iter - 1)->first;
@@ -2112,15 +2167,15 @@ static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc&
     VTR_ASSERT(cy_to != OPEN);
 
     //Convert to true (uncompressed) grid locations
-    to.x = compressed_block_grid.compressed_to_grid_x[cx_to];
-    to.y = compressed_block_grid.compressed_to_grid_y[cy_to];
+    to.x = to_compressed_block_grid.compressed_to_grid_x[cx_to];
+    to.y = to_compressed_block_grid.compressed_to_grid_y[cy_to];
 
     //Each x/y location contains only a single type, so we can pick a random
     //z (capcity) location
-    to.z = vtr::irand(type->capacity - 1);
+    to.z = vtr::irand(to_type->capacity - 1);
 
     auto& device_ctx = g_vpr_ctx.device();
-    VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].type == type, "Type must match");
+    VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].type == to_type, "Type must match");
     VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].width_offset == 0, "Should be at block base location");
     VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].height_offset == 0, "Should be at block base location");
 
@@ -3153,11 +3208,10 @@ static int try_place_macro(int itype, int ipos, int imacro) {
 
 static void initial_placement_pl_macros(int macros_max_num_tries, int* free_locations) {
     int macro_placed;
-    int itype, itry, ipos;
+    int itry, ipos;
     ClusterBlockId blk_id;
 
     auto& cluster_ctx = g_vpr_ctx.clustering();
-    auto& device_ctx = g_vpr_ctx.device();
     auto& place_ctx = g_vpr_ctx.placement();
 
     auto& pl_macros = place_ctx.pl_macros;
@@ -3169,49 +3223,62 @@ static void initial_placement_pl_macros(int macros_max_num_tries, int* free_loca
 
         // Assume that all the blocks in the macro are of the same type
         blk_id = pl_macros[imacro].members[0].blk_index;
-        itype = cluster_ctx.clb_nlist.block_type(blk_id)->index;
-        if (free_locations[itype] < int(pl_macros[imacro].members.size())) {
-            vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__,
-                      "Initial placement failed.\n"
-                      "Could not place macro length %zu with head block %s (#%zu); not enough free locations of type %s (#%d).\n"
-                      "VPR cannot auto-size for your circuit, please resize the FPGA manually.\n",
-                      pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id), device_ctx.block_types[itype].name, itype);
-        }
 
-        // Try to place the macro first, if can be placed - place them, otherwise try again
-        for (itry = 0; itry < macros_max_num_tries && macro_placed == false; itry++) {
-            // Choose a random position for the head
-            ipos = vtr::irand(free_locations[itype] - 1);
-
-            // Try to place the macro
-            macro_placed = try_place_macro(itype, ipos, imacro);
+        bool no_free_locations = true;
+        // Loop over all the possible equivalent tiles
+        for (int itype : get_available_tiles(cluster_ctx.clb_nlist.block_type(blk_id))) {
+            if (free_locations[itype] >= int(pl_macros[imacro].members.size())) {
+                no_free_locations = false;
+            } else {
+                continue;
+            }
 
-        } // Finished all tries
+            // Try to place the macro first, if can be placed - place them, otherwise try again
+            for (itry = 0; itry < macros_max_num_tries && macro_placed == false; itry++) {
+                // Choose a random position for the head
+                ipos = vtr::irand(free_locations[itype] - 1);
 
-        if (macro_placed == false) {
-            // if a macro still could not be placed after macros_max_num_tries times,
-            // go through the chip exhaustively to find a legal placement for the macro
-            // place the macro on the first location that is legal
-            // then set macro_placed = true;
-            // if there are no legal positions, error out
-
-            // Exhaustive placement of carry macros
-            for (ipos = 0; ipos < free_locations[itype] && macro_placed == false; ipos++) {
                 // Try to place the macro
                 macro_placed = try_place_macro(itype, ipos, imacro);
 
-            } // Exhausted all the legal placement position for this macro
+            } // Finished all tries
 
-            // If macro could not be placed after exhaustive placement, error out
             if (macro_placed == false) {
-                // Error out
-                vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__,
-                          "Initial placement failed.\n"
-                          "Could not place macro length %zu with head block %s (#%zu); not enough free locations of type %s (#%d).\n"
-                          "Please manually size the FPGA because VPR can't do this yet.\n",
-                          pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id), device_ctx.block_types[itype].name, itype);
+                // if a macro still could not be placed after macros_max_num_tries times,
+                // go through the chip exhaustively to find a legal placement for the macro
+                // place the macro on the first location that is legal
+                // then set macro_placed = true;
+                // if there are no legal positions, error out
+
+                // Exhaustive placement of carry macros
+                for (ipos = 0; ipos < free_locations[itype] && macro_placed == false; ipos++) {
+                    // Try to place the macro
+                    macro_placed = try_place_macro(itype, ipos, imacro);
+
+                } // Exhausted all the legal placement position for this macro
+            }
+
+            if (macro_placed == true) {
+                break;
             }
+        }
 
+        if (no_free_locations) {
+            vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__,
+                      "Initial placement failed.\n"
+                      "Could not place macro length %d with head block %s (#%zu); not enough free locations.\n"
+                      "VPR cannot auto-size for your circuit, please resize the FPGA manually.\n",
+                      pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id));
+        }
+
+        // If macro could not be placed even after exhaustive placement, error out
+        if (macro_placed == false) {
+            // Error out
+            vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__,
+                      "Initial placement failed.\n"
+                      "Could not place macro length %d with head block %s (#%zu); not enough free locations.\n"
+                      "Please manually size the FPGA because VPR can't do this yet.\n",
+                      pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id));
         } else {
             // This macro has been placed successfully, proceed to place the next macro
             continue;
@@ -3222,10 +3289,9 @@ static void initial_placement_pl_macros(int macros_max_num_tries, int* free_loca
 /* Place blocks that are NOT a part of any macro.
  * We'll randomly place each block in the clustered netlist, one by one. */
 static void initial_placement_blocks(int* free_locations, enum e_pad_loc_type pad_loc_type) {
-    int itype, ipos;
+    int ipos;
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.mutable_placement();
-    auto& device_ctx = g_vpr_ctx.device();
 
     for (auto blk_id : cluster_ctx.clb_nlist.blocks()) {
         if (place_ctx.block_locs[blk_id].loc.x != -1) { // -1 is a sentinel for an empty block
@@ -3241,45 +3307,54 @@ static void initial_placement_blocks(int* free_locations, enum e_pad_loc_type pa
              * Choose one randomly and put blk_id there. Then we don't want to pick
              * that location again, so remove it from the free_locations array.
              */
-            itype = cluster_ctx.clb_nlist.block_type(blk_id)->index;
-            if (free_locations[itype] <= 0) {
-                vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__,
-                          "Initial placement failed.\n"
-                          "Could not place block %s (#%zu); no free locations of type %s (#%d).\n",
-                          cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id), device_ctx.block_types[itype].name, itype);
-            }
+            bool no_free_locations = true;
+            // Loop over all the possible equivalent tiles
+            for (int itype : get_available_tiles(cluster_ctx.clb_nlist.block_type(blk_id))) {
+                if (free_locations[itype] > 0) {
+                    no_free_locations = false;
+                } else {
+                    continue;
+                }
+
+                t_pl_loc to;
+                initial_placement_location(free_locations, itype, ipos, to);
+
+                // Make sure that the position is EMPTY_BLOCK before placing the block down
+                VTR_ASSERT(place_ctx.grid_blocks[to.x][to.y].blocks[to.z] == EMPTY_BLOCK_ID);
 
-            t_pl_loc to;
-            initial_placement_location(free_locations, blk_id, ipos, to);
+                place_ctx.grid_blocks[to.x][to.y].blocks[to.z] = blk_id;
+                place_ctx.grid_blocks[to.x][to.y].usage++;
 
-            // Make sure that the position is EMPTY_BLOCK before placing the block down
-            VTR_ASSERT(place_ctx.grid_blocks[to.x][to.y].blocks[to.z] == EMPTY_BLOCK_ID);
+                place_ctx.block_locs[blk_id].loc = to;
 
-            place_ctx.grid_blocks[to.x][to.y].blocks[to.z] = blk_id;
-            place_ctx.grid_blocks[to.x][to.y].usage++;
+                //Mark IOs as fixed if specifying a (fixed) random placement
+                if (is_io_type(cluster_ctx.clb_nlist.block_type(blk_id)) && pad_loc_type == RANDOM) {
+                    place_ctx.block_locs[blk_id].is_fixed = true;
+                }
 
-            place_ctx.block_locs[blk_id].loc = to;
+                /* Ensure randomizer doesn't pick this location again, since it's occupied. Could shift all the
+                 * legal positions in legal_pos to remove the entry (choice) we just used, but faster to
+                 * just move the last entry in legal_pos to the spot we just used and decrement the
+                 * count of free_locations. */
+                legal_pos[itype][ipos] = legal_pos[itype][free_locations[itype] - 1]; /* overwrite used block position */
+                free_locations[itype]--;
 
-            //Mark IOs as fixed if specifying a (fixed) random placement
-            if (is_io_type(cluster_ctx.clb_nlist.block_type(blk_id)) && pad_loc_type == RANDOM) {
-                place_ctx.block_locs[blk_id].is_fixed = true;
+                //Do not check other type as the block has already been placed
+                break;
             }
 
-            /* Ensure randomizer doesn't pick this location again, since it's occupied. Could shift all the
-             * legal positions in legal_pos to remove the entry (choice) we just used, but faster to
-             * just move the last entry in legal_pos to the spot we just used and decrement the
-             * count of free_locations. */
-            legal_pos[itype][ipos] = legal_pos[itype][free_locations[itype] - 1]; /* overwrite used block position */
-            free_locations[itype]--;
+            // Check if there were no available locations
+            if (no_free_locations) {
+                vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__,
+                          "Initial placement failed.\n"
+                          "Could not place block %s (#%zu); no free locations\n",
+                          cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id));
+            }
         }
     }
 }
 
-static void initial_placement_location(const int* free_locations, ClusterBlockId blk_id, int& ipos, t_pl_loc& to) {
-    auto& cluster_ctx = g_vpr_ctx.clustering();
-
-    int itype = cluster_ctx.clb_nlist.block_type(blk_id)->index;
-
+static void initial_placement_location(const int* free_locations, int itype, int& ipos, t_pl_loc& to) {
     ipos = vtr::irand(free_locations[itype] - 1);
     to = legal_pos[itype][ipos];
 }
@@ -3536,7 +3611,7 @@ static int check_block_placement_consistency() {
                 if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum)
                     continue;
 
-                if (cluster_ctx.clb_nlist.block_type(bnum) != device_ctx.grid[i][j].type) {
+                if (!cluster_ctx.clb_nlist.block_type(bnum)->is_available_tile_index(device_ctx.grid[i][j].type->index)) {
                     VTR_LOG_ERROR("Block %zu type (%s) does not match grid location (%zu,%zu) type (%s).\n",
                                   size_t(bnum), cluster_ctx.clb_nlist.block_type(bnum)->name, i, j, device_ctx.grid[i][j].type->name);
                     error++;
@@ -3657,6 +3732,19 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts,
     timing_reporter.report_timing_setup(placer_opts.post_place_timing_report_file, *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths);
 }
 
+std::vector<int> get_available_tiles(t_type_ptr type) {
+    std::vector<int> types(1, type->index);
+
+    for (int i = 0; i < type->num_equivalent_tiles; i++) {
+        auto result = type->equivalent_tiles.find(i);
+        VTR_ASSERT(result != type->equivalent_tiles.end());
+
+        types.push_back(result->second->index);
+    }
+
+    return types;
+}
+
 #if 0
 static void update_screen_debug();
 
diff --git a/vpr/src/timing/clb_delay_calc.inl b/vpr/src/timing/clb_delay_calc.inl
index b19effad62b..47d76bf5d36 100644
--- a/vpr/src/timing/clb_delay_calc.inl
+++ b/vpr/src/timing/clb_delay_calc.inl
@@ -73,7 +73,8 @@ inline float ClbDelayCalc::pb_route_delay(ClusterBlockId clb, int pb_route_idx,
 inline const t_pb_graph_edge* ClbDelayCalc::find_pb_graph_edge(ClusterBlockId clb, int pb_route_idx) const {
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
-    int type_index = cluster_ctx.clb_nlist.block_type(clb)->index;
+    //Getting the original block type in case the CLB has been placed in an equivalent tile.
+    int type_index = cluster_ctx.clb_nlist.block_type(clb, false)->index;
 
     const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb);
     if (pb->pb_route.count(pb_route_idx)) {
@@ -84,7 +85,7 @@ inline const t_pb_graph_edge* ClbDelayCalc::find_pb_graph_edge(ClusterBlockId cl
             const t_pb_graph_pin* pb_gpin = intra_lb_pb_pin_lookup_.pb_gpin(type_index, pb_route_idx);
             const t_pb_graph_pin* upstream_pb_gpin = intra_lb_pb_pin_lookup_.pb_gpin(type_index, upstream_pb_route_idx);
 
-            return find_pb_graph_edge(upstream_pb_gpin, pb_gpin); 
+            return find_pb_graph_edge(upstream_pb_gpin, pb_gpin);
         }
     }
 
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index 3df718b6099..21ea0b3ca19 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -90,6 +90,10 @@ static AtomPinId find_atom_pin_for_pb_route_id(ClusterBlockId clb, int pb_route_
 static bool block_type_contains_blif_model(t_type_ptr type, const std::regex& blif_model_regex);
 static bool pb_type_contains_blif_model(const t_pb_type* pb_type, const std::regex& blif_model_regex);
 
+static t_type_ptr get_equivalent_tile(t_type_ptr type, int eq_itype);
+static bool try_sync_equivalent_tiles(ClusterBlockId clb, t_type_ptr logic_type, t_type_ptr phyical_type);
+static int get_type_pin(std::unordered_map<int, std::unordered_map<int, int>> pin_mappings, int eq_type_index, int eq_pin);
+
 /******************** Subroutine definitions *********************************/
 
 const t_model* find_model(const t_model* models, const std::string& name, bool required) {
@@ -134,6 +138,46 @@ void print_tabs(FILE* fpout, int num_tab) {
     }
 }
 
+static t_type_ptr get_equivalent_tile(t_type_ptr type, int eq_itype) {
+    auto result = type->equivalent_tiles.find(eq_itype);
+    VTR_ASSERT(result != type->equivalent_tiles.end());
+
+    return result->second;
+}
+
+static int get_type_pin(std::unordered_map<int, std::unordered_map<int, int>> pin_mappings, int eq_type_index, int eq_pin) {
+    auto tile_result = pin_mappings.find(eq_type_index);
+    VTR_ASSERT(tile_result != pin_mappings.end());
+
+    auto pin_mapping = tile_result->second;
+    auto pin_result = pin_mapping.find(eq_pin);
+    VTR_ASSERT(pin_result != pin_mapping.end());
+
+    return pin_result->second;
+}
+
+static bool try_sync_equivalent_tiles(ClusterBlockId clb, t_type_ptr logic_type, t_type_ptr physical_type) {
+    auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
+    auto& clb_nlist = cluster_ctx.clb_nlist;
+
+    //Searching for equivalent tiles in the logic_type
+    for (int itype = 0; itype < logic_type->num_equivalent_tiles; itype++) {
+        if (get_equivalent_tile(logic_type, itype)->index == physical_type->index) {
+            clb_nlist.set_equivalent_block_type(clb, itype, physical_type);
+
+            //Setting new logical to physical pin mapping
+            for (auto pin : clb_nlist.block_pins(clb)) {
+                int original_ipin = clb_nlist.pin_physical_index(pin);
+                int new_ipin = get_type_pin(logic_type->equivalent_tile_pin_mapping, itype, original_ipin);
+                clb_nlist.set_pin_physical_index(pin, new_ipin);
+            }
+            return true;
+        }
+    }
+
+    return false;
+}
+
 /* Points the place_ctx.grid_blocks structure back to the blocks list */
 void sync_grid_to_blocks() {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -171,11 +215,16 @@ void sync_grid_to_blocks() {
         }
 
         /* Check types match */
-        if (cluster_ctx.clb_nlist.block_type(blk_id) != device_ctx.grid[blk_x][blk_y].type) {
-            VPR_THROW(VPR_ERROR_PLACE, "A block is in a grid location (%d x %d) with a conflicting types '%s' and '%s' .\n",
-                      blk_x, blk_y,
-                      cluster_ctx.clb_nlist.block_type(blk_id)->name,
-                      device_ctx.grid[blk_x][blk_y].type->name);
+        auto logic_type = cluster_ctx.clb_nlist.block_type(blk_id);
+        auto physical_type = device_ctx.grid[blk_x][blk_y].type;
+
+        if (logic_type != physical_type) {
+            if (!try_sync_equivalent_tiles(blk_id, logic_type, physical_type)) {
+                VPR_THROW(VPR_ERROR_PLACE, "A block is in a grid location (%d x %d) with a conflicting types '%s' and '%s' .\n",
+                          blk_x, blk_y,
+                          cluster_ctx.clb_nlist.block_type(blk_id)->name,
+                          device_ctx.grid[blk_x][blk_y].type->name);
+            }
         }
 
         /* Check already in use */
@@ -444,7 +493,9 @@ static AtomPinId find_atom_pin_for_pb_route_id(ClusterBlockId clb, int pb_route_
     VTR_ASSERT_MSG(cluster_ctx.clb_nlist.block_pb(clb)->pb_route[pb_route_id].atom_net_id, "PB route should correspond to a valid atom net");
 
     //Find the graph pin associated with this pb_route
-    const t_pb_graph_pin* gpin = pb_gpin_lookup.pb_gpin(cluster_ctx.clb_nlist.block_type(clb)->index, pb_route_id);
+    int index = cluster_ctx.clb_nlist.block_type(clb, false)->index;
+
+    const t_pb_graph_pin* gpin = pb_gpin_lookup.pb_gpin(index, pb_route_id);
     VTR_ASSERT(gpin);
 
     //Get the PB associated with this block
@@ -542,25 +593,36 @@ int find_clb_pb_pin(ClusterBlockId clb, int clb_pin) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.placement();
 
-    VTR_ASSERT_MSG(clb_pin < cluster_ctx.clb_nlist.block_type(clb)->num_pins, "Must be a valid top-level pin");
+    auto& clb_nlist = cluster_ctx.clb_nlist;
+
+    auto type = clb_nlist.block_type(clb);
 
-    int pb_pin = -1;
+    int pin = clb_pin;
+
+    // In case an equivalent tile is selected, the CLB block type will be different (e.g. CLB logic type is LAB, CLB physical type is MLAB).
+    // Therefore, I need to retrieve the pin mapping from the LAB type by setting the `false` flag when calling block_type.
+    if (clb_nlist.block_eq_type_effective(clb)) {
+        int eq_type_index = clb_nlist.block_eq_type_index(clb);
+        auto block_type = clb_nlist.block_type(clb, false);
+
+        pin = get_type_pin(block_type->equivalent_tile_inverse_pin_mapping, eq_type_index, clb_pin);
+    }
+
+    int pb_pin = OPEN;
     if (place_ctx.block_locs[clb].nets_and_pins_synced_to_z_coordinate) {
         //Pins have been offset by z-coordinate, need to remove offset
 
-        t_type_ptr type = cluster_ctx.clb_nlist.block_type(clb);
         VTR_ASSERT(type->num_pins % type->capacity == 0);
         int num_basic_block_pins = type->num_pins / type->capacity;
         /* Logical location and physical location is offset by z * max_num_block_pins */
 
-        pb_pin = clb_pin - place_ctx.block_locs[clb].loc.z * num_basic_block_pins;
+        pb_pin = pin - place_ctx.block_locs[clb].loc.z * num_basic_block_pins;
     } else {
         //No offset
-        pb_pin = clb_pin;
+        pb_pin = pin;
     }
 
     VTR_ASSERT(pb_pin >= 0);
-
     return pb_pin;
 }
 
@@ -569,21 +631,35 @@ int find_pb_pin_clb_pin(ClusterBlockId clb, int pb_pin) {
     auto& cluster_ctx = g_vpr_ctx.clustering();
     auto& place_ctx = g_vpr_ctx.placement();
 
-    int clb_pin = -1;
+    auto& clb_nlist = cluster_ctx.clb_nlist;
+
+    auto type = clb_nlist.block_type(clb);
+
+    int pin = pb_pin;
+
+    // In case an equivalent tile is selected, the CLB block type will be different (e.g. CLB logic type is LAB, CLB physical type is MLAB).
+    // Therefore, I need to retrieve the pin mapping from the LAB type by setting the `false` flag when calling block_type.
+    if (clb_nlist.block_eq_type_effective(clb)) {
+        int eq_type_index = clb_nlist.block_eq_type_index(clb);
+        auto block_type = clb_nlist.block_type(clb, false);
+
+        pin = get_type_pin(block_type->equivalent_tile_pin_mapping, eq_type_index, pb_pin);
+    }
+
+    int clb_pin = OPEN;
     if (place_ctx.block_locs[clb].nets_and_pins_synced_to_z_coordinate) {
         //Pins have been offset by z-coordinate, need to remove offset
-        t_type_ptr type = cluster_ctx.clb_nlist.block_type(clb);
         VTR_ASSERT(type->num_pins % type->capacity == 0);
         int num_basic_block_pins = type->num_pins / type->capacity;
         /* Logical location and physical location is offset by z * max_num_block_pins */
 
-        clb_pin = pb_pin + place_ctx.block_locs[clb].loc.z * num_basic_block_pins;
+        clb_pin = pin + place_ctx.block_locs[clb].loc.z * num_basic_block_pins;
     } else {
         //No offset
-        clb_pin = pb_pin;
+        clb_pin = pin;
     }
-    VTR_ASSERT(clb_pin >= 0);
 
+    VTR_ASSERT(clb_pin >= 0);
     return clb_pin;
 }
 
diff --git a/vtr_flow/arch/equivalent_tiles/slice.xml b/vtr_flow/arch/equivalent_tiles/slice.xml
new file mode 100644
index 00000000000..b8a16a781eb
--- /dev/null
+++ b/vtr_flow/arch/equivalent_tiles/slice.xml
@@ -0,0 +1,1625 @@
+<?xml version="1.0"?>
+<!-- This architecture definition represents a simplified version of a SLICEM site -->
+<architecture xmlns:xi="http://www.w3.org/2001/XInclude">
+  <models>
+    <model name="CARRY">
+      <input_ports>
+        <port combinational_sink_ports="CO_CHAIN CO_FABRIC O" name="CI"/>
+        <port combinational_sink_ports="CO_CHAIN CO_FABRIC" name="DI"/>
+        <port combinational_sink_ports="CO_CHAIN CO_FABRIC O" name="S"/>
+      </input_ports>
+      <output_ports>
+        <port name="CO_CHAIN"/>
+        <port name="CO_FABRIC"/>
+        <port name="O"/>
+      </output_ports>
+    </model>
+    <model name="CARRY0">
+      <input_ports>
+        <port combinational_sink_ports="CO_CHAIN CO_FABRIC O" name="CI"/>
+        <port combinational_sink_ports="CO_CHAIN CO_FABRIC O" name="CI_INIT"/>
+        <port combinational_sink_ports="CO_CHAIN CO_FABRIC" name="DI"/>
+        <port combinational_sink_ports="CO_CHAIN CO_FABRIC O" name="S"/>
+      </input_ports>
+      <output_ports>
+        <port name="CO_CHAIN"/>
+        <port name="CO_FABRIC"/>
+        <port name="O"/>
+      </output_ports>
+    </model>
+    <model name="FDRE">
+      <input_ports>
+        <port is_clock="1" name="C"/>
+        <port clock="C" name="CE"/>
+        <port clock="C" name="R"/>
+        <port clock="C" name="D"/>
+      </input_ports>
+      <output_ports>
+        <port clock="C" name="Q"/>
+      </output_ports>
+    </model>
+    <model name="DRAM_2_OUTPUT_STUB">
+      <input_ports>
+        <port combinational_sink_ports="DPO_OUT" name="DPO"/>
+        <port combinational_sink_ports="SPO_OUT" name="SPO"/>
+      </input_ports>
+      <output_ports>
+        <port name="DPO_OUT"/>
+        <port name="SPO_OUT"/>
+      </output_ports>
+    </model>
+    <model name="DRAM_4_OUTPUT_STUB">
+      <input_ports>
+        <port combinational_sink_ports="DOA_OUT" name="DOA"/>
+        <port combinational_sink_ports="DOB_OUT" name="DOB"/>
+        <port combinational_sink_ports="DOC_OUT" name="DOC"/>
+        <port combinational_sink_ports="DOD_OUT" name="DOD"/>
+      </input_ports>
+      <output_ports>
+        <port name="DOA_OUT"/>
+        <port name="DOB_OUT"/>
+        <port name="DOC_OUT"/>
+        <port name="DOD_OUT"/>
+      </output_ports>
+    </model>
+  </models>
+  <tiles>
+    <tile name="io_tile">
+      <pinlocations pattern="custom">
+        <loc side="top" xoffset="0" yoffset="0">io_tile.in io_tile.out</loc>
+        <loc side="left" xoffset="0" yoffset="0">io_tile.in io_tile.out</loc>
+        <loc side="bottom" xoffset="0" yoffset="0">io_tile.in io_tile.out</loc>
+        <loc side="right" xoffset="0" yoffset="0">io_tile.in io_tile.out</loc>
+      </pinlocations>
+      <fc in_type="frac" in_val="1.0" out_type="frac" out_val="1.0"/>
+    </tile>
+    <tile name="BLK_IG-SLICEM">
+      <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+    </tile>
+    <tile name="BLK_IG-SLICEL">
+      <equivalent_tiles>
+        <mode name="BLK_IG-SLICEM">
+          <map from="DX" to="DX" num_pins="1"/>
+          <map from="D1" to="D1" num_pins="1"/>
+          <map from="D2" to="D2" num_pins="1"/>
+          <map from="D3" to="D3" num_pins="1"/>
+          <map from="D4" to="D4" num_pins="1"/>
+          <map from="D5" to="D5" num_pins="1"/>
+          <map from="D6" to="D6" num_pins="1"/>
+          <map from="CX" to="CX" num_pins="1"/>
+          <map from="C1" to="C1" num_pins="1"/>
+          <map from="C2" to="C2" num_pins="1"/>
+          <map from="C3" to="C3" num_pins="1"/>
+          <map from="C4" to="C4" num_pins="1"/>
+          <map from="C5" to="C5" num_pins="1"/>
+          <map from="C6" to="C6" num_pins="1"/>
+          <map from="BX" to="BX" num_pins="1"/>
+          <map from="B1" to="B1" num_pins="1"/>
+          <map from="B2" to="B2" num_pins="1"/>
+          <map from="B3" to="B3" num_pins="1"/>
+          <map from="B4" to="B4" num_pins="1"/>
+          <map from="B5" to="B5" num_pins="1"/>
+          <map from="B6" to="B6" num_pins="1"/>
+          <map from="AX" to="AX" num_pins="1"/>
+          <map from="A1" to="A1" num_pins="1"/>
+          <map from="A2" to="A2" num_pins="1"/>
+          <map from="A3" to="A3" num_pins="1"/>
+          <map from="A4" to="A4" num_pins="1"/>
+          <map from="A5" to="A5" num_pins="1"/>
+          <map from="A6" to="A6" num_pins="1"/>
+          <map from="SR" to="SR" num_pins="1"/>
+          <map from="CE" to="CE" num_pins="1"/>
+          <map from="CLK" to="CLK" num_pins="1"/>
+          <map from="CIN" to="CIN" num_pins="1"/>
+          <map from="COUT" to="COUT" num_pins="1"/>
+          <map from="DMUX" to="DMUX" num_pins="1"/>
+          <map from="D" to="D" num_pins="1"/>
+          <map from="DQ" to="DQ" num_pins="1"/>
+          <map from="CMUX" to="CMUX" num_pins="1"/>
+          <map from="C" to="C" num_pins="1"/>
+          <map from="CQ" to="CQ" num_pins="1"/>
+          <map from="BMUX" to="BMUX" num_pins="1"/>
+          <map from="B" to="B" num_pins="1"/>
+          <map from="BQ" to="BQ" num_pins="1"/>
+          <map from="AMUX" to="AMUX" num_pins="1"/>
+          <map from="A" to="A" num_pins="1"/>
+          <map from="AQ" to="AQ" num_pins="1"/>
+        </mode>
+      </equivalent_tiles>
+      <fc in_type="frac" in_val="0.15" out_type="frac" out_val="0.10"/>
+    </tile>
+  </tiles>
+  <complexblocklist>
+    <pb_type name="io_tile">
+      <input name="in" num_pins="1"/>
+      <output name="out" num_pins="1"/>
+      <mode name="OUTPUT">
+        <pb_type blif_model=".output" name="pad" num_pb="1">
+          <input name="outpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct input="io_tile.in" name="-" output="pad.outpad"/>
+        </interconnect>
+      </mode>
+      <mode name="INPUT">
+        <pb_type blif_model=".input" name="pad" num_pb="1">
+          <output name="inpad" num_pins="1"/>
+        </pb_type>
+        <interconnect>
+          <direct input="pad.inpad" name="-" output="io_tile.out"/>
+        </interconnect>
+      </mode>
+    </pb_type>
+    <pb_type name="BLK_IG-SLICEM">
+      <input name="DI" num_pins="1"/>
+      <input name="DX" num_pins="1"/>
+      <input name="D1" num_pins="1"/>
+      <input name="D2" num_pins="1"/>
+      <input name="D3" num_pins="1"/>
+      <input name="D4" num_pins="1"/>
+      <input name="D5" num_pins="1"/>
+      <input name="D6" num_pins="1"/>
+      <input name="CI" num_pins="1"/>
+      <input name="CX" num_pins="1"/>
+      <input name="C1" num_pins="1"/>
+      <input name="C2" num_pins="1"/>
+      <input name="C3" num_pins="1"/>
+      <input name="C4" num_pins="1"/>
+      <input name="C5" num_pins="1"/>
+      <input name="C6" num_pins="1"/>
+      <input name="BI" num_pins="1"/>
+      <input name="BX" num_pins="1"/>
+      <input name="B1" num_pins="1"/>
+      <input name="B2" num_pins="1"/>
+      <input name="B3" num_pins="1"/>
+      <input name="B4" num_pins="1"/>
+      <input name="B5" num_pins="1"/>
+      <input name="B6" num_pins="1"/>
+      <input name="AI" num_pins="1"/>
+      <input name="AX" num_pins="1"/>
+      <input name="A1" num_pins="1"/>
+      <input name="A2" num_pins="1"/>
+      <input name="A3" num_pins="1"/>
+      <input name="A4" num_pins="1"/>
+      <input name="A5" num_pins="1"/>
+      <input name="A6" num_pins="1"/>
+      <input name="SR" num_pins="1"/>
+      <input name="CE" num_pins="1"/>
+      <input name="WE" num_pins="1"/>
+      <clock name="CLK" num_pins="1"/>
+      <input name="CIN" num_pins="1"/>
+      <output name="COUT" num_pins="1"/>
+      <output name="DMUX" num_pins="1"/>
+      <output name="D" num_pins="1"/>
+      <output name="DQ" num_pins="1"/>
+      <output name="CMUX" num_pins="1"/>
+      <output name="C" num_pins="1"/>
+      <output name="CQ" num_pins="1"/>
+      <output name="BMUX" num_pins="1"/>
+      <output name="B" num_pins="1"/>
+      <output name="BQ" num_pins="1"/>
+      <output name="AMUX" num_pins="1"/>
+      <output name="A" num_pins="1"/>
+      <output name="AQ" num_pins="1"/>
+      <pb_type name="BLK_IG-COMMON_SLICE" num_pb="1">
+        <input name="DX" num_pins="1"/>
+        <input name="CX" num_pins="1"/>
+        <input name="BX" num_pins="1"/>
+        <input name="AX" num_pins="1"/>
+        <input name="DO6" num_pins="1"/>
+        <input name="CO6" num_pins="1"/>
+        <input name="BO6" num_pins="1"/>
+        <input name="AO6" num_pins="1"/>
+        <input name="DO5" num_pins="1"/>
+        <input name="CO5" num_pins="1"/>
+        <input name="BO5" num_pins="1"/>
+        <input name="AO5" num_pins="1"/>
+        <input name="SR" num_pins="1"/>
+        <input name="CE" num_pins="1"/>
+        <input name="AMC31" num_pins="1"/>
+        <clock name="CLK" num_pins="1"/>
+        <input name="CIN" num_pins="1"/>
+        <output name="COUT" num_pins="1"/>
+        <output name="DMUX" num_pins="1"/>
+        <output name="D" num_pins="1"/>
+        <output name="DQ" num_pins="1"/>
+        <output name="CMUX" num_pins="1"/>
+        <output name="C" num_pins="1"/>
+        <output name="CQ" num_pins="1"/>
+        <output name="BMUX" num_pins="1"/>
+        <output name="B" num_pins="1"/>
+        <output name="BQ" num_pins="1"/>
+        <output name="AMUX" num_pins="1"/>
+        <output name="A" num_pins="1"/>
+        <output name="AQ" num_pins="1"/>
+        <!-- Model of FF group in SLICEL and SLICEM -->
+        <pb_type name="BLK_BB-SLICE_FF" num_pb="1">
+          <!-- CK, CE and SR are slice wide. -->
+          <input name="CE" num_pins="1"/>
+          <input name="SR" num_pins="1"/>
+          <clock name="CK" num_pins="1"/>
+          <input name="D" num_pins="4"/>
+          <output name="Q" num_pins="4"/>
+          <input name="D5" num_pins="4"/>
+          <output name="Q5" num_pins="4"/>
+          <!-- |      |FFSYNC|LATCH|ZRST | -->
+          <!-- |FDRE  |   X  |     |  X  | -->
+          <mode name="FDRE">
+            <pb_type blif_model=".subckt FDRE" name="BEL_FF-FDRE" num_pb="8">
+              <input name="D" num_pins="1"/>
+              <input name="CE" num_pins="1"/>
+              <clock name="C" num_pins="1"/>
+              <input name="R" num_pins="1"/>
+              <output name="Q" num_pins="1"/>
+              <T_setup clock="C" port="BEL_FF-FDRE.D" value="10e-12"/>
+              <T_setup clock="C" port="BEL_FF-FDRE.CE" value="10e-12"/>
+              <T_setup clock="C" port="BEL_FF-FDRE.R" value="10e-12"/>
+              <T_clock_to_Q clock="C" max="10e-12" port="BEL_FF-FDRE.Q"/>
+            </pb_type>
+            <interconnect>
+              <complete input="BLK_BB-SLICE_FF.CE" name="CE" output="BEL_FF-FDRE.CE"/>
+              <complete input="BLK_BB-SLICE_FF.CK" name="C" output="BEL_FF-FDRE.C"/>
+              <complete input="BLK_BB-SLICE_FF.SR" name="SR" output="BEL_FF-FDRE.R"/>
+              <direct input="BLK_BB-SLICE_FF.D[3:0]" name="D" output="BEL_FF-FDRE[3:0].D"/>
+              <direct input="BEL_FF-FDRE[3:0].Q" name="Q" output="BLK_BB-SLICE_FF.Q[3:0]"/>
+              <direct input="BLK_BB-SLICE_FF.D5[3:0]" name="D5" output="BEL_FF-FDRE[7:4].D"/>
+              <direct input="BEL_FF-FDRE[7:4].Q" name="Q5" output="BLK_BB-SLICE_FF.Q5[3:0]"/>
+            </interconnect>
+          </mode>
+        </pb_type>
+        <!-- CARRY4 logic -->
+        <pb_type blif_model=".subckt CARRY0" name="BEL_BB-CARRY0" num_pb="1">
+          <input name="CI" num_pins="1"/>
+          <input name="CI_INIT" num_pins="1"/>
+          <output name="CO_CHAIN" num_pins="1"/>
+          <output name="CO_FABRIC" num_pins="1"/>
+          <input name="DI" num_pins="1"/>
+          <output name="O" num_pins="1"/>
+          <input name="S" num_pins="1"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI" max="10e-12" out_port="BEL_BB-CARRY0.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI_INIT" max="10e-12" out_port="BEL_BB-CARRY0.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY0.DI" max="10e-12" out_port="BEL_BB-CARRY0.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY0.S" max="10e-12" out_port="BEL_BB-CARRY0.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI" max="10e-12" out_port="BEL_BB-CARRY0.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI_INIT" max="10e-12" out_port="BEL_BB-CARRY0.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY0.DI" max="10e-12" out_port="BEL_BB-CARRY0.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY0.S" max="10e-12" out_port="BEL_BB-CARRY0.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI" max="10e-12" out_port="BEL_BB-CARRY0.O"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI_INIT" max="10e-12" out_port="BEL_BB-CARRY0.O"/>
+          <delay_constant in_port="BEL_BB-CARRY0.S" max="10e-12" out_port="BEL_BB-CARRY0.O"/>
+        </pb_type>
+        <pb_type blif_model=".subckt CARRY" name="BEL_BB-CARRY" num_pb="3">
+          <input name="CI" num_pins="1"/>
+          <output name="CO_CHAIN" num_pins="1"/>
+          <output name="CO_FABRIC" num_pins="1"/>
+          <input name="DI" num_pins="1"/>
+          <output name="O" num_pins="1"/>
+          <input name="S" num_pins="1"/>
+          <delay_constant in_port="BEL_BB-CARRY.CI" max="10e-12" out_port="BEL_BB-CARRY.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY.DI" max="10e-12" out_port="BEL_BB-CARRY.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY.S" max="10e-12" out_port="BEL_BB-CARRY.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY.CI" max="10e-12" out_port="BEL_BB-CARRY.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY.DI" max="10e-12" out_port="BEL_BB-CARRY.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY.S" max="10e-12" out_port="BEL_BB-CARRY.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY.CI" max="10e-12" out_port="BEL_BB-CARRY.O"/>
+          <delay_constant in_port="BEL_BB-CARRY.S" max="10e-12" out_port="BEL_BB-CARRY.O"/>
+        </pb_type>
+        <interconnect>
+          <direct input="BLK_IG-COMMON_SLICE.DX" name="DX" output="BLK_BB-SLICE_FF.D5[3]"/>
+          <direct input="BLK_IG-COMMON_SLICE.CX" name="CX" output="BLK_BB-SLICE_FF.D5[2]"/>
+          <direct input="BLK_IG-COMMON_SLICE.BX" name="BX" output="BLK_BB-SLICE_FF.D5[1]"/>
+          <direct input="BLK_IG-COMMON_SLICE.AX" name="AX" output="BLK_BB-SLICE_FF.D5[0]"/>
+          <mux input="BLK_IG-COMMON_SLICE.AMC31 BLK_BB-SLICE_FF.Q5[3] BEL_BB-CARRY[2].O BEL_BB-CARRY[2].CO_FABRIC BLK_IG-COMMON_SLICE.DO6 BLK_IG-COMMON_SLICE.DO5" name="DMUX" output="BLK_IG-COMMON_SLICE.DMUX"/>
+          <mux input="BLK_BB-SLICE_FF.Q5[2] BEL_BB-CARRY[1].O BEL_BB-CARRY[1].CO_FABRIC BLK_IG-COMMON_SLICE.CO6 BLK_IG-COMMON_SLICE.CO5" name="CMUX" output="BLK_IG-COMMON_SLICE.CMUX"/>
+          <mux input="BLK_BB-SLICE_FF.Q5[1] BEL_BB-CARRY[0].O BEL_BB-CARRY[0].CO_FABRIC BLK_IG-COMMON_SLICE.BO6 BLK_IG-COMMON_SLICE.BO5" name="BMUX" output="BLK_IG-COMMON_SLICE.BMUX"/>
+          <mux input="BLK_BB-SLICE_FF.Q5[0] BEL_BB-CARRY0.O BEL_BB-CARRY0.CO_FABRIC BLK_IG-COMMON_SLICE.AO6 BLK_IG-COMMON_SLICE.AO5" name="AMUX" output="BLK_IG-COMMON_SLICE.AMUX"/>
+          <mux input="BEL_BB-CARRY[2].O BEL_BB-CARRY[2].CO_FABRIC BLK_IG-COMMON_SLICE.DO6 BLK_IG-COMMON_SLICE.DO5 BLK_IG-COMMON_SLICE.DX" name="DFFMUX" output="BLK_BB-SLICE_FF.D[3]"/>
+          <mux input="BEL_BB-CARRY[1].O BEL_BB-CARRY[1].CO_FABRIC BLK_IG-COMMON_SLICE.CO6 BLK_IG-COMMON_SLICE.CO5 BLK_IG-COMMON_SLICE.CX" name="CFFMUX" output="BLK_BB-SLICE_FF.D[2]"/>
+          <mux input="BEL_BB-CARRY[0].O BEL_BB-CARRY[0].CO_FABRIC BLK_IG-COMMON_SLICE.BO6 BLK_IG-COMMON_SLICE.BO5 BLK_IG-COMMON_SLICE.BX" name="BFFMUX" output="BLK_BB-SLICE_FF.D[1]"/>
+          <mux input="BEL_BB-CARRY0.O BEL_BB-CARRY0.CO_FABRIC BLK_IG-COMMON_SLICE.AO6 BLK_IG-COMMON_SLICE.AO5 BLK_IG-COMMON_SLICE.AX" name="AFFMUX" output="BLK_BB-SLICE_FF.D[0]"/>
+          <direct input="BLK_BB-SLICE_FF.Q[0]" name="AFF" output="BLK_IG-COMMON_SLICE.AQ"/>
+          <direct input="BLK_BB-SLICE_FF.Q[1]" name="BFF" output="BLK_IG-COMMON_SLICE.BQ"/>
+          <direct input="BLK_BB-SLICE_FF.Q[2]" name="CFF" output="BLK_IG-COMMON_SLICE.CQ"/>
+          <direct input="BLK_BB-SLICE_FF.Q[3]" name="DFF" output="BLK_IG-COMMON_SLICE.DQ"/>
+          <!-- LUT O6 output -->
+          <direct input="BLK_IG-COMMON_SLICE.DO6" name="BLK_IG-COMMON_SLICE_DOUT" output="BLK_IG-COMMON_SLICE.D"/>
+          <direct input="BLK_IG-COMMON_SLICE.CO6" name="BLK_IG-COMMON_SLICE_COUT" output="BLK_IG-COMMON_SLICE.C"/>
+          <direct input="BLK_IG-COMMON_SLICE.BO6" name="BLK_IG-COMMON_SLICE_BOUT" output="BLK_IG-COMMON_SLICE.B"/>
+          <direct input="BLK_IG-COMMON_SLICE.AO6" name="BLK_IG-COMMON_SLICE_AOUT" output="BLK_IG-COMMON_SLICE.A"/>
+          <!-- Carry -->
+          <!-- Carry initialization -->
+          <direct input="BLK_IG-COMMON_SLICE.AX" name="PRECYINIT_MUX" output="BEL_BB-CARRY0.CI_INIT"/>
+
+          <direct input="BLK_IG-COMMON_SLICE.CIN" name="CIN_TO_CARRY0" output="BEL_BB-CARRY0.CI"/>
+          <!-- Tile internal carry -->
+          <direct input="BEL_BB-CARRY0.CO_CHAIN" name="CARRY0_TO_CARRY1" output="BEL_BB-CARRY[0].CI"/>
+          <direct input="BEL_BB-CARRY[0].CO_CHAIN" name="CARRY1_TO_CARRY2" output="BEL_BB-CARRY[1].CI"/>
+          <direct input="BEL_BB-CARRY[1].CO_CHAIN" name="CARRY2_TO_CARRY3" output="BEL_BB-CARRY[2].CI"/>
+          <!-- Carry selects -->
+          <direct input="BLK_IG-COMMON_SLICE.DO6" name="CARRY_S3" output="BEL_BB-CARRY[2].S"/>
+          <direct input="BLK_IG-COMMON_SLICE.CO6" name="CARRY_S2" output="BEL_BB-CARRY[1].S"/>
+          <direct input="BLK_IG-COMMON_SLICE.BO6" name="CARRY_S1" output="BEL_BB-CARRY[0].S"/>
+          <direct input="BLK_IG-COMMON_SLICE.AO6" name="CARRY_S0" output="BEL_BB-CARRY0.S"/>
+          <!-- Carry MUXCY.DI -->
+          <mux input="BLK_IG-COMMON_SLICE.DO5 BLK_IG-COMMON_SLICE.DX" name="CARRY_DI3" output="BEL_BB-CARRY[2].DI"/>
+          <mux input="BLK_IG-COMMON_SLICE.CO5 BLK_IG-COMMON_SLICE.CX" name="CARRY_DI2" output="BEL_BB-CARRY[1].DI"/>
+          <mux input="BLK_IG-COMMON_SLICE.BO5 BLK_IG-COMMON_SLICE.BX" name="CARRY_DI1" output="BEL_BB-CARRY[0].DI"/>
+          <mux input="BLK_IG-COMMON_SLICE.AO5 BLK_IG-COMMON_SLICE.AX" name="CARRY_DI0" output="BEL_BB-CARRY0.DI"/>
+          <direct input="BEL_BB-CARRY[2].CO_CHAIN" name="COUT" output="BLK_IG-COMMON_SLICE.COUT"/>
+          <!-- Clock, Clock Enable and Reset -->
+          <direct input="BLK_IG-COMMON_SLICE.CLK" name="CK" output="BLK_BB-SLICE_FF.CK"/>
+          <direct input="BLK_IG-COMMON_SLICE.CE" name="CE" output="BLK_BB-SLICE_FF.CE"/>
+          <direct input="BLK_IG-COMMON_SLICE.SR" name="SR" output="BLK_BB-SLICE_FF.SR"/>
+        </interconnect>
+      </pb_type>
+      <pb_type name="BLK_IG-SLICEM_MODES" num_pb="1">
+        <input name="DI" num_pins="1"/>
+        <input name="DX" num_pins="1"/>
+        <input name="D1" num_pins="1"/>
+        <input name="D2" num_pins="1"/>
+        <input name="D3" num_pins="1"/>
+        <input name="D4" num_pins="1"/>
+        <input name="D5" num_pins="1"/>
+        <input name="D6" num_pins="1"/>
+        <input name="CI" num_pins="1"/>
+        <input name="CX" num_pins="1"/>
+        <input name="C1" num_pins="1"/>
+        <input name="C2" num_pins="1"/>
+        <input name="C3" num_pins="1"/>
+        <input name="C4" num_pins="1"/>
+        <input name="C5" num_pins="1"/>
+        <input name="C6" num_pins="1"/>
+        <input name="BI" num_pins="1"/>
+        <input name="BX" num_pins="1"/>
+        <input name="B1" num_pins="1"/>
+        <input name="B2" num_pins="1"/>
+        <input name="B3" num_pins="1"/>
+        <input name="B4" num_pins="1"/>
+        <input name="B5" num_pins="1"/>
+        <input name="B6" num_pins="1"/>
+        <input name="AI" num_pins="1"/>
+        <input name="AX" num_pins="1"/>
+        <input name="A1" num_pins="1"/>
+        <input name="A2" num_pins="1"/>
+        <input name="A3" num_pins="1"/>
+        <input name="A4" num_pins="1"/>
+        <input name="A5" num_pins="1"/>
+        <input name="A6" num_pins="1"/>
+        <input name="WA7" num_pins="1"/>
+        <input name="WA8" num_pins="1"/>
+        <input name="CE" num_pins="1"/>
+        <input name="WE" num_pins="1"/>
+        <output name="DO6" num_pins="1"/>
+        <output name="DO5" num_pins="1"/>
+        <output name="CO6" num_pins="1"/>
+        <output name="CO5" num_pins="1"/>
+        <output name="BO6" num_pins="1"/>
+        <output name="BO5" num_pins="1"/>
+        <output name="AO6" num_pins="1"/>
+        <output name="AO5" num_pins="1"/>
+        <clock name="CLK" num_pins="1"/>
+        <mode name="LUTs">
+          <pb_type name="BLK_IG-COMMON_LUT_AND_F78MUX" num_pb="1">
+            <input name="D1" num_pins="1"/>
+            <input name="D2" num_pins="1"/>
+            <input name="D3" num_pins="1"/>
+            <input name="D4" num_pins="1"/>
+            <input name="D5" num_pins="1"/>
+            <input name="D6" num_pins="1"/>
+            <input name="CX" num_pins="1"/>
+            <input name="C1" num_pins="1"/>
+            <input name="C2" num_pins="1"/>
+            <input name="C3" num_pins="1"/>
+            <input name="C4" num_pins="1"/>
+            <input name="C5" num_pins="1"/>
+            <input name="C6" num_pins="1"/>
+            <input name="BX" num_pins="1"/>
+            <input name="B1" num_pins="1"/>
+            <input name="B2" num_pins="1"/>
+            <input name="B3" num_pins="1"/>
+            <input name="B4" num_pins="1"/>
+            <input name="B5" num_pins="1"/>
+            <input name="B6" num_pins="1"/>
+            <input name="AX" num_pins="1"/>
+            <input name="A1" num_pins="1"/>
+            <input name="A2" num_pins="1"/>
+            <input name="A3" num_pins="1"/>
+            <input name="A4" num_pins="1"/>
+            <input name="A5" num_pins="1"/>
+            <input name="A6" num_pins="1"/>
+            <output name="DO6" num_pins="1"/>
+            <output name="CO6" num_pins="1"/>
+            <output name="BO6" num_pins="1"/>
+            <output name="AO6" num_pins="1"/>
+            <output name="DO5" num_pins="1"/>
+            <output name="CO5" num_pins="1"/>
+            <output name="BO5" num_pins="1"/>
+            <output name="AO5" num_pins="1"/>
+            <pb_type name="BLK_IG-ALUT" num_pb="1">
+              <input name="A1" num_pins="1"/>
+              <input name="A2" num_pins="1"/>
+              <input name="A3" num_pins="1"/>
+              <input name="A4" num_pins="1"/>
+              <input name="A5" num_pins="1"/>
+              <input name="A6" num_pins="1"/>
+              <output name="O5" num_pins="1"/>
+              <output name="O6" num_pins="1"/>
+              <!-- LUT5+LUT5+F6MUX with two outputs -->
+              <mode name="BLK_IG-ALUT-LUT5_MUX">
+                <pb_type blif_model=".names" class="lut" name="BEL_LT-A5LUT" num_pb="2">
+                  <input name="in" num_pins="5" port_class="lut_in"/>
+                  <output name="out" num_pins="1" port_class="lut_out"/>
+                  <delay_matrix in_port="BEL_LT-A5LUT.in" out_port="BEL_LT-A5LUT.out" type="max">
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                  </delay_matrix>
+                </pb_type>
+                <interconnect>
+                  <!-- LUT5 (upper) -> O6 -->
+                  <direct input="BLK_IG-ALUT.A5" name="ALUT_A5_0" output="BEL_LT-A5LUT[0].in[4]"/>
+                  <direct input="BLK_IG-ALUT.A4" name="ALUT_A4_0" output="BEL_LT-A5LUT[0].in[3]"/>
+                  <direct input="BLK_IG-ALUT.A3" name="ALUT_A3_0" output="BEL_LT-A5LUT[0].in[2]"/>
+                  <direct input="BLK_IG-ALUT.A2" name="ALUT_A2_0" output="BEL_LT-A5LUT[0].in[1]"/>
+                  <direct input="BLK_IG-ALUT.A1" name="ALUT_A1_0" output="BEL_LT-A5LUT[0].in[0]"/>
+                  <!-- LUT5 (lower) -> O5 -->
+                  <direct input="BLK_IG-ALUT.A5" name="ALUT_A5_1" output="BEL_LT-A5LUT[1].in[4]"/>
+                  <direct input="BLK_IG-ALUT.A4" name="ALUT_A4_1" output="BEL_LT-A5LUT[1].in[3]"/>
+                  <direct input="BLK_IG-ALUT.A3" name="ALUT_A3_1" output="BEL_LT-A5LUT[1].in[2]"/>
+                  <direct input="BLK_IG-ALUT.A2" name="ALUT_A2_1" output="BEL_LT-A5LUT[1].in[1]"/>
+                  <direct input="BLK_IG-ALUT.A1" name="ALUT_A1_1" output="BEL_LT-A5LUT[1].in[0]"/>
+                  <!-- MUX used for LUT6 -->
+                  <!-- LUT outputs -->
+                  <direct input="BEL_LT-A5LUT[0].out" name="O5" output="BLK_IG-ALUT.O5">
+                    <pack_pattern in_port="BEL_LT-A5LUT[0].out" name="LUT5x2" out_port="BLK_IG-ALUT.O5"/>
+                  </direct>
+                  <direct input="BEL_LT-A5LUT[1].out" name="O6" output="BLK_IG-ALUT.O6">
+                    <pack_pattern in_port="BEL_LT-A5LUT[1].out" name="LUT5x2" out_port="BLK_IG-ALUT.O6"/>
+                  </direct>
+                </interconnect>
+              </mode>
+            </pb_type>
+            <pb_type name="BLK_IG-BLUT" num_pb="1">
+              <input name="A1" num_pins="1"/>
+              <input name="A2" num_pins="1"/>
+              <input name="A3" num_pins="1"/>
+              <input name="A4" num_pins="1"/>
+              <input name="A5" num_pins="1"/>
+              <input name="A6" num_pins="1"/>
+              <output name="O5" num_pins="1"/>
+              <output name="O6" num_pins="1"/>
+              <!-- LUT5+LUT5+F6MUX with two outputs -->
+              <mode name="BLK_IG-BLUT-LUT5_MUX">
+                <pb_type blif_model=".names" class="lut" name="BEL_LT-B5LUT" num_pb="2">
+                  <input name="in" num_pins="5" port_class="lut_in"/>
+                  <output name="out" num_pins="1" port_class="lut_out"/>
+                  <delay_matrix in_port="BEL_LT-B5LUT.in" out_port="BEL_LT-B5LUT.out" type="max">
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                  </delay_matrix>
+                </pb_type>
+                <interconnect>
+                  <!-- LUT5 (upper) -> O6 -->
+                  <direct input="BLK_IG-BLUT.A5" name="BLUT_A5_0" output="BEL_LT-B5LUT[0].in[4]"/>
+                  <direct input="BLK_IG-BLUT.A4" name="BLUT_A4_0" output="BEL_LT-B5LUT[0].in[3]"/>
+                  <direct input="BLK_IG-BLUT.A3" name="BLUT_A3_0" output="BEL_LT-B5LUT[0].in[2]"/>
+                  <direct input="BLK_IG-BLUT.A2" name="BLUT_A2_0" output="BEL_LT-B5LUT[0].in[1]"/>
+                  <direct input="BLK_IG-BLUT.A1" name="BLUT_A1_0" output="BEL_LT-B5LUT[0].in[0]"/>
+                  <!-- LUT5 (lower) -> O5 -->
+                  <direct input="BLK_IG-BLUT.A5" name="BLUT_A5_1" output="BEL_LT-B5LUT[1].in[4]"/>
+                  <direct input="BLK_IG-BLUT.A4" name="BLUT_A4_1" output="BEL_LT-B5LUT[1].in[3]"/>
+                  <direct input="BLK_IG-BLUT.A3" name="BLUT_A3_1" output="BEL_LT-B5LUT[1].in[2]"/>
+                  <direct input="BLK_IG-BLUT.A2" name="BLUT_A2_1" output="BEL_LT-B5LUT[1].in[1]"/>
+                  <direct input="BLK_IG-BLUT.A1" name="BLUT_A1_1" output="BEL_LT-B5LUT[1].in[0]"/>
+                  <!-- LUT outputs -->
+                  <direct input="BEL_LT-B5LUT[0].out" name="O5" output="BLK_IG-BLUT.O5">
+                    <pack_pattern in_port="BEL_LT-B5LUT[0].out" name="LUT5x2" out_port="BLK_IG-BLUT.O5"/>
+                  </direct>
+                  <direct input="BEL_LT-B5LUT[1].out" name="O6" output="BLK_IG-BLUT.O6">
+                    <pack_pattern in_port="BEL_LT-B5LUT[1].out" name="LUT5x2" out_port="BLK_IG-BLUT.O6"/>
+                  </direct>
+                </interconnect>
+              </mode>
+            </pb_type>
+            <pb_type name="BLK_IG-CLUT" num_pb="1">
+              <input name="A1" num_pins="1"/>
+              <input name="A2" num_pins="1"/>
+              <input name="A3" num_pins="1"/>
+              <input name="A4" num_pins="1"/>
+              <input name="A5" num_pins="1"/>
+              <input name="A6" num_pins="1"/>
+              <output name="O5" num_pins="1"/>
+              <output name="O6" num_pins="1"/>
+              <!-- LUT5+LUT5+F6MUX with two outputs -->
+              <mode name="BLK_IG-CLUT-LUT5_MUX">
+                <pb_type blif_model=".names" class="lut" name="BEL_LT-C5LUT" num_pb="2">
+                  <input name="in" num_pins="5" port_class="lut_in"/>
+                  <output name="out" num_pins="1" port_class="lut_out"/>
+                  <delay_matrix in_port="BEL_LT-C5LUT.in" out_port="BEL_LT-C5LUT.out" type="max">
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                  </delay_matrix>
+                </pb_type>
+                <interconnect>
+                  <!-- LUT5 (upper) -> O6 -->
+                  <direct input="BLK_IG-CLUT.A5" name="CLUT_A5_0" output="BEL_LT-C5LUT[0].in[4]"/>
+                  <direct input="BLK_IG-CLUT.A4" name="CLUT_A4_0" output="BEL_LT-C5LUT[0].in[3]"/>
+                  <direct input="BLK_IG-CLUT.A3" name="CLUT_A3_0" output="BEL_LT-C5LUT[0].in[2]"/>
+                  <direct input="BLK_IG-CLUT.A2" name="CLUT_A2_0" output="BEL_LT-C5LUT[0].in[1]"/>
+                  <direct input="BLK_IG-CLUT.A1" name="CLUT_A1_0" output="BEL_LT-C5LUT[0].in[0]"/>
+                  <!-- LUT5 (lower) -> O5 -->
+                  <direct input="BLK_IG-CLUT.A5" name="CLUT_A5_1" output="BEL_LT-C5LUT[1].in[4]"/>
+                  <direct input="BLK_IG-CLUT.A4" name="CLUT_A4_1" output="BEL_LT-C5LUT[1].in[3]"/>
+                  <direct input="BLK_IG-CLUT.A3" name="CLUT_A3_1" output="BEL_LT-C5LUT[1].in[2]"/>
+                  <direct input="BLK_IG-CLUT.A2" name="CLUT_A2_1" output="BEL_LT-C5LUT[1].in[1]"/>
+                  <direct input="BLK_IG-CLUT.A1" name="CLUT_A1_1" output="BEL_LT-C5LUT[1].in[0]"/>
+                  <!-- LUT outputs -->
+                  <direct input="BEL_LT-C5LUT[0].out" name="O5" output="BLK_IG-CLUT.O5">
+                    <pack_pattern in_port="BEL_LT-C5LUT[0].out" name="LUT5x2" out_port="BLK_IG-CLUT.O5"/>
+                  </direct>
+                  <direct input="BEL_LT-C5LUT[1].out" name="O6" output="BLK_IG-CLUT.O6">
+                    <pack_pattern in_port="BEL_LT-C5LUT[1].out" name="LUT5x2" out_port="BLK_IG-CLUT.O6"/>
+                  </direct>
+                </interconnect>
+              </mode>
+            </pb_type>
+            <pb_type name="BLK_IG-DLUT" num_pb="1">
+              <input name="A1" num_pins="1"/>
+              <input name="A2" num_pins="1"/>
+              <input name="A3" num_pins="1"/>
+              <input name="A4" num_pins="1"/>
+              <input name="A5" num_pins="1"/>
+              <input name="A6" num_pins="1"/>
+              <output name="O5" num_pins="1"/>
+              <output name="O6" num_pins="1"/>
+              <!-- LUT5+LUT5+F6MUX with two outputs -->
+              <mode name="BLK_IG-DLUT-LUT5_MUX">
+                <pb_type blif_model=".names" class="lut" name="BEL_LT-D5LUT" num_pb="2">
+                  <input name="in" num_pins="5" port_class="lut_in"/>
+                  <output name="out" num_pins="1" port_class="lut_out"/>
+                  <delay_matrix in_port="BEL_LT-D5LUT.in" out_port="BEL_LT-D5LUT.out" type="max">
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                    0.068e-9
+                  </delay_matrix>
+                </pb_type>
+                <interconnect>
+                  <!-- LUT5 (upper) -> O6 -->
+                  <direct input="BLK_IG-DLUT.A5" name="DLUT_A5_0" output="BEL_LT-D5LUT[0].in[4]"/>
+                  <direct input="BLK_IG-DLUT.A4" name="DLUT_A4_0" output="BEL_LT-D5LUT[0].in[3]"/>
+                  <direct input="BLK_IG-DLUT.A3" name="DLUT_A3_0" output="BEL_LT-D5LUT[0].in[2]"/>
+                  <direct input="BLK_IG-DLUT.A2" name="DLUT_A2_0" output="BEL_LT-D5LUT[0].in[1]"/>
+                  <direct input="BLK_IG-DLUT.A1" name="DLUT_A1_0" output="BEL_LT-D5LUT[0].in[0]"/>
+                  <!-- LUT5 (lower) -> O5 -->
+                  <direct input="BLK_IG-DLUT.A5" name="DLUT_A5_1" output="BEL_LT-D5LUT[1].in[4]"/>
+                  <direct input="BLK_IG-DLUT.A4" name="DLUT_A4_1" output="BEL_LT-D5LUT[1].in[3]"/>
+                  <direct input="BLK_IG-DLUT.A3" name="DLUT_A3_1" output="BEL_LT-D5LUT[1].in[2]"/>
+                  <direct input="BLK_IG-DLUT.A2" name="DLUT_A2_1" output="BEL_LT-D5LUT[1].in[1]"/>
+                  <direct input="BLK_IG-DLUT.A1" name="DLUT_A1_1" output="BEL_LT-D5LUT[1].in[0]"/>
+                  <!-- LUT outputs -->
+                  <direct input="BEL_LT-D5LUT[0].out" name="O5" output="BLK_IG-DLUT.O5">
+                    <pack_pattern in_port="BEL_LT-D5LUT[0].out" name="LUT5x2" out_port="BLK_IG-DLUT.O5"/>
+                  </direct>
+                  <direct input="BEL_LT-D5LUT[1].out" name="O6" output="BLK_IG-DLUT.O6">
+                    <pack_pattern in_port="BEL_LT-D5LUT[1].out" name="LUT5x2" out_port="BLK_IG-DLUT.O6"/>
+                  </direct>
+                </interconnect>
+              </mode>
+            </pb_type>
+            <interconnect>
+              <!-- LUT input pins -->
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D1" name="D1" output="BLK_IG-DLUT.A1"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D2" name="D2" output="BLK_IG-DLUT.A2"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D3" name="D3" output="BLK_IG-DLUT.A3"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D4" name="D4" output="BLK_IG-DLUT.A4"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D5" name="D5" output="BLK_IG-DLUT.A5"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D6" name="D6" output="BLK_IG-DLUT.A6"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C1" name="C1" output="BLK_IG-CLUT.A1"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C2" name="C2" output="BLK_IG-CLUT.A2"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C3" name="C3" output="BLK_IG-CLUT.A3"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C4" name="C4" output="BLK_IG-CLUT.A4"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C5" name="C5" output="BLK_IG-CLUT.A5"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C6" name="C6" output="BLK_IG-CLUT.A6"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B1" name="B1" output="BLK_IG-BLUT.A1"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B2" name="B2" output="BLK_IG-BLUT.A2"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B3" name="B3" output="BLK_IG-BLUT.A3"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B4" name="B4" output="BLK_IG-BLUT.A4"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B5" name="B5" output="BLK_IG-BLUT.A5"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B6" name="B6" output="BLK_IG-BLUT.A6"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A1" name="A1" output="BLK_IG-ALUT.A1"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A2" name="A2" output="BLK_IG-ALUT.A2"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A3" name="A3" output="BLK_IG-ALUT.A3"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A4" name="A4" output="BLK_IG-ALUT.A4"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A5" name="A5" output="BLK_IG-ALUT.A5"/>
+              <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A6" name="A6" output="BLK_IG-ALUT.A6"/>
+              <direct input="BLK_IG-DLUT.O6" name="DO6" output="BLK_IG-COMMON_LUT_AND_F78MUX.DO6"/>
+              <direct input="BLK_IG-DLUT.O5" name="DO5" output="BLK_IG-COMMON_LUT_AND_F78MUX.DO5"/>
+              <direct input="BLK_IG-CLUT.O6" name="CO6" output="BLK_IG-COMMON_LUT_AND_F78MUX.CO6"/>
+              <direct input="BLK_IG-CLUT.O5" name="CO5" output="BLK_IG-COMMON_LUT_AND_F78MUX.CO5"/>
+              <direct input="BLK_IG-BLUT.O6" name="BO6" output="BLK_IG-COMMON_LUT_AND_F78MUX.BO6"/>
+              <direct input="BLK_IG-BLUT.O5" name="BO5" output="BLK_IG-COMMON_LUT_AND_F78MUX.BO5"/>
+              <direct input="BLK_IG-ALUT.O6" name="AO6" output="BLK_IG-COMMON_LUT_AND_F78MUX.AO6"/>
+              <direct input="BLK_IG-ALUT.O5" name="AO5" output="BLK_IG-COMMON_LUT_AND_F78MUX.AO5"/>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <!-- Normal LUT input pins -->
+            <direct input="BLK_IG-SLICEM_MODES.D1" name="D1" output="BLK_IG-COMMON_LUT_AND_F78MUX.D1"/>
+            <direct input="BLK_IG-SLICEM_MODES.D2" name="D2" output="BLK_IG-COMMON_LUT_AND_F78MUX.D2"/>
+            <direct input="BLK_IG-SLICEM_MODES.D3" name="D3" output="BLK_IG-COMMON_LUT_AND_F78MUX.D3"/>
+            <direct input="BLK_IG-SLICEM_MODES.D4" name="D4" output="BLK_IG-COMMON_LUT_AND_F78MUX.D4"/>
+            <direct input="BLK_IG-SLICEM_MODES.D5" name="D5" output="BLK_IG-COMMON_LUT_AND_F78MUX.D5"/>
+            <direct input="BLK_IG-SLICEM_MODES.D6" name="D6" output="BLK_IG-COMMON_LUT_AND_F78MUX.D6"/>
+            <direct input="BLK_IG-SLICEM_MODES.C1" name="C1" output="BLK_IG-COMMON_LUT_AND_F78MUX.C1"/>
+            <direct input="BLK_IG-SLICEM_MODES.C2" name="C2" output="BLK_IG-COMMON_LUT_AND_F78MUX.C2"/>
+            <direct input="BLK_IG-SLICEM_MODES.C3" name="C3" output="BLK_IG-COMMON_LUT_AND_F78MUX.C3"/>
+            <direct input="BLK_IG-SLICEM_MODES.C4" name="C4" output="BLK_IG-COMMON_LUT_AND_F78MUX.C4"/>
+            <direct input="BLK_IG-SLICEM_MODES.C5" name="C5" output="BLK_IG-COMMON_LUT_AND_F78MUX.C5"/>
+            <direct input="BLK_IG-SLICEM_MODES.C6" name="C6" output="BLK_IG-COMMON_LUT_AND_F78MUX.C6"/>
+            <direct input="BLK_IG-SLICEM_MODES.B1" name="B1" output="BLK_IG-COMMON_LUT_AND_F78MUX.B1"/>
+            <direct input="BLK_IG-SLICEM_MODES.B2" name="B2" output="BLK_IG-COMMON_LUT_AND_F78MUX.B2"/>
+            <direct input="BLK_IG-SLICEM_MODES.B3" name="B3" output="BLK_IG-COMMON_LUT_AND_F78MUX.B3"/>
+            <direct input="BLK_IG-SLICEM_MODES.B4" name="B4" output="BLK_IG-COMMON_LUT_AND_F78MUX.B4"/>
+            <direct input="BLK_IG-SLICEM_MODES.B5" name="B5" output="BLK_IG-COMMON_LUT_AND_F78MUX.B5"/>
+            <direct input="BLK_IG-SLICEM_MODES.B6" name="B6" output="BLK_IG-COMMON_LUT_AND_F78MUX.B6"/>
+            <direct input="BLK_IG-SLICEM_MODES.A1" name="A1" output="BLK_IG-COMMON_LUT_AND_F78MUX.A1"/>
+            <direct input="BLK_IG-SLICEM_MODES.A2" name="A2" output="BLK_IG-COMMON_LUT_AND_F78MUX.A2"/>
+            <direct input="BLK_IG-SLICEM_MODES.A3" name="A3" output="BLK_IG-COMMON_LUT_AND_F78MUX.A3"/>
+            <direct input="BLK_IG-SLICEM_MODES.A4" name="A4" output="BLK_IG-COMMON_LUT_AND_F78MUX.A4"/>
+            <direct input="BLK_IG-SLICEM_MODES.A5" name="A5" output="BLK_IG-COMMON_LUT_AND_F78MUX.A5"/>
+            <direct input="BLK_IG-SLICEM_MODES.A6" name="A6" output="BLK_IG-COMMON_LUT_AND_F78MUX.A6"/>
+            <direct input="BLK_IG-SLICEM_MODES.CX" name="CX" output="BLK_IG-COMMON_LUT_AND_F78MUX.CX"/>
+            <direct input="BLK_IG-SLICEM_MODES.BX" name="BX" output="BLK_IG-COMMON_LUT_AND_F78MUX.BX"/>
+            <direct input="BLK_IG-SLICEM_MODES.AX" name="AX" output="BLK_IG-COMMON_LUT_AND_F78MUX.AX"/>
+            <!-- COMMON_SLICE inputs -->
+            <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.DO6" name="DO6" output="BLK_IG-SLICEM_MODES.DO6"/>
+            <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.DO5" name="DO5" output="BLK_IG-SLICEM_MODES.DO5"/>
+            <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.CO6" name="CO6" output="BLK_IG-SLICEM_MODES.CO6"/>
+            <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.CO5" name="CO5" output="BLK_IG-SLICEM_MODES.CO5"/>
+            <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.BO6" name="BO6" output="BLK_IG-SLICEM_MODES.BO6"/>
+            <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.BO5" name="BO5" output="BLK_IG-SLICEM_MODES.BO5"/>
+            <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.AO6" name="AO6" output="BLK_IG-SLICEM_MODES.AO6"/>
+            <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.AO5" name="AO5" output="BLK_IG-SLICEM_MODES.AO5"/>
+          </interconnect>
+        </mode>
+        <mode name="DRAMs">
+          <pb_type name="BLK_IG-A_DRAM" num_pb="1">
+            <clock name="CLK" num_pins="1"/>
+            <input name="A" num_pins="6"/>
+            <input name="WA" num_pins="8"/>
+            <input name="AI" num_pins="1"/>
+            <input name="PARENT_DI" num_pins="1"/>
+            <input name="DI2" num_pins="1"/>
+            <input name="WE" num_pins="1"/>
+            <output name="DO6" num_pins="1"/>
+            <output name="DO6_32" num_pins="1"/>
+            <output name="SO6" num_pins="1"/>
+            <output name="SO6_32" num_pins="1"/>
+            <output name="O6" num_pins="1"/>
+            <output name="O5" num_pins="1"/>
+            <!-- Only LUT mode is used for the purpose of this test architecture. Normally there would be all the DRAM modes (e.g. 64_DUAL_PORT, 32_SINGLE_PORT, ...)
+                 All the DRAM modes have been disabled to increase readability -->
+            <mode name="LUT">
+              <pb_type blif_model=".names" class="lut" name="BEL_LT-A5LUT" num_pb="2">
+                <input name="in" num_pins="5" port_class="lut_in"/>
+                <output name="out" num_pins="1" port_class="lut_out"/>
+                <delay_matrix in_port="BEL_LT-A5LUT.in" out_port="BEL_LT-A5LUT.out" type="max">
+                  0.068e-9
+                  0.068e-9
+                  0.068e-9
+                  0.068e-9
+                  0.068e-9
+                </delay_matrix>
+              </pb_type>
+              <interconnect>
+                <direct input="BLK_IG-A_DRAM.A[4:0]" name="ALUT_A5_0" output="BEL_LT-A5LUT[0].in[4:0]"/>
+                <direct input="BLK_IG-A_DRAM.A[4:0]" name="ALUT_A5_1" output="BEL_LT-A5LUT[1].in[4:0]"/>
+                <direct input="BEL_LT-A5LUT[0].out" name="O5" output="BLK_IG-A_DRAM.O5">
+                  <pack_pattern in_port="BEL_LT-A5LUT[0].out" name="LUT5x2" out_port="BLK_IG-A_DRAM.O5"/>
+                </direct>
+                <direct input="BEL_LT-A5LUT[1].out" name="O6" output="BLK_IG-A_DRAM.O6">
+                  <pack_pattern in_port="BEL_LT-A5LUT[1].out" name="LUT5x2" out_port="BLK_IG-A_DRAM.O6"/>
+                </direct>
+              </interconnect>
+            </mode>
+          </pb_type>
+          <pb_type name="BLK_IG-B_DRAM" num_pb="1">
+            <clock name="CLK" num_pins="1"/>
+            <input name="A" num_pins="6"/>
+            <input name="WA" num_pins="8"/>
+            <input name="BI" num_pins="1"/>
+            <input name="PARENT_DI" num_pins="1"/>
+            <input name="DI2" num_pins="1"/>
+            <input name="WE" num_pins="1"/>
+            <output name="DO6" num_pins="1"/>
+            <output name="DO6_32" num_pins="1"/>
+            <output name="SO6" num_pins="1"/>
+            <output name="SO6_32" num_pins="1"/>
+            <output name="O6" num_pins="1"/>
+            <output name="O5" num_pins="1"/>
+            <!-- Only LUT mode is used for the purpose of this test architecture. Normally there would be all the DRAM modes (e.g. 64_DUAL_PORT, 32_SINGLE_PORT, ...)
+                 All the DRAM modes have been disabled to increase readability -->
+            <mode name="LUT">
+              <pb_type blif_model=".names" class="lut" name="BEL_LT-B5LUT" num_pb="2">
+                <input name="in" num_pins="5" port_class="lut_in"/>
+                <output name="out" num_pins="1" port_class="lut_out"/>
+                <delay_matrix in_port="BEL_LT-B5LUT.in" out_port="BEL_LT-B5LUT.out" type="max">
+                  0.068e-9
+                  0.068e-9
+                  0.068e-9
+                  0.068e-9
+                  0.068e-9
+                </delay_matrix>
+              </pb_type>
+              <interconnect>
+                <direct input="BLK_IG-B_DRAM.A[4:0]" name="BLUT_A5_0" output="BEL_LT-B5LUT[0].in[4:0]"/>
+                <direct input="BLK_IG-B_DRAM.A[4:0]" name="BLUT_A5_1" output="BEL_LT-B5LUT[1].in[4:0]"/>
+                <direct input="BEL_LT-B5LUT[0].out" name="O5" output="BLK_IG-B_DRAM.O5">
+                  <pack_pattern in_port="BEL_LT-B5LUT[0].out" name="LUT5x2" out_port="BLK_IG-B_DRAM.O5"/>
+                </direct>
+                <direct input="BEL_LT-B5LUT[1].out" name="O6" output="BLK_IG-B_DRAM.O6">
+                  <pack_pattern in_port="BEL_LT-B5LUT[1].out" name="LUT5x2" out_port="BLK_IG-B_DRAM.O6"/>
+                </direct>
+              </interconnect>
+            </mode>
+          </pb_type>
+          <pb_type name="BLK_IG-C_DRAM" num_pb="1">
+            <clock name="CLK" num_pins="1"/>
+            <input name="A" num_pins="6"/>
+            <input name="WA" num_pins="8"/>
+            <input name="CI" num_pins="1"/>
+            <input name="PARENT_DI" num_pins="1"/>
+            <input name="DI2" num_pins="1"/>
+            <input name="WE" num_pins="1"/>
+            <output name="DO6" num_pins="1"/>
+            <output name="DO6_32" num_pins="1"/>
+            <output name="SO6" num_pins="1"/>
+            <output name="SO6_32" num_pins="1"/>
+            <output name="O6" num_pins="1"/>
+            <output name="O5" num_pins="1"/>
+            <!-- Only LUT mode is used for the purpose of this test architecture. Normally there would be all the DRAM modes (e.g. 64_DUAL_PORT, 32_SINGLE_PORT, ...)
+                 All the DRAM modes have been disabled to increase readability -->
+            <mode name="LUT">
+              <pb_type blif_model=".names" class="lut" name="BEL_LT-C5LUT" num_pb="2">
+                <input name="in" num_pins="5" port_class="lut_in"/>
+                <output name="out" num_pins="1" port_class="lut_out"/>
+                <delay_matrix in_port="BEL_LT-C5LUT.in" out_port="BEL_LT-C5LUT.out" type="max">
+                  0.068e-9
+                  0.068e-9
+                  0.068e-9
+                  0.068e-9
+                  0.068e-9
+                </delay_matrix>
+              </pb_type>
+              <interconnect>
+                <direct input="BLK_IG-C_DRAM.A[4:0]" name="CLUT_A5_0" output="BEL_LT-C5LUT[0].in[4:0]"/>
+                <direct input="BLK_IG-C_DRAM.A[4:0]" name="CLUT_A5_1" output="BEL_LT-C5LUT[1].in[4:0]"/>
+                <direct input="BEL_LT-C5LUT[0].out" name="O5" output="BLK_IG-C_DRAM.O5">
+                  <pack_pattern in_port="BEL_LT-C5LUT[0].out" name="LUT5x2" out_port="BLK_IG-C_DRAM.O5"/>
+                </direct>
+                <direct input="BEL_LT-C5LUT[1].out" name="O6" output="BLK_IG-C_DRAM.O6">
+                  <pack_pattern in_port="BEL_LT-C5LUT[1].out" name="LUT5x2" out_port="BLK_IG-C_DRAM.O6"/>
+                </direct>
+              </interconnect>
+            </mode>
+          </pb_type>
+
+          <!-- D_DRAM does not have a LUT mode because, if DRAMs mode is selected for SLICEM-MODES pb type DLUT can only operate in DRAM mode.
+               For the purpose of this test all the DRAM modes from {N}_DRAM pb types have been disabled to increase readability -->
+          <pb_type name="BLK_IG-D_DRAM" num_pb="1">
+            <clock name="CLK" num_pins="1"/>
+            <input name="A" num_pins="6"/>
+            <input name="WA7" num_pins="1"/>
+            <input name="WA8" num_pins="1"/>
+            <input name="DI1" num_pins="1"/>
+            <input name="DI2" num_pins="1"/>
+            <input name="WE" num_pins="1"/>
+            <output name="SO6" num_pins="1"/>
+            <output name="SO6_32" num_pins="1"/>
+            <output name="O6" num_pins="1"/>
+            <output name="O5" num_pins="1"/>
+            <interconnect/>
+          </pb_type>
+          <pb_type blif_model=".subckt DRAM_4_OUTPUT_STUB" name="BEL_BB-DRAM_4_OUTPUT_STUB" num_pb="2">
+            <input name="DOA" num_pins="1"/>
+            <output name="DOA_OUT" num_pins="1"/>
+            <input name="DOB" num_pins="1"/>
+            <output name="DOB_OUT" num_pins="1"/>
+            <input name="DOC" num_pins="1"/>
+            <output name="DOC_OUT" num_pins="1"/>
+            <input name="DOD" num_pins="1"/>
+            <output name="DOD_OUT" num_pins="1"/>
+            <delay_constant in_port="BEL_BB-DRAM_4_OUTPUT_STUB.DOA" max="0" out_port="BEL_BB-DRAM_4_OUTPUT_STUB.DOA_OUT"/>
+            <delay_constant in_port="BEL_BB-DRAM_4_OUTPUT_STUB.DOB" max="0" out_port="BEL_BB-DRAM_4_OUTPUT_STUB.DOB_OUT"/>
+            <delay_constant in_port="BEL_BB-DRAM_4_OUTPUT_STUB.DOC" max="0" out_port="BEL_BB-DRAM_4_OUTPUT_STUB.DOC_OUT"/>
+            <delay_constant in_port="BEL_BB-DRAM_4_OUTPUT_STUB.DOD" max="0" out_port="BEL_BB-DRAM_4_OUTPUT_STUB.DOD_OUT"/>
+          </pb_type>
+          <pb_type blif_model=".subckt DRAM_2_OUTPUT_STUB" name="BEL_BB-DRAM_2_OUTPUT_STUB" num_pb="4">
+            <input name="DPO" num_pins="1"/>
+            <output name="DPO_OUT" num_pins="1"/>
+            <input name="SPO" num_pins="1"/>
+            <output name="SPO_OUT" num_pins="1"/>
+            <delay_constant in_port="BEL_BB-DRAM_2_OUTPUT_STUB.DPO" max="0" out_port="BEL_BB-DRAM_2_OUTPUT_STUB.DPO_OUT"/>
+            <delay_constant in_port="BEL_BB-DRAM_2_OUTPUT_STUB.SPO" max="0" out_port="BEL_BB-DRAM_2_OUTPUT_STUB.SPO_OUT"/>
+          </pb_type>
+          <pb_type name="BLK_MM-WE_MUX" num_pb="1">
+            <input name="CE" num_pins="1"/>
+            <input name="WE" num_pins="1"/>
+            <output name="WE_OUT" num_pins="1"/>
+            <interconnect>
+              <mux input="BLK_MM-WE_MUX.CE BLK_MM-WE_MUX.WE" name="WE_MUX" output="BLK_MM-WE_MUX.WE_OUT">
+              </mux>
+            </interconnect>
+          </pb_type>
+          <interconnect>
+            <direct input="BLK_IG-SLICEM_MODES.CLK" name="AMEMCLK" output="BLK_IG-A_DRAM.CLK"/>
+            <direct input="BLK_IG-SLICEM_MODES.CLK" name="BMEMCLK" output="BLK_IG-B_DRAM.CLK"/>
+            <direct input="BLK_IG-SLICEM_MODES.CLK" name="CMEMCLK" output="BLK_IG-C_DRAM.CLK"/>
+            <direct input="BLK_IG-SLICEM_MODES.CLK" name="DMEMCLK" output="BLK_IG-D_DRAM.CLK"/>
+            <direct input="BLK_IG-SLICEM_MODES.D1" name="D1" output="BLK_IG-D_DRAM.A[0]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D2" name="D2" output="BLK_IG-D_DRAM.A[1]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D3" name="D3" output="BLK_IG-D_DRAM.A[2]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D4" name="D4" output="BLK_IG-D_DRAM.A[3]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D5" name="D5" output="BLK_IG-D_DRAM.A[4]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D6" name="D6" output="BLK_IG-D_DRAM.A[5]"/>
+            <direct input="BLK_IG-SLICEM_MODES.C1" name="C1" output="BLK_IG-C_DRAM.A[0]"/>
+            <direct input="BLK_IG-SLICEM_MODES.C2" name="C2" output="BLK_IG-C_DRAM.A[1]"/>
+            <direct input="BLK_IG-SLICEM_MODES.C3" name="C3" output="BLK_IG-C_DRAM.A[2]"/>
+            <direct input="BLK_IG-SLICEM_MODES.C4" name="C4" output="BLK_IG-C_DRAM.A[3]"/>
+            <direct input="BLK_IG-SLICEM_MODES.C5" name="C5" output="BLK_IG-C_DRAM.A[4]"/>
+            <direct input="BLK_IG-SLICEM_MODES.C6" name="C6" output="BLK_IG-C_DRAM.A[5]"/>
+            <direct input="BLK_IG-SLICEM_MODES.B1" name="B1" output="BLK_IG-B_DRAM.A[0]"/>
+            <direct input="BLK_IG-SLICEM_MODES.B2" name="B2" output="BLK_IG-B_DRAM.A[1]"/>
+            <direct input="BLK_IG-SLICEM_MODES.B3" name="B3" output="BLK_IG-B_DRAM.A[2]"/>
+            <direct input="BLK_IG-SLICEM_MODES.B4" name="B4" output="BLK_IG-B_DRAM.A[3]"/>
+            <direct input="BLK_IG-SLICEM_MODES.B5" name="B5" output="BLK_IG-B_DRAM.A[4]"/>
+            <direct input="BLK_IG-SLICEM_MODES.B6" name="B6" output="BLK_IG-B_DRAM.A[5]"/>
+            <direct input="BLK_IG-SLICEM_MODES.A1" name="A1" output="BLK_IG-A_DRAM.A[0]"/>
+            <direct input="BLK_IG-SLICEM_MODES.A2" name="A2" output="BLK_IG-A_DRAM.A[1]"/>
+            <direct input="BLK_IG-SLICEM_MODES.A3" name="A3" output="BLK_IG-A_DRAM.A[2]"/>
+            <direct input="BLK_IG-SLICEM_MODES.A4" name="A4" output="BLK_IG-A_DRAM.A[3]"/>
+            <direct input="BLK_IG-SLICEM_MODES.A5" name="A5" output="BLK_IG-A_DRAM.A[4]"/>
+            <direct input="BLK_IG-SLICEM_MODES.A6" name="A6" output="BLK_IG-A_DRAM.A[5]"/>
+            <!-- W Address lines come in on the DLUT pins and go to all the LUTs -->
+            <direct input="BLK_IG-SLICEM_MODES.D1" name="WC1" output="BLK_IG-C_DRAM.WA[0]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D2" name="WC2" output="BLK_IG-C_DRAM.WA[1]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D3" name="WC3" output="BLK_IG-C_DRAM.WA[2]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D4" name="WC4" output="BLK_IG-C_DRAM.WA[3]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D5" name="WC5" output="BLK_IG-C_DRAM.WA[4]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D6" name="WC6" output="BLK_IG-C_DRAM.WA[5]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D1" name="WB1" output="BLK_IG-B_DRAM.WA[0]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D2" name="WB2" output="BLK_IG-B_DRAM.WA[1]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D3" name="WB3" output="BLK_IG-B_DRAM.WA[2]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D4" name="WB4" output="BLK_IG-B_DRAM.WA[3]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D5" name="WB5" output="BLK_IG-B_DRAM.WA[4]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D6" name="WB6" output="BLK_IG-B_DRAM.WA[5]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D1" name="WA1" output="BLK_IG-A_DRAM.WA[0]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D2" name="WA2" output="BLK_IG-A_DRAM.WA[1]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D3" name="WA3" output="BLK_IG-A_DRAM.WA[2]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D4" name="WA4" output="BLK_IG-A_DRAM.WA[3]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D5" name="WA5" output="BLK_IG-A_DRAM.WA[4]"/>
+            <direct input="BLK_IG-SLICEM_MODES.D6" name="WA6" output="BLK_IG-A_DRAM.WA[5]"/>
+            <direct input="BLK_IG-SLICEM_MODES.WA7" name="D_WA7" output="BLK_IG-D_DRAM.WA7"/>
+            <direct input="BLK_IG-SLICEM_MODES.WA7" name="C_WA7" output="BLK_IG-C_DRAM.WA[6]"/>
+            <direct input="BLK_IG-SLICEM_MODES.WA7" name="B_WA7" output="BLK_IG-B_DRAM.WA[6]"/>
+            <direct input="BLK_IG-SLICEM_MODES.WA7" name="A_WA7" output="BLK_IG-A_DRAM.WA[6]"/>
+            <direct input="BLK_IG-SLICEM_MODES.WA8" name="D_WA8" output="BLK_IG-D_DRAM.WA8"/>
+            <direct input="BLK_IG-SLICEM_MODES.WA8" name="C_WA8" output="BLK_IG-C_DRAM.WA[7]"/>
+            <direct input="BLK_IG-SLICEM_MODES.WA8" name="B_WA8" output="BLK_IG-B_DRAM.WA[7]"/>
+            <direct input="BLK_IG-SLICEM_MODES.WA8" name="A_WA8" output="BLK_IG-A_DRAM.WA[7]"/>
+            <!-- Direct DI1 inputs -->
+            <direct input="BLK_IG-SLICEM_MODES.DI" name="DI" output="BLK_IG-D_DRAM.DI1"/>
+            <direct input="BLK_IG-SLICEM_MODES.CI" name="CI" output="BLK_IG-C_DRAM.CI"/>
+            <direct input="BLK_IG-SLICEM_MODES.BI" name="BI" output="BLK_IG-B_DRAM.BI"/>
+            <direct input="BLK_IG-SLICEM_MODES.AI" name="AI" output="BLK_IG-A_DRAM.AI"/>
+            <!-- Parent DI1 inputs -->
+            <direct input="BLK_IG-SLICEM_MODES.DI" name="P_CI" output="BLK_IG-C_DRAM.PARENT_DI"/>
+            <direct input="BLK_IG-SLICEM_MODES.DI" name="P_BI" output="BLK_IG-B_DRAM.PARENT_DI"/>
+            <mux input="BLK_IG-SLICEM_MODES.DI BLK_IG-SLICEM_MODES.BI" name="P_AI" output="BLK_IG-A_DRAM.PARENT_DI"/>
+            <!-- DI2 inputs -->
+            <direct input="BLK_IG-SLICEM_MODES.DX" name="D_DI2" output="BLK_IG-D_DRAM.DI2"/>
+            <direct input="BLK_IG-SLICEM_MODES.CX" name="C_DI2" output="BLK_IG-C_DRAM.DI2"/>
+            <direct input="BLK_IG-SLICEM_MODES.BX" name="B_DI2" output="BLK_IG-B_DRAM.DI2"/>
+            <direct input="BLK_IG-SLICEM_MODES.AX" name="A_DI2" output="BLK_IG-A_DRAM.DI2"/>
+            <!-- WE inputs -->
+            <direct input="BLK_IG-SLICEM_MODES.CE" name="CE_TO_WE_MUX" output="BLK_MM-WE_MUX.CE"/>
+            <direct input="BLK_IG-SLICEM_MODES.WE" name="WE_TO_WE_MUX" output="BLK_MM-WE_MUX.WE"/>
+            <direct input="BLK_MM-WE_MUX.WE_OUT" name="WE1" output="BLK_IG-A_DRAM.WE"/>
+            <direct input="BLK_MM-WE_MUX.WE_OUT" name="WE2" output="BLK_IG-B_DRAM.WE"/>
+            <direct input="BLK_MM-WE_MUX.WE_OUT" name="WE3" output="BLK_IG-C_DRAM.WE"/>
+            <direct input="BLK_MM-WE_MUX.WE_OUT" name="WE4" output="BLK_IG-D_DRAM.WE"/>
+            <!-- Outputs -->
+            <direct input="BLK_IG-D_DRAM.SO6_32" name="SPO_0" output="BEL_BB-DRAM_2_OUTPUT_STUB[0].SPO"/>
+            <direct input="BLK_IG-C_DRAM.DO6_32" name="DPO_0" output="BEL_BB-DRAM_2_OUTPUT_STUB[0].DPO"/>
+            <direct input="BLK_IG-B_DRAM.SO6_32" name="SPO_1" output="BEL_BB-DRAM_2_OUTPUT_STUB[1].SPO"/>
+            <direct input="BLK_IG-A_DRAM.DO6_32" name="DPO_1" output="BEL_BB-DRAM_2_OUTPUT_STUB[1].DPO"/>
+            <direct input="BLK_IG-D_DRAM.SO6" name="SPO_2" output="BEL_BB-DRAM_2_OUTPUT_STUB[2].SPO"/>
+            <direct input="BLK_IG-C_DRAM.DO6" name="DPO_2" output="BEL_BB-DRAM_2_OUTPUT_STUB[2].DPO"/>
+            <direct input="BLK_IG-B_DRAM.SO6" name="SPO_3" output="BEL_BB-DRAM_2_OUTPUT_STUB[3].SPO"/>
+            <direct input="BLK_IG-A_DRAM.DO6" name="DPO_3" output="BEL_BB-DRAM_2_OUTPUT_STUB[3].DPO"/>
+            <direct input="BLK_IG-D_DRAM.SO6_32" name="DOD32" output="BEL_BB-DRAM_4_OUTPUT_STUB[0].DOD"/>
+            <direct input="BLK_IG-C_DRAM.DO6_32" name="DOC32" output="BEL_BB-DRAM_4_OUTPUT_STUB[0].DOC"/>
+            <direct input="BLK_IG-B_DRAM.DO6_32" name="DOB32" output="BEL_BB-DRAM_4_OUTPUT_STUB[0].DOB"/>
+            <direct input="BLK_IG-A_DRAM.DO6_32" name="DOA32" output="BEL_BB-DRAM_4_OUTPUT_STUB[0].DOA"/>
+            <direct input="BLK_IG-D_DRAM.SO6" name="DOD" output="BEL_BB-DRAM_4_OUTPUT_STUB[1].DOD"/>
+            <direct input="BLK_IG-C_DRAM.DO6" name="DOC" output="BEL_BB-DRAM_4_OUTPUT_STUB[1].DOC"/>
+            <direct input="BLK_IG-B_DRAM.DO6" name="DOB" output="BEL_BB-DRAM_4_OUTPUT_STUB[1].DOB"/>
+            <direct input="BLK_IG-A_DRAM.DO6" name="DOA" output="BEL_BB-DRAM_4_OUTPUT_STUB[1].DOA"/>
+            <mux input="BLK_IG-D_DRAM.O6 BEL_BB-DRAM_2_OUTPUT_STUB[0].SPO_OUT BEL_BB-DRAM_2_OUTPUT_STUB[2].SPO_OUT BEL_BB-DRAM_4_OUTPUT_STUB[0].DOD_OUT BEL_BB-DRAM_4_OUTPUT_STUB[1].DOD_OUT" name="DO6" output="BLK_IG-SLICEM_MODES.DO6"/>
+            <direct input="BLK_IG-D_DRAM.O5" name="DO5" output="BLK_IG-SLICEM_MODES.DO5"/>
+            <mux input="BLK_IG-C_DRAM.O6 BEL_BB-DRAM_2_OUTPUT_STUB[0].DPO_OUT BEL_BB-DRAM_2_OUTPUT_STUB[2].DPO_OUT BEL_BB-DRAM_4_OUTPUT_STUB[0].DOC_OUT BEL_BB-DRAM_4_OUTPUT_STUB[1].DOC_OUT" name="CO6" output="BLK_IG-SLICEM_MODES.CO6"/>
+            <direct input="BLK_IG-C_DRAM.O5" name="CO5" output="BLK_IG-SLICEM_MODES.CO5"/>
+            <mux input="BLK_IG-B_DRAM.O6 BEL_BB-DRAM_2_OUTPUT_STUB[1].SPO_OUT BEL_BB-DRAM_2_OUTPUT_STUB[3].SPO_OUT BEL_BB-DRAM_4_OUTPUT_STUB[0].DOB_OUT BEL_BB-DRAM_4_OUTPUT_STUB[1].DOB_OUT" name="BO6" output="BLK_IG-SLICEM_MODES.BO6"/>
+            <direct input="BLK_IG-B_DRAM.O5" name="BO5" output="BLK_IG-SLICEM_MODES.BO5"/>
+            <mux input="BLK_IG-A_DRAM.O6 BEL_BB-DRAM_2_OUTPUT_STUB[1].DPO_OUT BEL_BB-DRAM_2_OUTPUT_STUB[3].DPO_OUT BEL_BB-DRAM_4_OUTPUT_STUB[0].DOA_OUT BEL_BB-DRAM_4_OUTPUT_STUB[1].DOA_OUT" name="AO6" output="BLK_IG-SLICEM_MODES.AO6"/>
+            <direct input="BLK_IG-A_DRAM.O5" name="AO5" output="BLK_IG-SLICEM_MODES.AO5"/>
+          </interconnect>
+        </mode>
+      </pb_type>
+      <interconnect>
+        <!-- SLICEM_MODES inputs -->
+        <direct input="BLK_IG-SLICEM.DI" name="DI" output="BLK_IG-SLICEM_MODES.DI"/>
+        <direct input="BLK_IG-SLICEM.DX" name="DX2" output="BLK_IG-SLICEM_MODES.DX"/>
+        <direct input="BLK_IG-SLICEM.D1" name="D1" output="BLK_IG-SLICEM_MODES.D1"/>
+        <direct input="BLK_IG-SLICEM.D2" name="D2" output="BLK_IG-SLICEM_MODES.D2"/>
+        <direct input="BLK_IG-SLICEM.D3" name="D3" output="BLK_IG-SLICEM_MODES.D3"/>
+        <direct input="BLK_IG-SLICEM.D4" name="D4" output="BLK_IG-SLICEM_MODES.D4"/>
+        <direct input="BLK_IG-SLICEM.D5" name="D5" output="BLK_IG-SLICEM_MODES.D5"/>
+        <direct input="BLK_IG-SLICEM.D6" name="D6" output="BLK_IG-SLICEM_MODES.D6"/>
+        <direct input="BLK_IG-SLICEM.CI" name="CI" output="BLK_IG-SLICEM_MODES.CI"/>
+        <direct input="BLK_IG-SLICEM.CX" name="CX2" output="BLK_IG-SLICEM_MODES.CX"/>
+        <direct input="BLK_IG-SLICEM.C1" name="C1" output="BLK_IG-SLICEM_MODES.C1"/>
+        <direct input="BLK_IG-SLICEM.C2" name="C2" output="BLK_IG-SLICEM_MODES.C2"/>
+        <direct input="BLK_IG-SLICEM.C3" name="C3" output="BLK_IG-SLICEM_MODES.C3"/>
+        <direct input="BLK_IG-SLICEM.C4" name="C4" output="BLK_IG-SLICEM_MODES.C4"/>
+        <direct input="BLK_IG-SLICEM.C5" name="C5" output="BLK_IG-SLICEM_MODES.C5"/>
+        <direct input="BLK_IG-SLICEM.C6" name="C6" output="BLK_IG-SLICEM_MODES.C6"/>
+        <direct input="BLK_IG-SLICEM.BI" name="BI" output="BLK_IG-SLICEM_MODES.BI"/>
+        <direct input="BLK_IG-SLICEM.BX" name="BX2" output="BLK_IG-SLICEM_MODES.BX"/>
+        <direct input="BLK_IG-SLICEM.B1" name="B1" output="BLK_IG-SLICEM_MODES.B1"/>
+        <direct input="BLK_IG-SLICEM.B2" name="B2" output="BLK_IG-SLICEM_MODES.B2"/>
+        <direct input="BLK_IG-SLICEM.B3" name="B3" output="BLK_IG-SLICEM_MODES.B3"/>
+        <direct input="BLK_IG-SLICEM.B4" name="B4" output="BLK_IG-SLICEM_MODES.B4"/>
+        <direct input="BLK_IG-SLICEM.B5" name="B5" output="BLK_IG-SLICEM_MODES.B5"/>
+        <direct input="BLK_IG-SLICEM.B6" name="B6" output="BLK_IG-SLICEM_MODES.B6"/>
+        <direct input="BLK_IG-SLICEM.AI" name="AI" output="BLK_IG-SLICEM_MODES.AI"/>
+        <direct input="BLK_IG-SLICEM.AX" name="AX2" output="BLK_IG-SLICEM_MODES.AX"/>
+        <direct input="BLK_IG-SLICEM.A1" name="A1" output="BLK_IG-SLICEM_MODES.A1"/>
+        <direct input="BLK_IG-SLICEM.A2" name="A2" output="BLK_IG-SLICEM_MODES.A2"/>
+        <direct input="BLK_IG-SLICEM.A3" name="A3" output="BLK_IG-SLICEM_MODES.A3"/>
+        <direct input="BLK_IG-SLICEM.A4" name="A4" output="BLK_IG-SLICEM_MODES.A4"/>
+        <direct input="BLK_IG-SLICEM.A5" name="A5" output="BLK_IG-SLICEM_MODES.A5"/>
+        <direct input="BLK_IG-SLICEM.A6" name="A6" output="BLK_IG-SLICEM_MODES.A6"/>
+        <direct input="BLK_IG-SLICEM.CLK" name="CK2" output="BLK_IG-SLICEM_MODES.CLK"/>
+        <direct input="BLK_IG-SLICEM.CE" name="CE2" output="BLK_IG-SLICEM_MODES.CE"/>
+        <direct input="BLK_IG-SLICEM.WE" name="WE2" output="BLK_IG-SLICEM_MODES.WE"/>
+        <!-- SLICEM_MODES Outputs -->
+        <direct input="BLK_IG-SLICEM_MODES.DO6" name="DO6" output="BLK_IG-COMMON_SLICE.DO6"/>
+        <direct input="BLK_IG-SLICEM_MODES.DO5" name="DO5" output="BLK_IG-COMMON_SLICE.DO5"/>
+        <direct input="BLK_IG-SLICEM_MODES.CO6" name="CO6" output="BLK_IG-COMMON_SLICE.CO6"/>
+        <direct input="BLK_IG-SLICEM_MODES.CO5" name="CO5" output="BLK_IG-COMMON_SLICE.CO5"/>
+        <direct input="BLK_IG-SLICEM_MODES.BO6" name="BO6" output="BLK_IG-COMMON_SLICE.BO6"/>
+        <direct input="BLK_IG-SLICEM_MODES.BO5" name="BO5" output="BLK_IG-COMMON_SLICE.BO5"/>
+        <direct input="BLK_IG-SLICEM_MODES.AO6" name="AO6" output="BLK_IG-COMMON_SLICE.AO6"/>
+        <direct input="BLK_IG-SLICEM_MODES.AO5" name="AO5" output="BLK_IG-COMMON_SLICE.AO5"/>
+        <!-- A-DX inputs -->
+        <direct input="BLK_IG-SLICEM.DX" name="DX" output="BLK_IG-COMMON_SLICE.DX"/>
+        <direct input="BLK_IG-SLICEM.CX" name="CX" output="BLK_IG-COMMON_SLICE.CX"/>
+        <direct input="BLK_IG-SLICEM.BX" name="BX" output="BLK_IG-COMMON_SLICE.BX"/>
+        <direct input="BLK_IG-SLICEM.AX" name="AX" output="BLK_IG-COMMON_SLICE.AX"/>
+        <!-- [A-F]Q outputs -->
+        <direct input="BLK_IG-COMMON_SLICE.AQ" name="AQ" output="BLK_IG-SLICEM.AQ"/>
+        <direct input="BLK_IG-COMMON_SLICE.BQ" name="BQ" output="BLK_IG-SLICEM.BQ"/>
+        <direct input="BLK_IG-COMMON_SLICE.CQ" name="CQ" output="BLK_IG-SLICEM.CQ"/>
+        <direct input="BLK_IG-COMMON_SLICE.DQ" name="DQ" output="BLK_IG-SLICEM.DQ"/>
+        <!-- A-D output -->
+        <direct input="BLK_IG-COMMON_SLICE.D" name="BLK_IG-SLICEM_DOUT" output="BLK_IG-SLICEM.D"/>
+        <direct input="BLK_IG-COMMON_SLICE.C" name="BLK_IG-SLICEM_COUT" output="BLK_IG-SLICEM.C"/>
+        <direct input="BLK_IG-COMMON_SLICE.B" name="BLK_IG-SLICEM_BOUT" output="BLK_IG-SLICEM.B"/>
+        <direct input="BLK_IG-COMMON_SLICE.A" name="BLK_IG-SLICEM_AOUT" output="BLK_IG-SLICEM.A"/>
+        <!-- AMUX-DMUX output -->
+        <direct input="BLK_IG-COMMON_SLICE.DMUX" name="BLK_IG-SLICEM_DMUX" output="BLK_IG-SLICEM.DMUX"/>
+        <direct input="BLK_IG-COMMON_SLICE.CMUX" name="BLK_IG-SLICEM_CMUX" output="BLK_IG-SLICEM.CMUX"/>
+        <direct input="BLK_IG-COMMON_SLICE.BMUX" name="BLK_IG-SLICEM_BMUX" output="BLK_IG-SLICEM.BMUX"/>
+        <direct input="BLK_IG-COMMON_SLICE.AMUX" name="BLK_IG-SLICEM_AMUX" output="BLK_IG-SLICEM.AMUX"/>
+        <!-- Carry -->
+        <direct input="BLK_IG-SLICEM.CIN" name="CIN" output="BLK_IG-COMMON_SLICE.CIN"/>
+        <direct input="BLK_IG-COMMON_SLICE.COUT" name="COUT" output="BLK_IG-SLICEM.COUT"/>
+        <!-- Clock, Clock Enable and Reset -->
+        <direct input="BLK_IG-SLICEM.CLK" name="CK" output="BLK_IG-COMMON_SLICE.CLK"/>
+        <direct input="BLK_IG-SLICEM.CE" name="CE" output="BLK_IG-COMMON_SLICE.CE"/>
+        <direct input="BLK_IG-SLICEM.SR" name="SR" output="BLK_IG-COMMON_SLICE.SR"/>
+        <!-- WA7 and WA8 -->
+        <direct input="BLK_IG-SLICEM.CX" name="WA7" output="BLK_IG-SLICEM_MODES.WA7">
+        </direct>
+        <direct input="BLK_IG-SLICEM.BX" name="WA8" output="BLK_IG-SLICEM_MODES.WA8">
+        </direct>
+      </interconnect>
+    </pb_type>
+    <pb_type name="BLK_IG-SLICEL">
+      <input name="DX" num_pins="1"/>
+      <input name="D1" num_pins="1"/>
+      <input name="D2" num_pins="1"/>
+      <input name="D3" num_pins="1"/>
+      <input name="D4" num_pins="1"/>
+      <input name="D5" num_pins="1"/>
+      <input name="D6" num_pins="1"/>
+      <input name="CX" num_pins="1"/>
+      <input name="C1" num_pins="1"/>
+      <input name="C2" num_pins="1"/>
+      <input name="C3" num_pins="1"/>
+      <input name="C4" num_pins="1"/>
+      <input name="C5" num_pins="1"/>
+      <input name="C6" num_pins="1"/>
+      <input name="BX" num_pins="1"/>
+      <input name="B1" num_pins="1"/>
+      <input name="B2" num_pins="1"/>
+      <input name="B3" num_pins="1"/>
+      <input name="B4" num_pins="1"/>
+      <input name="B5" num_pins="1"/>
+      <input name="B6" num_pins="1"/>
+      <input name="AX" num_pins="1"/>
+      <input name="A1" num_pins="1"/>
+      <input name="A2" num_pins="1"/>
+      <input name="A3" num_pins="1"/>
+      <input name="A4" num_pins="1"/>
+      <input name="A5" num_pins="1"/>
+      <input name="A6" num_pins="1"/>
+      <input name="SR" num_pins="1"/>
+      <input name="CE" num_pins="1"/>
+      <clock name="CLK" num_pins="1"/>
+      <input name="CIN" num_pins="1"/>
+      <output name="COUT" num_pins="1"/>
+      <output name="DMUX" num_pins="1"/>
+      <output name="D" num_pins="1"/>
+      <output name="DQ" num_pins="1"/>
+      <output name="CMUX" num_pins="1"/>
+      <output name="C" num_pins="1"/>
+      <output name="CQ" num_pins="1"/>
+      <output name="BMUX" num_pins="1"/>
+      <output name="B" num_pins="1"/>
+      <output name="BQ" num_pins="1"/>
+      <output name="AMUX" num_pins="1"/>
+      <output name="A" num_pins="1"/>
+      <output name="AQ" num_pins="1"/>
+      <pb_type name="BLK_IG-COMMON_LUT_AND_F78MUX" num_pb="1">
+        <input name="D1" num_pins="1"/>
+        <input name="D2" num_pins="1"/>
+        <input name="D3" num_pins="1"/>
+        <input name="D4" num_pins="1"/>
+        <input name="D5" num_pins="1"/>
+        <input name="D6" num_pins="1"/>
+        <input name="CX" num_pins="1"/>
+        <input name="C1" num_pins="1"/>
+        <input name="C2" num_pins="1"/>
+        <input name="C3" num_pins="1"/>
+        <input name="C4" num_pins="1"/>
+        <input name="C5" num_pins="1"/>
+        <input name="C6" num_pins="1"/>
+        <input name="BX" num_pins="1"/>
+        <input name="B1" num_pins="1"/>
+        <input name="B2" num_pins="1"/>
+        <input name="B3" num_pins="1"/>
+        <input name="B4" num_pins="1"/>
+        <input name="B5" num_pins="1"/>
+        <input name="B6" num_pins="1"/>
+        <input name="AX" num_pins="1"/>
+        <input name="A1" num_pins="1"/>
+        <input name="A2" num_pins="1"/>
+        <input name="A3" num_pins="1"/>
+        <input name="A4" num_pins="1"/>
+        <input name="A5" num_pins="1"/>
+        <input name="A6" num_pins="1"/>
+        <output name="DO6" num_pins="1"/>
+        <output name="CO6" num_pins="1"/>
+        <output name="BO6" num_pins="1"/>
+        <output name="AO6" num_pins="1"/>
+        <output name="DO5" num_pins="1"/>
+        <output name="CO5" num_pins="1"/>
+        <output name="BO5" num_pins="1"/>
+        <output name="AO5" num_pins="1"/>
+        <pb_type name="BLK_IG-ALUT" num_pb="1">
+          <input name="A1" num_pins="1"/>
+          <input name="A2" num_pins="1"/>
+          <input name="A3" num_pins="1"/>
+          <input name="A4" num_pins="1"/>
+          <input name="A5" num_pins="1"/>
+          <input name="A6" num_pins="1"/>
+          <output name="O5" num_pins="1"/>
+          <output name="O6" num_pins="1"/>
+          <!-- LUT5+LUT5+F6MUX with two outputs -->
+          <mode name="BLK_IG-ALUT-LUT5_MUX">
+            <pb_type blif_model=".names" class="lut" name="BEL_LT-A5LUT" num_pb="2">
+              <input name="in" num_pins="5" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+              <delay_matrix in_port="BEL_LT-A5LUT.in" out_port="BEL_LT-A5LUT.out" type="max">
+                0.068e-9
+                0.068e-9
+                0.068e-9
+                0.068e-9
+                0.068e-9
+              </delay_matrix>
+            </pb_type>
+            <interconnect>
+              <!-- LUT5 (upper) -> O6 -->
+              <direct input="BLK_IG-ALUT.A5" name="ALUT_A5_0" output="BEL_LT-A5LUT[0].in[4]"/>
+              <direct input="BLK_IG-ALUT.A4" name="ALUT_A4_0" output="BEL_LT-A5LUT[0].in[3]"/>
+              <direct input="BLK_IG-ALUT.A3" name="ALUT_A3_0" output="BEL_LT-A5LUT[0].in[2]"/>
+              <direct input="BLK_IG-ALUT.A2" name="ALUT_A2_0" output="BEL_LT-A5LUT[0].in[1]"/>
+              <direct input="BLK_IG-ALUT.A1" name="ALUT_A1_0" output="BEL_LT-A5LUT[0].in[0]"/>
+              <!-- LUT5 (lower) -> O5 -->
+              <direct input="BLK_IG-ALUT.A5" name="ALUT_A5_1" output="BEL_LT-A5LUT[1].in[4]"/>
+              <direct input="BLK_IG-ALUT.A4" name="ALUT_A4_1" output="BEL_LT-A5LUT[1].in[3]"/>
+              <direct input="BLK_IG-ALUT.A3" name="ALUT_A3_1" output="BEL_LT-A5LUT[1].in[2]"/>
+              <direct input="BLK_IG-ALUT.A2" name="ALUT_A2_1" output="BEL_LT-A5LUT[1].in[1]"/>
+              <direct input="BLK_IG-ALUT.A1" name="ALUT_A1_1" output="BEL_LT-A5LUT[1].in[0]"/>
+              <!-- LUT outputs -->
+              <direct input="BEL_LT-A5LUT[0].out" name="O5" output="BLK_IG-ALUT.O5">
+                <pack_pattern in_port="BEL_LT-A5LUT[0].out" name="LUT5x2" out_port="BLK_IG-ALUT.O5"/>
+              </direct>
+              <direct input="BEL_LT-A5LUT[1].out" name="O6" output="BLK_IG-ALUT.O6">
+                <pack_pattern in_port="BEL_LT-A5LUT[1].out" name="LUT5x2" out_port="BLK_IG-ALUT.O6"/>
+              </direct>
+            </interconnect>
+          </mode>
+        </pb_type>
+        <pb_type name="BLK_IG-BLUT" num_pb="1">
+          <input name="A1" num_pins="1"/>
+          <input name="A2" num_pins="1"/>
+          <input name="A3" num_pins="1"/>
+          <input name="A4" num_pins="1"/>
+          <input name="A5" num_pins="1"/>
+          <input name="A6" num_pins="1"/>
+          <output name="O5" num_pins="1"/>
+          <output name="O6" num_pins="1"/>
+          <!-- LUT5+LUT5+F6MUX with two outputs -->
+          <mode name="BLK_IG-BLUT-LUT5_MUX">
+            <pb_type blif_model=".names" class="lut" name="BEL_LT-B5LUT" num_pb="2">
+              <input name="in" num_pins="5" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+              <delay_matrix in_port="BEL_LT-B5LUT.in" out_port="BEL_LT-B5LUT.out" type="max">
+                0.068e-9
+                0.068e-9
+                0.068e-9
+                0.068e-9
+                0.068e-9
+              </delay_matrix>
+            </pb_type>
+            <interconnect>
+              <!-- LUT5 (upper) -> O6 -->
+              <direct input="BLK_IG-BLUT.A5" name="BLUT_A5_0" output="BEL_LT-B5LUT[0].in[4]"/>
+              <direct input="BLK_IG-BLUT.A4" name="BLUT_A4_0" output="BEL_LT-B5LUT[0].in[3]"/>
+              <direct input="BLK_IG-BLUT.A3" name="BLUT_A3_0" output="BEL_LT-B5LUT[0].in[2]"/>
+              <direct input="BLK_IG-BLUT.A2" name="BLUT_A2_0" output="BEL_LT-B5LUT[0].in[1]"/>
+              <direct input="BLK_IG-BLUT.A1" name="BLUT_A1_0" output="BEL_LT-B5LUT[0].in[0]"/>
+              <!-- LUT5 (lower) -> O5 -->
+              <direct input="BLK_IG-BLUT.A5" name="BLUT_A5_1" output="BEL_LT-B5LUT[1].in[4]"/>
+              <direct input="BLK_IG-BLUT.A4" name="BLUT_A4_1" output="BEL_LT-B5LUT[1].in[3]"/>
+              <direct input="BLK_IG-BLUT.A3" name="BLUT_A3_1" output="BEL_LT-B5LUT[1].in[2]"/>
+              <direct input="BLK_IG-BLUT.A2" name="BLUT_A2_1" output="BEL_LT-B5LUT[1].in[1]"/>
+              <direct input="BLK_IG-BLUT.A1" name="BLUT_A1_1" output="BEL_LT-B5LUT[1].in[0]"/>
+              <!-- LUT outputs -->
+              <direct input="BEL_LT-B5LUT[0].out" name="O5" output="BLK_IG-BLUT.O5">
+                <pack_pattern in_port="BEL_LT-B5LUT[0].out" name="LUT5x2" out_port="BLK_IG-BLUT.O5"/>
+              </direct>
+              <direct input="BEL_LT-B5LUT[1].out" name="O6" output="BLK_IG-BLUT.O6">
+                <pack_pattern in_port="BEL_LT-B5LUT[1].out" name="LUT5x2" out_port="BLK_IG-BLUT.O6"/>
+              </direct>
+            </interconnect>
+          </mode>
+        </pb_type>
+        <pb_type name="BLK_IG-CLUT" num_pb="1">
+          <input name="A1" num_pins="1"/>
+          <input name="A2" num_pins="1"/>
+          <input name="A3" num_pins="1"/>
+          <input name="A4" num_pins="1"/>
+          <input name="A5" num_pins="1"/>
+          <input name="A6" num_pins="1"/>
+          <output name="O5" num_pins="1"/>
+          <output name="O6" num_pins="1"/>
+          <!-- LUT5+LUT5+F6MUX with two outputs -->
+          <mode name="BLK_IG-CLUT-LUT5_MUX">
+            <pb_type blif_model=".names" class="lut" name="BEL_LT-C5LUT" num_pb="2">
+              <input name="in" num_pins="5" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+              <delay_matrix in_port="BEL_LT-C5LUT.in" out_port="BEL_LT-C5LUT.out" type="max">
+                0.068e-9
+                0.068e-9
+                0.068e-9
+                0.068e-9
+                0.068e-9
+              </delay_matrix>
+            </pb_type>
+            <interconnect>
+              <!-- LUT5 (upper) -> O6 -->
+              <direct input="BLK_IG-CLUT.A5" name="CLUT_A5_0" output="BEL_LT-C5LUT[0].in[4]"/>
+              <direct input="BLK_IG-CLUT.A4" name="CLUT_A4_0" output="BEL_LT-C5LUT[0].in[3]"/>
+              <direct input="BLK_IG-CLUT.A3" name="CLUT_A3_0" output="BEL_LT-C5LUT[0].in[2]"/>
+              <direct input="BLK_IG-CLUT.A2" name="CLUT_A2_0" output="BEL_LT-C5LUT[0].in[1]"/>
+              <direct input="BLK_IG-CLUT.A1" name="CLUT_A1_0" output="BEL_LT-C5LUT[0].in[0]"/>
+              <!-- LUT5 (lower) -> O5 -->
+              <direct input="BLK_IG-CLUT.A5" name="CLUT_A5_1" output="BEL_LT-C5LUT[1].in[4]"/>
+              <direct input="BLK_IG-CLUT.A4" name="CLUT_A4_1" output="BEL_LT-C5LUT[1].in[3]"/>
+              <direct input="BLK_IG-CLUT.A3" name="CLUT_A3_1" output="BEL_LT-C5LUT[1].in[2]"/>
+              <direct input="BLK_IG-CLUT.A2" name="CLUT_A2_1" output="BEL_LT-C5LUT[1].in[1]"/>
+              <direct input="BLK_IG-CLUT.A1" name="CLUT_A1_1" output="BEL_LT-C5LUT[1].in[0]"/>
+              <!-- LUT outputs -->
+              <direct input="BEL_LT-C5LUT[0].out" name="O5" output="BLK_IG-CLUT.O5">
+                <pack_pattern in_port="BEL_LT-C5LUT[0].out" name="LUT5x2" out_port="BLK_IG-CLUT.O5"/>
+              </direct>
+              <direct input="BEL_LT-C5LUT[1].out" name="O6" output="BLK_IG-CLUT.O6">
+                <pack_pattern in_port="BEL_LT-C5LUT[1].out" name="LUT5x2" out_port="BLK_IG-CLUT.O6"/>
+              </direct>
+            </interconnect>
+          </mode>
+        </pb_type>
+        <pb_type name="BLK_IG-DLUT" num_pb="1">
+          <input name="A1" num_pins="1"/>
+          <input name="A2" num_pins="1"/>
+          <input name="A3" num_pins="1"/>
+          <input name="A4" num_pins="1"/>
+          <input name="A5" num_pins="1"/>
+          <input name="A6" num_pins="1"/>
+          <output name="O5" num_pins="1"/>
+          <output name="O6" num_pins="1"/>
+          <!-- LUT5+LUT5+F6MUX with two outputs -->
+          <mode name="BLK_IG-DLUT-LUT5_MUX">
+            <pb_type blif_model=".names" class="lut" name="BEL_LT-D5LUT" num_pb="2">
+              <input name="in" num_pins="5" port_class="lut_in"/>
+              <output name="out" num_pins="1" port_class="lut_out"/>
+              <delay_matrix in_port="BEL_LT-D5LUT.in" out_port="BEL_LT-D5LUT.out" type="max">
+                0.068e-9
+                0.068e-9
+                0.068e-9
+                0.068e-9
+                0.068e-9
+              </delay_matrix>
+            </pb_type>
+            <interconnect>
+              <!-- LUT5 (upper) -> O6 -->
+              <direct input="BLK_IG-DLUT.A5" name="DLUT_A5_0" output="BEL_LT-D5LUT[0].in[4]"/>
+              <direct input="BLK_IG-DLUT.A4" name="DLUT_A4_0" output="BEL_LT-D5LUT[0].in[3]"/>
+              <direct input="BLK_IG-DLUT.A3" name="DLUT_A3_0" output="BEL_LT-D5LUT[0].in[2]"/>
+              <direct input="BLK_IG-DLUT.A2" name="DLUT_A2_0" output="BEL_LT-D5LUT[0].in[1]"/>
+              <direct input="BLK_IG-DLUT.A1" name="DLUT_A1_0" output="BEL_LT-D5LUT[0].in[0]"/>
+              <!-- LUT5 (lower) -> O5 -->
+              <direct input="BLK_IG-DLUT.A5" name="DLUT_A5_1" output="BEL_LT-D5LUT[1].in[4]"/>
+              <direct input="BLK_IG-DLUT.A4" name="DLUT_A4_1" output="BEL_LT-D5LUT[1].in[3]"/>
+              <direct input="BLK_IG-DLUT.A3" name="DLUT_A3_1" output="BEL_LT-D5LUT[1].in[2]"/>
+              <direct input="BLK_IG-DLUT.A2" name="DLUT_A2_1" output="BEL_LT-D5LUT[1].in[1]"/>
+              <direct input="BLK_IG-DLUT.A1" name="DLUT_A1_1" output="BEL_LT-D5LUT[1].in[0]"/>
+              <!-- LUT outputs -->
+              <direct input="BEL_LT-D5LUT[0].out" name="O5" output="BLK_IG-DLUT.O5">
+                <pack_pattern in_port="BEL_LT-D5LUT[0].out" name="LUT5x2" out_port="BLK_IG-DLUT.O5"/>
+              </direct>
+              <direct input="BEL_LT-D5LUT[1].out" name="O6" output="BLK_IG-DLUT.O6">
+                <pack_pattern in_port="BEL_LT-D5LUT[1].out" name="LUT5x2" out_port="BLK_IG-DLUT.O6"/>
+              </direct>
+            </interconnect>
+          </mode>
+        </pb_type>
+        <interconnect>
+          <!-- LUT input pins -->
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D1" name="D1" output="BLK_IG-DLUT.A1"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D2" name="D2" output="BLK_IG-DLUT.A2"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D3" name="D3" output="BLK_IG-DLUT.A3"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D4" name="D4" output="BLK_IG-DLUT.A4"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D5" name="D5" output="BLK_IG-DLUT.A5"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.D6" name="D6" output="BLK_IG-DLUT.A6"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C1" name="C1" output="BLK_IG-CLUT.A1"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C2" name="C2" output="BLK_IG-CLUT.A2"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C3" name="C3" output="BLK_IG-CLUT.A3"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C4" name="C4" output="BLK_IG-CLUT.A4"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C5" name="C5" output="BLK_IG-CLUT.A5"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.C6" name="C6" output="BLK_IG-CLUT.A6"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B1" name="B1" output="BLK_IG-BLUT.A1"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B2" name="B2" output="BLK_IG-BLUT.A2"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B3" name="B3" output="BLK_IG-BLUT.A3"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B4" name="B4" output="BLK_IG-BLUT.A4"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B5" name="B5" output="BLK_IG-BLUT.A5"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.B6" name="B6" output="BLK_IG-BLUT.A6"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A1" name="A1" output="BLK_IG-ALUT.A1"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A2" name="A2" output="BLK_IG-ALUT.A2"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A3" name="A3" output="BLK_IG-ALUT.A3"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A4" name="A4" output="BLK_IG-ALUT.A4"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A5" name="A5" output="BLK_IG-ALUT.A5"/>
+          <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.A6" name="A6" output="BLK_IG-ALUT.A6"/>
+          <direct input="BLK_IG-DLUT.O6" name="DO6" output="BLK_IG-COMMON_LUT_AND_F78MUX.DO6"/>
+          <direct input="BLK_IG-DLUT.O5" name="DO5" output="BLK_IG-COMMON_LUT_AND_F78MUX.DO5"/>
+          <direct input="BLK_IG-CLUT.O6" name="CO6" output="BLK_IG-COMMON_LUT_AND_F78MUX.CO6"/>
+          <direct input="BLK_IG-CLUT.O5" name="CO5" output="BLK_IG-COMMON_LUT_AND_F78MUX.CO5"/>
+          <direct input="BLK_IG-BLUT.O6" name="BO6" output="BLK_IG-COMMON_LUT_AND_F78MUX.BO6"/>
+          <direct input="BLK_IG-BLUT.O5" name="BO5" output="BLK_IG-COMMON_LUT_AND_F78MUX.BO5"/>
+          <direct input="BLK_IG-ALUT.O6" name="AO6" output="BLK_IG-COMMON_LUT_AND_F78MUX.AO6"/>
+          <direct input="BLK_IG-ALUT.O5" name="AO5" output="BLK_IG-COMMON_LUT_AND_F78MUX.AO5"/>
+        </interconnect>
+      </pb_type>
+      <pb_type name="BLK_IG-COMMON_SLICE" num_pb="1">
+        <input name="DX" num_pins="1"/>
+        <input name="CX" num_pins="1"/>
+        <input name="BX" num_pins="1"/>
+        <input name="AX" num_pins="1"/>
+        <input name="DO6" num_pins="1"/>
+        <input name="CO6" num_pins="1"/>
+        <input name="BO6" num_pins="1"/>
+        <input name="AO6" num_pins="1"/>
+        <input name="DO5" num_pins="1"/>
+        <input name="CO5" num_pins="1"/>
+        <input name="BO5" num_pins="1"/>
+        <input name="AO5" num_pins="1"/>
+        <input name="SR" num_pins="1"/>
+        <input name="CE" num_pins="1"/>
+        <!-- This input in unconnected on SLICEL -->
+        <input name="AMC31" num_pins="1"/>
+        <clock name="CLK" num_pins="1"/>
+        <input name="CIN" num_pins="1"/>
+        <output name="COUT" num_pins="1"/>
+        <output name="DMUX" num_pins="1"/>
+        <output name="D" num_pins="1"/>
+        <output name="DQ" num_pins="1"/>
+        <output name="CMUX" num_pins="1"/>
+        <output name="C" num_pins="1"/>
+        <output name="CQ" num_pins="1"/>
+        <output name="BMUX" num_pins="1"/>
+        <output name="B" num_pins="1"/>
+        <output name="BQ" num_pins="1"/>
+        <output name="AMUX" num_pins="1"/>
+        <output name="A" num_pins="1"/>
+        <output name="AQ" num_pins="1"/>
+        <!-- Model of FF group in SLICEL and SLICEM -->
+        <pb_type name="BLK_BB-SLICE_FF" num_pb="1">
+          <!-- CK, CE and SR are slice wide. -->
+          <input name="CE" num_pins="1"/>
+          <input name="SR" num_pins="1"/>
+          <clock name="CK" num_pins="1"/>
+          <input name="D" num_pins="4"/>
+          <output name="Q" num_pins="4"/>
+          <input name="D5" num_pins="4"/>
+          <output name="Q5" num_pins="4"/>
+          <!-- |      |FFSYNC|LATCH|ZRST | -->
+          <!-- |FDRE  |   X  |     |  X  | -->
+          <mode name="FDRE">
+            <pb_type blif_model=".subckt FDRE" name="BEL_FF-FDRE" num_pb="8">
+              <input name="D" num_pins="1"/>
+              <input name="CE" num_pins="1"/>
+              <clock name="C" num_pins="1"/>
+              <input name="R" num_pins="1"/>
+              <output name="Q" num_pins="1"/>
+              <T_setup clock="C" port="BEL_FF-FDRE.D" value="10e-12"/>
+              <T_setup clock="C" port="BEL_FF-FDRE.CE" value="10e-12"/>
+              <T_setup clock="C" port="BEL_FF-FDRE.R" value="10e-12"/>
+              <T_clock_to_Q clock="C" max="10e-12" port="BEL_FF-FDRE.Q"/>
+            </pb_type>
+            <interconnect>
+              <complete input="BLK_BB-SLICE_FF.CE" name="CE" output="BEL_FF-FDRE.CE"/>
+              <complete input="BLK_BB-SLICE_FF.CK" name="C" output="BEL_FF-FDRE.C"/>
+              <complete input="BLK_BB-SLICE_FF.SR" name="SR" output="BEL_FF-FDRE.R"/>
+              <direct input="BLK_BB-SLICE_FF.D[3:0]" name="D" output="BEL_FF-FDRE[3:0].D"/>
+              <direct input="BEL_FF-FDRE[3:0].Q" name="Q" output="BLK_BB-SLICE_FF.Q[3:0]"/>
+              <direct input="BLK_BB-SLICE_FF.D5[3:0]" name="D5" output="BEL_FF-FDRE[7:4].D"/>
+              <direct input="BEL_FF-FDRE[7:4].Q" name="Q5" output="BLK_BB-SLICE_FF.Q5[3:0]"/>
+            </interconnect>
+          </mode>
+        </pb_type>
+        <!-- CARRY4 logic -->
+        <pb_type blif_model=".subckt CARRY0" name="BEL_BB-CARRY0" num_pb="1">
+          <input name="CI" num_pins="1"/>
+          <input name="CI_INIT" num_pins="1"/>
+          <output name="CO_CHAIN" num_pins="1"/>
+          <output name="CO_FABRIC" num_pins="1"/>
+          <input name="DI" num_pins="1"/>
+          <output name="O" num_pins="1"/>
+          <input name="S" num_pins="1"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI" max="10e-12" out_port="BEL_BB-CARRY0.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI_INIT" max="10e-12" out_port="BEL_BB-CARRY0.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY0.DI" max="10e-12" out_port="BEL_BB-CARRY0.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY0.S" max="10e-12" out_port="BEL_BB-CARRY0.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI" max="10e-12" out_port="BEL_BB-CARRY0.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI_INIT" max="10e-12" out_port="BEL_BB-CARRY0.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY0.DI" max="10e-12" out_port="BEL_BB-CARRY0.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY0.S" max="10e-12" out_port="BEL_BB-CARRY0.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI" max="10e-12" out_port="BEL_BB-CARRY0.O"/>
+          <delay_constant in_port="BEL_BB-CARRY0.CI_INIT" max="10e-12" out_port="BEL_BB-CARRY0.O"/>
+          <delay_constant in_port="BEL_BB-CARRY0.S" max="10e-12" out_port="BEL_BB-CARRY0.O"/>
+        </pb_type>
+        <pb_type blif_model=".subckt CARRY" name="BEL_BB-CARRY" num_pb="3">
+          <input name="CI" num_pins="1"/>
+          <output name="CO_CHAIN" num_pins="1"/>
+          <output name="CO_FABRIC" num_pins="1"/>
+          <input name="DI" num_pins="1"/>
+          <output name="O" num_pins="1"/>
+          <input name="S" num_pins="1"/>
+          <delay_constant in_port="BEL_BB-CARRY.CI" max="10e-12" out_port="BEL_BB-CARRY.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY.DI" max="10e-12" out_port="BEL_BB-CARRY.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY.S" max="10e-12" out_port="BEL_BB-CARRY.CO_CHAIN"/>
+          <delay_constant in_port="BEL_BB-CARRY.CI" max="10e-12" out_port="BEL_BB-CARRY.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY.DI" max="10e-12" out_port="BEL_BB-CARRY.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY.S" max="10e-12" out_port="BEL_BB-CARRY.CO_FABRIC"/>
+          <delay_constant in_port="BEL_BB-CARRY.CI" max="10e-12" out_port="BEL_BB-CARRY.O"/>
+          <delay_constant in_port="BEL_BB-CARRY.S" max="10e-12" out_port="BEL_BB-CARRY.O"/>
+        </pb_type>
+        <interconnect>
+          <!-- 5FF MUXs -->
+          <mux input="BLK_IG-COMMON_SLICE.DX BLK_IG-COMMON_SLICE.DO5" name="D5FFMUX" output="BLK_BB-SLICE_FF.D5[3]"/>
+          <mux input="BLK_IG-COMMON_SLICE.CX BLK_IG-COMMON_SLICE.CO5" name="C5FFMUX" output="BLK_BB-SLICE_FF.D5[2]"/>
+          <mux input="BLK_IG-COMMON_SLICE.BX BLK_IG-COMMON_SLICE.BO5" name="B5FFMUX" output="BLK_BB-SLICE_FF.D5[1]"/>
+          <mux input="BLK_IG-COMMON_SLICE.AX BLK_IG-COMMON_SLICE.AO5" name="A5FFMUX" output="BLK_BB-SLICE_FF.D5[0]"/>
+          <!-- [A-D]MUX -->
+          <mux input="BLK_IG-COMMON_SLICE.AMC31 BLK_BB-SLICE_FF.Q5[3] BEL_BB-CARRY[2].O BEL_BB-CARRY[2].CO_FABRIC BLK_IG-COMMON_SLICE.DO6 BLK_IG-COMMON_SLICE.DO5" name="DMUX" output="BLK_IG-COMMON_SLICE.DMUX"/>
+          <mux input="BLK_BB-SLICE_FF.Q5[2] BEL_BB-CARRY[1].O BEL_BB-CARRY[1].CO_FABRIC BLK_IG-COMMON_SLICE.CO6 BLK_IG-COMMON_SLICE.CO5" name="CMUX" output="BLK_IG-COMMON_SLICE.CMUX"/>
+          <mux input="BLK_BB-SLICE_FF.Q5[1] BEL_BB-CARRY[0].O BEL_BB-CARRY[0].CO_FABRIC BLK_IG-COMMON_SLICE.BO6 BLK_IG-COMMON_SLICE.BO5" name="BMUX" output="BLK_IG-COMMON_SLICE.BMUX"/>
+          <mux input="BLK_BB-SLICE_FF.Q5[0] BEL_BB-CARRY0.O BEL_BB-CARRY0.CO_FABRIC BLK_IG-COMMON_SLICE.AO6 BLK_IG-COMMON_SLICE.AO5" name="AMUX" output="BLK_IG-COMMON_SLICE.AMUX"/>
+          <!-- [A-D]FFMUX -->
+          <mux input="BEL_BB-CARRY[2].O BEL_BB-CARRY[2].CO_FABRIC BLK_IG-COMMON_SLICE.DO6 BLK_IG-COMMON_SLICE.DO5 BLK_IG-COMMON_SLICE.DX" name="DFFMUX" output="BLK_BB-SLICE_FF.D[3]"/>
+          <mux input="BEL_BB-CARRY[1].O BEL_BB-CARRY[1].CO_FABRIC BLK_IG-COMMON_SLICE.CO6 BLK_IG-COMMON_SLICE.CO5 BLK_IG-COMMON_SLICE.CX" name="CFFMUX" output="BLK_BB-SLICE_FF.D[2]"/>
+          <mux input="BEL_BB-CARRY[0].O BEL_BB-CARRY[0].CO_FABRIC BLK_IG-COMMON_SLICE.BO6 BLK_IG-COMMON_SLICE.BO5 BLK_IG-COMMON_SLICE.BX" name="BFFMUX" output="BLK_BB-SLICE_FF.D[1]"/>
+          <mux input="BEL_BB-CARRY0.O BEL_BB-CARRY0.CO_FABRIC BLK_IG-COMMON_SLICE.AO6 BLK_IG-COMMON_SLICE.AO5 BLK_IG-COMMON_SLICE.AX" name="AFFMUX" output="BLK_BB-SLICE_FF.D[0]"/>
+          <!-- [A-F]Q outputs -->
+          <direct input="BLK_BB-SLICE_FF.Q[0]" name="AFF" output="BLK_IG-COMMON_SLICE.AQ"/>
+          <direct input="BLK_BB-SLICE_FF.Q[1]" name="BFF" output="BLK_IG-COMMON_SLICE.BQ"/>
+          <direct input="BLK_BB-SLICE_FF.Q[2]" name="CFF" output="BLK_IG-COMMON_SLICE.CQ"/>
+          <direct input="BLK_BB-SLICE_FF.Q[3]" name="DFF" output="BLK_IG-COMMON_SLICE.DQ"/>
+          <!-- LUT O6 output -->
+          <direct input="BLK_IG-COMMON_SLICE.DO6" name="BLK_IG-COMMON_SLICE_DOUT" output="BLK_IG-COMMON_SLICE.D"/>
+          <direct input="BLK_IG-COMMON_SLICE.CO6" name="BLK_IG-COMMON_SLICE_COUT" output="BLK_IG-COMMON_SLICE.C"/>
+          <direct input="BLK_IG-COMMON_SLICE.BO6" name="BLK_IG-COMMON_SLICE_BOUT" output="BLK_IG-COMMON_SLICE.B"/>
+          <direct input="BLK_IG-COMMON_SLICE.AO6" name="BLK_IG-COMMON_SLICE_AOUT" output="BLK_IG-COMMON_SLICE.A"/>
+          <!-- Carry -->
+          <!-- Carry initialization -->
+          <direct input="BLK_IG-COMMON_SLICE.AX" name="PRECYINIT_MUX" output="BEL_BB-CARRY0.CI_INIT"/>
+          <direct input="BLK_IG-COMMON_SLICE.CIN" name="CIN_TO_CARRY0" output="BEL_BB-CARRY0.CI">
+            <pack_pattern in_port="BLK_IG-COMMON_SLICE.CIN" name="BLK_TI-CLBLL_R.BLK_IG-SLICEL.CARRYCHAIN" out_port="BEL_BB-CARRY0.CI"/>
+          </direct>
+          <!-- Tile internal carry -->
+          <direct input="BEL_BB-CARRY0.CO_CHAIN" name="CARRY0_TO_CARRY1" output="BEL_BB-CARRY[0].CI">
+            <pack_pattern in_port="BEL_BB-CARRY0.CO_CHAIN" name="BLK_TI-CLBLL_R.BLK_IG-SLICEL.CARRYCHAIN" out_port="BEL_BB-CARRY[0].CI"/>
+          </direct>
+          <direct input="BEL_BB-CARRY[0].CO_CHAIN" name="CARRY1_TO_CARRY2" output="BEL_BB-CARRY[1].CI">
+            <pack_pattern in_port="BEL_BB-CARRY[0].CO_CHAIN" name="BLK_TI-CLBLL_R.BLK_IG-SLICEL.CARRYCHAIN" out_port="BEL_BB-CARRY[1].CI"/>
+          </direct>
+          <direct input="BEL_BB-CARRY[1].CO_CHAIN" name="CARRY2_TO_CARRY3" output="BEL_BB-CARRY[2].CI">
+            <pack_pattern in_port="BEL_BB-CARRY[1].CO_CHAIN" name="BLK_TI-CLBLL_R.BLK_IG-SLICEL.CARRYCHAIN" out_port="BEL_BB-CARRY[2].CI"/>
+          </direct>
+          <!-- Carry selects -->
+          <direct input="BLK_IG-COMMON_SLICE.DO6" name="CARRY_S3" output="BEL_BB-CARRY[2].S"/>
+          <direct input="BLK_IG-COMMON_SLICE.CO6" name="CARRY_S2" output="BEL_BB-CARRY[1].S"/>
+          <direct input="BLK_IG-COMMON_SLICE.BO6" name="CARRY_S1" output="BEL_BB-CARRY[0].S"/>
+          <direct input="BLK_IG-COMMON_SLICE.AO6" name="CARRY_S0" output="BEL_BB-CARRY0.S"/>
+          <!-- Carry MUXCY.DI -->
+          <mux input="BLK_IG-COMMON_SLICE.DO5 BLK_IG-COMMON_SLICE.DX" name="CARRY_DI3" output="BEL_BB-CARRY[2].DI"/>
+          <mux input="BLK_IG-COMMON_SLICE.CO5 BLK_IG-COMMON_SLICE.CX" name="CARRY_DI2" output="BEL_BB-CARRY[1].DI"/>
+          <mux input="BLK_IG-COMMON_SLICE.BO5 BLK_IG-COMMON_SLICE.BX" name="CARRY_DI1" output="BEL_BB-CARRY[0].DI"/>
+          <mux input="BLK_IG-COMMON_SLICE.AO5 BLK_IG-COMMON_SLICE.AX" name="CARRY_DI0" output="BEL_BB-CARRY0.DI"/>
+          <direct input="BEL_BB-CARRY[2].CO_CHAIN" name="COUT" output="BLK_IG-COMMON_SLICE.COUT">
+            <pack_pattern in_port="BEL_BB-CARRY[2].CO_CHAIN" name="BLK_TI-CLBLL_R.BLK_IG-SLICEL.CARRYCHAIN" out_port="BLK_IG-COMMON_SLICE.COUT"/>
+          </direct>
+          <!-- Clock, Clock Enable and Reset -->
+          <direct input="BLK_IG-COMMON_SLICE.CLK" name="CK" output="BLK_BB-SLICE_FF.CK"/>
+          <direct input="BLK_IG-COMMON_SLICE.CE" name="CE" output="BLK_BB-SLICE_FF.CE"/>
+          <direct input="BLK_IG-COMMON_SLICE.SR" name="SR" output="BLK_BB-SLICE_FF.SR"/>
+        </interconnect>
+      </pb_type>
+      <interconnect>
+        <!-- LUT input pins -->
+        <direct input="BLK_IG-SLICEL.D1" name="D1" output="BLK_IG-COMMON_LUT_AND_F78MUX.D1"/>
+        <direct input="BLK_IG-SLICEL.D2" name="D2" output="BLK_IG-COMMON_LUT_AND_F78MUX.D2"/>
+        <direct input="BLK_IG-SLICEL.D3" name="D3" output="BLK_IG-COMMON_LUT_AND_F78MUX.D3"/>
+        <direct input="BLK_IG-SLICEL.D4" name="D4" output="BLK_IG-COMMON_LUT_AND_F78MUX.D4"/>
+        <direct input="BLK_IG-SLICEL.D5" name="D5" output="BLK_IG-COMMON_LUT_AND_F78MUX.D5"/>
+        <direct input="BLK_IG-SLICEL.D6" name="D6" output="BLK_IG-COMMON_LUT_AND_F78MUX.D6"/>
+        <direct input="BLK_IG-SLICEL.C1" name="C1" output="BLK_IG-COMMON_LUT_AND_F78MUX.C1"/>
+        <direct input="BLK_IG-SLICEL.C2" name="C2" output="BLK_IG-COMMON_LUT_AND_F78MUX.C2"/>
+        <direct input="BLK_IG-SLICEL.C3" name="C3" output="BLK_IG-COMMON_LUT_AND_F78MUX.C3"/>
+        <direct input="BLK_IG-SLICEL.C4" name="C4" output="BLK_IG-COMMON_LUT_AND_F78MUX.C4"/>
+        <direct input="BLK_IG-SLICEL.C5" name="C5" output="BLK_IG-COMMON_LUT_AND_F78MUX.C5"/>
+        <direct input="BLK_IG-SLICEL.C6" name="C6" output="BLK_IG-COMMON_LUT_AND_F78MUX.C6"/>
+        <direct input="BLK_IG-SLICEL.B1" name="B1" output="BLK_IG-COMMON_LUT_AND_F78MUX.B1"/>
+        <direct input="BLK_IG-SLICEL.B2" name="B2" output="BLK_IG-COMMON_LUT_AND_F78MUX.B2"/>
+        <direct input="BLK_IG-SLICEL.B3" name="B3" output="BLK_IG-COMMON_LUT_AND_F78MUX.B3"/>
+        <direct input="BLK_IG-SLICEL.B4" name="B4" output="BLK_IG-COMMON_LUT_AND_F78MUX.B4"/>
+        <direct input="BLK_IG-SLICEL.B5" name="B5" output="BLK_IG-COMMON_LUT_AND_F78MUX.B5"/>
+        <direct input="BLK_IG-SLICEL.B6" name="B6" output="BLK_IG-COMMON_LUT_AND_F78MUX.B6"/>
+        <direct input="BLK_IG-SLICEL.A1" name="A1" output="BLK_IG-COMMON_LUT_AND_F78MUX.A1"/>
+        <direct input="BLK_IG-SLICEL.A2" name="A2" output="BLK_IG-COMMON_LUT_AND_F78MUX.A2"/>
+        <direct input="BLK_IG-SLICEL.A3" name="A3" output="BLK_IG-COMMON_LUT_AND_F78MUX.A3"/>
+        <direct input="BLK_IG-SLICEL.A4" name="A4" output="BLK_IG-COMMON_LUT_AND_F78MUX.A4"/>
+        <direct input="BLK_IG-SLICEL.A5" name="A5" output="BLK_IG-COMMON_LUT_AND_F78MUX.A5"/>
+        <direct input="BLK_IG-SLICEL.A6" name="A6" output="BLK_IG-COMMON_LUT_AND_F78MUX.A6"/>
+        <direct input="BLK_IG-SLICEL.CX" name="CX" output="BLK_IG-COMMON_LUT_AND_F78MUX.CX"/>
+        <direct input="BLK_IG-SLICEL.BX" name="BX" output="BLK_IG-COMMON_LUT_AND_F78MUX.BX"/>
+        <direct input="BLK_IG-SLICEL.AX" name="AX" output="BLK_IG-COMMON_LUT_AND_F78MUX.AX"/>
+        <!-- COMMON_SLICE inputs -->
+        <direct input="BLK_IG-SLICEL.DX" name="DX2" output="BLK_IG-COMMON_SLICE.DX"/>
+        <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.DO6" name="DO6" output="BLK_IG-COMMON_SLICE.DO6"/>
+        <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.DO5" name="DO5" output="BLK_IG-COMMON_SLICE.DO5"/>
+        <direct input="BLK_IG-SLICEL.CX" name="CX2" output="BLK_IG-COMMON_SLICE.CX"/>
+        <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.CO6" name="CO6" output="BLK_IG-COMMON_SLICE.CO6"/>
+        <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.CO5" name="CO5" output="BLK_IG-COMMON_SLICE.CO5"/>
+        <direct input="BLK_IG-SLICEL.BX" name="BX2" output="BLK_IG-COMMON_SLICE.BX"/>
+        <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.BO6" name="BO6" output="BLK_IG-COMMON_SLICE.BO6"/>
+        <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.BO5" name="BO5" output="BLK_IG-COMMON_SLICE.BO5"/>
+        <direct input="BLK_IG-SLICEL.AX" name="AX2" output="BLK_IG-COMMON_SLICE.AX"/>
+        <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.AO6" name="AO6" output="BLK_IG-COMMON_SLICE.AO6"/>
+        <direct input="BLK_IG-COMMON_LUT_AND_F78MUX.AO5" name="AO5" output="BLK_IG-COMMON_SLICE.AO5"/>
+        <!-- [A-F]Q outputs -->
+        <direct input="BLK_IG-COMMON_SLICE.AQ" name="AQ" output="BLK_IG-SLICEL.AQ"/>
+        <direct input="BLK_IG-COMMON_SLICE.BQ" name="BQ" output="BLK_IG-SLICEL.BQ"/>
+        <direct input="BLK_IG-COMMON_SLICE.CQ" name="CQ" output="BLK_IG-SLICEL.CQ"/>
+        <direct input="BLK_IG-COMMON_SLICE.DQ" name="DQ" output="BLK_IG-SLICEL.DQ"/>
+        <!-- A-D output -->
+        <direct input="BLK_IG-COMMON_SLICE.D" name="BLK_IG-SLICEL_DOUT" output="BLK_IG-SLICEL.D"/>
+        <direct input="BLK_IG-COMMON_SLICE.C" name="BLK_IG-SLICEL_COUT" output="BLK_IG-SLICEL.C"/>
+        <direct input="BLK_IG-COMMON_SLICE.B" name="BLK_IG-SLICEL_BOUT" output="BLK_IG-SLICEL.B"/>
+        <direct input="BLK_IG-COMMON_SLICE.A" name="BLK_IG-SLICEL_AOUT" output="BLK_IG-SLICEL.A"/>
+        <!-- AMUX-DMUX output -->
+        <direct input="BLK_IG-COMMON_SLICE.DMUX" name="BLK_IG-SLICEL_DMUX" output="BLK_IG-SLICEL.DMUX"/>
+        <direct input="BLK_IG-COMMON_SLICE.CMUX" name="BLK_IG-SLICEL_CMUX" output="BLK_IG-SLICEL.CMUX"/>
+        <direct input="BLK_IG-COMMON_SLICE.BMUX" name="BLK_IG-SLICEL_BMUX" output="BLK_IG-SLICEL.BMUX"/>
+        <direct input="BLK_IG-COMMON_SLICE.AMUX" name="BLK_IG-SLICEL_AMUX" output="BLK_IG-SLICEL.AMUX"/>
+        <!-- Carry -->
+        <direct input="BLK_IG-SLICEL.CIN" name="CIN" output="BLK_IG-COMMON_SLICE.CIN"/>
+        <direct input="BLK_IG-COMMON_SLICE.COUT" name="COUT" output="BLK_IG-SLICEL.COUT"/>
+        <!-- Clock, Clock Enable and Reset -->
+        <direct input="BLK_IG-SLICEL.CLK" name="CK" output="BLK_IG-COMMON_SLICE.CLK"/>
+        <direct input="BLK_IG-SLICEL.CE" name="CE" output="BLK_IG-COMMON_SLICE.CE"/>
+        <direct input="BLK_IG-SLICEL.SR" name="SR" output="BLK_IG-COMMON_SLICE.SR"/>
+      </interconnect>
+    </pb_type>
+  </complexblocklist>
+  <layout>
+    <fixed_layout name="TEST" width="6" height="6">
+      <single priority="1" type="io_tile" x="0" y="1"/>
+      <single priority="1" type="io_tile" x="0" y="2"/>
+      <single priority="1" type="io_tile" x="0" y="3"/>
+      <single priority="1" type="io_tile" x="0" y="4"/>
+      <single priority="1" type="io_tile" x="3" y="1"/>
+      <single priority="1" type="io_tile" x="3" y="2"/>
+      <single priority="1" type="io_tile" x="3" y="3"/>
+      <single priority="1" type="io_tile" x="3" y="4"/>
+      <single priority="1" type="io_tile" x="1" y="0"/>
+      <single priority="1" type="io_tile" x="2" y="0"/>
+      <single priority="1" type="io_tile" x="1" y="5"/>
+      <single priority="1" type="io_tile" x="2" y="5"/>
+      <single priority="1" type="BLK_IG-SLICEM" x="1" y="1"/>
+      <single priority="1" type="BLK_IG-SLICEM" x="1" y="2"/>
+      <single priority="1" type="BLK_IG-SLICEM" x="1" y="3"/>
+      <single priority="1" type="BLK_IG-SLICEM" x="1" y="4"/>
+      <single priority="1" type="BLK_IG-SLICEL" x="2" y="1"/>
+      <single priority="1" type="BLK_IG-SLICEL" x="2" y="2"/>
+      <single priority="1" type="BLK_IG-SLICEL" x="2" y="3"/>
+      <single priority="1" type="BLK_IG-SLICEL" x="2" y="4"/>
+    </fixed_layout>
+  </layout>
+  <device>
+    <sizing R_minW_nmos="6065.520020" R_minW_pmos="18138.500000"/>
+    <area grid_logic_tile_area="14813.392"/>
+    <connection_block input_switch_name="buffer"/>
+    <switch_block fs="3" type="wilton"/>
+    <chan_width_distr>
+      <x distr="uniform" peak="1.0"/>
+      <y distr="uniform" peak="1.0"/>
+    </chan_width_distr>
+  </device>
+  <switchlist>
+    <switch Cin=".77e-15" Cout="4e-15" R="551" Tdel="6.8e-12" buf_size="27.645901" mux_trans_size="2.630740" name="routing" type="mux"/>
+    <switch Cin=".77e-15" Cout="4e-15" R="551" Tdel="6.8e-12" buf_size="27.645901" mux_trans_size="2.630740" name="buffer" type="mux"/>
+  </switchlist>
+  <segmentlist>
+    <segment Cmetal="22.5e-15" Rmetal="101" freq="1.0" length="12" name="dummy" type="bidir">
+      <wire_switch name="routing"/>
+      <opin_switch name="routing"/>
+      <sb type="pattern">1 1 1 1 1 1 1 1 1 1 1 1 1</sb>
+      <cb type="pattern">1 1 1 1 1 1 1 1 1 1 1 1</cb>
+    </segment>
+  </segmentlist>
+</architecture>
diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt
new file mode 100644
index 00000000000..7ec5b84e895
--- /dev/null
+++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt
@@ -0,0 +1,31 @@
+##############################################
+# Configuration file for running experiments
+##############################################
+
+# Path to directory of circuits to use
+circuits_dir=benchmarks/microbenchmarks
+
+# Path to directory of architectures to use
+archs_dir=arch/equivalent_tiles
+
+# Path to directory of SDC files to use
+sdc_dir = sdc
+
+# Add circuits to list to sweep
+circuit_list_add=carry_chain.blif
+
+# Add architectures to list to sweep
+arch_list_add=slice.xml
+
+# Parse info and how to parse
+parse_file=vpr_standard.txt
+
+# How to parse QoR info
+qor_parse_file=qor_standard.txt
+
+# Pass requirements
+pass_requirements_file=pass_requirements.txt
+
+# Script parameters
+#script_params=""
+script_params = -track_memory_usage -lut_size 1 -starting_stage vpr

From 980dc679343bda94e3ef25b22b50b5c4cb7db47e Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Wed, 15 May 2019 18:04:01 +0200
Subject: [PATCH 10/15] vtr_flow: added script to add tiles to architecture xml

I have also changes travis.yml to install the lxml python package
needed by the script

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 .travis.yml                      |   1 +
 vtr_flow/scripts/add_tiles.py    | 142 +++++++++++++++++++++++++++++++
 vtr_flow/scripts/run_vtr_flow.pl |   5 +-
 3 files changed, 146 insertions(+), 2 deletions(-)
 create mode 100755 vtr_flow/scripts/add_tiles.py

diff --git a/.travis.yml b/.travis.yml
index 2fe883ecece..186a0779a85 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -36,6 +36,7 @@ addons:
     - libxml++2.6-dev
     - perl
     - python
+    - python-lxml
     - texinfo
     - time
     - valgrind
diff --git a/vtr_flow/scripts/add_tiles.py b/vtr_flow/scripts/add_tiles.py
new file mode 100755
index 00000000000..efcf092a17a
--- /dev/null
+++ b/vtr_flow/scripts/add_tiles.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+
+"""
+This script is intended to modify the architecture description file to be compliant with
+the new format.
+
+It moves the top level pb_types attributes and tags to the tiles high-level tag.
+
+BEFORE:
+<complexblocklist>
+    <pb_type name="BRAM" area="2" height="4" width="1" capacity="1">
+        <input ... />
+        <input ... />
+        <input ... />
+        <output ... />
+        <output ... />
+        <output ... />
+        <interconnect ... />
+        <fc ... />
+        <pinlocations ... />
+        <switchblock_locations ... />
+    </pb_type>
+</complexblocklist>
+
+AFTER:
+<tiles>
+    <tile name="BRAM" area="2" height="4" width="1" capacity="1">
+        <interconnect ... />
+        <fc ... />
+        <pinlocations ... />
+        <switchblock_locations ... />
+    </tile>
+</tiles>
+<complexblocklist
+    <pb_type name="BRAM">
+        <input ... />
+        <input ... />
+        <input ... />
+        <output ... />
+        <output ... />
+        <output ... />
+    </pb_type>
+</complexblocklist>
+"""
+
+"""
+This script is intended to modify the architecture description file to be compliant with
+the new format.
+
+It moves the top level pb_types attributes and tags to the tiles high-level tag.
+
+BEFORE:
+<complexblocklist>
+    <pb_type name="BRAM" area="2" height="4" width="1" capacity="1">
+        <input ... />
+        <input ... />
+        <input ... />
+        <output ... />
+        <output ... />
+        <output ... />
+        <interconnect ... />
+        <fc ... />
+        <pinlocations ... />
+        <switchblock_locations ... />
+    </pb_type>
+</complexblocklist>
+
+AFTER:
+<tiles>
+    <tile name="BRAM" area="2" height="4" width="1" capacity="1">
+        <interconnect ... />
+        <fc ... />
+        <pinlocations ... />
+        <switchblock_locations ... />
+    </tile>
+</tiles>
+<complexblocklist
+    <pb_type name="BRAM">
+        <input ... />
+        <input ... />
+        <input ... />
+        <output ... />
+        <output ... />
+        <output ... />
+    </pb_type>
+</complexblocklist>
+"""
+
+from lxml import etree as ET
+import argparse
+
+TAGS_TO_SWAP = ['fc', 'pinlocations', 'switchblock_locations']
+ATTR_TO_REMOVE = ['area', 'height', 'width', 'capacity']
+
+def swap_tags(tile, pb_type):
+    # Moving tags from top level pb_type to tile
+    for child in pb_type:
+        if child.tag in TAGS_TO_SWAP:
+            pb_type.remove(child)
+            tile.append(child)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Moves top level pb_types to tiles tag."
+    )
+    parser.add_argument(
+        '--arch_xml',
+        required=True,
+        help="Input arch.xml that needs to be modified to move the top level pb_types to the `tiles` tag."
+    )
+
+    args = parser.parse_args()
+
+    arch_xml = ET.ElementTree()
+    root_element = arch_xml.parse(args.arch_xml)
+
+    tiles = ET.SubElement(root_element, 'tiles')
+
+    top_pb_types = []
+    for pb_type in root_element.iter('pb_type'):
+        if pb_type.getparent().tag == 'complexblocklist':
+            top_pb_types.append(pb_type)
+
+    for pb_type in top_pb_types:
+        tile = ET.SubElement(tiles, 'tile')
+        attrs = pb_type.attrib
+
+        for attr in attrs:
+            tile.set(attr, pb_type.get(attr))
+
+        # Remove attributes of top level pb_types only
+        for attr in ATTR_TO_REMOVE:
+            pb_type.attrib.pop(attr, None)
+
+        swap_tags(tile, pb_type)
+
+    print(ET.tostring(arch_xml, pretty_print=True).decode('utf-8'))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/vtr_flow/scripts/run_vtr_flow.pl b/vtr_flow/scripts/run_vtr_flow.pl
index 1d59a1e9bac..2db3b4d50a7 100755
--- a/vtr_flow/scripts/run_vtr_flow.pl
+++ b/vtr_flow/scripts/run_vtr_flow.pl
@@ -363,7 +363,7 @@
 
 # Read arch XML
 my $tpp      = XML::TreePP->new();
-my $xml_tree = $tpp->parsefile($architecture_file_path);
+my $xml_tree = $tpp->parsefile("$architecture_file_path");
 
 # Get lut size if undefined
 if (!defined $lut_size) {
@@ -416,7 +416,8 @@
 #system "cp $odin2_base_config"
 
 my $architecture_file_path_new = "$temp_dir$architecture_file_name";
-copy( $architecture_file_path, $architecture_file_path_new );
+my $ret = `$vtr_flow_path/scripts/add_tiles.py --arch_xml $architecture_file_path > $architecture_file_path_new`;
+#copy( "$architecture_file_path", $architecture_file_path_new );
 $architecture_file_path = $architecture_file_path_new;
 
 my $circuit_file_path_new = "$temp_dir$benchmark_name" . file_ext_for_stage($starting_stage - 1, $circuit_suffix);

From bc50eee4605ee779d2a3f7e1a482e05d6ee5b5fe Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Thu, 16 May 2019 18:28:10 +0200
Subject: [PATCH 11/15] vtr_flow: added comment

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 vtr_flow/scripts/run_vtr_flow.pl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vtr_flow/scripts/run_vtr_flow.pl b/vtr_flow/scripts/run_vtr_flow.pl
index 2db3b4d50a7..c851dc4886a 100755
--- a/vtr_flow/scripts/run_vtr_flow.pl
+++ b/vtr_flow/scripts/run_vtr_flow.pl
@@ -363,7 +363,7 @@
 
 # Read arch XML
 my $tpp      = XML::TreePP->new();
-my $xml_tree = $tpp->parsefile("$architecture_file_path");
+my $xml_tree = $tpp->parsefile($architecture_file_path);
 
 # Get lut size if undefined
 if (!defined $lut_size) {
@@ -417,6 +417,8 @@
 
 my $architecture_file_path_new = "$temp_dir$architecture_file_name";
 my $ret = `$vtr_flow_path/scripts/add_tiles.py --arch_xml $architecture_file_path > $architecture_file_path_new`;
+
+# There is no need to copy the arch decription file as it is produced by the add_tiles.py script
 #copy( "$architecture_file_path", $architecture_file_path_new );
 $architecture_file_path = $architecture_file_path_new;
 

From 76376574394b344cea33162764cdc926f50be0f3 Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Thu, 16 May 2019 20:30:00 +0200
Subject: [PATCH 12/15] ODIN II: regression script modifies arch.xml with tiles

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 ODIN_II/verify_odin.sh | 98 +++++++++++++++++++++---------------------
 1 file changed, 50 insertions(+), 48 deletions(-)

diff --git a/ODIN_II/verify_odin.sh b/ODIN_II/verify_odin.sh
index 0416d9b78f8..fc2b09bb418 100755
--- a/ODIN_II/verify_odin.sh
+++ b/ODIN_II/verify_odin.sh
@@ -64,9 +64,9 @@ function exit_program() {
 	if [ -f ${NEW_RUN_DIR}/test_failures.log ]; then
 		FAIL_COUNT=$(wc -l ${NEW_RUN_DIR}/test_failures.log | cut -d ' ' -f 1)
 	fi
-	
+
 	FAILURE=$(( ${FAIL_COUNT} ))
-	
+
 	if [ "_${FAILURE}" != "_0" ]
 	then
 		echo "Failed ${FAILURE} benchmarks"
@@ -107,7 +107,7 @@ _prt_cur_arg() {
 function help() {
 
 printf "Called program with $INPUT
-	Usage: 
+	Usage:
 		${THIS_SCRIPT_EXEC} [ OPTIONS / FLAGS ]
 
 
@@ -209,7 +209,7 @@ function cleanup_temp() {
 	fi
 
 	for runs in ${OUTPUT_DIRECTORY}/run*
-	do 
+	do
 		rm -Rf ${runs}
 	done
 
@@ -267,14 +267,14 @@ function mv_failed() {
 # Helper Functions
 function flag_is_number() {
 	case "_$2" in
-		_) 
+		_)
 			echo "Passed an empty value for $1"
 			help
 			exit 120
 		;;
 		*)
 			case $2 in
-				''|*[!0-9]*) 
+				''|*[!0-9]*)
 					echo "Passed a non number value [$2] for $1"
 					help
 					exit 120
@@ -312,7 +312,7 @@ function _set_flag() {
 	_batch_sim_flag=$(_set_if ${_BATCH_SIM} "--batch")
 	_use_best_coverage_flag=$(_set_if ${_BEST_COVERAGE_OFF} "--best_coverage")
 	_perf_flag=$(_set_if ${_USE_PERF} "--tool perf")
-	
+
 	_vector_flag="-g ${_VECTORS}"
 	_timeout_flag="--time_limit ${_TIMEOUT}s"
 	_simulation_threads_flag=$([ "${_SIM_THREADS}" != "1" ] && echo "-j ${_SIM_THREADS}")
@@ -323,20 +323,20 @@ function _set_flag() {
 
 function parse_args() {
 	while [[ "$#" > 0 ]]
-	do 
-		case $1 in 
+	do
+		case $1 in
 
 		# Help Desk
 			-h|--help)
 				echo "Printing Help information"
 				help
 				exit_program
-			
+
 		## directory in benchmark
 			;;-t|--test)
 				# this is handled down stream
 				if [ "_$2" == "_" ]
-				then 
+				then
 					echo "empty argument for $1"
 					exit 120
 				fi
@@ -349,11 +349,11 @@ function parse_args() {
 			;;-a|--adder_def)
 
 				if [ "_$2" == "_" ]
-				then 
+				then
 					echo "empty argument for $1"
 					exit 120
 				fi
-				
+
 				_ADDER_DEF=$2
 
 				if [ "${_ADDER_DEF}" != "default" ] && [ "${_ADDER_DEF}" != "optimized" ] && [ ! -f "$(readlink -f ${_ADDER_DEF})" ]
@@ -367,11 +367,11 @@ function parse_args() {
 			;;-d|--output_dir)
 
 				if [ "_$2" == "_" ]
-				then 
+				then
 					echo "empty argument for $1"
 					exit 120
 				fi
-				
+
 				_RUN_DIR_OVERRIDE=$2
 
 				if [ ! -d "${_RUN_DIR_OVERRIDE}" ]
@@ -409,45 +409,45 @@ function parse_args() {
 				shift
 
 		# Boolean flags
-			;;-g|--generate_bench)		
+			;;-g|--generate_bench)
 				_GENERATE_BENCH="on"
 				echo "generating output vector for test given predefined input"
 
-			;;-o|--generate_output)		
+			;;-o|--generate_output)
 				_GENERATE_OUTPUT="on"
 				echo "generating input and output vector for test"
 
-			;;-c|--clean)				
+			;;-c|--clean)
 				echo "Cleaning temporary run in directory"
 				cleanup_temp
 
-			;;-l|--limit_ressource)		
+			;;-l|--limit_ressource)
 				_LIMIT_RESSOURCE="on"
 				echo "limiting ressources for benchmark, this can help with small hardware"
 
-			;;-v|--valgrind)			
+			;;-v|--valgrind)
 				_VALGRIND="on"
 				echo "Using Valgrind for benchmarks"
 
-			;;-B|--best_coverage_off)	
+			;;-B|--best_coverage_off)
 				_BEST_COVERAGE_OFF="off"
 				echo "turning off using best coverage for benchmark vector generation"
 
-			;;-b|--batch_sim)			
+			;;-b|--batch_sim)
 				_BATCH_SIM="on"
 				echo "Using Batch multithreaded simulation with -j threads"
 
 			;;-p|--perf)
 				_USE_PERF="on"
 				echo "Using perf for synthesis and simulation"
-			
-			;;-f|--force_simulate)   
+
+			;;-f|--force_simulate)
 				_FORCE_SIM="on"
-				echo "Forcing Simulation"         
+				echo "Forcing Simulation"
 
-			;;*) 
+			;;*)
 				echo "Unknown parameter passed: $1"
-				help 
+				help
 				ctrl_c
 		esac
 		shift
@@ -477,9 +477,9 @@ function sim() {
 	shift
 
 	while [[ "$#" > 0 ]]
-	do 
+	do
 		case $1 in
-			--custom_args_file) 
+			--custom_args_file)
 				with_custom_args=1
 				;;
 
@@ -517,7 +517,7 @@ function sim() {
 
 			*)
 				echo "Unknown internal parameter passed: $1"
-				config_help 
+				config_help
 				ctrl_c
 				;;
 		esac
@@ -553,15 +553,15 @@ function sim() {
 											${_timeout_flag}
 											${_low_ressource_flag}
 											${_valgrind_flag}"
-											
+
 				if [ "${_USE_PERF}" == "on" ]
 				then
 					wrapper_odin_command="${wrapper_odin_command} ${_perf_flag} ${DIR}/perf.data"
 				fi
 
 				odin_command="${DEFAULT_CMD_PARAM}
-								$(cat ${dir}/odin.args | tr '\n' ' ') 
-								-o ${blif_file} 
+								$(cat ${dir}/odin.args | tr '\n' ' ')
+								-o ${blif_file}
 								-sim_dir ${DIR}"
 
 				echo $(echo "${wrapper_odin_command} ${odin_command}" | tr '\n' ' ' | tr -s ' ' ) > ${DIR}/odin_param
@@ -607,13 +607,6 @@ function sim() {
 
 			for arches in ${arch_list}
 			do
-
-				arch_cmd=""
-				if [ -e ${arches} ]
-				then
-					arch_cmd="-a ${arches}"
-				fi
-
 				arch_basename=${arches%.xml}
 				arch_name=${arch_basename##*/}
 
@@ -622,6 +615,14 @@ function sim() {
 				DIR="${NEW_RUN_DIR}/${TEST_FULL_REF}"
 				blif_file="${DIR}/odin.blif"
 
+				arch_cmd=""
+				if [ -e ${arches} ]
+				then
+					tiles_cmd="../vtr_flow/scripts/add_tiles.py"
+					arch_file="${arch_name}.xml"
+					${tiles_cmd} --arch_xml ${arches} > ${DIR}/${arch_name}.xml
+					arch_cmd="-a ${DIR}/${arch_name}.xml"
+				fi
 
 				#build commands
 				mkdir -p $DIR
@@ -643,6 +644,7 @@ function sim() {
 						wrapper_synthesis_command="${wrapper_synthesis_command} ${_perf_flag} ${DIR}/perf.data"
 					fi
 
+
 					synthesis_command="${DEFAULT_CMD_PARAM}
 										${arch_cmd}
 										-V ${benchmark}
@@ -712,7 +714,7 @@ function sim() {
 
 				#run the simulation
 				find ${NEW_RUN_DIR}/${bench_type}/ -name sim_param | xargs -n1 -P$threads -I sim_cmd ${SHELL} -c '$(cat sim_cmd)'
-				
+
 				# move the log
 				for sim_log in $(find ${NEW_RUN_DIR}/${bench_type}/ -name "simulation.log")
 				do
@@ -722,7 +724,7 @@ function sim() {
 				disable_failed ${global_simulation_failure}
 
 			done
-			
+
 			mkdir -p ${NEW_RUN_DIR}/${bench_type}/vectors
 
 			# move the vectors
@@ -733,7 +735,7 @@ function sim() {
 
 				cp ${sim_input_vectors} ${NEW_RUN_DIR}/${bench_type}/vectors/${BM_NAME}
 				mv ${sim_input_vectors} ${BM_DIR}/${BM_NAME}
-				
+
 			done
 
 
@@ -803,7 +805,7 @@ function debug_failures() {
 
 			echo "Which benchmark would you like to debug (type 'quit' or 'q' to exit)?"
 			echo "============"
-			echo "${FAILURES_LIST}"	
+			echo "${FAILURES_LIST}"
 			echo "============"
 			printf "enter a substring: "
 
@@ -813,7 +815,7 @@ function debug_failures() {
 					echo "exiting"
 					break
 					;;
-				*)					
+				*)
 					BM="${FAILED_RUN_DIR}/$(echo "${FAILURES_LIST}" | grep ${INPUT_BM} | tail -n 1)"
 
 					if [ "_${BM}" != "_" ] && [ -f "${BM}/${CMD_FILE_NAME}" ]
@@ -854,7 +856,7 @@ LIGHT_LIST=(
 	"operators"
 	"arch"
 	"other"
-	"micro"	
+	"micro"
 	"syntax"
 	"FIR"
 )
@@ -934,8 +936,8 @@ case ${_TEST} in
 
 	full_suite)
 		run_all
-		;;	
-		
+		;;
+
 	heavy_suite)
 		run_heavy_suite
 		;;

From 15ebc1324521a1d04d4f19826fe2287e6c060af9 Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Thu, 16 May 2019 23:03:31 +0200
Subject: [PATCH 13/15] vtr_flow: added tiles step to upgrade arch

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 vtr_flow/scripts/add_tiles.py    |  43 ------------
 vtr_flow/scripts/upgrade_arch.py | 115 +++++++++++++++++++++++++++----
 2 files changed, 102 insertions(+), 56 deletions(-)

diff --git a/vtr_flow/scripts/add_tiles.py b/vtr_flow/scripts/add_tiles.py
index efcf092a17a..14794bae284 100755
--- a/vtr_flow/scripts/add_tiles.py
+++ b/vtr_flow/scripts/add_tiles.py
@@ -43,49 +43,6 @@
 </complexblocklist>
 """
 
-"""
-This script is intended to modify the architecture description file to be compliant with
-the new format.
-
-It moves the top level pb_types attributes and tags to the tiles high-level tag.
-
-BEFORE:
-<complexblocklist>
-    <pb_type name="BRAM" area="2" height="4" width="1" capacity="1">
-        <input ... />
-        <input ... />
-        <input ... />
-        <output ... />
-        <output ... />
-        <output ... />
-        <interconnect ... />
-        <fc ... />
-        <pinlocations ... />
-        <switchblock_locations ... />
-    </pb_type>
-</complexblocklist>
-
-AFTER:
-<tiles>
-    <tile name="BRAM" area="2" height="4" width="1" capacity="1">
-        <interconnect ... />
-        <fc ... />
-        <pinlocations ... />
-        <switchblock_locations ... />
-    </tile>
-</tiles>
-<complexblocklist
-    <pb_type name="BRAM">
-        <input ... />
-        <input ... />
-        <input ... />
-        <output ... />
-        <output ... />
-        <output ... />
-    </pb_type>
-</complexblocklist>
-"""
-
 from lxml import etree as ET
 import argparse
 
diff --git a/vtr_flow/scripts/upgrade_arch.py b/vtr_flow/scripts/upgrade_arch.py
index ef6dd8f7310..64cba982360 100755
--- a/vtr_flow/scripts/upgrade_arch.py
+++ b/vtr_flow/scripts/upgrade_arch.py
@@ -39,6 +39,7 @@ def __init__(self):
     "upgrade_port_equivalence",
     "upgrade_complex_sb_num_conns",
     "add_missing_comb_model_internal_timing_edges",
+    "move_top_level_pb_type_to_tiles",
 ]
 
 def parse_args():
@@ -137,6 +138,11 @@ def main():
         if result:
             modified = True
 
+    if "move_top_level_pb_type_to_tiles" in args.features:
+        result = move_top_level_pb_type_to_tiles(arch)
+        if result:
+            modified = True
+
     if modified:
         if args.debug:
             root.write(sys.stdout, pretty_print=args.pretty)
@@ -155,7 +161,7 @@ def add_model_timing(arch):
     #Find all primitive pb types
     prim_pbs = arch.findall(".//pb_type[@blif_model]")
 
-    #Build up the timing specifications from 
+    #Build up the timing specifications from
     default_models = frozenset([".input", ".output", ".latch", ".names"])
     primitive_timing_specs = {}
     for prim_pb in prim_pbs:
@@ -237,7 +243,7 @@ def upgrade_fc_overrides(arch):
             port = old_pin_override.attrib['name']
             fc_type = old_pin_override.attrib['fc_type']
             fc_val = old_pin_override.attrib['fc_val']
-            
+
             fc_tag.remove(old_pin_override)
 
             new_attrib = OrderedDict()
@@ -285,7 +291,7 @@ def upgrade_fc_overrides(arch):
                 new_attrib["fc_val"] = out_val
 
                 fc_override = ET.SubElement(fc_tag, "fc_override", attrib=new_attrib)
-        
+
             changed = True
     return changed
 
@@ -350,7 +356,7 @@ def upgrade_device_layout(arch):
         device_auto.attrib['height'] = height
     else:
         assert False, "Unrecognized <layout> specification"
-    
+
     if 0:
         for type, locs in type_to_grid_specs.iteritems():
             print "Type:", type
@@ -370,7 +376,7 @@ def upgrade_device_layout(arch):
         device_auto.text = "\n" + 2*INDENT
         device_auto.tail = "\n"
 
-    
+
     for type_name, locs in type_to_grid_specs.iteritems():
         for loc in locs:
             assert loc.tag == "loc"
@@ -408,8 +414,8 @@ def upgrade_device_layout(arch):
                 col_spec.attrib['priority'] = str(priority)
                 col_spec.tail = "\n" + 2*INDENT
 
-                #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY" 
-                #instead of with the underlying type. To replicate that we create a col spec with the same 
+                #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY"
+                #instead of with the underlying type. To replicate that we create a col spec with the same
                 #location information, but of type 'EMPTY' and with slightly lower priority than the real type.
 
                 col_empty_spec = ET.SubElement(device_auto, 'col')
@@ -451,8 +457,8 @@ def upgrade_device_layout(arch):
                 col_spec.attrib['priority'] = str(priority)
                 col_spec.tail = "\n" + 2*INDENT
 
-                #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY" 
-                #instead of with the underlying type. To replicate that we create a col spec with the same 
+                #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY"
+                #instead of with the underlying type. To replicate that we create a col spec with the same
                 #location information, but of type 'EMPTY' and with slightly lower priority than the real type.
                 col_empty_spec = ET.SubElement(device_auto, 'col')
                 col_empty_spec.attrib['type'] = "EMPTY"
@@ -496,7 +502,7 @@ def upgrade_device_layout(arch):
                 assert False, "Unrecognzied <loc> type tag {}".format(loc_type)
 
     return changed
-        
+
 def remove_io_chan_distr(arch):
     """
     Removes the legacy '<io>' channel width distribution tags
@@ -631,7 +637,7 @@ def upgrade_connection_block_input_switch(arch):
         #
         #Create the switch
         #
-        
+
         switch_name = "ipin_cblock"
 
         #Make sure the switch name doesn't already exist
@@ -673,7 +679,7 @@ def upgrade_switch_types(arch):
     assert switchlist_tag is not None
 
     for switch_tag in switchlist_tag.findall("./switch"):
-        
+
         switch_type = switch_tag.attrib['type']
 
         if switch_type in ['buffered', 'pass_trans']:
@@ -710,7 +716,7 @@ def rename_fc_attributes(arch):
 def remove_longline_sb_cb(arch):
     """
     Drops <sb> and <cb> of any <segment> types with length="longline",
-    since we now assume longlines have full switch block/connection block 
+    since we now assume longlines have full switch block/connection block
     populations
     """
 
@@ -867,5 +873,88 @@ def add_missing_comb_model_internal_timing_edges(arch):
 
     return changed
 
+def move_top_level_pb_type_to_tiles(arch):
+    """
+    This script is intended to modify the architecture description file to be compliant with
+    the new format.
+
+    It moves the top level pb_types attributes and tags to the tiles high-level tag.
+
+    BEFORE:
+    <complexblocklist>
+        <pb_type name="BRAM" area="2" height="4" width="1" capacity="1">
+            <input ... />
+            <input ... />
+            <input ... />
+            <output ... />
+            <output ... />
+            <output ... />
+            <interconnect ... />
+            <fc ... />
+            <pinlocations ... />
+            <switchblock_locations ... />
+        </pb_type>
+    </complexblocklist>
+
+    AFTER:
+    <tiles>
+        <tile name="BRAM" area="2" height="4" width="1" capacity="1">
+            <interconnect ... />
+            <fc ... />
+            <pinlocations ... />
+            <switchblock_locations ... />
+        </tile>
+    </tiles>
+    <complexblocklist
+        <pb_type name="BRAM">
+            <input ... />
+            <input ... />
+            <input ... />
+            <output ... />
+            <output ... />
+            <output ... />
+        </pb_type>
+    </complexblocklist>
+    """
+
+    changed = False
+
+    TAGS_TO_SWAP = ['fc', 'pinlocations', 'switchblock_locations']
+    ATTR_TO_REMOVE = ['area', 'height', 'width', 'capacity']
+
+    def swap_tags(tile, pb_type):
+        # Moving tags from top level pb_type to tile
+        for child in pb_type:
+            if child.tag in TAGS_TO_SWAP:
+                pb_type.remove(child)
+                tile.append(child)
+
+    tiles = arch.find('tiles')
+
+    if tiles is None:
+        tiles = ET.SubElement(arch, 'tiles')
+
+        top_pb_types = []
+        for pb_type in arch.iter('pb_type'):
+            if pb_type.getparent().tag == 'complexblocklist':
+                top_pb_types.append(pb_type)
+
+        for pb_type in top_pb_types:
+            tile = ET.SubElement(tiles, 'tile')
+            attrs = pb_type.attrib
+
+            for attr in attrs:
+                tile.set(attr, pb_type.get(attr))
+
+            # Remove attributes of top level pb_types only
+            for attr in ATTR_TO_REMOVE:
+                pb_type.attrib.pop(attr, None)
+
+            swap_tags(tile, pb_type)
+
+        changed = True
+
+    return changed
+
 if __name__ == "__main__":
     main()

From e1200cd8afa52629364686db0658522caeae8d0f Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Fri, 31 May 2019 18:29:04 +0200
Subject: [PATCH 14/15] ODIN_II: corrected arch.xml file generation

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 ODIN_II/verify_odin.sh | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/ODIN_II/verify_odin.sh b/ODIN_II/verify_odin.sh
index fc2b09bb418..089bed1dd82 100755
--- a/ODIN_II/verify_odin.sh
+++ b/ODIN_II/verify_odin.sh
@@ -109,8 +109,6 @@ function help() {
 printf "Called program with $INPUT
 	Usage:
 		${THIS_SCRIPT_EXEC} [ OPTIONS / FLAGS ]
-
-
 	OPTIONS:
 		-h|--help                       $(_prt_cur_arg off) print this
 		-t|--test < test name >         $(_prt_cur_arg ${_TEST}) Test name is one of ( ${TEST_DIR_LIST} heavy_suite light_suite full_suite vtr_basic vtr_strong pre_commit failures debug_sim debug_synth)
@@ -121,7 +119,6 @@ printf "Called program with $INPUT
 		-a|--adder_def < /abs/path >    $(_prt_cur_arg ${_ADDER_DEF}) Use template to build adders
 		-n|--simulation_count < N >     $(_prt_cur_arg ${_SIM_COUNT}) Allow to run the simulation N times to benchmark the simulator
 		-d|--output_dir < /abs/path >   $(_prt_cur_arg ${_RUN_DIR_OVERRIDE}) Change the run directory output
-
 	FLAGS:
 		-g|--generate_bench             $(_prt_cur_arg ${_GENERATE_BENCH}) Generate input and output vector for test
 		-o|--generate_output            $(_prt_cur_arg ${_GENERATE_OUTPUT}) Generate output vector for test given its input vector
@@ -132,7 +129,6 @@ printf "Called program with $INPUT
 		-b|--batch_sim                  $(_prt_cur_arg ${_BATCH_SIM}) Use Batch mode multithreaded simulation
 		-p|--perf                       $(_prt_cur_arg ${_USE_PERF}) Use Perf for monitoring execution
 		-f|--force_simulate             $(_prt_cur_arg ${_FORCE_SIM}) Force the simulation to be executed regardless of the config
-
 "
 }
 
@@ -607,6 +603,7 @@ function sim() {
 
 			for arches in ${arch_list}
 			do
+
 				arch_basename=${arches%.xml}
 				arch_name=${arch_basename##*/}
 
@@ -615,18 +612,18 @@ function sim() {
 				DIR="${NEW_RUN_DIR}/${TEST_FULL_REF}"
 				blif_file="${DIR}/odin.blif"
 
+				#build commands
+				mkdir -p $DIR
+
 				arch_cmd=""
 				if [ -e ${arches} ]
 				then
 					tiles_cmd="../vtr_flow/scripts/add_tiles.py"
 					arch_file="${arch_name}.xml"
-					${tiles_cmd} --arch_xml ${arches} > ${DIR}/${arch_name}.xml
-					arch_cmd="-a ${DIR}/${arch_name}.xml"
+					${tiles_cmd} --arch_xml ${arches} > $DIR/${arch_name}.xml
+					arch_cmd="-a $DIR/${arch_name}.xml"
 				fi
 
-				#build commands
-				mkdir -p $DIR
-
 				###############################
 				# Synthesis
 				if [ "${_SYNTHESIS}" == "on" ]
@@ -644,7 +641,6 @@ function sim() {
 						wrapper_synthesis_command="${wrapper_synthesis_command} ${_perf_flag} ${DIR}/perf.data"
 					fi
 
-
 					synthesis_command="${DEFAULT_CMD_PARAM}
 										${arch_cmd}
 										-V ${benchmark}
@@ -973,3 +969,16 @@ print_time_since $START
 
 exit_program
 ### end here
+© 2019 GitHub, Inc.
+Terms
+Privacy
+Security
+Status
+Help
+Contact GitHub
+Pricing
+API
+Training
+Blog
+About
+

From a59408a779911b6e1fae0c19d5b283f3af434ba1 Mon Sep 17 00:00:00 2001
From: Alessandro Comodi <acomodi@antmicro.com>
Date: Thu, 27 Jun 2019 14:42:03 +0200
Subject: [PATCH 15/15] vpr: corrected bug in equivalent placement

Signed-off-by: Alessandro Comodi <acomodi@antmicro.com>
---
 vpr/src/place/place.cpp | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index f0b2e8e3e16..96de3266233 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -318,6 +318,7 @@ static e_find_affected_blocks_result identify_macro_self_swap_affected_macros(st
 static e_find_affected_blocks_result record_macro_self_swaps(const int imacro, t_pl_offset swap_offset);
 
 bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to);
+bool is_legal_blk_swap(t_pl_loc from, t_pl_loc to);
 
 std::set<t_pl_loc> determine_locations_emptied_by_move();
 
@@ -1675,12 +1676,27 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) {
     auto blk_type_from = cluster_ctx.clb_nlist.block_type(blk);
     auto blk_type_to = device_ctx.grid[to.x][to.y].type;
 
-    // First check is to see if `from` type can be placed in `to` type
+    // Check is to see if `from` type can be placed in `to` type
     if (!blk_type_from->is_available_tile_index(blk_type_to->index)) {
         return false;
     }
 
     t_pl_loc from = place_ctx.block_locs[blk].loc;
+    if (!is_legal_blk_swap(from, to)) {
+        return false;
+    }
+
+    return true;
+}
+
+bool is_legal_blk_swap(t_pl_loc from, t_pl_loc to) {
+    // Make sure that when swapping, the block in the `to` location
+    // can be moved in the `from` location
+
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
+
     ClusterBlockId blk_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.z];
 
     // In case `blk_to` is empty we can skip the second check
@@ -1688,10 +1704,10 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) {
         return true;
     }
 
-    blk_type_from = device_ctx.grid[from.x][from.y].type;
-    blk_type_to = cluster_ctx.clb_nlist.block_type(blk_to);
+    auto blk_type_from = device_ctx.grid[from.x][from.y].type;
+    auto blk_type_to = cluster_ctx.clb_nlist.block_type(blk_to);
 
-    // Second check is to see if `to` type can be placed in `from` type
+    // Check is to see if `to` type can be placed in `from` type
     if (!blk_type_to->is_available_tile_index(blk_type_from->index)) {
         return false;
     }
@@ -2179,7 +2195,7 @@ static bool find_to(t_type_ptr to_type, t_type_ptr from_type, float rlim, const
     VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].width_offset == 0, "Should be at block base location");
     VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].height_offset == 0, "Should be at block base location");
 
-    return true;
+    return is_legal_blk_swap(from, to);
 }
 
 static e_swap_result assess_swap(double delta_c, double t) {